LLVM  17.0.0git
TargetTransformInfo.h
Go to the documentation of this file.
1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This pass exposes codegen information to IR-level passes. Every
10 /// transformation that uses codegen information is broken into three parts:
11 /// 1. The IR-level analysis pass.
12 /// 2. The IR-level transformation interface which provides the needed
13 /// information.
14 /// 3. Codegen-level implementation which uses target-specific hooks.
15 ///
16 /// This file defines #2, which is the interface that IR-level transformations
17 /// use for querying the codegen.
18 ///
19 //===----------------------------------------------------------------------===//
20 
21 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23 
25 #include "llvm/IR/FMF.h"
26 #include "llvm/IR/InstrTypes.h"
27 #include "llvm/IR/PassManager.h"
28 #include "llvm/Pass.h"
32 #include <functional>
33 #include <optional>
34 #include <utility>
35 
36 namespace llvm {
37 
38 namespace Intrinsic {
39 typedef unsigned ID;
40 }
41 
42 class AssumptionCache;
43 class BlockFrequencyInfo;
44 class DominatorTree;
45 class BranchInst;
46 class CallBase;
47 class Function;
48 class GlobalValue;
49 class InstCombiner;
52 class IntrinsicInst;
53 class LoadInst;
54 class Loop;
55 class LoopInfo;
57 class ProfileSummaryInfo;
59 class SCEV;
60 class ScalarEvolution;
61 class StoreInst;
62 class SwitchInst;
63 class TargetLibraryInfo;
64 class Type;
65 class User;
66 class Value;
67 class VPIntrinsic;
68 struct KnownBits;
69 
70 /// Information about a load/store intrinsic defined by the target.
72  /// This is the pointer that the intrinsic is loading from or storing to.
73  /// If this is non-null, then analysis/optimization passes can assume that
74  /// this intrinsic is functionally equivalent to a load/store from this
75  /// pointer.
76  Value *PtrVal = nullptr;
77 
78  // Ordering for atomic operations.
80 
81  // Same Id is set by the target for corresponding load/store intrinsics.
82  unsigned short MatchingId = 0;
83 
84  bool ReadMem = false;
85  bool WriteMem = false;
86  bool IsVolatile = false;
87 
88  bool isUnordered() const {
91  !IsVolatile;
92  }
93 };
94 
95 /// Attributes of a target dependent hardware loop.
97  HardwareLoopInfo() = delete;
99  Loop *L = nullptr;
100  BasicBlock *ExitBlock = nullptr;
101  BranchInst *ExitBranch = nullptr;
102  const SCEV *ExitCount = nullptr;
103  IntegerType *CountType = nullptr;
104  Value *LoopDecrement = nullptr; // Decrement the loop counter by this
105  // value in every iteration.
106  bool IsNestingLegal = false; // Can a hardware loop be a parent to
107  // another hardware loop?
108  bool CounterInReg = false; // Should loop counter be updated in
109  // the loop via a phi?
110  bool PerformEntryTest = false; // Generate the intrinsic which also performs
111  // icmp ne zero on the loop counter value and
112  // produces an i1 to guard the loop entry.
114  DominatorTree &DT, bool ForceNestedLoop = false,
115  bool ForceHardwareLoopPHI = false);
116  bool canAnalyze(LoopInfo &LI);
117 };
118 
120  const IntrinsicInst *II = nullptr;
121  Type *RetTy = nullptr;
122  Intrinsic::ID IID;
123  SmallVector<Type *, 4> ParamTys;
125  FastMathFlags FMF;
126  // If ScalarizationCost is UINT_MAX, the cost of scalarizing the
127  // arguments and the return value will be computed based on types.
128  InstructionCost ScalarizationCost = InstructionCost::getInvalid();
129 
130 public:
132  Intrinsic::ID Id, const CallBase &CI,
134  bool TypeBasedOnly = false);
135 
138  FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
140 
143 
147  const IntrinsicInst *I = nullptr,
149 
150  Intrinsic::ID getID() const { return IID; }
151  const IntrinsicInst *getInst() const { return II; }
152  Type *getReturnType() const { return RetTy; }
153  FastMathFlags getFlags() const { return FMF; }
154  InstructionCost getScalarizationCost() const { return ScalarizationCost; }
156  const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
157 
158  bool isTypeBasedOnly() const {
159  return Arguments.empty();
160  }
161 
162  bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
163 };
164 
166 
168 typedef TargetTransformInfo TTI;
169 
170 /// This pass provides access to the codegen interfaces that are needed
171 /// for IR-level transformations.
173 public:
174  /// Construct a TTI object using a type implementing the \c Concept
175  /// API below.
176  ///
177  /// This is used by targets to construct a TTI wrapping their target-specific
178  /// implementation that encodes appropriate costs for their target.
179  template <typename T> TargetTransformInfo(T Impl);
180 
181  /// Construct a baseline TTI object using a minimal implementation of
182  /// the \c Concept API below.
183  ///
184  /// The TTI implementation will reflect the information in the DataLayout
185  /// provided if non-null.
186  explicit TargetTransformInfo(const DataLayout &DL);
187 
188  // Provide move semantics.
191 
192  // We need to define the destructor out-of-line to define our sub-classes
193  // out-of-line.
195 
196  /// Handle the invalidation of this information.
197  ///
198  /// When used as a result of \c TargetIRAnalysis this method will be called
199  /// when the function this was computed for changes. When it returns false,
200  /// the information is preserved across those changes.
203  // FIXME: We should probably in some way ensure that the subtarget
204  // information for a function hasn't changed.
205  return false;
206  }
207 
208  /// \name Generic Target Information
209  /// @{
210 
211  /// The kind of cost model.
212  ///
213  /// There are several different cost models that can be customized by the
214  /// target. The normalization of each cost model may be target specific.
215  /// e.g. TCK_SizeAndLatency should be comparable to target thresholds such as
216  /// those derived from MCSchedModel::LoopMicroOpBufferSize etc.
218  TCK_RecipThroughput, ///< Reciprocal throughput.
219  TCK_Latency, ///< The latency of instruction.
220  TCK_CodeSize, ///< Instruction code size.
221  TCK_SizeAndLatency ///< The weighted sum of size and latency.
222  };
223 
224  /// Underlying constants for 'cost' values in this interface.
225  ///
226  /// Many APIs in this interface return a cost. This enum defines the
227  /// fundamental values that should be used to interpret (and produce) those
228  /// costs. The costs are returned as an int rather than a member of this
229  /// enumeration because it is expected that the cost of one IR instruction
230  /// may have a multiplicative factor to it or otherwise won't fit directly
231  /// into the enum. Moreover, it is common to sum or average costs which works
232  /// better as simple integral values. Thus this enum only provides constants.
233  /// Also note that the returned costs are signed integers to make it natural
234  /// to add, subtract, and test with zero (a common boundary condition). It is
235  /// not expected that 2^32 is a realistic cost to be modeling at any point.
236  ///
237  /// Note that these costs should usually reflect the intersection of code-size
238  /// cost and execution cost. A free instruction is typically one that folds
239  /// into another instruction. For example, reg-to-reg moves can often be
240  /// skipped by renaming the registers in the CPU, but they still are encoded
241  /// and thus wouldn't be considered 'free' here.
243  TCC_Free = 0, ///< Expected to fold away in lowering.
244  TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
245  TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
246  };
247 
248  /// Estimate the cost of a GEP operation when lowered.
250  getGEPCost(Type *PointeeType, const Value *Ptr,
253 
254  /// \returns A value by which our inlining threshold should be multiplied.
255  /// This is primarily used to bump up the inlining threshold wholesale on
256  /// targets where calls are unusually expensive.
257  ///
258  /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
259  /// individual classes of instructions would be better.
260  unsigned getInliningThresholdMultiplier() const;
261 
262  /// \returns A value to be added to the inlining threshold.
263  unsigned adjustInliningThreshold(const CallBase *CB) const;
264 
265  /// \returns Vector bonus in percent.
266  ///
267  /// Vector bonuses: We want to more aggressively inline vector-dense kernels
268  /// and apply this bonus based on the percentage of vector instructions. A
269  /// bonus is applied if the vector instructions exceed 50% and half that
270  /// amount is applied if it exceeds 10%. Note that these bonuses are some what
271  /// arbitrary and evolved over time by accident as much as because they are
272  /// principled bonuses.
273  /// FIXME: It would be nice to base the bonus values on something more
274  /// scientific. A target may has no bonus on vector instructions.
275  int getInlinerVectorBonusPercent() const;
276 
277  /// \return the expected cost of a memcpy, which could e.g. depend on the
278  /// source/destination type and alignment and the number of bytes copied.
280 
281  /// \return The estimated number of case clusters when lowering \p 'SI'.
282  /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
283  /// table.
285  unsigned &JTSize,
286  ProfileSummaryInfo *PSI,
287  BlockFrequencyInfo *BFI) const;
288 
289  /// Estimate the cost of a given IR user when lowered.
290  ///
291  /// This can estimate the cost of either a ConstantExpr or Instruction when
292  /// lowered.
293  ///
294  /// \p Operands is a list of operands which can be a result of transformations
295  /// of the current operands. The number of the operands on the list must equal
296  /// to the number of the current operands the IR user has. Their order on the
297  /// list must be the same as the order of the current operands the IR user
298  /// has.
299  ///
300  /// The returned cost is defined in terms of \c TargetCostConstants, see its
301  /// comments for a detailed explanation of the cost values.
304  TargetCostKind CostKind) const;
305 
306  /// This is a helper function which calls the three-argument
307  /// getInstructionCost with \p Operands which are the current operands U has.
309  TargetCostKind CostKind) const {
312  }
313 
314  /// If a branch or a select condition is skewed in one direction by more than
315  /// this factor, it is very likely to be predicted correctly.
317 
318  /// Return true if branch divergence exists.
319  ///
320  /// Branch divergence has a significantly negative impact on GPU performance
321  /// when threads in the same wavefront take different paths due to conditional
322  /// branches.
323  bool hasBranchDivergence() const;
324 
325  /// Return true if the target prefers to use GPU divergence analysis to
326  /// replace the legacy version.
327  bool useGPUDivergenceAnalysis() const;
328 
329  /// Returns whether V is a source of divergence.
330  ///
331  /// This function provides the target-dependent information for
332  /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis
333  /// first builds the dependency graph, and then runs the reachability
334  /// algorithm starting with the sources of divergence.
335  bool isSourceOfDivergence(const Value *V) const;
336 
337  // Returns true for the target specific
338  // set of operations which produce uniform result
339  // even taking non-uniform arguments
340  bool isAlwaysUniform(const Value *V) const;
341 
342  /// Returns the address space ID for a target's 'flat' address space. Note
343  /// this is not necessarily the same as addrspace(0), which LLVM sometimes
344  /// refers to as the generic address space. The flat address space is a
345  /// generic address space that can be used access multiple segments of memory
346  /// with different address spaces. Access of a memory location through a
347  /// pointer with this address space is expected to be legal but slower
348  /// compared to the same memory location accessed through a pointer with a
349  /// different address space.
350  //
351  /// This is for targets with different pointer representations which can
352  /// be converted with the addrspacecast instruction. If a pointer is converted
353  /// to this address space, optimizations should attempt to replace the access
354  /// with the source address space.
355  ///
356  /// \returns ~0u if the target does not have such a flat address space to
357  /// optimize away.
358  unsigned getFlatAddressSpace() const;
359 
360  /// Return any intrinsic address operand indexes which may be rewritten if
361  /// they use a flat address space pointer.
362  ///
363  /// \returns true if the intrinsic was handled.
365  Intrinsic::ID IID) const;
366 
367  bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
368 
369  /// Return true if globals in this address space can have initializers other
370  /// than `undef`.
371  bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const;
372 
373  unsigned getAssumedAddrSpace(const Value *V) const;
374 
375  bool isSingleThreaded() const;
376 
377  std::pair<const Value *, unsigned>
378  getPredicatedAddrSpace(const Value *V) const;
379 
380  /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
381  /// NewV, which has a different address space. This should happen for every
382  /// operand index that collectFlatAddressOperands returned for the intrinsic.
383  /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
384  /// new value (which may be the original \p II with modified operands).
386  Value *NewV) const;
387 
388  /// Test whether calls to a function lower to actual program function
389  /// calls.
390  ///
391  /// The idea is to test whether the program is likely to require a 'call'
392  /// instruction or equivalent in order to call the given function.
393  ///
394  /// FIXME: It's not clear that this is a good or useful query API. Client's
395  /// should probably move to simpler cost metrics using the above.
396  /// Alternatively, we could split the cost interface into distinct code-size
397  /// and execution-speed costs. This would allow modelling the core of this
398  /// query more accurately as a call is a single small instruction, but
399  /// incurs significant execution cost.
400  bool isLoweredToCall(const Function *F) const;
401 
402  struct LSRCost {
403  /// TODO: Some of these could be merged. Also, a lexical ordering
404  /// isn't always optimal.
405  unsigned Insns;
406  unsigned NumRegs;
407  unsigned AddRecCost;
408  unsigned NumIVMuls;
409  unsigned NumBaseAdds;
410  unsigned ImmCost;
411  unsigned SetupCost;
412  unsigned ScaleCost;
413  };
414 
415  /// Parameters that control the generic loop unrolling transformation.
417  /// The cost threshold for the unrolled loop. Should be relative to the
418  /// getInstructionCost values returned by this API, and the expectation is
419  /// that the unrolled loop's instructions when run through that interface
420  /// should not exceed this cost. However, this is only an estimate. Also,
421  /// specific loops may be unrolled even with a cost above this threshold if
422  /// deemed profitable. Set this to UINT_MAX to disable the loop body cost
423  /// restriction.
424  unsigned Threshold;
425  /// If complete unrolling will reduce the cost of the loop, we will boost
426  /// the Threshold by a certain percent to allow more aggressive complete
427  /// unrolling. This value provides the maximum boost percentage that we
428  /// can apply to Threshold (The value should be no less than 100).
429  /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
430  /// MaxPercentThresholdBoost / 100)
431  /// E.g. if complete unrolling reduces the loop execution time by 50%
432  /// then we boost the threshold by the factor of 2x. If unrolling is not
433  /// expected to reduce the running time, then we do not increase the
434  /// threshold.
436  /// The cost threshold for the unrolled loop when optimizing for size (set
437  /// to UINT_MAX to disable).
439  /// The cost threshold for the unrolled loop, like Threshold, but used
440  /// for partial/runtime unrolling (set to UINT_MAX to disable).
442  /// The cost threshold for the unrolled loop when optimizing for size, like
443  /// OptSizeThreshold, but used for partial/runtime unrolling (set to
444  /// UINT_MAX to disable).
446  /// A forced unrolling factor (the number of concatenated bodies of the
447  /// original loop in the unrolled loop body). When set to 0, the unrolling
448  /// transformation will select an unrolling factor based on the current cost
449  /// threshold and other factors.
450  unsigned Count;
451  /// Default unroll count for loops with run-time trip count.
453  // Set the maximum unrolling factor. The unrolling factor may be selected
454  // using the appropriate cost threshold, but may not exceed this number
455  // (set to UINT_MAX to disable). This does not apply in cases where the
456  // loop is being fully unrolled.
457  unsigned MaxCount;
458  /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
459  /// applies even if full unrolling is selected. This allows a target to fall
460  /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
462  // Represents number of instructions optimized when "back edge"
463  // becomes "fall through" in unrolled loop.
464  // For now we count a conditional branch on a backedge and a comparison
465  // feeding it.
466  unsigned BEInsns;
467  /// Allow partial unrolling (unrolling of loops to expand the size of the
468  /// loop body, not only to eliminate small constant-trip-count loops).
469  bool Partial;
470  /// Allow runtime unrolling (unrolling of loops to expand the size of the
471  /// loop body even when the number of loop iterations is not known at
472  /// compile time).
473  bool Runtime;
474  /// Allow generation of a loop remainder (extra iterations after unroll).
476  /// Allow emitting expensive instructions (such as divisions) when computing
477  /// the trip count of a loop for runtime unrolling.
479  /// Apply loop unroll on any kind of loop
480  /// (mainly to loops that fail runtime unrolling).
481  bool Force;
482  /// Allow using trip count upper bound to unroll loops.
484  /// Allow unrolling of all the iterations of the runtime loop remainder.
486  /// Allow unroll and jam. Used to enable unroll and jam for the target.
488  /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
489  /// value above is used during unroll and jam for the outer loop size.
490  /// This value is used in the same manner to limit the size of the inner
491  /// loop.
493  /// Don't allow loop unrolling to simulate more than this number of
494  /// iterations when checking full unroll profitability
496  };
497 
498  /// Get target-customized preferences for the generic loop unrolling
499  /// transformation. The caller will initialize UP with the current
500  /// target-independent defaults.
503  OptimizationRemarkEmitter *ORE) const;
504 
505  /// Query the target whether it would be profitable to convert the given loop
506  /// into a hardware loop.
508  AssumptionCache &AC, TargetLibraryInfo *LibInfo,
509  HardwareLoopInfo &HWLoopInfo) const;
510 
511  /// Query the target whether it would be prefered to create a predicated
512  /// vector loop, which can avoid the need to emit a scalar epilogue loop.
515  DominatorTree *DT,
517  InterleavedAccessInfo *IAI) const;
518 
519  /// Query the target whether lowering of the llvm.get.active.lane.mask
520  /// intrinsic is supported and how the mask should be used. A return value
521  /// of PredicationStyle::Data indicates the mask is used as data only,
522  /// whereas PredicationStyle::DataAndControlFlow indicates we should also use
523  /// the mask for control flow in the loop. If unsupported the return value is
524  /// PredicationStyle::None.
526 
527  // Parameters that control the loop peeling transformation
529  /// A forced peeling factor (the number of bodied of the original loop
530  /// that should be peeled off before the loop body). When set to 0, the
531  /// a peeling factor based on profile information and other factors.
532  unsigned PeelCount;
533  /// Allow peeling off loop iterations.
535  /// Allow peeling off loop iterations for loop nests.
537  /// Allow peeling basing on profile. Uses to enable peeling off all
538  /// iterations basing on provided profile.
539  /// If the value is true the peeling cost model can decide to peel only
540  /// some iterations and in this case it will set this to false.
542  };
543 
544  /// Get target-customized preferences for the generic loop peeling
545  /// transformation. The caller will initialize \p PP with the current
546  /// target-independent defaults with information from \p L and \p SE.
548  PeelingPreferences &PP) const;
549 
550  /// Targets can implement their own combinations for target-specific
551  /// intrinsics. This function will be called from the InstCombine pass every
552  /// time a target-specific intrinsic is encountered.
553  ///
554  /// \returns std::nullopt to not do anything target specific or a value that
555  /// will be returned from the InstCombiner. It is possible to return null and
556  /// stop further processing of the intrinsic by returning nullptr.
557  std::optional<Instruction *> instCombineIntrinsic(InstCombiner & IC,
558  IntrinsicInst & II) const;
559  /// Can be used to implement target-specific instruction combining.
560  /// \see instCombineIntrinsic
561  std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
562  InstCombiner & IC, IntrinsicInst & II, APInt DemandedMask,
563  KnownBits & Known, bool &KnownBitsComputed) const;
564  /// Can be used to implement target-specific instruction combining.
565  /// \see instCombineIntrinsic
566  std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
567  InstCombiner & IC, IntrinsicInst & II, APInt DemandedElts,
568  APInt & UndefElts, APInt & UndefElts2, APInt & UndefElts3,
569  std::function<void(Instruction *, unsigned, APInt, APInt &)>
570  SimplifyAndSetOp) const;
571  /// @}
572 
573  /// \name Scalar Target Information
574  /// @{
575 
576  /// Flags indicating the kind of support for population count.
577  ///
578  /// Compared to the SW implementation, HW support is supposed to
579  /// significantly boost the performance when the population is dense, and it
580  /// may or may not degrade performance if the population is sparse. A HW
581  /// support is considered as "Fast" if it can outperform, or is on a par
582  /// with, SW implementation when the population is sparse; otherwise, it is
583  /// considered as "Slow".
585 
586  /// Return true if the specified immediate is legal add immediate, that
587  /// is the target has add instructions which can add a register with the
588  /// immediate without having to materialize the immediate into a register.
589  bool isLegalAddImmediate(int64_t Imm) const;
590 
591  /// Return true if the specified immediate is legal icmp immediate,
592  /// that is the target has icmp instructions which can compare a register
593  /// against the immediate without having to materialize the immediate into a
594  /// register.
595  bool isLegalICmpImmediate(int64_t Imm) const;
596 
597  /// Return true if the addressing mode represented by AM is legal for
598  /// this target, for a load/store of the specified type.
599  /// The type may be VoidTy, in which case only return true if the addressing
600  /// mode is legal for a load/store of any legal type.
601  /// If target returns true in LSRWithInstrQueries(), I may be valid.
602  /// TODO: Handle pre/postinc as well.
603  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
604  bool HasBaseReg, int64_t Scale,
605  unsigned AddrSpace = 0,
606  Instruction *I = nullptr) const;
607 
608  /// Return true if LSR cost of C1 is lower than C2.
610  const TargetTransformInfo::LSRCost &C2) const;
611 
612  /// Return true if LSR major cost is number of registers. Targets which
613  /// implement their own isLSRCostLess and unset number of registers as major
614  /// cost should return false, otherwise return true.
615  bool isNumRegsMajorCostOfLSR() const;
616 
617  /// \returns true if LSR should not optimize a chain that includes \p I.
619 
620  /// Return true if the target can fuse a compare and branch.
621  /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
622  /// calculation for the instructions in a loop.
623  bool canMacroFuseCmp() const;
624 
625  /// Return true if the target can save a compare for loop count, for example
626  /// hardware loop saves a compare.
627  bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
629  TargetLibraryInfo *LibInfo) const;
630 
635  };
636 
637  /// Return the preferred addressing mode LSR should make efforts to generate.
639  ScalarEvolution *SE) const;
640 
641  /// Return true if the target supports masked store.
642  bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
643  /// Return true if the target supports masked load.
644  bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;
645 
646  /// Return true if the target supports nontemporal store.
647  bool isLegalNTStore(Type *DataType, Align Alignment) const;
648  /// Return true if the target supports nontemporal load.
649  bool isLegalNTLoad(Type *DataType, Align Alignment) const;
650 
651  /// \Returns true if the target supports broadcasting a load to a vector of
652  /// type <NumElements x ElementTy>.
653  bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const;
654 
655  /// Return true if the target supports masked scatter.
656  bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
657  /// Return true if the target supports masked gather.
658  bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
659  /// Return true if the target forces scalarizing of llvm.masked.gather
660  /// intrinsics.
661  bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const;
662  /// Return true if the target forces scalarizing of llvm.masked.scatter
663  /// intrinsics.
664  bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const;
665 
666  /// Return true if the target supports masked compress store.
667  bool isLegalMaskedCompressStore(Type *DataType) const;
668  /// Return true if the target supports masked expand load.
669  bool isLegalMaskedExpandLoad(Type *DataType) const;
670 
671  /// Return true if this is an alternating opcode pattern that can be lowered
672  /// to a single instruction on the target. In X86 this is for the addsub
673  /// instruction which corrsponds to a Shuffle + Fadd + FSub pattern in IR.
674  /// This function expectes two opcodes: \p Opcode1 and \p Opcode2 being
675  /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
676  /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
677  /// \p VecTy is the vector type of the instruction to be generated.
678  bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
679  const SmallBitVector &OpcodeMask) const;
680 
681  /// Return true if we should be enabling ordered reductions for the target.
682  bool enableOrderedReductions() const;
683 
684  /// Return true if the target has a unified operation to calculate division
685  /// and remainder. If so, the additional implicit multiplication and
686  /// subtraction required to calculate a remainder from division are free. This
687  /// can enable more aggressive transformations for division and remainder than
688  /// would typically be allowed using throughput or size cost models.
689  bool hasDivRemOp(Type *DataType, bool IsSigned) const;
690 
691  /// Return true if the given instruction (assumed to be a memory access
692  /// instruction) has a volatile variant. If that's the case then we can avoid
693  /// addrspacecast to generic AS for volatile loads/stores. Default
694  /// implementation returns false, which prevents address space inference for
695  /// volatile loads/stores.
696  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
697 
698  /// Return true if target doesn't mind addresses in vectors.
699  bool prefersVectorizedAddressing() const;
700 
701  /// Return the cost of the scaling factor used in the addressing
702  /// mode represented by AM for this target, for a load/store
703  /// of the specified type.
704  /// If the AM is supported, the return value must be >= 0.
705  /// If the AM is not supported, it returns a negative value.
706  /// TODO: Handle pre/postinc as well.
708  int64_t BaseOffset, bool HasBaseReg,
709  int64_t Scale,
710  unsigned AddrSpace = 0) const;
711 
712  /// Return true if the loop strength reduce pass should make
713  /// Instruction* based TTI queries to isLegalAddressingMode(). This is
714  /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
715  /// immediate offset and no index register.
716  bool LSRWithInstrQueries() const;
717 
718  /// Return true if it's free to truncate a value of type Ty1 to type
719  /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
720  /// by referencing its sub-register AX.
721  bool isTruncateFree(Type *Ty1, Type *Ty2) const;
722 
723  /// Return true if it is profitable to hoist instruction in the
724  /// then/else to before if.
725  bool isProfitableToHoist(Instruction *I) const;
726 
727  bool useAA() const;
728 
729  /// Return true if this type is legal.
730  bool isTypeLegal(Type *Ty) const;
731 
732  /// Returns the estimated number of registers required to represent \p Ty.
733  unsigned getRegUsageForType(Type *Ty) const;
734 
735  /// Return true if switches should be turned into lookup tables for the
736  /// target.
737  bool shouldBuildLookupTables() const;
738 
739  /// Return true if switches should be turned into lookup tables
740  /// containing this constant value for the target.
742 
743  /// Return true if lookup tables should be turned into relative lookup tables.
744  bool shouldBuildRelLookupTables() const;
745 
746  /// Return true if the input function which is cold at all call sites,
747  /// should use coldcc calling convention.
748  bool useColdCCForColdCall(Function &F) const;
749 
750  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
751  /// are set if the demanded result elements need to be inserted and/or
752  /// extracted from vectors.
754  const APInt &DemandedElts,
755  bool Insert, bool Extract,
757 
758  /// Estimate the overhead of scalarizing an instructions unique
759  /// non-constant operands. The (potentially vector) types to use for each of
760  /// argument are passes via Tys.
763  ArrayRef<Type *> Tys,
765 
766  /// If target has efficient vector element load/store instructions, it can
767  /// return true here so that insertion/extraction costs are not added to
768  /// the scalarization cost of a load/store.
770 
771  /// If the target supports tail calls.
772  bool supportsTailCalls() const;
773 
774  /// If target supports tail call on \p CB
775  bool supportsTailCallFor(const CallBase *CB) const;
776 
777  /// Don't restrict interleaved unrolling to small loops.
778  bool enableAggressiveInterleaving(bool LoopHasReductions) const;
779 
780  /// Returns options for expansion of memcmp. IsZeroCmp is
781  // true if this is the expansion of memcmp(p1, p2, s) == 0.
783  // Return true if memcmp expansion is enabled.
784  operator bool() const { return MaxNumLoads > 0; }
785 
786  // Maximum number of load operations.
787  unsigned MaxNumLoads = 0;
788 
789  // The list of available load sizes (in bytes), sorted in decreasing order.
791 
792  // For memcmp expansion when the memcmp result is only compared equal or
793  // not-equal to 0, allow up to this number of load pairs per block. As an
794  // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
795  // a0 = load2bytes &a[0]
796  // b0 = load2bytes &b[0]
797  // a2 = load1byte &a[2]
798  // b2 = load1byte &b[2]
799  // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
800  unsigned NumLoadsPerBlock = 1;
801 
802  // Set to true to allow overlapping loads. For example, 7-byte compares can
803  // be done with two 4-byte compares instead of 4+2+1-byte compares. This
804  // requires all loads in LoadSizes to be doable in an unaligned way.
805  bool AllowOverlappingLoads = false;
806  };
808  bool IsZeroCmp) const;
809 
810  /// Should the Select Optimization pass be enabled and ran.
811  bool enableSelectOptimize() const;
812 
813  /// Enable matching of interleaved access groups.
815 
816  /// Enable matching of interleaved access groups that contain predicated
817  /// accesses or gaps and therefore vectorized using masked
818  /// vector loads/stores.
820 
821  /// Indicate that it is potentially unsafe to automatically vectorize
822  /// floating-point operations because the semantics of vector and scalar
823  /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
824  /// does not support IEEE-754 denormal numbers, while depending on the
825  /// platform, scalar floating-point math does.
826  /// This applies to floating-point math operations and calls, not memory
827  /// operations, shuffles, or casts.
829 
830  /// Determine if the target supports unaligned memory accesses.
832  unsigned AddressSpace = 0,
833  Align Alignment = Align(1),
834  unsigned *Fast = nullptr) const;
835 
836  /// Return hardware support for population count.
837  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
838 
839  /// Return true if the hardware has a fast square-root instruction.
840  bool haveFastSqrt(Type *Ty) const;
841 
842  /// Return true if the cost of the instruction is too high to speculatively
843  /// execute and should be kept behind a branch.
844  /// This normally just wraps around a getInstructionCost() call, but some
845  /// targets might report a low TCK_SizeAndLatency value that is incompatible
846  /// with the fixed TCC_Expensive value.
847  /// NOTE: This assumes the instruction passes isSafeToSpeculativelyExecute().
849 
850  /// Return true if it is faster to check if a floating-point value is NaN
851  /// (or not-NaN) versus a comparison against a constant FP zero value.
852  /// Targets should override this if materializing a 0.0 for comparison is
853  /// generally as cheap as checking for ordered/unordered.
854  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
855 
856  /// Return the expected cost of supporting the floating point operation
857  /// of the specified type.
858  InstructionCost getFPOpCost(Type *Ty) const;
859 
860  /// Return the expected cost of materializing for the given integer
861  /// immediate of the specified type.
863  TargetCostKind CostKind) const;
864 
865  /// Return the expected cost of materialization for the given integer
866  /// immediate of the specified type for a given instruction. The cost can be
867  /// zero if the immediate can be folded into the specified instruction.
868  InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
869  const APInt &Imm, Type *Ty,
871  Instruction *Inst = nullptr) const;
873  const APInt &Imm, Type *Ty,
874  TargetCostKind CostKind) const;
875 
876  /// Return the expected cost for the given integer when optimising
877  /// for size. This is different than the other integer immediate cost
878  /// functions in that it is subtarget agnostic. This is useful when you e.g.
879  /// target one ISA such as Aarch32 but smaller encodings could be possible
880  /// with another such as Thumb. This return value is used as a penalty when
881  /// the total costs for a constant is calculated (the bigger the cost, the
882  /// more beneficial constant hoisting is).
883  InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
884  const APInt &Imm, Type *Ty) const;
885  /// @}
886 
887  /// \name Vector Target Information
888  /// @{
889 
890  /// The various kinds of shuffle patterns for vector queries.
891  enum ShuffleKind {
892  SK_Broadcast, ///< Broadcast element 0 to all other elements.
893  SK_Reverse, ///< Reverse the order of the vector.
894  SK_Select, ///< Selects elements from the corresponding lane of
895  ///< either source operand. This is equivalent to a
896  ///< vector select with a constant condition operand.
897  SK_Transpose, ///< Transpose two vectors.
898  SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
899  SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
900  SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
901  ///< with any shuffle mask.
902  SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
903  ///< shuffle mask.
904  SK_Splice ///< Concatenates elements from the first input vector
905  ///< with elements of the second input vector. Returning
906  ///< a vector of the same type as the input vectors.
907  ///< Index indicates start offset in first input vector.
908  };
909 
910  /// Additional information about an operand's possible values.
912  OK_AnyValue, // Operand can have any value.
913  OK_UniformValue, // Operand is uniform (splat of a value).
914  OK_UniformConstantValue, // Operand is uniform constant.
915  OK_NonUniformConstantValue // Operand is a non uniform constant value.
916  };
917 
918  /// Additional properties of an operand's values.
920  OP_None = 0,
923  };
924 
925  // Describe the values an operand can take. We're in the process
926  // of migrating uses of OperandValueKind and OperandValueProperties
927  // to use this class, and then will change the internal representation.
931 
932  bool isConstant() const {
934  }
935  bool isUniform() const {
937  }
938  bool isPowerOf2() const {
939  return Properties == OP_PowerOf2;
940  }
941  bool isNegatedPowerOf2() const {
942  return Properties == OP_NegatedPowerOf2;
943  }
944 
946  return {Kind, OP_None};
947  }
948  };
949 
950  /// \return the number of registers in the target-provided register class.
951  unsigned getNumberOfRegisters(unsigned ClassID) const;
952 
953  /// \return the target-provided register class ID for the provided type,
954  /// accounting for type promotion and other type-legalization techniques that
955  /// the target might apply. However, it specifically does not account for the
956  /// scalarization or splitting of vector types. Should a vector type require
957  /// scalarization or splitting into multiple underlying vector registers, that
958  /// type should be mapped to a register class containing no registers.
959  /// Specifically, this is designed to provide a simple, high-level view of the
960  /// register allocation later performed by the backend. These register classes
961  /// don't necessarily map onto the register classes used by the backend.
962  /// FIXME: It's not currently possible to determine how many registers
963  /// are used by the provided type.
964  unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
965 
966  /// \return the target-provided register class name
967  const char *getRegisterClassName(unsigned ClassID) const;
968 
970 
971  /// \return The width of the largest scalar or vector register type.
973 
974  /// \return The width of the smallest vector register type.
975  unsigned getMinVectorRegisterBitWidth() const;
976 
977  /// \return The maximum value of vscale if the target specifies an
978  /// architectural maximum vector length, and std::nullopt otherwise.
979  std::optional<unsigned> getMaxVScale() const;
980 
981  /// \return the value of vscale to tune the cost model for.
982  std::optional<unsigned> getVScaleForTuning() const;
983 
984  /// \return True if the vectorization factor should be chosen to
985  /// make the vector of the smallest element type match the size of a
986  /// vector register. For wider element types, this could result in
987  /// creating vectors that span multiple vector registers.
988  /// If false, the vectorization factor will be chosen based on the
989  /// size of the widest element type.
990  /// \p K Register Kind for vectorization.
992 
993  /// \return The minimum vectorization factor for types of given element
994  /// bit width, or 0 if there is no minimum VF. The returned value only
995  /// applies when shouldMaximizeVectorBandwidth returns true.
996  /// If IsScalable is true, the returned ElementCount must be a scalable VF.
997  ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
998 
999  /// \return The maximum vectorization factor for types of given element
1000  /// bit width and opcode, or 0 if there is no maximum VF.
1001  /// Currently only used by the SLP vectorizer.
1002  unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
1003 
1004  /// \return The minimum vectorization factor for the store instruction. Given
1005  /// the initial estimation of the minimum vector factor and store value type,
1006  /// it tries to find possible lowest VF, which still might be profitable for
1007  /// the vectorization.
1008  /// \param VF Initial estimation of the minimum vector factor.
1009  /// \param ScalarMemTy Scalar memory type of the store operation.
1010  /// \param ScalarValTy Scalar type of the stored value.
1011  /// Currently only used by the SLP vectorizer.
1012  unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1013  Type *ScalarValTy) const;
1014 
1015  /// \return True if it should be considered for address type promotion.
1016  /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
1017  /// profitable without finding other extensions fed by the same input.
1019  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
1020 
1021  /// \return The size of a cache line in bytes.
1022  unsigned getCacheLineSize() const;
1023 
1024  /// The possible cache levels
1025  enum class CacheLevel {
1026  L1D, // The L1 data cache
1027  L2D, // The L2 data cache
1028 
1029  // We currently do not model L3 caches, as their sizes differ widely between
1030  // microarchitectures. Also, we currently do not have a use for L3 cache
1031  // size modeling yet.
1032  };
1033 
1034  /// \return The size of the cache level in bytes, if available.
1035  std::optional<unsigned> getCacheSize(CacheLevel Level) const;
1036 
1037  /// \return The associativity of the cache level, if available.
1038  std::optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
1039 
1040  /// \return How much before a load we should place the prefetch
1041  /// instruction. This is currently measured in number of
1042  /// instructions.
1043  unsigned getPrefetchDistance() const;
1044 
1045  /// Some HW prefetchers can handle accesses up to a certain constant stride.
1046  /// Sometimes prefetching is beneficial even below the HW prefetcher limit,
1047  /// and the arguments provided are meant to serve as a basis for deciding this
1048  /// for a particular loop.
1049  ///
1050  /// \param NumMemAccesses Number of memory accesses in the loop.
1051  /// \param NumStridedMemAccesses Number of the memory accesses that
1052  /// ScalarEvolution could find a known stride
1053  /// for.
1054  /// \param NumPrefetches Number of software prefetches that will be
1055  /// emitted as determined by the addresses
1056  /// involved and the cache line size.
1057  /// \param HasCall True if the loop contains a call.
1058  ///
1059  /// \return This is the minimum stride in bytes where it makes sense to start
1060  /// adding SW prefetches. The default is 1, i.e. prefetch with any
1061  /// stride.
1062  unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1063  unsigned NumStridedMemAccesses,
1064  unsigned NumPrefetches, bool HasCall) const;
1065 
1066  /// \return The maximum number of iterations to prefetch ahead. If
1067  /// the required number of iterations is more than this number, no
1068  /// prefetching is performed.
1069  unsigned getMaxPrefetchIterationsAhead() const;
1070 
1071  /// \return True if prefetching should also be done for writes.
1072  bool enableWritePrefetching() const;
1073 
1074  /// \return if target want to issue a prefetch in address space \p AS.
1075  bool shouldPrefetchAddressSpace(unsigned AS) const;
1076 
1077  /// \return The maximum interleave factor that any transform should try to
1078  /// perform for this target. This number depends on the level of parallelism
1079  /// and the number of execution units in the CPU.
1080  unsigned getMaxInterleaveFactor(unsigned VF) const;
1081 
1082  /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
1083  static OperandValueInfo getOperandInfo(const Value *V);
1084 
1085  /// This is an approximation of reciprocal throughput of a math/logic op.
1086  /// A higher cost indicates less expected throughput.
1087  /// From Agner Fog's guides, reciprocal throughput is "the average number of
1088  /// clock cycles per instruction when the instructions are not part of a
1089  /// limiting dependency chain."
1090  /// Therefore, costs should be scaled to account for multiple execution units
1091  /// on the target that can process this type of instruction. For example, if
1092  /// there are 5 scalar integer units and 2 vector integer units that can
1093  /// calculate an 'add' in a single cycle, this model should indicate that the
1094  /// cost of the vector add instruction is 2.5 times the cost of the scalar
1095  /// add instruction.
1096  /// \p Args is an optional argument which holds the instruction operands
1097  /// values so the TTI can analyze those values searching for special
1098  /// cases or optimizations based on those values.
1099  /// \p CxtI is the optional original context instruction, if one exists, to
1100  /// provide even more information.
1102  unsigned Opcode, Type *Ty,
1105  TTI::OperandValueInfo Opd2Info = {TTI::OK_AnyValue, TTI::OP_None},
1106  ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
1107  const Instruction *CxtI = nullptr) const;
1108 
1109  /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1110  /// The exact mask may be passed as Mask, or else the array will be empty.
1111  /// The index and subtype parameters are used by the subvector insertion and
1112  /// extraction shuffle kinds to show the insert/extract point and the type of
1113  /// the subvector being inserted/extracted. The operands of the shuffle can be
1114  /// passed through \p Args, which helps improve the cost estimation in some
1115  /// cases, like in broadcast loads.
1116  /// NOTE: For subvector extractions Tp represents the source type.
1117  InstructionCost
1119  ArrayRef<int> Mask = std::nullopt,
1121  int Index = 0, VectorType *SubTp = nullptr,
1122  ArrayRef<const Value *> Args = std::nullopt) const;
1123 
1124  /// Represents a hint about the context in which a cast is used.
1125  ///
1126  /// For zext/sext, the context of the cast is the operand, which must be a
1127  /// load of some kind. For trunc, the context is of the cast is the single
1128  /// user of the instruction, which must be a store of some kind.
1129  ///
1130  /// This enum allows the vectorizer to give getCastInstrCost an idea of the
1131  /// type of cast it's dealing with, as not every cast is equal. For instance,
1132  /// the zext of a load may be free, but the zext of an interleaving load can
1133  //// be (very) expensive!
1134  ///
1135  /// See \c getCastContextHint to compute a CastContextHint from a cast
1136  /// Instruction*. Callers can use it if they don't need to override the
1137  /// context and just want it to be calculated from the instruction.
1138  ///
1139  /// FIXME: This handles the types of load/store that the vectorizer can
1140  /// produce, which are the cases where the context instruction is most
1141  /// likely to be incorrect. There are other situations where that can happen
1142  /// too, which might be handled here but in the long run a more general
1143  /// solution of costing multiple instructions at the same times may be better.
1144  enum class CastContextHint : uint8_t {
1145  None, ///< The cast is not used with a load/store of any kind.
1146  Normal, ///< The cast is used with a normal load/store.
1147  Masked, ///< The cast is used with a masked load/store.
1148  GatherScatter, ///< The cast is used with a gather/scatter.
1149  Interleave, ///< The cast is used with an interleaved load/store.
1150  Reversed, ///< The cast is used with a reversed load/store.
1151  };
1152 
1153  /// Calculates a CastContextHint from \p I.
1154  /// This should be used by callers of getCastInstrCost if they wish to
1155  /// determine the context from some instruction.
1156  /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
1157  /// or if it's another type of cast.
1159 
1160  /// \return The expected cost of cast instructions, such as bitcast, trunc,
1161  /// zext, etc. If there is an existing instruction that holds Opcode, it
1162  /// may be passed in the 'I' parameter.
1164  getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1167  const Instruction *I = nullptr) const;
1168 
1169  /// \return The expected cost of a sign- or zero-extended vector extract. Use
1170  /// Index = -1 to indicate that there is no information about the index value.
1171  InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1172  VectorType *VecTy,
1173  unsigned Index) const;
1174 
1175  /// \return The expected cost of control-flow related instructions such as
1176  /// Phi, Ret, Br, Switch.
1178  getCFInstrCost(unsigned Opcode,
1180  const Instruction *I = nullptr) const;
1181 
1182  /// \returns The expected cost of compare and select instructions. If there
1183  /// is an existing instruction that holds Opcode, it may be passed in the
1184  /// 'I' parameter. The \p VecPred parameter can be used to indicate the select
1185  /// is using a compare with the specified predicate as condition. When vector
1186  /// types are passed, \p VecPred must be used for all lanes.
1188  getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1189  CmpInst::Predicate VecPred,
1191  const Instruction *I = nullptr) const;
1192 
1193  /// \return The expected cost of vector Insert and Extract.
1194  /// Use -1 to indicate that there is no information on the index value.
1195  /// This is used when the instruction is not available; a typical use
1196  /// case is to provision the cost of vectorization/scalarization in
1197  /// vectorizer passes.
1198  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1200  unsigned Index = -1, Value *Op0 = nullptr,
1201  Value *Op1 = nullptr) const;
1202 
1203  /// \return The expected cost of vector Insert and Extract.
1204  /// This is used when instruction is available, and implementation
1205  /// asserts 'I' is not nullptr.
1206  ///
1207  /// A typical suitable use case is cost estimation when vector instruction
1208  /// exists (e.g., from basic blocks during transformation).
1211  unsigned Index = -1) const;
1212 
1213  /// \return The cost of replication shuffle of \p VF elements typed \p EltTy
1214  /// \p ReplicationFactor times.
1215  ///
1216  /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
1217  /// <0,0,0,1,1,1,2,2,2,3,3,3>
1218  InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
1219  int VF,
1220  const APInt &DemandedDstElts,
1222 
1223  /// \return The cost of Load and Store instructions.
1225  getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1226  unsigned AddressSpace,
1228  OperandValueInfo OpdInfo = {OK_AnyValue, OP_None},
1229  const Instruction *I = nullptr) const;
1230 
1231  /// \return The cost of VP Load and Store instructions.
1232  InstructionCost
1233  getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1234  unsigned AddressSpace,
1236  const Instruction *I = nullptr) const;
1237 
1238  /// \return The cost of masked Load and Store instructions.
1239  InstructionCost getMaskedMemoryOpCost(
1240  unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
1242 
1243  /// \return The cost of Gather or Scatter operation
1244  /// \p Opcode - is a type of memory access Load or Store
1245  /// \p DataTy - a vector type of the data to be loaded or stored
1246  /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1247  /// \p VariableMask - true when the memory access is predicated with a mask
1248  /// that is not a compile-time constant
1249  /// \p Alignment - alignment of single element
1250  /// \p I - the optional original context instruction, if one exists, e.g. the
1251  /// load/store to transform or the call to the gather/scatter intrinsic
1252  InstructionCost getGatherScatterOpCost(
1253  unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1255  const Instruction *I = nullptr) const;
1256 
1257  /// \return The cost of the interleaved memory operation.
1258  /// \p Opcode is the memory operation code
1259  /// \p VecTy is the vector type of the interleaved access.
1260  /// \p Factor is the interleave factor
1261  /// \p Indices is the indices for interleaved load members (as interleaved
1262  /// load allows gaps)
1263  /// \p Alignment is the alignment of the memory operation
1264  /// \p AddressSpace is address space of the pointer.
1265  /// \p UseMaskForCond indicates if the memory access is predicated.
1266  /// \p UseMaskForGaps indicates if gaps should be masked.
1267  InstructionCost getInterleavedMemoryOpCost(
1268  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1269  Align Alignment, unsigned AddressSpace,
1271  bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
1272 
1273  /// A helper function to determine the type of reduction algorithm used
1274  /// for a given \p Opcode and set of FastMathFlags \p FMF.
1275  static bool requiresOrderedReduction(std::optional<FastMathFlags> FMF) {
1276  return FMF && !(*FMF).allowReassoc();
1277  }
1278 
1279  /// Calculate the cost of vector reduction intrinsics.
1280  ///
1281  /// This is the cost of reducing the vector value of type \p Ty to a scalar
1282  /// value using the operation denoted by \p Opcode. The FastMathFlags
1283  /// parameter \p FMF indicates what type of reduction we are performing:
1284  /// 1. Tree-wise. This is the typical 'fast' reduction performed that
1285  /// involves successively splitting a vector into half and doing the
1286  /// operation on the pair of halves until you have a scalar value. For
1287  /// example:
1288  /// (v0, v1, v2, v3)
1289  /// ((v0+v2), (v1+v3), undef, undef)
1290  /// ((v0+v2+v1+v3), undef, undef, undef)
1291  /// This is the default behaviour for integer operations, whereas for
1292  /// floating point we only do this if \p FMF indicates that
1293  /// reassociation is allowed.
1294  /// 2. Ordered. For a vector with N elements this involves performing N
1295  /// operations in lane order, starting with an initial scalar value, i.e.
1296  /// result = InitVal + v0
1297  /// result = result + v1
1298  /// result = result + v2
1299  /// result = result + v3
1300  /// This is only the case for FP operations and when reassociation is not
1301  /// allowed.
1302  ///
1304  unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
1306 
1308  VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
1310 
1311  /// Calculate the cost of an extended reduction pattern, similar to
1312  /// getArithmeticReductionCost of an Add reduction with multiply and optional
1313  /// extensions. This is the cost of as:
1314  /// ResTy vecreduce.add(mul (A, B)).
1315  /// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)).
1317  bool IsUnsigned, Type *ResTy, VectorType *Ty,
1319 
1320  /// Calculate the cost of an extended reduction pattern, similar to
1321  /// getArithmeticReductionCost of a reduction with an extension.
1322  /// This is the cost of as:
1323  /// ResTy vecreduce.opcode(ext(Ty A)).
1325  unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1326  std::optional<FastMathFlags> FMF,
1328 
1329  /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1330  /// Three cases are handled: 1. scalar instruction 2. vector instruction
1331  /// 3. scalar instruction which is to be vectorized.
1334 
1335  /// \returns The cost of Call instructions.
1337  Function *F, Type *RetTy, ArrayRef<Type *> Tys,
1339 
1340  /// \returns The number of pieces into which the provided type must be
1341  /// split during legalization. Zero is returned when the answer is unknown.
1342  unsigned getNumberOfParts(Type *Tp) const;
1343 
1344  /// \returns The cost of the address computation. For most targets this can be
1345  /// merged into the instruction indexing mode. Some targets might want to
1346  /// distinguish between address computation for memory operations on vector
1347  /// types and scalar types. Such targets should override this function.
1348  /// The 'SE' parameter holds pointer for the scalar evolution object which
1349  /// is used in order to get the Ptr step value in case of constant stride.
1350  /// The 'Ptr' parameter holds SCEV of the access pointer.
1352  ScalarEvolution *SE = nullptr,
1353  const SCEV *Ptr = nullptr) const;
1354 
1355  /// \returns The cost, if any, of keeping values of the given types alive
1356  /// over a callsite.
1357  ///
1358  /// Some types may require the use of register classes that do not have
1359  /// any callee-saved registers, so would require a spill and fill.
1361 
1362  /// \returns True if the intrinsic is a supported memory intrinsic. Info
1363  /// will contain additional information - whether the intrinsic may write
1364  /// or read to memory, volatility and the pointer. Info is undefined
1365  /// if false is returned.
1367 
1368  /// \returns The maximum element size, in bytes, for an element
1369  /// unordered-atomic memory intrinsic.
1370  unsigned getAtomicMemIntrinsicMaxElementSize() const;
1371 
1372  /// \returns A value which is the result of the given memory intrinsic. New
1373  /// instructions may be created to extract the result from the given intrinsic
1374  /// memory operation. Returns nullptr if the target cannot create a result
1375  /// from the given intrinsic.
1377  Type *ExpectedType) const;
1378 
1379  /// \returns The type to use in a loop expansion of a memcpy call.
1381  LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
1382  unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
1383  std::optional<uint32_t> AtomicElementSize = std::nullopt) const;
1384 
1385  /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1386  /// \param RemainingBytes The number of bytes to copy.
1387  ///
1388  /// Calculates the operand types to use when copying \p RemainingBytes of
1389  /// memory, where source and destination alignments are \p SrcAlign and
1390  /// \p DestAlign respectively.
1393  unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1394  unsigned SrcAlign, unsigned DestAlign,
1395  std::optional<uint32_t> AtomicCpySize = std::nullopt) const;
1396 
1397  /// \returns True if the two functions have compatible attributes for inlining
1398  /// purposes.
1399  bool areInlineCompatible(const Function *Caller,
1400  const Function *Callee) const;
1401 
1402  /// \returns True if the caller and callee agree on how \p Types will be
1403  /// passed to or returned from the callee.
1404  /// to the callee.
1405  /// \param Types List of types to check.
1406  bool areTypesABICompatible(const Function *Caller, const Function *Callee,
1407  const ArrayRef<Type *> &Types) const;
1408 
1409  /// The type of load/store indexing.
1411  MIM_Unindexed, ///< No indexing.
1412  MIM_PreInc, ///< Pre-incrementing.
1413  MIM_PreDec, ///< Pre-decrementing.
1414  MIM_PostInc, ///< Post-incrementing.
1415  MIM_PostDec ///< Post-decrementing.
1416  };
1417 
1418  /// \returns True if the specified indexed load for the given type is legal.
1419  bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1420 
1421  /// \returns True if the specified indexed store for the given type is legal.
1422  bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1423 
1424  /// \returns The bitwidth of the largest vector type that should be used to
1425  /// load/store in the given address space.
1426  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1427 
1428  /// \returns True if the load instruction is legal to vectorize.
1429  bool isLegalToVectorizeLoad(LoadInst *LI) const;
1430 
1431  /// \returns True if the store instruction is legal to vectorize.
1432  bool isLegalToVectorizeStore(StoreInst *SI) const;
1433 
1434  /// \returns True if it is legal to vectorize the given load chain.
1435  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
1436  unsigned AddrSpace) const;
1437 
1438  /// \returns True if it is legal to vectorize the given store chain.
1439  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
1440  unsigned AddrSpace) const;
1441 
1442  /// \returns True if it is legal to vectorize the given reduction kind.
1444  ElementCount VF) const;
1445 
1446  /// \returns True if the given type is supported for scalable vectors
1447  bool isElementTypeLegalForScalableVector(Type *Ty) const;
1448 
1449  /// \returns The new vector factor value if the target doesn't support \p
1450  /// SizeInBytes loads or has a better vector factor.
1451  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1452  unsigned ChainSizeInBytes,
1453  VectorType *VecTy) const;
1454 
1455  /// \returns The new vector factor value if the target doesn't support \p
1456  /// SizeInBytes stores or has a better vector factor.
1457  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1458  unsigned ChainSizeInBytes,
1459  VectorType *VecTy) const;
1460 
1461  /// Flags describing the kind of vector reduction.
1463  ReductionFlags() = default;
1464  bool IsMaxOp =
1465  false; ///< If the op a min/max kind, true if it's a max operation.
1466  bool IsSigned = false; ///< Whether the operation is a signed int reduction.
1467  bool NoNaN =
1468  false; ///< If op is an fp min/max, whether NaNs may be present.
1469  };
1470 
1471  /// \returns True if the target prefers reductions in loop.
1472  bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1473  ReductionFlags Flags) const;
1474 
1475  /// \returns True if the target prefers reductions select kept in the loop
1476  /// when tail folding. i.e.
1477  /// loop:
1478  /// p = phi (0, s)
1479  /// a = add (p, x)
1480  /// s = select (mask, a, p)
1481  /// vecreduce.add(s)
1482  ///
1483  /// As opposed to the normal scheme of p = phi (0, a) which allows the select
1484  /// to be pulled out of the loop. If the select(.., add, ..) can be predicated
1485  /// by the target, this can lead to cleaner code generation.
1486  bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1487  ReductionFlags Flags) const;
1488 
1489  /// Return true if the loop vectorizer should consider vectorizing an
1490  /// otherwise scalar epilogue loop.
1491  bool preferEpilogueVectorization() const;
1492 
1493  /// \returns True if the target wants to expand the given reduction intrinsic
1494  /// into a shuffle sequence.
1495  bool shouldExpandReduction(const IntrinsicInst *II) const;
1496 
1497  /// \returns the size cost of rematerializing a GlobalValue address relative
1498  /// to a stack reload.
1499  unsigned getGISelRematGlobalCost() const;
1500 
1501  /// \returns the lower bound of a trip count to decide on vectorization
1502  /// while tail-folding.
1503  unsigned getMinTripCountTailFoldingThreshold() const;
1504 
1505  /// \returns True if the target supports scalable vectors.
1506  bool supportsScalableVectors() const;
1507 
1508  /// \return true when scalable vectorization is preferred.
1509  bool enableScalableVectorization() const;
1510 
1511  /// \name Vector Predication Information
1512  /// @{
1513  /// Whether the target supports the %evl parameter of VP intrinsic efficiently
1514  /// in hardware, for the given opcode and type/alignment. (see LLVM Language
1515  /// Reference - "Vector Predication Intrinsics").
1516  /// Use of %evl is discouraged when that is not the case.
1517  bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1518  Align Alignment) const;
1519 
1522  // keep the predicating parameter
1523  Legal = 0,
1524  // where legal, discard the predicate parameter
1525  Discard = 1,
1526  // transform into something else that is also predicating
1528  };
1529 
1530  // How to transform the EVL parameter.
1531  // Legal: keep the EVL parameter as it is.
1532  // Discard: Ignore the EVL parameter where it is safe to do so.
1533  // Convert: Fold the EVL into the mask parameter.
1535 
1536  // How to transform the operator.
1537  // Legal: The target supports this operator.
1538  // Convert: Convert this to a non-VP operation.
1539  // The 'Discard' strategy is invalid.
1541 
1542  bool shouldDoNothing() const {
1543  return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
1544  }
1547  };
1548 
1549  /// \returns How the target needs this vector-predicated operation to be
1550  /// transformed.
1552  /// @}
1553 
1554  /// @}
1555 
1556 private:
1557  /// The abstract base class used to type erase specific TTI
1558  /// implementations.
1559  class Concept;
1560 
1561  /// The template model for the base class which wraps a concrete
1562  /// implementation in a type erased interface.
1563  template <typename T> class Model;
1564 
1565  std::unique_ptr<Concept> TTIImpl;
1566 };
1567 
1569 public:
1570  virtual ~Concept() = 0;
1571  virtual const DataLayout &getDataLayout() const = 0;
1572  virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
1575  virtual unsigned getInliningThresholdMultiplier() = 0;
1576  virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
1577  virtual int getInlinerVectorBonusPercent() = 0;
1578  virtual InstructionCost getMemcpyCost(const Instruction *I) = 0;
1579  virtual unsigned
1580  getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
1581  ProfileSummaryInfo *PSI,
1582  BlockFrequencyInfo *BFI) = 0;
1583  virtual InstructionCost getInstructionCost(const User *U,
1585  TargetCostKind CostKind) = 0;
1587  virtual bool hasBranchDivergence() = 0;
1588  virtual bool useGPUDivergenceAnalysis() = 0;
1589  virtual bool isSourceOfDivergence(const Value *V) = 0;
1590  virtual bool isAlwaysUniform(const Value *V) = 0;
1591  virtual unsigned getFlatAddressSpace() = 0;
1592  virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1593  Intrinsic::ID IID) const = 0;
1594  virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1595  virtual bool
1596  canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0;
1597  virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
1598  virtual bool isSingleThreaded() const = 0;
1599  virtual std::pair<const Value *, unsigned>
1600  getPredicatedAddrSpace(const Value *V) const = 0;
1602  Value *OldV,
1603  Value *NewV) const = 0;
1604  virtual bool isLoweredToCall(const Function *F) = 0;
1605  virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1607  OptimizationRemarkEmitter *ORE) = 0;
1608  virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
1609  PeelingPreferences &PP) = 0;
1610  virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1611  AssumptionCache &AC,
1612  TargetLibraryInfo *LibInfo,
1613  HardwareLoopInfo &HWLoopInfo) = 0;
1614  virtual bool
1618  InterleavedAccessInfo *IAI) = 0;
1620  virtual std::optional<Instruction *> instCombineIntrinsic(
1621  InstCombiner &IC, IntrinsicInst &II) = 0;
1622  virtual std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
1623  InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask,
1624  KnownBits & Known, bool &KnownBitsComputed) = 0;
1625  virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
1626  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts,
1627  APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
1628  std::function<void(Instruction *, unsigned, APInt, APInt &)>
1629  SimplifyAndSetOp) = 0;
1630  virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1631  virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1632  virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1633  int64_t BaseOffset, bool HasBaseReg,
1634  int64_t Scale, unsigned AddrSpace,
1635  Instruction *I) = 0;
1636  virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
1637  const TargetTransformInfo::LSRCost &C2) = 0;
1638  virtual bool isNumRegsMajorCostOfLSR() = 0;
1639  virtual bool isProfitableLSRChainElement(Instruction *I) = 0;
1640  virtual bool canMacroFuseCmp() = 0;
1641  virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
1642  LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1643  TargetLibraryInfo *LibInfo) = 0;
1644  virtual AddressingModeKind
1645  getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const = 0;
1646  virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
1647  virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
1648  virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
1649  virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
1650  virtual bool isLegalBroadcastLoad(Type *ElementTy,
1651  ElementCount NumElements) const = 0;
1652  virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
1653  virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
1654  virtual bool forceScalarizeMaskedGather(VectorType *DataType,
1655  Align Alignment) = 0;
1656  virtual bool forceScalarizeMaskedScatter(VectorType *DataType,
1657  Align Alignment) = 0;
1658  virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
1659  virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
1660  virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
1661  unsigned Opcode1,
1662  const SmallBitVector &OpcodeMask) const = 0;
1663  virtual bool enableOrderedReductions() = 0;
1664  virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1665  virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1666  virtual bool prefersVectorizedAddressing() = 0;
1668  int64_t BaseOffset,
1669  bool HasBaseReg, int64_t Scale,
1670  unsigned AddrSpace) = 0;
1671  virtual bool LSRWithInstrQueries() = 0;
1672  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1673  virtual bool isProfitableToHoist(Instruction *I) = 0;
1674  virtual bool useAA() = 0;
1675  virtual bool isTypeLegal(Type *Ty) = 0;
1676  virtual unsigned getRegUsageForType(Type *Ty) = 0;
1677  virtual bool shouldBuildLookupTables() = 0;
1678  virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
1679  virtual bool shouldBuildRelLookupTables() = 0;
1680  virtual bool useColdCCForColdCall(Function &F) = 0;
1682  const APInt &DemandedElts,
1683  bool Insert, bool Extract,
1684  TargetCostKind CostKind) = 0;
1685  virtual InstructionCost
1687  ArrayRef<Type *> Tys,
1688  TargetCostKind CostKind) = 0;
1689  virtual bool supportsEfficientVectorElementLoadStore() = 0;
1690  virtual bool supportsTailCalls() = 0;
1691  virtual bool supportsTailCallFor(const CallBase *CB) = 0;
1692  virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1693  virtual MemCmpExpansionOptions
1694  enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
1695  virtual bool enableSelectOptimize() = 0;
1696  virtual bool enableInterleavedAccessVectorization() = 0;
1697  virtual bool enableMaskedInterleavedAccessVectorization() = 0;
1698  virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
1700  unsigned BitWidth,
1701  unsigned AddressSpace,
1702  Align Alignment,
1703  unsigned *Fast) = 0;
1704  virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1705  virtual bool haveFastSqrt(Type *Ty) = 0;
1706  virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I) = 0;
1707  virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1708  virtual InstructionCost getFPOpCost(Type *Ty) = 0;
1709  virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1710  const APInt &Imm, Type *Ty) = 0;
1711  virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1712  TargetCostKind CostKind) = 0;
1713  virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1714  const APInt &Imm, Type *Ty,
1716  Instruction *Inst = nullptr) = 0;
1717  virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
1718  const APInt &Imm, Type *Ty,
1719  TargetCostKind CostKind) = 0;
1720  virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
1721  virtual unsigned getRegisterClassForType(bool Vector,
1722  Type *Ty = nullptr) const = 0;
1723  virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
1724  virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0;
1725  virtual unsigned getMinVectorRegisterBitWidth() const = 0;
1726  virtual std::optional<unsigned> getMaxVScale() const = 0;
1727  virtual std::optional<unsigned> getVScaleForTuning() const = 0;
1728  virtual bool
1730  virtual ElementCount getMinimumVF(unsigned ElemWidth,
1731  bool IsScalable) const = 0;
1732  virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
1733  virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1734  Type *ScalarValTy) const = 0;
1736  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1737  virtual unsigned getCacheLineSize() const = 0;
1738  virtual std::optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
1739  virtual std::optional<unsigned> getCacheAssociativity(CacheLevel Level)
1740  const = 0;
1741 
1742  /// \return How much before a load we should place the prefetch
1743  /// instruction. This is currently measured in number of
1744  /// instructions.
1745  virtual unsigned getPrefetchDistance() const = 0;
1746 
1747  /// \return Some HW prefetchers can handle accesses up to a certain
1748  /// constant stride. This is the minimum stride in bytes where it
1749  /// makes sense to start adding SW prefetches. The default is 1,
1750  /// i.e. prefetch with any stride. Sometimes prefetching is beneficial
1751  /// even below the HW prefetcher limit, and the arguments provided are
1752  /// meant to serve as a basis for deciding this for a particular loop.
1753  virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1754  unsigned NumStridedMemAccesses,
1755  unsigned NumPrefetches,
1756  bool HasCall) const = 0;
1757 
1758  /// \return The maximum number of iterations to prefetch ahead. If
1759  /// the required number of iterations is more than this number, no
1760  /// prefetching is performed.
1761  virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
1762 
1763  /// \return True if prefetching should also be done for writes.
1764  virtual bool enableWritePrefetching() const = 0;
1765 
1766  /// \return if target want to issue a prefetch in address space \p AS.
1767  virtual bool shouldPrefetchAddressSpace(unsigned AS) const = 0;
1768 
1769  virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
1771  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
1772  OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
1773  ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
1774 
1778  int Index, VectorType *SubTp,
1780  virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
1781  Type *Src, CastContextHint CCH,
1783  const Instruction *I) = 0;
1784  virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1785  VectorType *VecTy,
1786  unsigned Index) = 0;
1787  virtual InstructionCost getCFInstrCost(unsigned Opcode,
1789  const Instruction *I = nullptr) = 0;
1790  virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1791  Type *CondTy,
1792  CmpInst::Predicate VecPred,
1794  const Instruction *I) = 0;
1795  virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1797  unsigned Index, Value *Op0,
1798  Value *Op1) = 0;
1799  virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
1801  unsigned Index) = 0;
1802 
1803  virtual InstructionCost
1804  getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
1805  const APInt &DemandedDstElts,
1807 
1808  virtual InstructionCost
1809  getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1811  OperandValueInfo OpInfo, const Instruction *I) = 0;
1812  virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src,
1813  Align Alignment,
1814  unsigned AddressSpace,
1816  const Instruction *I) = 0;
1817  virtual InstructionCost
1818  getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1819  unsigned AddressSpace,
1821  virtual InstructionCost
1822  getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
1823  bool VariableMask, Align Alignment,
1825  const Instruction *I = nullptr) = 0;
1826 
1828  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1829  Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1830  bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
1831  virtual InstructionCost
1832  getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
1833  std::optional<FastMathFlags> FMF,
1835  virtual InstructionCost
1836  getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
1839  unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1840  std::optional<FastMathFlags> FMF,
1843  bool IsUnsigned, Type *ResTy, VectorType *Ty,
1845  virtual InstructionCost
1848  virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy,
1849  ArrayRef<Type *> Tys,
1851  virtual unsigned getNumberOfParts(Type *Tp) = 0;
1852  virtual InstructionCost
1853  getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr) = 0;
1854  virtual InstructionCost
1856  virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1857  MemIntrinsicInfo &Info) = 0;
1858  virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
1860  Type *ExpectedType) = 0;
1862  LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
1863  unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
1864  std::optional<uint32_t> AtomicElementSize) const = 0;
1865 
1866  virtual void getMemcpyLoopResidualLoweringType(
1868  unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1869  unsigned SrcAlign, unsigned DestAlign,
1870  std::optional<uint32_t> AtomicCpySize) const = 0;
1871  virtual bool areInlineCompatible(const Function *Caller,
1872  const Function *Callee) const = 0;
1873  virtual bool areTypesABICompatible(const Function *Caller,
1874  const Function *Callee,
1875  const ArrayRef<Type *> &Types) const = 0;
1876  virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1877  virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1878  virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
1879  virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
1880  virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
1881  virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1882  Align Alignment,
1883  unsigned AddrSpace) const = 0;
1884  virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1885  Align Alignment,
1886  unsigned AddrSpace) const = 0;
1887  virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
1888  ElementCount VF) const = 0;
1889  virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
1890  virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1891  unsigned ChainSizeInBytes,
1892  VectorType *VecTy) const = 0;
1893  virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1894  unsigned ChainSizeInBytes,
1895  VectorType *VecTy) const = 0;
1896  virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1897  ReductionFlags) const = 0;
1898  virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1899  ReductionFlags) const = 0;
1900  virtual bool preferEpilogueVectorization() const = 0;
1901 
1902  virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
1903  virtual unsigned getGISelRematGlobalCost() const = 0;
1904  virtual unsigned getMinTripCountTailFoldingThreshold() const = 0;
1905  virtual bool enableScalableVectorization() const = 0;
1906  virtual bool supportsScalableVectors() const = 0;
1907  virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1908  Align Alignment) const = 0;
1909  virtual VPLegalization
1910  getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
1911 };
1912 
1913 template <typename T>
1915  T Impl;
1916 
1917 public:
1918  Model(T Impl) : Impl(std::move(Impl)) {}
1919  ~Model() override = default;
1920 
1921  const DataLayout &getDataLayout() const override {
1922  return Impl.getDataLayout();
1923  }
1924 
1925  InstructionCost
1926  getGEPCost(Type *PointeeType, const Value *Ptr,
1927  ArrayRef<const Value *> Operands,
1929  return Impl.getGEPCost(PointeeType, Ptr, Operands, CostKind);
1930  }
1931  unsigned getInliningThresholdMultiplier() override {
1932  return Impl.getInliningThresholdMultiplier();
1933  }
1934  unsigned adjustInliningThreshold(const CallBase *CB) override {
1935  return Impl.adjustInliningThreshold(CB);
1936  }
1937  int getInlinerVectorBonusPercent() override {
1938  return Impl.getInlinerVectorBonusPercent();
1939  }
1940  InstructionCost getMemcpyCost(const Instruction *I) override {
1941  return Impl.getMemcpyCost(I);
1942  }
1943  InstructionCost getInstructionCost(const User *U,
1944  ArrayRef<const Value *> Operands,
1945  TargetCostKind CostKind) override {
1946  return Impl.getInstructionCost(U, Operands, CostKind);
1947  }
1948  BranchProbability getPredictableBranchThreshold() override {
1949  return Impl.getPredictableBranchThreshold();
1950  }
1951  bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
1952  bool useGPUDivergenceAnalysis() override {
1953  return Impl.useGPUDivergenceAnalysis();
1954  }
1955  bool isSourceOfDivergence(const Value *V) override {
1956  return Impl.isSourceOfDivergence(V);
1957  }
1958 
1959  bool isAlwaysUniform(const Value *V) override {
1960  return Impl.isAlwaysUniform(V);
1961  }
1962 
1963  unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
1964 
1965  bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1966  Intrinsic::ID IID) const override {
1967  return Impl.collectFlatAddressOperands(OpIndexes, IID);
1968  }
1969 
1970  bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
1971  return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
1972  }
1973 
1974  bool
1975  canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
1976  return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
1977  }
1978 
1979  unsigned getAssumedAddrSpace(const Value *V) const override {
1980  return Impl.getAssumedAddrSpace(V);
1981  }
1982 
1983  bool isSingleThreaded() const override { return Impl.isSingleThreaded(); }
1984 
1985  std::pair<const Value *, unsigned>
1986  getPredicatedAddrSpace(const Value *V) const override {
1987  return Impl.getPredicatedAddrSpace(V);
1988  }
1989 
1990  Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
1991  Value *NewV) const override {
1992  return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
1993  }
1994 
1995  bool isLoweredToCall(const Function *F) override {
1996  return Impl.isLoweredToCall(F);
1997  }
1998  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1999  UnrollingPreferences &UP,
2000  OptimizationRemarkEmitter *ORE) override {
2001  return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2002  }
2003  void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2004  PeelingPreferences &PP) override {
2005  return Impl.getPeelingPreferences(L, SE, PP);
2006  }
2007  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2008  AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2009  HardwareLoopInfo &HWLoopInfo) override {
2010  return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2011  }
2012  bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
2013  AssumptionCache &AC, TargetLibraryInfo *TLI,
2014  DominatorTree *DT,
2015  LoopVectorizationLegality *LVL,
2016  InterleavedAccessInfo *IAI) override {
2017  return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL, IAI);
2018  }
2020  return Impl.emitGetActiveLaneMask();
2021  }
2022  std::optional<Instruction *>
2023  instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override {
2024  return Impl.instCombineIntrinsic(IC, II);
2025  }
2026  std::optional<Value *>
2027  simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
2028  APInt DemandedMask, KnownBits &Known,
2029  bool &KnownBitsComputed) override {
2030  return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
2031  KnownBitsComputed);
2032  }
2033  std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2034  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
2035  APInt &UndefElts2, APInt &UndefElts3,
2036  std::function<void(Instruction *, unsigned, APInt, APInt &)>
2037  SimplifyAndSetOp) override {
2038  return Impl.simplifyDemandedVectorEltsIntrinsic(
2039  IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2040  SimplifyAndSetOp);
2041  }
2042  bool isLegalAddImmediate(int64_t Imm) override {
2043  return Impl.isLegalAddImmediate(Imm);
2044  }
2045  bool isLegalICmpImmediate(int64_t Imm) override {
2046  return Impl.isLegalICmpImmediate(Imm);
2047  }
2048  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2049  bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
2050  Instruction *I) override {
2051  return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2052  AddrSpace, I);
2053  }
2054  bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
2055  const TargetTransformInfo::LSRCost &C2) override {
2056  return Impl.isLSRCostLess(C1, C2);
2057  }
2058  bool isNumRegsMajorCostOfLSR() override {
2059  return Impl.isNumRegsMajorCostOfLSR();
2060  }
2061  bool isProfitableLSRChainElement(Instruction *I) override {
2062  return Impl.isProfitableLSRChainElement(I);
2063  }
2064  bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
2065  bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2066  DominatorTree *DT, AssumptionCache *AC,
2067  TargetLibraryInfo *LibInfo) override {
2068  return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2069  }
2071  getPreferredAddressingMode(const Loop *L,
2072  ScalarEvolution *SE) const override {
2073  return Impl.getPreferredAddressingMode(L, SE);
2074  }
2075  bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
2076  return Impl.isLegalMaskedStore(DataType, Alignment);
2077  }
2078  bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
2079  return Impl.isLegalMaskedLoad(DataType, Alignment);
2080  }
2081  bool isLegalNTStore(Type *DataType, Align Alignment) override {
2082  return Impl.isLegalNTStore(DataType, Alignment);
2083  }
2084  bool isLegalNTLoad(Type *DataType, Align Alignment) override {
2085  return Impl.isLegalNTLoad(DataType, Alignment);
2086  }
2087  bool isLegalBroadcastLoad(Type *ElementTy,
2088  ElementCount NumElements) const override {
2089  return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2090  }
2091  bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
2092  return Impl.isLegalMaskedScatter(DataType, Alignment);
2093  }
2094  bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
2095  return Impl.isLegalMaskedGather(DataType, Alignment);
2096  }
2097  bool forceScalarizeMaskedGather(VectorType *DataType,
2098  Align Alignment) override {
2099  return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2100  }
2101  bool forceScalarizeMaskedScatter(VectorType *DataType,
2102  Align Alignment) override {
2103  return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2104  }
2105  bool isLegalMaskedCompressStore(Type *DataType) override {
2106  return Impl.isLegalMaskedCompressStore(DataType);
2107  }
2108  bool isLegalMaskedExpandLoad(Type *DataType) override {
2109  return Impl.isLegalMaskedExpandLoad(DataType);
2110  }
2111  bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
2112  const SmallBitVector &OpcodeMask) const override {
2113  return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2114  }
2115  bool enableOrderedReductions() override {
2116  return Impl.enableOrderedReductions();
2117  }
2118  bool hasDivRemOp(Type *DataType, bool IsSigned) override {
2119  return Impl.hasDivRemOp(DataType, IsSigned);
2120  }
2121  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
2122  return Impl.hasVolatileVariant(I, AddrSpace);
2123  }
2124  bool prefersVectorizedAddressing() override {
2125  return Impl.prefersVectorizedAddressing();
2126  }
2127  InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2128  int64_t BaseOffset, bool HasBaseReg,
2129  int64_t Scale,
2130  unsigned AddrSpace) override {
2131  return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2132  AddrSpace);
2133  }
2134  bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
2135  bool isTruncateFree(Type *Ty1, Type *Ty2) override {
2136  return Impl.isTruncateFree(Ty1, Ty2);
2137  }
2138  bool isProfitableToHoist(Instruction *I) override {
2139  return Impl.isProfitableToHoist(I);
2140  }
2141  bool useAA() override { return Impl.useAA(); }
2142  bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
2143  unsigned getRegUsageForType(Type *Ty) override {
2144  return Impl.getRegUsageForType(Ty);
2145  }
2146  bool shouldBuildLookupTables() override {
2147  return Impl.shouldBuildLookupTables();
2148  }
2149  bool shouldBuildLookupTablesForConstant(Constant *C) override {
2150  return Impl.shouldBuildLookupTablesForConstant(C);
2151  }
2152  bool shouldBuildRelLookupTables() override {
2153  return Impl.shouldBuildRelLookupTables();
2154  }
2155  bool useColdCCForColdCall(Function &F) override {
2156  return Impl.useColdCCForColdCall(F);
2157  }
2158 
2159  InstructionCost getScalarizationOverhead(VectorType *Ty,
2160  const APInt &DemandedElts,
2161  bool Insert, bool Extract,
2162  TargetCostKind CostKind) override {
2163  return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2164  CostKind);
2165  }
2166  InstructionCost
2167  getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2168  ArrayRef<Type *> Tys,
2169  TargetCostKind CostKind) override {
2170  return Impl.getOperandsScalarizationOverhead(Args, Tys, CostKind);
2171  }
2172 
2173  bool supportsEfficientVectorElementLoadStore() override {
2174  return Impl.supportsEfficientVectorElementLoadStore();
2175  }
2176 
2177  bool supportsTailCalls() override { return Impl.supportsTailCalls(); }
2178  bool supportsTailCallFor(const CallBase *CB) override {
2179  return Impl.supportsTailCallFor(CB);
2180  }
2181 
2182  bool enableAggressiveInterleaving(bool LoopHasReductions) override {
2183  return Impl.enableAggressiveInterleaving(LoopHasReductions);
2184  }
2185  MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
2186  bool IsZeroCmp) const override {
2187  return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2188  }
2189  bool enableInterleavedAccessVectorization() override {
2190  return Impl.enableInterleavedAccessVectorization();
2191  }
2192  bool enableSelectOptimize() override {
2193  return Impl.enableSelectOptimize();
2194  }
2196  return Impl.enableMaskedInterleavedAccessVectorization();
2197  }
2198  bool isFPVectorizationPotentiallyUnsafe() override {
2199  return Impl.isFPVectorizationPotentiallyUnsafe();
2200  }
2201  bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
2202  unsigned AddressSpace, Align Alignment,
2203  unsigned *Fast) override {
2204  return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
2205  Alignment, Fast);
2206  }
2207  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
2208  return Impl.getPopcntSupport(IntTyWidthInBit);
2209  }
2210  bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
2211 
2212  bool isExpensiveToSpeculativelyExecute(const Instruction* I) override {
2213  return Impl.isExpensiveToSpeculativelyExecute(I);
2214  }
2215 
2216  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
2217  return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2218  }
2219 
2220  InstructionCost getFPOpCost(Type *Ty) override {
2221  return Impl.getFPOpCost(Ty);
2222  }
2223 
2224  InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
2225  const APInt &Imm, Type *Ty) override {
2226  return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
2227  }
2228  InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
2229  TargetCostKind CostKind) override {
2230  return Impl.getIntImmCost(Imm, Ty, CostKind);
2231  }
2232  InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
2233  const APInt &Imm, Type *Ty,
2235  Instruction *Inst = nullptr) override {
2236  return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
2237  }
2238  InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
2239  const APInt &Imm, Type *Ty,
2240  TargetCostKind CostKind) override {
2241  return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
2242  }
2243  unsigned getNumberOfRegisters(unsigned ClassID) const override {
2244  return Impl.getNumberOfRegisters(ClassID);
2245  }
2246  unsigned getRegisterClassForType(bool Vector,
2247  Type *Ty = nullptr) const override {
2248  return Impl.getRegisterClassForType(Vector, Ty);
2249  }
2250  const char *getRegisterClassName(unsigned ClassID) const override {
2251  return Impl.getRegisterClassName(ClassID);
2252  }
2253  TypeSize getRegisterBitWidth(RegisterKind K) const override {
2254  return Impl.getRegisterBitWidth(K);
2255  }
2256  unsigned getMinVectorRegisterBitWidth() const override {
2257  return Impl.getMinVectorRegisterBitWidth();
2258  }
2259  std::optional<unsigned> getMaxVScale() const override {
2260  return Impl.getMaxVScale();
2261  }
2262  std::optional<unsigned> getVScaleForTuning() const override {
2263  return Impl.getVScaleForTuning();
2264  }
2266  TargetTransformInfo::RegisterKind K) const override {
2267  return Impl.shouldMaximizeVectorBandwidth(K);
2268  }
2269  ElementCount getMinimumVF(unsigned ElemWidth,
2270  bool IsScalable) const override {
2271  return Impl.getMinimumVF(ElemWidth, IsScalable);
2272  }
2273  unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
2274  return Impl.getMaximumVF(ElemWidth, Opcode);
2275  }
2276  unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
2277  Type *ScalarValTy) const override {
2278  return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2279  }
2281  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
2282  return Impl.shouldConsiderAddressTypePromotion(
2283  I, AllowPromotionWithoutCommonHeader);
2284  }
2285  unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
2286  std::optional<unsigned> getCacheSize(CacheLevel Level) const override {
2287  return Impl.getCacheSize(Level);
2288  }
2289  std::optional<unsigned>
2290  getCacheAssociativity(CacheLevel Level) const override {
2291  return Impl.getCacheAssociativity(Level);
2292  }
2293 
2294  /// Return the preferred prefetch distance in terms of instructions.
2295  ///
2296  unsigned getPrefetchDistance() const override {
2297  return Impl.getPrefetchDistance();
2298  }
2299 
2300  /// Return the minimum stride necessary to trigger software
2301  /// prefetching.
2302  ///
2303  unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2304  unsigned NumStridedMemAccesses,
2305  unsigned NumPrefetches,
2306  bool HasCall) const override {
2307  return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2308  NumPrefetches, HasCall);
2309  }
2310 
2311  /// Return the maximum prefetch distance in terms of loop
2312  /// iterations.
2313  ///
2314  unsigned getMaxPrefetchIterationsAhead() const override {
2315  return Impl.getMaxPrefetchIterationsAhead();
2316  }
2317 
2318  /// \return True if prefetching should also be done for writes.
2319  bool enableWritePrefetching() const override {
2320  return Impl.enableWritePrefetching();
2321  }
2322 
2323  /// \return if target want to issue a prefetch in address space \p AS.
2324  bool shouldPrefetchAddressSpace(unsigned AS) const override {
2325  return Impl.shouldPrefetchAddressSpace(AS);
2326  }
2327 
2328  unsigned getMaxInterleaveFactor(unsigned VF) override {
2329  return Impl.getMaxInterleaveFactor(VF);
2330  }
2331  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
2332  unsigned &JTSize,
2333  ProfileSummaryInfo *PSI,
2334  BlockFrequencyInfo *BFI) override {
2335  return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2336  }
2337  InstructionCost getArithmeticInstrCost(
2338  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2339  OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2340  ArrayRef<const Value *> Args,
2341  const Instruction *CxtI = nullptr) override {
2342  return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
2343  Args, CxtI);
2344  }
2345 
2346  InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
2347  ArrayRef<int> Mask,
2349  VectorType *SubTp,
2350  ArrayRef<const Value *> Args) override {
2351  return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
2352  }
2353  InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
2354  CastContextHint CCH,
2356  const Instruction *I) override {
2357  return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
2358  }
2359  InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2360  VectorType *VecTy,
2361  unsigned Index) override {
2362  return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
2363  }
2364  InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
2365  const Instruction *I = nullptr) override {
2366  return Impl.getCFInstrCost(Opcode, CostKind, I);
2367  }
2368  InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
2369  CmpInst::Predicate VecPred,
2371  const Instruction *I) override {
2372  return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
2373  }
2374  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2376  unsigned Index, Value *Op0,
2377  Value *Op1) override {
2378  return Impl.getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
2379  }
2380  InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
2382  unsigned Index) override {
2383  return Impl.getVectorInstrCost(I, Val, CostKind, Index);
2384  }
2385  InstructionCost
2386  getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
2387  const APInt &DemandedDstElts,
2388  TTI::TargetCostKind CostKind) override {
2389  return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2390  DemandedDstElts, CostKind);
2391  }
2392  InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2393  unsigned AddressSpace,
2395  OperandValueInfo OpInfo,
2396  const Instruction *I) override {
2397  return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind,
2398  OpInfo, I);
2399  }
2400  InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2401  unsigned AddressSpace,
2403  const Instruction *I) override {
2404  return Impl.getVPMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2405  CostKind, I);
2406  }
2407  InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
2408  Align Alignment, unsigned AddressSpace,
2409  TTI::TargetCostKind CostKind) override {
2410  return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2411  CostKind);
2412  }
2413  InstructionCost
2414  getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2415  bool VariableMask, Align Alignment,
2417  const Instruction *I = nullptr) override {
2418  return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
2419  Alignment, CostKind, I);
2420  }
2421  InstructionCost getInterleavedMemoryOpCost(
2422  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2423  Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2424  bool UseMaskForCond, bool UseMaskForGaps) override {
2425  return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2426  Alignment, AddressSpace, CostKind,
2427  UseMaskForCond, UseMaskForGaps);
2428  }
2429  InstructionCost
2430  getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2431  std::optional<FastMathFlags> FMF,
2432  TTI::TargetCostKind CostKind) override {
2433  return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
2434  }
2435  InstructionCost
2436  getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
2437  TTI::TargetCostKind CostKind) override {
2438  return Impl.getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
2439  }
2440  InstructionCost getExtendedReductionCost(
2441  unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
2442  std::optional<FastMathFlags> FMF,
2444  return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2445  CostKind);
2446  }
2447  InstructionCost getMulAccReductionCost(
2448  bool IsUnsigned, Type *ResTy, VectorType *Ty,
2450  return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty, CostKind);
2451  }
2452  InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2453  TTI::TargetCostKind CostKind) override {
2454  return Impl.getIntrinsicInstrCost(ICA, CostKind);
2455  }
2456  InstructionCost getCallInstrCost(Function *F, Type *RetTy,
2457  ArrayRef<Type *> Tys,
2458  TTI::TargetCostKind CostKind) override {
2459  return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
2460  }
2461  unsigned getNumberOfParts(Type *Tp) override {
2462  return Impl.getNumberOfParts(Tp);
2463  }
2464  InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2465  const SCEV *Ptr) override {
2466  return Impl.getAddressComputationCost(Ty, SE, Ptr);
2467  }
2468  InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
2469  return Impl.getCostOfKeepingLiveOverCall(Tys);
2470  }
2471  bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2472  MemIntrinsicInfo &Info) override {
2473  return Impl.getTgtMemIntrinsic(Inst, Info);
2474  }
2475  unsigned getAtomicMemIntrinsicMaxElementSize() const override {
2476  return Impl.getAtomicMemIntrinsicMaxElementSize();
2477  }
2478  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2479  Type *ExpectedType) override {
2480  return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2481  }
2483  LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
2484  unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
2485  std::optional<uint32_t> AtomicElementSize) const override {
2486  return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
2487  DestAddrSpace, SrcAlign, DestAlign,
2488  AtomicElementSize);
2489  }
2491  SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2492  unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2493  unsigned SrcAlign, unsigned DestAlign,
2494  std::optional<uint32_t> AtomicCpySize) const override {
2495  Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2496  SrcAddrSpace, DestAddrSpace,
2497  SrcAlign, DestAlign, AtomicCpySize);
2498  }
2499  bool areInlineCompatible(const Function *Caller,
2500  const Function *Callee) const override {
2501  return Impl.areInlineCompatible(Caller, Callee);
2502  }
2503  bool areTypesABICompatible(const Function *Caller, const Function *Callee,
2504  const ArrayRef<Type *> &Types) const override {
2505  return Impl.areTypesABICompatible(Caller, Callee, Types);
2506  }
2507  bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
2508  return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
2509  }
2510  bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
2511  return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
2512  }
2513  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
2514  return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2515  }
2516  bool isLegalToVectorizeLoad(LoadInst *LI) const override {
2517  return Impl.isLegalToVectorizeLoad(LI);
2518  }
2519  bool isLegalToVectorizeStore(StoreInst *SI) const override {
2520  return Impl.isLegalToVectorizeStore(SI);
2521  }
2522  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
2523  unsigned AddrSpace) const override {
2524  return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2525  AddrSpace);
2526  }
2527  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
2528  unsigned AddrSpace) const override {
2529  return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2530  AddrSpace);
2531  }
2532  bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
2533  ElementCount VF) const override {
2534  return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2535  }
2536  bool isElementTypeLegalForScalableVector(Type *Ty) const override {
2537  return Impl.isElementTypeLegalForScalableVector(Ty);
2538  }
2539  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2540  unsigned ChainSizeInBytes,
2541  VectorType *VecTy) const override {
2542  return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2543  }
2544  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2545  unsigned ChainSizeInBytes,
2546  VectorType *VecTy) const override {
2547  return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2548  }
2549  bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2550  ReductionFlags Flags) const override {
2551  return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2552  }
2553  bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2554  ReductionFlags Flags) const override {
2555  return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2556  }
2557  bool preferEpilogueVectorization() const override {
2558  return Impl.preferEpilogueVectorization();
2559  }
2560 
2561  bool shouldExpandReduction(const IntrinsicInst *II) const override {
2562  return Impl.shouldExpandReduction(II);
2563  }
2564 
2565  unsigned getGISelRematGlobalCost() const override {
2566  return Impl.getGISelRematGlobalCost();
2567  }
2568 
2569  unsigned getMinTripCountTailFoldingThreshold() const override {
2570  return Impl.getMinTripCountTailFoldingThreshold();
2571  }
2572 
2573  bool supportsScalableVectors() const override {
2574  return Impl.supportsScalableVectors();
2575  }
2576 
2577  bool enableScalableVectorization() const override {
2578  return Impl.enableScalableVectorization();
2579  }
2580 
2581  bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2582  Align Alignment) const override {
2583  return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2584  }
2585 
2587  getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
2588  return Impl.getVPLegalizationStrategy(PI);
2589  }
2590 };
2591 
2592 template <typename T>
2594  : TTIImpl(new Model<T>(Impl)) {}
2595 
2596 /// Analysis pass providing the \c TargetTransformInfo.
2597 ///
2598 /// The core idea of the TargetIRAnalysis is to expose an interface through
2599 /// which LLVM targets can analyze and provide information about the middle
2600 /// end's target-independent IR. This supports use cases such as target-aware
2601 /// cost modeling of IR constructs.
2602 ///
2603 /// This is a function analysis because much of the cost modeling for targets
2604 /// is done in a subtarget specific way and LLVM supports compiling different
2605 /// functions targeting different subtargets in order to support runtime
2606 /// dispatch according to the observed subtarget.
2607 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
2608 public:
2610 
2611  /// Default construct a target IR analysis.
2612  ///
2613  /// This will use the module's datalayout to construct a baseline
2614  /// conservative TTI result.
2615  TargetIRAnalysis();
2616 
2617  /// Construct an IR analysis pass around a target-provide callback.
2618  ///
2619  /// The callback will be called with a particular function for which the TTI
2620  /// is needed and must return a TTI object for that function.
2621  TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
2622 
2623  // Value semantics. We spell out the constructors for MSVC.
2625  : TTICallback(Arg.TTICallback) {}
2627  : TTICallback(std::move(Arg.TTICallback)) {}
2629  TTICallback = RHS.TTICallback;
2630  return *this;
2631  }
2633  TTICallback = std::move(RHS.TTICallback);
2634  return *this;
2635  }
2636 
2638 
2639 private:
2641  static AnalysisKey Key;
2642 
2643  /// The callback used to produce a result.
2644  ///
2645  /// We use a completely opaque callback so that targets can provide whatever
2646  /// mechanism they desire for constructing the TTI for a given function.
2647  ///
2648  /// FIXME: Should we really use std::function? It's relatively inefficient.
2649  /// It might be possible to arrange for even stateful callbacks to outlive
2650  /// the analysis and thus use a function_ref which would be lighter weight.
2651  /// This may also be less error prone as the callback is likely to reference
2652  /// the external TargetMachine, and that reference needs to never dangle.
2653  std::function<Result(const Function &)> TTICallback;
2654 
2655  /// Helper function used as the callback in the default constructor.
2656  static Result getDefaultTTI(const Function &F);
2657 };
2658 
2659 /// Wrapper pass for TargetTransformInfo.
2660 ///
2661 /// This pass can be constructed from a TTI object which it stores internally
2662 /// and is queried by passes.
2664  TargetIRAnalysis TIRA;
2665  std::optional<TargetTransformInfo> TTI;
2666 
2667  virtual void anchor();
2668 
2669 public:
2670  static char ID;
2671 
2672  /// We must provide a default constructor for the pass but it should
2673  /// never be used.
2674  ///
2675  /// Use the constructor below or call one of the creation routines.
2677 
2679 
2681 };
2682 
2683 /// Create an analysis pass wrapper around a TTI object.
2684 ///
2685 /// This analysis pass just holds the TTI instance and makes it available to
2686 /// clients.
2688 
2689 } // namespace llvm
2690 
2691 #endif
llvm::TargetTransformInfo::ReductionFlags::IsMaxOp
bool IsMaxOp
If the op a min/max kind, true if it's a max operation.
Definition: TargetTransformInfo.h:1464
llvm::TargetTransformInfo::CastContextHint::GatherScatter
@ GatherScatter
The cast is used with a gather/scatter.
llvm::TargetTransformInfo::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
Definition: TargetTransformInfo.cpp:295
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::TargetTransformInfo::Concept::getExtractWithExtendCost
virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)=0
llvm::TargetTransformInfo::CacheLevel::L1D
@ L1D
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::TargetTransformInfo::PSK_FastHardware
@ PSK_FastHardware
Definition: TargetTransformInfo.h:584
llvm::TargetTransformInfo::Concept::getPopcntSupport
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit)=0
llvm::TargetTransformInfo::Concept::getGEPCost
virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::CastContextHint::Masked
@ Masked
The cast is used with a masked load/store.
llvm::TargetTransformInfo::Concept::instCombineIntrinsic
virtual std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)=0
llvm::TargetTransformInfo::getOperandsScalarizationOverhead
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: TargetTransformInfo.cpp:523
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:466
llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:445
llvm::TargetTransformInfo::SK_Select
@ SK_Select
Selects elements from the corresponding lane of either source operand.
Definition: TargetTransformInfo.h:894
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2607
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:473
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:217
llvm::TargetTransformInfo::LSRCost::NumRegs
unsigned NumRegs
Definition: TargetTransformInfo.h:406
llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition: TargetTransformInfo.h:245
llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition: TargetTransformInfo.h:441
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::TargetTransformInfo::ReductionFlags
Flags describing the kind of vector reduction.
Definition: TargetTransformInfo.h:1462
FMF.h
llvm::TargetTransformInfo::Concept::isHardwareLoopProfitable
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)=0
llvm::TargetTransformInfo::Concept::isSourceOfDivergence
virtual bool isSourceOfDivergence(const Value *V)=0
llvm::TargetTransformInfo::Concept::getMemcpyLoopLoweringType
virtual Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicElementSize) const =0
llvm::TargetTransformInfo::getInstructionCost
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:225
llvm::TargetTransformInfo::Concept::enableMaskedInterleavedAccessVectorization
virtual bool enableMaskedInterleavedAccessVectorization()=0
llvm::MemIntrinsicInfo::PtrVal
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
Definition: TargetTransformInfo.h:76
llvm::TargetTransformInfo::Concept::rewriteIntrinsicWithAddressSpace
virtual Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const =0
llvm::TargetTransformInfo::getMinVectorRegisterBitWidth
unsigned getMinVectorRegisterBitWidth() const
Definition: TargetTransformInfo.cpp:655
llvm::TargetTransformInfo::OperandValueInfo::isNegatedPowerOf2
bool isNegatedPowerOf2() const
Definition: TargetTransformInfo.h:941
llvm::TargetTransformInfo::Concept::enableOrderedReductions
virtual bool enableOrderedReductions()=0
llvm::HardwareLoopInfo::LoopDecrement
Value * LoopDecrement
Definition: TargetTransformInfo.h:104
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:114
llvm::TargetTransformInfo::OperandValueInfo::Properties
OperandValueProperties Properties
Definition: TargetTransformInfo.h:930
llvm::TargetTransformInfo::Concept::areTypesABICompatible
virtual bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const =0
llvm::TargetTransformInfo::MemIndexedMode
MemIndexedMode
The type of load/store indexing.
Definition: TargetTransformInfo.h:1410
llvm::TargetTransformInfo::TCK_Latency
@ TCK_Latency
The latency of instruction.
Definition: TargetTransformInfo.h:219
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:718
llvm::TargetTransformInfo::UnrollingPreferences::MaxCount
unsigned MaxCount
Definition: TargetTransformInfo.h:457
llvm::ImmutablePass
ImmutablePass class - This class is used to provide information that does not need to be run.
Definition: Pass.h:279
llvm::TargetTransformInfo::getRegisterClassName
const char * getRegisterClassName(unsigned ClassID) const
Definition: TargetTransformInfo.cpp:646
AtomicOrdering.h
llvm::ElementCount
Definition: TypeSize.h:279
llvm::TargetTransformInfo::OK_UniformValue
@ OK_UniformValue
Definition: TargetTransformInfo.h:913
llvm::TargetTransformInfo::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:890
llvm::TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
Definition: TargetTransformInfo.cpp:1217
llvm::TargetTransformInfo::Concept::enableMemCmpExpansion
virtual MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const =0
llvm::TargetTransformInfo::canMacroFuseCmp
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
Definition: TargetTransformInfo.cpp:376
llvm::Function
Definition: Function.h:59
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
llvm::TargetTransformInfo::Concept::isLegalMaskedScatter
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment)=0
Pass.h
llvm::TargetTransformInfo::getRegisterBitWidth
TypeSize getRegisterBitWidth(RegisterKind K) const
Definition: TargetTransformInfo.cpp:650
llvm::TargetTransformInfo::PopcntSupportKind
PopcntSupportKind
Flags indicating the kind of support for population count.
Definition: TargetTransformInfo.h:584
llvm::TargetTransformInfo::Concept::getIntImmCost
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
llvm::TargetTransformInfo::getVPLegalizationStrategy
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
Definition: TargetTransformInfo.cpp:1162
llvm::TargetTransformInfo::AMK_PostIndexed
@ AMK_PostIndexed
Definition: TargetTransformInfo.h:633
llvm::TargetTransformInfo::requiresOrderedReduction
static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
Definition: TargetTransformInfo.h:1275
llvm::TargetTransformInfoWrapperPass::getTTI
TargetTransformInfo & getTTI(const Function &F)
Definition: TargetTransformInfo.cpp:1230
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::TargetTransformInfo::getAddressComputationCost
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
Definition: TargetTransformInfo.cpp:997
InstCombiner
Machine InstCombiner
Definition: MachineCombiner.cpp:136
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::TargetTransformInfo::instCombineIntrinsic
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
Definition: TargetTransformInfo.cpp:313
llvm::TargetTransformInfo::PeelingPreferences::AllowPeeling
bool AllowPeeling
Allow peeling off loop iterations.
Definition: TargetTransformInfo.h:534
llvm::TargetTransformInfo::Concept::hasVolatileVariant
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace)=0
llvm::TargetTransformInfo::getMaxVScale
std::optional< unsigned > getMaxVScale() const
Definition: TargetTransformInfo.cpp:659
llvm::TargetTransformInfo::Concept::isFPVectorizationPotentiallyUnsafe
virtual bool isFPVectorizationPotentiallyUnsafe()=0
llvm::TargetTransformInfo::Concept::isLegalMaskedExpandLoad
virtual bool isLegalMaskedExpandLoad(Type *DataType)=0
llvm::TargetTransformInfo::Concept::isAlwaysUniform
virtual bool isAlwaysUniform(const Value *V)=0
llvm::TargetTransformInfo::Concept::getMaxPrefetchIterationsAhead
virtual unsigned getMaxPrefetchIterationsAhead() const =0
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:172
llvm::TargetTransformInfo::MemCmpExpansionOptions::AllowOverlappingLoads
bool AllowOverlappingLoads
Definition: TargetTransformInfo.h:805
llvm::TargetTransformInfo::getScalingFactorCost
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
Definition: TargetTransformInfo.cpp:468
llvm::IntrinsicCostAttributes::getReturnType
Type * getReturnType() const
Definition: TargetTransformInfo.h:152
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:452
llvm::TargetTransformInfo::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfo.cpp:976
llvm::TargetTransformInfo::Concept::getRegisterClassForType
virtual unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const =0
llvm::TargetTransformInfo::RGK_Scalar
@ RGK_Scalar
Definition: TargetTransformInfo.h:969
llvm::TargetTransformInfo::Concept::enableInterleavedAccessVectorization
virtual bool enableInterleavedAccessVectorization()=0
llvm::LoopVectorizationLegality
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Definition: LoopVectorizationLegality.h:241
llvm::TargetTransformInfo::getAssumedAddrSpace
unsigned getAssumedAddrSpace(const Value *V) const
Definition: TargetTransformInfo.cpp:273
llvm::IntrinsicCostAttributes::getInst
const IntrinsicInst * getInst() const
Definition: TargetTransformInfo.h:151
llvm::TargetTransformInfo::Concept::useGPUDivergenceAnalysis
virtual bool useGPUDivergenceAnalysis()=0
llvm::TargetTransformInfo::isLegalICmpImmediate
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition: TargetTransformInfo.cpp:350
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJamInnerLoopThreshold
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
Definition: TargetTransformInfo.h:492
llvm::TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
Definition: TargetTransformInfo.cpp:325
llvm::TargetTransformInfo::Concept::getMinMaxReductionCost
virtual InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::isLegalBroadcastLoad
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
\Returns true if the target supports broadcasting a load to a vector of type <NumElements x ElementTy...
Definition: TargetTransformInfo.cpp:412
llvm::TargetIRAnalysis::operator=
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Definition: TargetTransformInfo.h:2628
llvm::TargetIRAnalysis::TargetIRAnalysis
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Definition: TargetTransformInfo.h:2626
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::HardwareLoopInfo::ExitBranch
BranchInst * ExitBranch
Definition: TargetTransformInfo.h:101
llvm::TargetTransformInfo::UnrollingPreferences::UnrollRemainder
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
Definition: TargetTransformInfo.h:485
llvm::TargetTransformInfo::Concept::isExpensiveToSpeculativelyExecute
virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I)=0
llvm::TargetTransformInfo::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind) const
Estimate the overhead of scalarizing an instruction.
Definition: TargetTransformInfo.cpp:516
llvm::TargetTransformInfo::UnrollingPreferences::Count
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
Definition: TargetTransformInfo.h:450
llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:220
llvm::TargetTransformInfo::VPLegalization
Definition: TargetTransformInfo.h:1520
llvm::TargetTransformInfo::shouldBuildLookupTables
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
Definition: TargetTransformInfo.cpp:499
llvm::TargetTransformInfo::LSRCost::NumIVMuls
unsigned NumIVMuls
Definition: TargetTransformInfo.h:408
llvm::TargetTransformInfo::Concept::isLegalToVectorizeReduction
virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const =0
llvm::TargetTransformInfo::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, Value *Op0=nullptr, Value *Op1=nullptr) const
Definition: TargetTransformInfo.cpp:901
llvm::HardwareLoopInfo::isHardwareLoopCandidate
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Definition: TargetTransformInfo.cpp:106
llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:469
llvm::TargetTransformInfo::getMaximumVF
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
Definition: TargetTransformInfo.cpp:677
llvm::TargetTransformInfo::Concept::getEstimatedNumberOfCaseClusters
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)=0
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::TargetTransformInfo::useColdCCForColdCall
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
Definition: TargetTransformInfo.cpp:512
llvm::TargetTransformInfo::VPLegalization::Convert
@ Convert
Definition: TargetTransformInfo.h:1527
llvm::TargetTransformInfo::Concept::getGatherScatterOpCost
virtual InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:528
llvm::TargetTransformInfo::operator=
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
Definition: TargetTransformInfo.cpp:193
llvm::TargetTransformInfo::Concept::getPeelingPreferences
virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP)=0
llvm::HardwareLoopInfo::L
Loop * L
Definition: TargetTransformInfo.h:99
llvm::TargetTransformInfo::Concept::preferEpilogueVectorization
virtual bool preferEpilogueVectorization() const =0
llvm::TargetTransformInfo::isLegalNTLoad
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
Definition: TargetTransformInfo.cpp:408
llvm::TargetTransformInfo::isLegalNTStore
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
Definition: TargetTransformInfo.cpp:403
llvm::TargetTransformInfo::UnrollingPreferences::FullUnrollMaxCount
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
Definition: TargetTransformInfo.h:461
ForceNestedLoop
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
llvm::TargetTransformInfo::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:881
Vector
So we should use XX3Form_Rcr to implement intrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::IntrinsicCostAttributes::IntrinsicCostAttributes
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarCost=InstructionCost::getInvalid(), bool TypeBasedOnly=false)
Definition: TargetTransformInfo.cpp:61
llvm::TargetTransformInfo::OP_PowerOf2
@ OP_PowerOf2
Definition: TargetTransformInfo.h:921
llvm::TargetTransformInfo::getPredictableBranchThreshold
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
Definition: TargetTransformInfo.cpp:234
llvm::TargetTransformInfo::getIntImmCodeSizeCost
InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
Definition: TargetTransformInfo.cpp:601
llvm::TargetTransformInfo::getInlinerVectorBonusPercent
int getInlinerVectorBonusPercent() const
Definition: TargetTransformInfo.cpp:207
llvm::TargetTransformInfo::getIntImmCostIntrin
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Definition: TargetTransformInfo.cpp:628
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
VectorType
Definition: ItaniumDemangle.h:1075
llvm::TargetTransformInfo::Concept::isTruncateFree
virtual bool isTruncateFree(Type *Ty1, Type *Ty2)=0
llvm::TargetTransformInfo::getIntImmCostInst
InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
Definition: TargetTransformInfo.cpp:618
llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:21
llvm::TargetTransformInfo::Concept::getAtomicMemIntrinsicMaxElementSize
virtual unsigned getAtomicMemIntrinsicMaxElementSize() const =0
llvm::TargetTransformInfo::OperandValueInfo
Definition: TargetTransformInfo.h:928
llvm::TargetTransformInfo::enableInterleavedAccessVectorization
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
Definition: TargetTransformInfo.cpp:555
llvm::TargetTransformInfo::Concept::prefersVectorizedAddressing
virtual bool prefersVectorizedAddressing()=0
llvm::TargetTransformInfo::SK_PermuteSingleSrc
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
Definition: TargetTransformInfo.h:902
llvm::TargetTransformInfo::Concept::getOrCreateResultFromMemIntrinsic
virtual Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)=0
llvm::TargetTransformInfo::Concept::getCostOfKeepingLiveOverCall
virtual InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)=0
llvm::TargetTransformInfo::Concept::getRegisterBitWidth
virtual TypeSize getRegisterBitWidth(RegisterKind K) const =0
llvm::TargetTransformInfo::UnrollingPreferences::AllowExpensiveTripCount
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
Definition: TargetTransformInfo.h:478
llvm::TargetTransformInfo::preferInLoopReduction
bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
Definition: TargetTransformInfo.cpp:1147
llvm::TargetTransformInfo::CacheLevel
CacheLevel
The possible cache levels.
Definition: TargetTransformInfo.h:1025
llvm::TargetTransformInfo::Concept
Definition: TargetTransformInfo.h:1568
llvm::TargetTransformInfo::Concept::isLegalNTStore
virtual bool isLegalNTStore(Type *DataType, Align Alignment)=0
new
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM ID Predecessors according to mbb< bb27, 0x8b0a7c0 > Note ADDri is not a two address instruction its result reg1037 is an operand of the PHI node in bb76 and its operand reg1039 is the result of the PHI node We should treat it as a two address code and make sure the ADDri is scheduled after any node that reads reg1039 Use info(i.e. register scavenger) to assign it a free register to allow reuse the collector could move the objects and invalidate the derived pointer This is bad enough in the first but safe points can crop up unpredictably **array_addr i32 n y store obj * new
Definition: README.txt:125
llvm::TargetTransformInfo::LSRCost::Insns
unsigned Insns
TODO: Some of these could be merged.
Definition: TargetTransformInfo.h:405
llvm::IntrinsicCostAttributes::getScalarizationCost
InstructionCost getScalarizationCost() const
Definition: TargetTransformInfo.h:154
llvm::TargetTransformInfo::SK_Broadcast
@ SK_Broadcast
Broadcast element 0 to all other elements.
Definition: TargetTransformInfo.h:892
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::TargetTransformInfo::Concept::getAddressComputationCost
virtual InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)=0
llvm::TargetTransformInfo::Concept::getIntImmCodeSizeCost
virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty)=0
llvm::TargetTransformInfo::canHaveNonUndefGlobalInitializerInAddressSpace
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
Return true if globals in this address space can have initializers other than undef.
Definition: TargetTransformInfo.cpp:268
llvm::TargetTransformInfo::Concept::isLegalNTLoad
virtual bool isLegalNTLoad(Type *DataType, Align Alignment)=0
llvm::IntrinsicCostAttributes::skipScalarizationCost
bool skipScalarizationCost() const
Definition: TargetTransformInfo.h:162
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::HardwareLoopInfo::IsNestingLegal
bool IsNestingLegal
Definition: TargetTransformInfo.h:106
llvm::TargetTransformInfo::LSRCost::AddRecCost
unsigned AddRecCost
Definition: TargetTransformInfo.h:407
llvm::IntrinsicCostAttributes::getFlags
FastMathFlags getFlags() const
Definition: TargetTransformInfo.h:153
llvm::Reloc::Model
Model
Definition: CodeGen.h:25
llvm::TargetTransformInfo::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Get target-customized preferences for the generic loop unrolling transformation.
Definition: TargetTransformInfo.cpp:335
llvm::TargetTransformInfo::getCacheAssociativity
std::optional< unsigned > getCacheAssociativity(CacheLevel Level) const
Definition: TargetTransformInfo.cpp:704
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::SmallBitVector
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
Definition: SmallBitVector.h:35
llvm::TargetTransformInfo::isLegalMaskedScatter
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
Return true if the target supports masked scatter.
Definition: TargetTransformInfo.cpp:428
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:187
llvm::TargetTransformInfo::LSRCost::SetupCost
unsigned SetupCost
Definition: TargetTransformInfo.h:411
llvm::TargetTransformInfo::Concept::isLegalMaskedLoad
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment)=0
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::TargetTransformInfo::Concept::canMacroFuseCmp
virtual bool canMacroFuseCmp()=0
llvm::TargetTransformInfo::Concept::isTypeLegal
virtual bool isTypeLegal(Type *Ty)=0
llvm::TargetTransformInfo::getGISelRematGlobalCost
unsigned getGISelRematGlobalCost() const
Definition: TargetTransformInfo.cpp:1170
llvm::IntrinsicCostAttributes::getArgTypes
const SmallVectorImpl< Type * > & getArgTypes() const
Definition: TargetTransformInfo.h:156
llvm::TargetTransformInfo::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Definition: TargetTransformInfo.cpp:1079
llvm::TargetTransformInfo::Concept::getMinimumVF
virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const =0
llvm::TargetTransformInfo::isTypeLegal
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
Definition: TargetTransformInfo.cpp:491
llvm::HardwareLoopInfo::ExitCount
const SCEV * ExitCount
Definition: TargetTransformInfo.h:102
llvm::TargetTransformInfo::SK_PermuteTwoSrc
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
Definition: TargetTransformInfo.h:900
llvm::TargetTransformInfo::PeelingPreferences::PeelProfiledIterations
bool PeelProfiledIterations
Allow peeling basing on profile.
Definition: TargetTransformInfo.h:541
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::TargetTransformInfo::getMinimumVF
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
Definition: TargetTransformInfo.cpp:672
llvm::MemIntrinsicInfo::isUnordered
bool isUnordered() const
Definition: TargetTransformInfo.h:88
llvm::TargetTransformInfo::Concept::getPredictableBranchThreshold
virtual BranchProbability getPredictableBranchThreshold()=0
llvm::TargetTransformInfo::isProfitableLSRChainElement
bool isProfitableLSRChainElement(Instruction *I) const
Definition: TargetTransformInfo.cpp:372
llvm::TargetTransformInfo::Concept::useAA
virtual bool useAA()=0
llvm::TargetTransformInfo::getCastContextHint
static CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
Definition: TargetTransformInfo.cpp:819
llvm::TargetTransformInfo::getOrCreateResultFromMemIntrinsic
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
Definition: TargetTransformInfo.cpp:1055
llvm::TargetTransformInfo::isLegalToVectorizeLoad
bool isLegalToVectorizeLoad(LoadInst *LI) const
Definition: TargetTransformInfo.cpp:1104
llvm::TargetTransformInfo::supportsTailCallFor
bool supportsTailCallFor(const CallBase *CB) const
If target supports tail call on CB.
Definition: TargetTransformInfo.cpp:537
llvm::MemIntrinsicInfo::Ordering
AtomicOrdering Ordering
Definition: TargetTransformInfo.h:79
llvm::TargetTransformInfo::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
Definition: TargetTransformInfo.cpp:796
llvm::TargetTransformInfo::Concept::useColdCCForColdCall
virtual bool useColdCCForColdCall(Function &F)=0
llvm::TargetTransformInfoWrapperPass::ID
static char ID
Definition: TargetTransformInfo.h:2670
llvm::TargetTransformInfo::TargetCostConstants
TargetCostConstants
Underlying constants for 'cost' values in this interface.
Definition: TargetTransformInfo.h:242
llvm::TargetTransformInfo::getPopcntSupport
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
Definition: TargetTransformInfo.cpp:578
llvm::TargetIRAnalysis::TargetIRAnalysis
TargetIRAnalysis(const TargetIRAnalysis &Arg)
Definition: TargetTransformInfo.h:2624
llvm::TargetTransformInfo::OP_None
@ OP_None
Definition: TargetTransformInfo.h:920
llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition: TargetTransformInfo.h:891
llvm::TargetTransformInfo::Concept::getScalarizationOverhead
virtual InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TargetCostKind CostKind)=0
llvm::TargetTransformInfo::getPreferredAddressingMode
AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
Return the preferred addressing mode LSR should make efforts to generate.
Definition: TargetTransformInfo.cpp:388
llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition: TargetTransformInfo.h:1144
llvm::TargetTransformInfo::Concept::getVPLegalizationStrategy
virtual VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const =0
llvm::User
Definition: User.h:44
llvm::TargetTransformInfo::Concept::allowsMisalignedMemoryAccesses
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, unsigned *Fast)=0
llvm::TargetTransformInfo::useGPUDivergenceAnalysis
bool useGPUDivergenceAnalysis() const
Return true if the target prefers to use GPU divergence analysis to replace the legacy version.
Definition: TargetTransformInfo.cpp:242
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::TargetTransformInfo::UnrollingPreferences::Force
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
Definition: TargetTransformInfo.h:481
InstrTypes.h
llvm::TargetTransformInfo::Concept::getPrefetchDistance
virtual unsigned getPrefetchDistance() const =0
llvm::TargetTransformInfo::hasBranchDivergence
bool hasBranchDivergence() const
Return true if branch divergence exists.
Definition: TargetTransformInfo.cpp:238
llvm::TargetTransformInfo::isLegalToVectorizeReduction
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
Definition: TargetTransformInfo.cpp:1124
llvm::TargetTransformInfo::getIntImmCost
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
Definition: TargetTransformInfo.cpp:611
SI
@ SI
Definition: SIInstrInfo.cpp:7993
llvm::TargetTransformInfo::Concept::supportsEfficientVectorElementLoadStore
virtual bool supportsEfficientVectorElementLoadStore()=0
llvm::TargetTransformInfo::Concept::canSaveCmp
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo)=0
llvm::TargetTransformInfo::getNumberOfParts
unsigned getNumberOfParts(Type *Tp) const
Definition: TargetTransformInfo.cpp:992
llvm::TargetTransformInfo::Concept::isFCmpOrdCheaperThanFCmpZero
virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)=0
llvm::TargetTransformInfo::Concept::isNumRegsMajorCostOfLSR
virtual bool isNumRegsMajorCostOfLSR()=0
llvm::TargetTransformInfo::supportsScalableVectors
bool supportsScalableVectors() const
Definition: TargetTransformInfo.cpp:1178
llvm::TargetTransformInfo::isIndexedLoadLegal
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
Definition: TargetTransformInfo.cpp:1090
llvm::TargetTransformInfo::CastContextHint::Interleave
@ Interleave
The cast is used with an interleaved load/store.
llvm::TargetTransformInfo::UnrollingPreferences::MaxIterationsCountToAnalyze
unsigned MaxIterationsCountToAnalyze
Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...
Definition: TargetTransformInfo.h:495
llvm::TargetTransformInfo::Concept::getNumberOfRegisters
virtual unsigned getNumberOfRegisters(unsigned ClassID) const =0
llvm::TargetTransformInfo::Concept::getMemcpyLoopResidualLoweringType
virtual void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicCpySize) const =0
llvm::dwarf::Index
Index
Definition: Dwarf.h:550
llvm::TargetTransformInfo::Concept::isLegalToVectorizeLoadChain
virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
llvm::TargetTransformInfo::Concept::getInstructionCost
virtual InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind)=0
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJam
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
Definition: TargetTransformInfo.h:487
llvm::TargetTransformInfo::isLegalMaskedExpandLoad
bool isLegalMaskedExpandLoad(Type *DataType) const
Return true if the target supports masked expand load.
Definition: TargetTransformInfo.cpp:447
llvm::TargetTransformInfo::enableScalableVectorization
bool enableScalableVectorization() const
Definition: TargetTransformInfo.cpp:1182
llvm::TargetTransformInfo::Concept::supportsTailCalls
virtual bool supportsTailCalls()=0
llvm::TargetTransformInfo::Concept::isLegalMaskedGather
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment)=0
llvm::TargetTransformInfo::Concept::getVScaleForTuning
virtual std::optional< unsigned > getVScaleForTuning() const =0
llvm::TargetTransformInfo::Concept::hasBranchDivergence
virtual bool hasBranchDivergence()=0
llvm::Instruction
Definition: Instruction.h:41
llvm::TargetTransformInfo::Concept::enableWritePrefetching
virtual bool enableWritePrefetching() const =0
llvm::TargetTransformInfo::MIM_PreDec
@ MIM_PreDec
Pre-decrementing.
Definition: TargetTransformInfo.h:1413
llvm::InterleavedAccessInfo
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:765
llvm::HardwareLoopInfo::PerformEntryTest
bool PerformEntryTest
Definition: TargetTransformInfo.h:110
llvm::TargetTransformInfo::Concept::getMaskedMemoryOpCost
virtual InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::isLegalMaskedLoad
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked load.
Definition: TargetTransformInfo.cpp:398
llvm::TargetTransformInfo::isSourceOfDivergence
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
Definition: TargetTransformInfo.cpp:246
llvm::TargetTransformInfo::Concept::getReplicationShuffleCost
virtual InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::Concept::getCacheSize
virtual std::optional< unsigned > getCacheSize(CacheLevel Level) const =0
llvm::TargetTransformInfo::CastContextHint::Reversed
@ Reversed
The cast is used with a reversed load/store.
llvm::TargetTransformInfo::getPrefetchDistance
unsigned getPrefetchDistance() const
Definition: TargetTransformInfo.cpp:708
llvm::TargetTransformInfo::getVScaleForTuning
std::optional< unsigned > getVScaleForTuning() const
Definition: TargetTransformInfo.cpp:663
llvm::HardwareLoopInfo::CounterInReg
bool CounterInReg
Definition: TargetTransformInfo.h:108
llvm::TargetTransformInfo::Concept::isIndexedStoreLegal
virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const =0
llvm::TargetTransformInfo::Concept::supportsScalableVectors
virtual bool supportsScalableVectors() const =0
llvm::TargetTransformInfo::Concept::isLegalToVectorizeLoad
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const =0
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::TargetTransformInfo::Concept::isLegalToVectorizeStoreChain
virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
llvm::TargetTransformInfo::isFCmpOrdCheaperThanFCmpZero
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
Definition: TargetTransformInfo.cpp:591
llvm::AnalysisManager::Invalidator
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:661
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
llvm::TargetTransformInfo::Concept::getMemoryOpCost
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, OperandValueInfo OpInfo, const Instruction *I)=0
llvm::TargetTransformInfo::RGK_FixedWidthVector
@ RGK_FixedWidthVector
Definition: TargetTransformInfo.h:969
llvm::TargetTransformInfo::OperandValueInfo::Kind
OperandValueKind Kind
Definition: TargetTransformInfo.h:929
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::TargetTransformInfo::OperandValueInfo::isUniform
bool isUniform() const
Definition: TargetTransformInfo.h:935
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::TargetTransformInfo::areTypesABICompatible
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
Definition: TargetTransformInfo.cpp:1084
llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:119
BranchProbability.h
llvm::TargetTransformInfo::VPLegalization::VPLegalization
VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)
Definition: TargetTransformInfo.h:1545
llvm::TargetTransformInfo::Concept::getDataLayout
virtual const DataLayout & getDataLayout() const =0
llvm::TargetTransformInfo::hasVolatileVariant
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
Definition: TargetTransformInfo.cpp:459
llvm::TargetTransformInfo::PSK_Software
@ PSK_Software
Definition: TargetTransformInfo.h:584
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:40
llvm::TargetTransformInfo::Concept::isElementTypeLegalForScalableVector
virtual bool isElementTypeLegalForScalableVector(Type *Ty) const =0
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:74
llvm::TargetTransformInfo::isLegalMaskedCompressStore
bool isLegalMaskedCompressStore(Type *DataType) const
Return true if the target supports masked compress store.
Definition: TargetTransformInfo.cpp:443
llvm::PredicationStyle::DataAndControlFlow
@ DataAndControlFlow
llvm::TargetTransformInfo::haveFastSqrt
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
Definition: TargetTransformInfo.cpp:582
llvm::createTargetTransformInfoWrapperPass
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
Definition: TargetTransformInfo.cpp:1237
llvm::TargetTransformInfo::VPLegalization::EVLParamStrategy
VPTransform EVLParamStrategy
Definition: TargetTransformInfo.h:1534
llvm::TargetTransformInfo::SK_Reverse
@ SK_Reverse
Reverse the order of the vector.
Definition: TargetTransformInfo.h:893
llvm::TargetTransformInfo::Concept::getVectorInstrCost
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)=0
llvm::TargetTransformInfo::preferPredicateOverEpilogue
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI) const
Query the target whether it would be prefered to create a predicated vector loop, which can avoid the...
Definition: TargetTransformInfo.cpp:301
llvm::TargetTransformInfo::Concept::collectFlatAddressOperands
virtual bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const =0
llvm::TargetTransformInfo::VPLegalization::VPTransform
VPTransform
Definition: TargetTransformInfo.h:1521
llvm::TargetTransformInfo::getFlatAddressSpace
unsigned getFlatAddressSpace() const
Returns the address space ID for a target's 'flat' address space.
Definition: TargetTransformInfo.cpp:254
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
llvm::TargetTransformInfo::CastContextHint::Normal
@ Normal
The cast is used with a normal load/store.
llvm::TargetTransformInfo::Concept::~Concept
virtual ~Concept()=0
llvm::TargetTransformInfo::Concept::getIntrinsicInstrCost
virtual InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)=0
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:301
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::TargetTransformInfo::Concept::preferPredicateOverEpilogue
virtual bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)=0
llvm::TargetTransformInfo::Concept::hasActiveVectorLength
virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const =0
llvm::TargetTransformInfo::SK_InsertSubvector
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
Definition: TargetTransformInfo.h:898
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::TargetTransformInfo::OperandValueInfo::isPowerOf2
bool isPowerOf2() const
Definition: TargetTransformInfo.h:938
llvm::TargetTransformInfo::Concept::isProfitableLSRChainElement
virtual bool isProfitableLSRChainElement(Instruction *I)=0
llvm::TargetTransformInfo::shouldBuildLookupTablesForConstant
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
Definition: TargetTransformInfo.cpp:503
llvm::TargetTransformInfo::Concept::shouldMaximizeVectorBandwidth
virtual bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const =0
llvm::TargetTransformInfo::MIM_PostInc
@ MIM_PostInc
Post-incrementing.
Definition: TargetTransformInfo.h:1414
llvm::HardwareLoopInfo::HardwareLoopInfo
HardwareLoopInfo()=delete
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
llvm::TargetTransformInfo::LSRCost
Definition: TargetTransformInfo.h:402
llvm::TargetTransformInfo::collectFlatAddressOperands
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Return any intrinsic address operand indexes which may be rewritten if they use a flat address space ...
Definition: TargetTransformInfo.cpp:258
llvm::TargetTransformInfo::VPLegalization::OpStrategy
VPTransform OpStrategy
Definition: TargetTransformInfo.h:1540
llvm::TargetTransformInfo::isLegalMaskedGather
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
Return true if the target supports masked gather.
Definition: TargetTransformInfo.cpp:417
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2663
llvm::TargetTransformInfo::Concept::getInterleavedMemoryOpCost
virtual InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)=0
llvm::TargetTransformInfo::preferPredicatedReductionSelect
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
Definition: TargetTransformInfo.cpp:1152
llvm::TargetTransformInfo::Concept::hasDivRemOp
virtual bool hasDivRemOp(Type *DataType, bool IsSigned)=0
llvm::TargetTransformInfo::Concept::isLSRCostLess
virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)=0
llvm::TargetTransformInfo::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
Definition: TargetTransformInfo.cpp:964
llvm::TargetTransformInfo::ReductionFlags::NoNaN
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
Definition: TargetTransformInfo.h:1467
llvm::TargetTransformInfo::Concept::shouldBuildLookupTables
virtual bool shouldBuildLookupTables()=0
llvm::TargetTransformInfo::OK_UniformConstantValue
@ OK_UniformConstantValue
Definition: TargetTransformInfo.h:914
llvm::TargetTransformInfo::forceScalarizeMaskedGather
bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.gather intrinsics.
Definition: TargetTransformInfo.cpp:433
llvm::TargetIRAnalysis::Result
TargetTransformInfo Result
Definition: TargetTransformInfo.h:2609
llvm::TargetTransformInfo::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(unsigned VF) const
Definition: TargetTransformInfo.cpp:731
llvm::TargetTransformInfo::Concept::simplifyDemandedUseBitsIntrinsic
virtual std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)=0
llvm::TargetTransformInfo::VPLegalization::shouldDoNothing
bool shouldDoNothing() const
Definition: TargetTransformInfo.h:1542
llvm::TargetTransformInfo::getRegisterClassForType
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
Definition: TargetTransformInfo.cpp:641
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::TargetTransformInfo::Concept::getMaximumVF
virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const =0
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::TargetTransformInfo::preferEpilogueVectorization
bool preferEpilogueVectorization() const
Return true if the loop vectorizer should consider vectorizing an otherwise scalar epilogue loop.
Definition: TargetTransformInfo.cpp:1157
llvm::TargetTransformInfo::isLegalAltInstr
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
Return true if this is an alternating opcode pattern that can be lowered to a single instruction on t...
Definition: TargetTransformInfo.cpp:422
llvm::AnalysisKey
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:69
llvm::TargetTransformInfo::getExtendedReductionCost
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of a reduc...
Definition: TargetTransformInfo.cpp:1028
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:416
llvm::TargetTransformInfo::getCostOfKeepingLiveOverCall
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
Definition: TargetTransformInfo.cpp:1042
llvm::AtomicOrdering::Unordered
@ Unordered
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::TargetTransformInfo::OperandValueProperties
OperandValueProperties
Additional properties of an operand's values.
Definition: TargetTransformInfo.h:919
llvm::TargetTransformInfo::Concept::isLegalMaskedStore
virtual bool isLegalMaskedStore(Type *DataType, Align Alignment)=0
llvm::TargetTransformInfo::shouldConsiderAddressTypePromotion
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
Definition: TargetTransformInfo.cpp:687
llvm::TargetTransformInfo::Concept::enableSelectOptimize
virtual bool enableSelectOptimize()=0
llvm::TargetTransformInfo::Concept::getVPMemoryOpCost
virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I)=0
llvm::TargetTransformInfo::Concept::getTgtMemIntrinsic
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)=0
llvm::TargetTransformInfo::getReplicationShuffleCost
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
Definition: TargetTransformInfo.cpp:925
llvm::TargetTransformInfo::isExpensiveToSpeculativelyExecute
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
Return true if the cost of the instruction is too high to speculatively execute and should be kept be...
Definition: TargetTransformInfo.cpp:586
llvm::TargetTransformInfo::PeelingPreferences::AllowLoopNestsPeeling
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
Definition: TargetTransformInfo.h:536
llvm::TargetTransformInfo::rewriteIntrinsicWithAddressSpace
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Rewrite intrinsic call II such that OldV will be replaced with NewV, which has a different address sp...
Definition: TargetTransformInfo.cpp:286
llvm::TargetTransformInfo::isLSRCostLess
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C2.
Definition: TargetTransformInfo.cpp:363
llvm::TargetTransformInfo::Concept::shouldExpandReduction
virtual bool shouldExpandReduction(const IntrinsicInst *II) const =0
llvm::TargetTransformInfo::Concept::getLoadVectorFactor
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
llvm::TargetTransformInfo::getGatherScatterOpCost
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:955
llvm::TargetTransformInfo::VPLegalization::Discard
@ Discard
Definition: TargetTransformInfo.h:1525
llvm::TargetTransformInfo::Concept::getCastInstrCost
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I)=0
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1862
llvm::TargetTransformInfo::Concept::isLoweredToCall
virtual bool isLoweredToCall(const Function *F)=0
llvm::TargetTransformInfo::LSRWithInstrQueries
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
Definition: TargetTransformInfo.cpp:477
llvm::TargetTransformInfo::Concept::getScalingFactorCost
virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)=0
llvm::TargetTransformInfo::getMemcpyLoopResidualLoweringType
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicCpySize=std::nullopt) const
Definition: TargetTransformInfo.cpp:1069
llvm::TargetTransformInfo::getMinMaxReductionCost
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Definition: TargetTransformInfo.cpp:1019
llvm::TargetTransformInfo::isLegalToVectorizeLoadChain
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: TargetTransformInfo.cpp:1112
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:62
llvm::TargetTransformInfo::getGEPCost
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TargetCostKind CostKind=TCK_SizeAndLatency) const
Estimate the cost of a GEP operation when lowered.
Definition: TargetTransformInfo.cpp:212
llvm::TargetTransformInfo::Concept::isSingleThreaded
virtual bool isSingleThreaded() const =0
llvm::TargetTransformInfo::isLegalAddImmediate
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
Definition: TargetTransformInfo.cpp:346
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:167
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::TargetTransformInfo::LSRCost::ScaleCost
unsigned ScaleCost
Definition: TargetTransformInfo.h:412
llvm::Length
@ Length
Definition: DWP.cpp:406
llvm::TargetTransformInfo::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
Definition: TargetTransformInfo.cpp:291
llvm::TargetTransformInfo::OperandValueInfo::getNoProps
OperandValueInfo getNoProps() const
Definition: TargetTransformInfo.h:945
llvm::TargetTransformInfo::SK_Splice
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
Definition: TargetTransformInfo.h:904
llvm::TargetTransformInfo::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:862
llvm::TargetTransformInfo::Concept::shouldBuildRelLookupTables
virtual bool shouldBuildRelLookupTables()=0
llvm::TargetTransformInfo::PSK_SlowHardware
@ PSK_SlowHardware
Definition: TargetTransformInfo.h:584
llvm::TargetTransformInfo::Concept::getRegisterClassName
virtual const char * getRegisterClassName(unsigned ClassID) const =0
llvm::AnalysisInfoMixin
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:394
llvm::TargetTransformInfo::ReductionFlags::IsSigned
bool IsSigned
Whether the operation is a signed int reduction.
Definition: TargetTransformInfo.h:1466
llvm::TargetTransformInfo::OperandValueKind
OperandValueKind
Additional information about an operand's possible values.
Definition: TargetTransformInfo.h:911
llvm::MemIntrinsicInfo::ReadMem
bool ReadMem
Definition: TargetTransformInfo.h:84
llvm::TargetTransformInfo::Concept::getCmpSelInstrCost
virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I)=0
llvm::TargetTransformInfo::MemCmpExpansionOptions::MaxNumLoads
unsigned MaxNumLoads
Definition: TargetTransformInfo.h:787
InstructionCost.h
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::TargetTransformInfo::canSaveCmp
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
Definition: TargetTransformInfo.cpp:380
llvm::TargetTransformInfo::isTruncateFree
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
Definition: TargetTransformInfo.cpp:481
llvm::TargetTransformInfo::prefersVectorizedAddressing
bool prefersVectorizedAddressing() const
Return true if target doesn't mind addresses in vectors.
Definition: TargetTransformInfo.cpp:464
llvm::TargetTransformInfo::MemCmpExpansionOptions
Returns options for expansion of memcmp. IsZeroCmp is.
Definition: TargetTransformInfo.h:782
llvm::TargetTransformInfo::Concept::getOperandsScalarizationOverhead
virtual InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TargetCostKind CostKind)=0
llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:243
llvm::TargetTransformInfo::PeelingPreferences::PeelCount
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Definition: TargetTransformInfo.h:532
llvm::TargetTransformInfo::Concept::getMulAccReductionCost
virtual InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
llvm::TargetTransformInfo::supportsEfficientVectorElementLoadStore
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
Definition: TargetTransformInfo.cpp:529
llvm::TargetTransformInfo::emitGetActiveLaneMask
PredicationStyle emitGetActiveLaneMask() const
Query the target whether lowering of the llvm.get.active.lane.mask intrinsic is supported and how the...
Definition: TargetTransformInfo.cpp:308
llvm::TargetTransformInfo::enableMemCmpExpansion
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
Definition: TargetTransformInfo.cpp:547
llvm::TargetTransformInfo::Concept::shouldConsiderAddressTypePromotion
virtual bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)=0
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:33
llvm::LoopInfo
Definition: LoopInfo.h:1108
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:221
llvm::TargetTransformInfo::Concept::haveFastSqrt
virtual bool haveFastSqrt(Type *Ty)=0
llvm::TargetTransformInfo::Concept::isNoopAddrSpaceCast
virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const =0
llvm::TargetTransformInfo::isElementTypeLegalForScalableVector
bool isElementTypeLegalForScalableVector(Type *Ty) const
Definition: TargetTransformInfo.cpp:1129
llvm::TargetTransformInfo::isLegalMaskedStore
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
Return true if the target supports masked store.
Definition: TargetTransformInfo.cpp:393
llvm::TargetTransformInfo::getPredicatedAddrSpace
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
Definition: TargetTransformInfo.cpp:282
llvm::TargetTransformInfo::Concept::isIndexedLoadLegal
virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const =0
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
llvm::BranchProbability
Definition: BranchProbability.h:30
llvm::TargetTransformInfo::Concept::getMinTripCountTailFoldingThreshold
virtual unsigned getMinTripCountTailFoldingThreshold() const =0
llvm::TargetTransformInfo::Concept::getFlatAddressSpace
virtual unsigned getFlatAddressSpace()=0
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::TargetTransformInfo::Concept::emitGetActiveLaneMask
virtual PredicationStyle emitGetActiveLaneMask()=0
llvm::TargetTransformInfo::UnrollingPreferences::DefaultUnrollRuntimeCount
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
Definition: TargetTransformInfo.h:452
llvm::TargetTransformInfo::hasDivRemOp
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
Definition: TargetTransformInfo.cpp:455
llvm::TargetTransformInfo::Concept::LSRWithInstrQueries
virtual bool LSRWithInstrQueries()=0
llvm::TargetTransformInfo::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: TargetTransformInfo.cpp:637
llvm::TargetTransformInfo::shouldPrefetchAddressSpace
bool shouldPrefetchAddressSpace(unsigned AS) const
Definition: TargetTransformInfo.cpp:727
llvm::InstructionCost::isValid
bool isValid() const
Definition: InstructionCost.h:79
llvm::TargetTransformInfo::AddressingModeKind
AddressingModeKind
Definition: TargetTransformInfo.h:631
llvm::TargetTransformInfo::getMaxPrefetchIterationsAhead
unsigned getMaxPrefetchIterationsAhead() const
Definition: TargetTransformInfo.cpp:719
llvm::TargetTransformInfo::MIM_Unindexed
@ MIM_Unindexed
No indexing.
Definition: TargetTransformInfo.h:1411
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:433
llvm::TargetTransformInfo::OK_AnyValue
@ OK_AnyValue
Definition: TargetTransformInfo.h:912
llvm::TargetTransformInfo::getLoadVectorFactor
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
Definition: TargetTransformInfo.cpp:1133
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:177
llvm::TargetTransformInfo::SK_Transpose
@ SK_Transpose
Transpose two vectors.
Definition: TargetTransformInfo.h:897
llvm::TargetTransformInfo::CastContextHint::None
@ None
The cast is not used with a load/store of any kind.
llvm::TargetTransformInfo::isLegalToVectorizeStore
bool isLegalToVectorizeStore(StoreInst *SI) const
Definition: TargetTransformInfo.cpp:1108
llvm::TargetTransformInfo::CacheLevel::L2D
@ L2D
llvm::TargetTransformInfo::Concept::getAssumedAddrSpace
virtual unsigned getAssumedAddrSpace(const Value *V) const =0
llvm::TargetTransformInfo::MIM_PreInc
@ MIM_PreInc
Pre-incrementing.
Definition: TargetTransformInfo.h:1412
llvm::TargetTransformInfo::MemCmpExpansionOptions::LoadSizes
SmallVector< unsigned, 8 > LoadSizes
Definition: TargetTransformInfo.h:790
llvm::TargetIRAnalysis::TargetIRAnalysis
TargetIRAnalysis()
Default construct a target IR analysis.
Definition: TargetTransformInfo.cpp:1193
llvm::TargetTransformInfo::Concept::preferInLoopReduction
virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags) const =0
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:187
llvm::TargetTransformInfo::enableOrderedReductions
bool enableOrderedReductions() const
Return true if we should be enabling ordered reductions for the target.
Definition: TargetTransformInfo.cpp:451
llvm::TargetTransformInfo::Concept::forceScalarizeMaskedGather
virtual bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment)=0
llvm::TargetTransformInfo::OP_NegatedPowerOf2
@ OP_NegatedPowerOf2
Definition: TargetTransformInfo.h:922
llvm::TargetTransformInfo::Concept::getStoreMinimumVF
virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const =0
llvm::TargetTransformInfo::Concept::supportsTailCallFor
virtual bool supportsTailCallFor(const CallBase *CB)=0
llvm::TargetTransformInfo::Concept::isLegalICmpImmediate
virtual bool isLegalICmpImmediate(int64_t Imm)=0
llvm::TargetTransformInfo::simplifyDemandedUseBitsIntrinsic
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Can be used to implement target-specific instruction combining.
Definition: TargetTransformInfo.cpp:318
llvm::TargetTransformInfo::getArithmeticReductionCost
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
Definition: TargetTransformInfo.cpp:1010
llvm::TargetTransformInfo::getCacheLineSize
unsigned getCacheLineSize() const
Definition: TargetTransformInfo.cpp:693
llvm::TargetTransformInfo::getShuffleCost
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask=std::nullopt, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args=std::nullopt) const
Definition: TargetTransformInfo.cpp:808
llvm::TargetTransformInfo::supportsTailCalls
bool supportsTailCalls() const
If the target supports tail calls.
Definition: TargetTransformInfo.cpp:533
std
Definition: BitVector.h:851
llvm::TargetTransformInfo::enableMaskedInterleavedAccessVectorization
bool enableMaskedInterleavedAccessVectorization() const
Enable matching of interleaved access groups that contain predicated accesses or gaps and therefore v...
Definition: TargetTransformInfo.cpp:559
llvm::KnownBits
Definition: KnownBits.h:23
llvm::TargetTransformInfo::LSRCost::NumBaseAdds
unsigned NumBaseAdds
Definition: TargetTransformInfo.h:409
llvm::TargetIRAnalysis::operator=
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
Definition: TargetTransformInfo.h:2632
llvm::HardwareLoopInfo::ExitBlock
BasicBlock * ExitBlock
Definition: TargetTransformInfo.h:100
llvm::MemIntrinsicInfo::WriteMem
bool WriteMem
Definition: TargetTransformInfo.h:85
llvm::TargetTransformInfo::UnrollingPreferences::UpperBound
bool UpperBound
Allow using trip count upper bound to unroll loops.
Definition: TargetTransformInfo.h:483
llvm::TargetTransformInfo::getMulAccReductionCost
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of an Add ...
Definition: TargetTransformInfo.cpp:1035
llvm::VPIntrinsic
This is the common base class for vector predication intrinsics.
Definition: IntrinsicInst.h:483
llvm::TargetTransformInfo::isNoopAddrSpaceCast
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Definition: TargetTransformInfo.cpp:263
llvm::TargetTransformInfo::getOperandInfo
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Definition: TargetTransformInfo.cpp:736
llvm::TypeSize
Definition: TypeSize.h:314
llvm::TargetTransformInfo::getLoadStoreVecRegBitWidth
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
Definition: TargetTransformInfo.cpp:1100
llvm::TargetTransformInfo::UnrollingPreferences::AllowRemainder
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
Definition: TargetTransformInfo.h:475
llvm::TargetTransformInfo::Concept::enableAggressiveInterleaving
virtual bool enableAggressiveInterleaving(bool LoopHasReductions)=0
llvm::TargetTransformInfo::getMemcpyLoopLoweringType
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicElementSize=std::nullopt) const
Definition: TargetTransformInfo.cpp:1060
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::TargetTransformInfo::shouldMaximizeVectorBandwidth
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
Definition: TargetTransformInfo.cpp:667
llvm::TargetTransformInfo::isFPVectorizationPotentiallyUnsafe
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
Definition: TargetTransformInfo.cpp:563
llvm::TargetTransformInfo::Concept::isLegalBroadcastLoad
virtual bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const =0
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
PassManager.h
Arguments
AMDGPU Lower Kernel Arguments
Definition: AMDGPULowerKernelArguments.cpp:240
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:234
llvm::User::operand_values
iterator_range< value_op_iterator > operand_values()
Definition: User.h:266
llvm::TargetTransformInfo::LSRCost::ImmCost
unsigned ImmCost
Definition: TargetTransformInfo.h:410
llvm::TargetTransformInfo::hasActiveVectorLength
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
Definition: TargetTransformInfo.cpp:1186
llvm::TargetTransformInfo::forceScalarizeMaskedScatter
bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.scatter intrinsics.
Definition: TargetTransformInfo.cpp:438
llvm::TargetTransformInfo::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:934
llvm::TargetIRAnalysis::run
Result run(const Function &F, FunctionAnalysisManager &)
Definition: TargetTransformInfo.cpp:1199
llvm::TargetTransformInfo::getInstructionCost
InstructionCost getInstructionCost(const User *U, TargetCostKind CostKind) const
This is a helper function which calls the three-argument getInstructionCost with Operands which are t...
Definition: TargetTransformInfo.h:308
llvm::HardwareLoopInfo::HardwareLoopInfo
HardwareLoopInfo(Loop *L)
Definition: TargetTransformInfo.h:98
llvm::TargetTransformInfo::Concept::getFPOpCost
virtual InstructionCost getFPOpCost(Type *Ty)=0
llvm::TargetTransformInfo::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier() const
Definition: TargetTransformInfo.cpp:198
llvm::TargetTransformInfo::Concept::getMaxVScale
virtual std::optional< unsigned > getMaxVScale() const =0
llvm::PredicationStyle
PredicationStyle
Definition: TargetTransformInfo.h:165
llvm::InstCombiner
The core instruction combiner logic.
Definition: InstCombiner.h:45
llvm::TargetTransformInfo::adjustInliningThreshold
unsigned adjustInliningThreshold(const CallBase *CB) const
Definition: TargetTransformInfo.cpp:203
llvm::TargetTransformInfo::getMaskedMemoryOpCost
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Definition: TargetTransformInfo.cpp:946
llvm::TargetTransformInfo::RGK_ScalableVector
@ RGK_ScalableVector
Definition: TargetTransformInfo.h:969
llvm::TargetTransformInfo::getMinTripCountTailFoldingThreshold
unsigned getMinTripCountTailFoldingThreshold() const
Definition: TargetTransformInfo.cpp:1174
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
llvm::HardwareLoopInfo
Attributes of a target dependent hardware loop.
Definition: TargetTransformInfo.h:96
llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition: InstructionCost.h:73
llvm::TargetTransformInfo::getEstimatedNumberOfCaseClusters
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Definition: TargetTransformInfo.cpp:218
llvm::RISCVMatInt::Imm
@ Imm
Definition: RISCVMatInt.h:23
llvm::TargetTransformInfo::TargetTransformInfo
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
Definition: TargetTransformInfo.h:2593
llvm::RecurrenceDescriptor
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:69
llvm::TargetTransformInfo::Concept::simplifyDemandedVectorEltsIntrinsic
virtual std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)=0
llvm::TargetTransformInfo::Concept::isLegalAltInstr
virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const =0
llvm::TargetTransformInfo::Concept::getCallInstrCost
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::MemCmpExpansionOptions::NumLoadsPerBlock
unsigned NumLoadsPerBlock
Definition: TargetTransformInfo.h:800
llvm::TargetTransformInfo::Concept::getCacheAssociativity
virtual std::optional< unsigned > getCacheAssociativity(CacheLevel Level) const =0
llvm::IntrinsicCostAttributes::getID
Intrinsic::ID getID() const
Definition: TargetTransformInfo.h:150
llvm::TargetTransformInfo::Concept::getGISelRematGlobalCost
virtual unsigned getGISelRematGlobalCost() const =0
llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:969
llvm::TargetTransformInfo::Concept::getIntImmCostInst
virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr)=0
llvm::TargetTransformInfo::Concept::getCFInstrCost
virtual InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
llvm::TargetTransformInfo::invalidate
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
Definition: TargetTransformInfo.h:201
llvm::TargetTransformInfo::Concept::getInlinerVectorBonusPercent
virtual int getInlinerVectorBonusPercent()=0
llvm::TargetTransformInfo::Concept::isLegalAddImmediate
virtual bool isLegalAddImmediate(int64_t Imm)=0
llvm::TargetTransformInfo::enableSelectOptimize
bool enableSelectOptimize() const
Should the Select Optimization pass be enabled and ran.
Definition: TargetTransformInfo.cpp:551
SmallBitVector.h
llvm::TargetTransformInfo::UnrollingPreferences::Threshold
unsigned Threshold
The cost threshold for the unrolled loop.
Definition: TargetTransformInfo.h:424
llvm::TargetTransformInfo::ReductionFlags::ReductionFlags
ReductionFlags()=default
llvm::TargetTransformInfo::Concept::enableScalableVectorization
virtual bool enableScalableVectorization() const =0
llvm::TargetTransformInfo::Concept::getNumberOfParts
virtual unsigned getNumberOfParts(Type *Tp)=0
llvm::TargetTransformInfo::Concept::getPredicatedAddrSpace
virtual std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const =0
llvm::TargetTransformInfo::VPLegalization::Legal
@ Legal
Definition: TargetTransformInfo.h:1523
llvm::TargetTransformInfo::Concept::shouldBuildLookupTablesForConstant
virtual bool shouldBuildLookupTablesForConstant(Constant *C)=0
llvm::TargetTransformInfo::Concept::isProfitableToHoist
virtual bool isProfitableToHoist(Instruction *I)=0
llvm::TargetTransformInfo::getMinPrefetchStride
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Some HW prefetchers can handle accesses up to a certain constant stride.
Definition: TargetTransformInfo.cpp:712
llvm::TargetTransformInfo::isIndexedStoreLegal
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
Definition: TargetTransformInfo.cpp:1095
llvm::TargetTransformInfo::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR() const
Return true if LSR major cost is number of registers.
Definition: TargetTransformInfo.cpp:368
llvm::TargetTransformInfo::shouldExpandReduction
bool shouldExpandReduction(const IntrinsicInst *II) const
Definition: TargetTransformInfo.cpp:1166
llvm::TargetTransformInfo::getExtractWithExtendCost
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) const
Definition: TargetTransformInfo.cpp:873
llvm::TargetTransformInfo::Concept::isLegalAddressingMode
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I)=0
llvm::TargetTransformInfo::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const
Get target-customized preferences for the generic loop peeling transformation.
Definition: TargetTransformInfo.cpp:341
llvm::TargetTransformInfo::getStoreVectorFactor
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
Definition: TargetTransformInfo.cpp:1140
llvm::TargetTransformInfo::MIM_PostDec
@ MIM_PostDec
Post-decrementing.
Definition: TargetTransformInfo.h:1415
llvm::HardwareLoopInfo::canAnalyze
bool canAnalyze(LoopInfo &LI)
Definition: TargetTransformInfo.cpp:51
llvm::TargetTransformInfo::isLegalToVectorizeStoreChain
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: TargetTransformInfo.cpp:1118
llvm::SmallVectorImpl< const Value * >
llvm::TargetTransformInfo::Concept::getArithmeticInstrCost
virtual InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, OperandValueInfo Opd1Info, OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr)=0
ForceHardwareLoopPHI
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
llvm::TargetTransformInfo::Concept::preferPredicatedReductionSelect
virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags) const =0
llvm::MemIntrinsicInfo
Information about a load/store intrinsic defined by the target.
Definition: TargetTransformInfo.h:71
llvm::TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize
unsigned getAtomicMemIntrinsicMaxElementSize() const
Definition: TargetTransformInfo.cpp:1051
llvm::msgpack::Type
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
Definition: MsgPackReader.h:48
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1184
llvm::TargetTransformInfo::enableAggressiveInterleaving
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don't restrict interleaved unrolling to small loops.
Definition: TargetTransformInfo.cpp:541
llvm::TargetTransformInfo::Concept::areInlineCompatible
virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const =0
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::TargetTransformInfo::Concept::getShuffleCost
virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args)=0
llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition: TargetTransformInfo.h:438
llvm::TargetTransformInfo::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), unsigned *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
Definition: TargetTransformInfo.cpp:568
llvm::TargetTransformInfo::Concept::getMinVectorRegisterBitWidth
virtual unsigned getMinVectorRegisterBitWidth() const =0
llvm::TargetTransformInfo::getCallInstrCost
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
Definition: TargetTransformInfo.cpp:984
llvm::MemIntrinsicInfo::MatchingId
unsigned short MatchingId
Definition: TargetTransformInfo.h:82
llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:244
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3283
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::TargetTransformInfo::Concept::getPreferredAddressingMode
virtual AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const =0
llvm::IntrinsicCostAttributes::isTypeBasedOnly
bool isTypeBasedOnly() const
Definition: TargetTransformInfo.h:158
llvm::TargetTransformInfo::isProfitableToHoist
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
Definition: TargetTransformInfo.cpp:485
llvm::TargetTransformInfo::Concept::getExtendedReductionCost
virtual InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
llvm::TargetTransformInfo::shouldBuildRelLookupTables
bool shouldBuildRelLookupTables() const
Return true if lookup tables should be turned into relative lookup tables.
Definition: TargetTransformInfo.cpp:508
llvm::TargetTransformInfo::Concept::getMaxInterleaveFactor
virtual unsigned getMaxInterleaveFactor(unsigned VF)=0
llvm::TargetTransformInfo::Concept::getStoreVectorFactor
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3139
llvm::TargetTransformInfo::Concept::getLoadStoreVecRegBitWidth
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const =0
llvm::TargetTransformInfo::Concept::getRegUsageForType
virtual unsigned getRegUsageForType(Type *Ty)=0
llvm::TargetTransformInfo::~TargetTransformInfo
~TargetTransformInfo()
llvm::TargetTransformInfo::Concept::getCacheLineSize
virtual unsigned getCacheLineSize() const =0
llvm::AMDGPU::VGPRIndexMode::Id
Id
Definition: SIDefines.h:247
llvm::TargetTransformInfo::OperandValueInfo::isConstant
bool isConstant() const
Definition: TargetTransformInfo.h:932
llvm::TargetTransformInfo::isSingleThreaded
bool isSingleThreaded() const
Definition: TargetTransformInfo.cpp:277
llvm::TargetTransformInfo::Concept::canHaveNonUndefGlobalInitializerInAddressSpace
virtual bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const =0
llvm::TargetTransformInfo::Concept::adjustInliningThreshold
virtual unsigned adjustInliningThreshold(const CallBase *CB)=0
llvm::TargetTransformInfo::Concept::getIntImmCostIntrin
virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
llvm::TargetTransformInfo::OK_NonUniformConstantValue
@ OK_NonUniformConstantValue
Definition: TargetTransformInfo.h:915
llvm::TargetTransformInfo::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
Definition: TargetTransformInfo.cpp:1046
llvm::TargetTransformInfo::getStoreMinimumVF
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
Definition: TargetTransformInfo.cpp:682
llvm::TargetTransformInfo::getRegUsageForType
unsigned getRegUsageForType(Type *Ty) const
Returns the estimated number of registers required to represent Ty.
Definition: TargetTransformInfo.cpp:495
llvm::TargetTransformInfo::Concept::isLegalMaskedCompressStore
virtual bool isLegalMaskedCompressStore(Type *DataType)=0
llvm::TargetTransformInfo::useAA
bool useAA() const
Definition: TargetTransformInfo.cpp:489
llvm::TargetTransformInfo::Concept::getInliningThresholdMultiplier
virtual unsigned getInliningThresholdMultiplier()=0
llvm::HardwareLoopInfo::CountType
IntegerType * CountType
Definition: TargetTransformInfo.h:103
llvm::TargetTransformInfo::enableWritePrefetching
bool enableWritePrefetching() const
Definition: TargetTransformInfo.cpp:723
llvm::TargetTransformInfo::getFPOpCost
InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
Definition: TargetTransformInfo.cpp:595
llvm::PredicationStyle::None
@ None
llvm::TargetTransformInfo::Concept::getArithmeticReductionCost
virtual InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)=0
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition: TargetTransformInfo.h:218
llvm::IntrinsicCostAttributes::getArgs
const SmallVectorImpl< const Value * > & getArgs() const
Definition: TargetTransformInfo.h:155
llvm::TargetTransformInfo::Concept::shouldPrefetchAddressSpace
virtual bool shouldPrefetchAddressSpace(unsigned AS) const =0
llvm::TargetTransformInfo::AMK_None
@ AMK_None
Definition: TargetTransformInfo.h:634
llvm::TargetTransformInfo::SK_ExtractSubvector
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
Definition: TargetTransformInfo.h:899
llvm::TargetTransformInfo::AMK_PreIndexed
@ AMK_PreIndexed
Definition: TargetTransformInfo.h:632
llvm::AtomicOrdering::NotAtomic
@ NotAtomic
llvm::TargetTransformInfo::getCacheSize
std::optional< unsigned > getCacheSize(CacheLevel Level) const
Definition: TargetTransformInfo.cpp:699
llvm::Data
@ Data
llvm::TargetTransformInfo::Concept::isLegalToVectorizeStore
virtual bool isLegalToVectorizeStore(StoreInst *SI) const =0
llvm::TargetTransformInfo::getVPMemoryOpCost
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
llvm::TargetTransformInfo::isAlwaysUniform
bool isAlwaysUniform(const Value *V) const
Definition: TargetTransformInfo.cpp:250
llvm::TargetTransformInfo::Concept::getMemcpyCost
virtual InstructionCost getMemcpyCost(const Instruction *I)=0
llvm::TargetTransformInfo::isLegalAddressingMode
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Definition: TargetTransformInfo.cpp:354
llvm::TargetTransformInfo::getMemcpyCost
InstructionCost getMemcpyCost(const Instruction *I) const
Definition: TargetTransformInfo.cpp:1004
llvm::TargetTransformInfo::UnrollingPreferences::MaxPercentThresholdBoost
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
Definition: TargetTransformInfo.h:435
llvm::TargetTransformInfo::Concept::forceScalarizeMaskedScatter
virtual bool forceScalarizeMaskedScatter(VectorType *DataType, Align Alignment)=0
llvm::TargetTransformInfo::Concept::getUnrollingPreferences
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)=0
llvm::MemIntrinsicInfo::IsVolatile
bool IsVolatile
Definition: TargetTransformInfo.h:86
llvm::TargetTransformInfo::Concept::getMinPrefetchStride
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const =0
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:39