LLVM  14.0.0git
TargetTransformInfo.h
Go to the documentation of this file.
1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This pass exposes codegen information to IR-level passes. Every
10 /// transformation that uses codegen information is broken into three parts:
11 /// 1. The IR-level analysis pass.
12 /// 2. The IR-level transformation interface which provides the needed
13 /// information.
14 /// 3. Codegen-level implementation which uses target-specific hooks.
15 ///
16 /// This file defines #2, which is the interface that IR-level transformations
17 /// use for querying the codegen.
18 ///
19 //===----------------------------------------------------------------------===//
20 
21 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23 
24 #include "llvm/IR/InstrTypes.h"
25 #include "llvm/IR/Operator.h"
26 #include "llvm/IR/PassManager.h"
27 #include "llvm/Pass.h"
30 #include "llvm/Support/DataTypes.h"
32 #include <functional>
33 #include <utility>
34 
35 namespace llvm {
36 
37 namespace Intrinsic {
38 typedef unsigned ID;
39 }
40 
41 class AssumptionCache;
42 class BlockFrequencyInfo;
43 class DominatorTree;
44 class BranchInst;
45 class CallBase;
46 class Function;
47 class GlobalValue;
48 class InstCombiner;
50 class IntrinsicInst;
51 class LoadInst;
52 class LoopAccessInfo;
53 class Loop;
54 class LoopInfo;
55 class ProfileSummaryInfo;
57 class SCEV;
58 class ScalarEvolution;
59 class StoreInst;
60 class SwitchInst;
61 class TargetLibraryInfo;
62 class Type;
63 class User;
64 class Value;
65 class VPIntrinsic;
66 struct KnownBits;
67 template <typename T> class Optional;
68 
69 /// Information about a load/store intrinsic defined by the target.
71  /// This is the pointer that the intrinsic is loading from or storing to.
72  /// If this is non-null, then analysis/optimization passes can assume that
73  /// this intrinsic is functionally equivalent to a load/store from this
74  /// pointer.
75  Value *PtrVal = nullptr;
76 
77  // Ordering for atomic operations.
79 
80  // Same Id is set by the target for corresponding load/store intrinsics.
81  unsigned short MatchingId = 0;
82 
83  bool ReadMem = false;
84  bool WriteMem = false;
85  bool IsVolatile = false;
86 
87  bool isUnordered() const {
90  !IsVolatile;
91  }
92 };
93 
94 /// Attributes of a target dependent hardware loop.
96  HardwareLoopInfo() = delete;
98  Loop *L = nullptr;
99  BasicBlock *ExitBlock = nullptr;
100  BranchInst *ExitBranch = nullptr;
101  const SCEV *ExitCount = nullptr;
102  IntegerType *CountType = nullptr;
103  Value *LoopDecrement = nullptr; // Decrement the loop counter by this
104  // value in every iteration.
105  bool IsNestingLegal = false; // Can a hardware loop be a parent to
106  // another hardware loop?
107  bool CounterInReg = false; // Should loop counter be updated in
108  // the loop via a phi?
109  bool PerformEntryTest = false; // Generate the intrinsic which also performs
110  // icmp ne zero on the loop counter value and
111  // produces an i1 to guard the loop entry.
113  DominatorTree &DT, bool ForceNestedLoop = false,
114  bool ForceHardwareLoopPHI = false);
115  bool canAnalyze(LoopInfo &LI);
116 };
117 
119  const IntrinsicInst *II = nullptr;
120  Type *RetTy = nullptr;
121  Intrinsic::ID IID;
122  SmallVector<Type *, 4> ParamTys;
124  FastMathFlags FMF;
125  // If ScalarizationCost is UINT_MAX, the cost of scalarizing the
126  // arguments and the return value will be computed based on types.
127  InstructionCost ScalarizationCost = InstructionCost::getInvalid();
128 
129 public:
131  Intrinsic::ID Id, const CallBase &CI,
133 
136  FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
138 
141 
145  const IntrinsicInst *I = nullptr,
147 
148  Intrinsic::ID getID() const { return IID; }
149  const IntrinsicInst *getInst() const { return II; }
150  Type *getReturnType() const { return RetTy; }
151  FastMathFlags getFlags() const { return FMF; }
152  InstructionCost getScalarizationCost() const { return ScalarizationCost; }
154  const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
155 
156  bool isTypeBasedOnly() const {
157  return Arguments.empty();
158  }
159 
160  bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
161 };
162 
164 typedef TargetTransformInfo TTI;
165 
166 /// This pass provides access to the codegen interfaces that are needed
167 /// for IR-level transformations.
169 public:
170  /// Construct a TTI object using a type implementing the \c Concept
171  /// API below.
172  ///
173  /// This is used by targets to construct a TTI wrapping their target-specific
174  /// implementation that encodes appropriate costs for their target.
175  template <typename T> TargetTransformInfo(T Impl);
176 
177  /// Construct a baseline TTI object using a minimal implementation of
178  /// the \c Concept API below.
179  ///
180  /// The TTI implementation will reflect the information in the DataLayout
181  /// provided if non-null.
182  explicit TargetTransformInfo(const DataLayout &DL);
183 
184  // Provide move semantics.
187 
188  // We need to define the destructor out-of-line to define our sub-classes
189  // out-of-line.
191 
192  /// Handle the invalidation of this information.
193  ///
194  /// When used as a result of \c TargetIRAnalysis this method will be called
195  /// when the function this was computed for changes. When it returns false,
196  /// the information is preserved across those changes.
199  // FIXME: We should probably in some way ensure that the subtarget
200  // information for a function hasn't changed.
201  return false;
202  }
203 
204  /// \name Generic Target Information
205  /// @{
206 
207  /// The kind of cost model.
208  ///
209  /// There are several different cost models that can be customized by the
210  /// target. The normalization of each cost model may be target specific.
212  TCK_RecipThroughput, ///< Reciprocal throughput.
213  TCK_Latency, ///< The latency of instruction.
214  TCK_CodeSize, ///< Instruction code size.
215  TCK_SizeAndLatency ///< The weighted sum of size and latency.
216  };
217 
218  /// Query the cost of a specified instruction.
219  ///
220  /// Clients should use this interface to query the cost of an existing
221  /// instruction. The instruction must have a valid parent (basic block).
222  ///
223  /// Note, this method does not cache the cost calculation and it
224  /// can be expensive in some cases.
226  enum TargetCostKind kind) const {
227  InstructionCost Cost;
228  switch (kind) {
229  case TCK_RecipThroughput:
230  Cost = getInstructionThroughput(I);
231  break;
232  case TCK_Latency:
233  Cost = getInstructionLatency(I);
234  break;
235  case TCK_CodeSize:
236  case TCK_SizeAndLatency:
237  Cost = getUserCost(I, kind);
238  break;
239  }
240  return Cost;
241  }
242 
243  /// Underlying constants for 'cost' values in this interface.
244  ///
245  /// Many APIs in this interface return a cost. This enum defines the
246  /// fundamental values that should be used to interpret (and produce) those
247  /// costs. The costs are returned as an int rather than a member of this
248  /// enumeration because it is expected that the cost of one IR instruction
249  /// may have a multiplicative factor to it or otherwise won't fit directly
250  /// into the enum. Moreover, it is common to sum or average costs which works
251  /// better as simple integral values. Thus this enum only provides constants.
252  /// Also note that the returned costs are signed integers to make it natural
253  /// to add, subtract, and test with zero (a common boundary condition). It is
254  /// not expected that 2^32 is a realistic cost to be modeling at any point.
255  ///
256  /// Note that these costs should usually reflect the intersection of code-size
257  /// cost and execution cost. A free instruction is typically one that folds
258  /// into another instruction. For example, reg-to-reg moves can often be
259  /// skipped by renaming the registers in the CPU, but they still are encoded
260  /// and thus wouldn't be considered 'free' here.
262  TCC_Free = 0, ///< Expected to fold away in lowering.
263  TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
264  TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
265  };
266 
267  /// Estimate the cost of a GEP operation when lowered.
269  getGEPCost(Type *PointeeType, const Value *Ptr,
272 
273  /// \returns A value by which our inlining threshold should be multiplied.
274  /// This is primarily used to bump up the inlining threshold wholesale on
275  /// targets where calls are unusually expensive.
276  ///
277  /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
278  /// individual classes of instructions would be better.
279  unsigned getInliningThresholdMultiplier() const;
280 
281  /// \returns A value to be added to the inlining threshold.
282  unsigned adjustInliningThreshold(const CallBase *CB) const;
283 
284  /// \returns Vector bonus in percent.
285  ///
286  /// Vector bonuses: We want to more aggressively inline vector-dense kernels
287  /// and apply this bonus based on the percentage of vector instructions. A
288  /// bonus is applied if the vector instructions exceed 50% and half that
289  /// amount is applied if it exceeds 10%. Note that these bonuses are some what
290  /// arbitrary and evolved over time by accident as much as because they are
291  /// principled bonuses.
292  /// FIXME: It would be nice to base the bonus values on something more
293  /// scientific. A target may has no bonus on vector instructions.
294  int getInlinerVectorBonusPercent() const;
295 
296  /// \return the expected cost of a memcpy, which could e.g. depend on the
297  /// source/destination type and alignment and the number of bytes copied.
299 
300  /// \return The estimated number of case clusters when lowering \p 'SI'.
301  /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
302  /// table.
304  unsigned &JTSize,
305  ProfileSummaryInfo *PSI,
306  BlockFrequencyInfo *BFI) const;
307 
308  /// Estimate the cost of a given IR user when lowered.
309  ///
310  /// This can estimate the cost of either a ConstantExpr or Instruction when
311  /// lowered.
312  ///
313  /// \p Operands is a list of operands which can be a result of transformations
314  /// of the current operands. The number of the operands on the list must equal
315  /// to the number of the current operands the IR user has. Their order on the
316  /// list must be the same as the order of the current operands the IR user
317  /// has.
318  ///
319  /// The returned cost is defined in terms of \c TargetCostConstants, see its
320  /// comments for a detailed explanation of the cost values.
322  TargetCostKind CostKind) const;
323 
324  /// This is a helper function which calls the two-argument getUserCost
325  /// with \p Operands which are the current operands U has.
328  return getUserCost(U, Operands, CostKind);
329  }
330 
331  /// If a branch or a select condition is skewed in one direction by more than
332  /// this factor, it is very likely to be predicted correctly.
334 
335  /// Return true if branch divergence exists.
336  ///
337  /// Branch divergence has a significantly negative impact on GPU performance
338  /// when threads in the same wavefront take different paths due to conditional
339  /// branches.
340  bool hasBranchDivergence() const;
341 
342  /// Return true if the target prefers to use GPU divergence analysis to
343  /// replace the legacy version.
344  bool useGPUDivergenceAnalysis() const;
345 
346  /// Returns whether V is a source of divergence.
347  ///
348  /// This function provides the target-dependent information for
349  /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis
350  /// first builds the dependency graph, and then runs the reachability
351  /// algorithm starting with the sources of divergence.
352  bool isSourceOfDivergence(const Value *V) const;
353 
354  // Returns true for the target specific
355  // set of operations which produce uniform result
356  // even taking non-uniform arguments
357  bool isAlwaysUniform(const Value *V) const;
358 
359  /// Returns the address space ID for a target's 'flat' address space. Note
360  /// this is not necessarily the same as addrspace(0), which LLVM sometimes
361  /// refers to as the generic address space. The flat address space is a
362  /// generic address space that can be used access multiple segments of memory
363  /// with different address spaces. Access of a memory location through a
364  /// pointer with this address space is expected to be legal but slower
365  /// compared to the same memory location accessed through a pointer with a
366  /// different address space.
367  //
368  /// This is for targets with different pointer representations which can
369  /// be converted with the addrspacecast instruction. If a pointer is converted
370  /// to this address space, optimizations should attempt to replace the access
371  /// with the source address space.
372  ///
373  /// \returns ~0u if the target does not have such a flat address space to
374  /// optimize away.
375  unsigned getFlatAddressSpace() const;
376 
377  /// Return any intrinsic address operand indexes which may be rewritten if
378  /// they use a flat address space pointer.
379  ///
380  /// \returns true if the intrinsic was handled.
382  Intrinsic::ID IID) const;
383 
384  bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
385 
386  /// Return true if globals in this address space can have initializers other
387  /// than `undef`.
388  bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const;
389 
390  unsigned getAssumedAddrSpace(const Value *V) const;
391 
392  std::pair<const Value *, unsigned>
393  getPredicatedAddrSpace(const Value *V) const;
394 
395  /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
396  /// NewV, which has a different address space. This should happen for every
397  /// operand index that collectFlatAddressOperands returned for the intrinsic.
398  /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
399  /// new value (which may be the original \p II with modified operands).
401  Value *NewV) const;
402 
403  /// Test whether calls to a function lower to actual program function
404  /// calls.
405  ///
406  /// The idea is to test whether the program is likely to require a 'call'
407  /// instruction or equivalent in order to call the given function.
408  ///
409  /// FIXME: It's not clear that this is a good or useful query API. Client's
410  /// should probably move to simpler cost metrics using the above.
411  /// Alternatively, we could split the cost interface into distinct code-size
412  /// and execution-speed costs. This would allow modelling the core of this
413  /// query more accurately as a call is a single small instruction, but
414  /// incurs significant execution cost.
415  bool isLoweredToCall(const Function *F) const;
416 
417  struct LSRCost {
418  /// TODO: Some of these could be merged. Also, a lexical ordering
419  /// isn't always optimal.
420  unsigned Insns;
421  unsigned NumRegs;
422  unsigned AddRecCost;
423  unsigned NumIVMuls;
424  unsigned NumBaseAdds;
425  unsigned ImmCost;
426  unsigned SetupCost;
427  unsigned ScaleCost;
428  };
429 
430  /// Parameters that control the generic loop unrolling transformation.
432  /// The cost threshold for the unrolled loop. Should be relative to the
433  /// getUserCost values returned by this API, and the expectation is that
434  /// the unrolled loop's instructions when run through that interface should
435  /// not exceed this cost. However, this is only an estimate. Also, specific
436  /// loops may be unrolled even with a cost above this threshold if deemed
437  /// profitable. Set this to UINT_MAX to disable the loop body cost
438  /// restriction.
439  unsigned Threshold;
440  /// If complete unrolling will reduce the cost of the loop, we will boost
441  /// the Threshold by a certain percent to allow more aggressive complete
442  /// unrolling. This value provides the maximum boost percentage that we
443  /// can apply to Threshold (The value should be no less than 100).
444  /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
445  /// MaxPercentThresholdBoost / 100)
446  /// E.g. if complete unrolling reduces the loop execution time by 50%
447  /// then we boost the threshold by the factor of 2x. If unrolling is not
448  /// expected to reduce the running time, then we do not increase the
449  /// threshold.
451  /// The cost threshold for the unrolled loop when optimizing for size (set
452  /// to UINT_MAX to disable).
454  /// The cost threshold for the unrolled loop, like Threshold, but used
455  /// for partial/runtime unrolling (set to UINT_MAX to disable).
457  /// The cost threshold for the unrolled loop when optimizing for size, like
458  /// OptSizeThreshold, but used for partial/runtime unrolling (set to
459  /// UINT_MAX to disable).
461  /// A forced unrolling factor (the number of concatenated bodies of the
462  /// original loop in the unrolled loop body). When set to 0, the unrolling
463  /// transformation will select an unrolling factor based on the current cost
464  /// threshold and other factors.
465  unsigned Count;
466  /// Default unroll count for loops with run-time trip count.
468  // Set the maximum unrolling factor. The unrolling factor may be selected
469  // using the appropriate cost threshold, but may not exceed this number
470  // (set to UINT_MAX to disable). This does not apply in cases where the
471  // loop is being fully unrolled.
472  unsigned MaxCount;
473  /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
474  /// applies even if full unrolling is selected. This allows a target to fall
475  /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
477  // Represents number of instructions optimized when "back edge"
478  // becomes "fall through" in unrolled loop.
479  // For now we count a conditional branch on a backedge and a comparison
480  // feeding it.
481  unsigned BEInsns;
482  /// Allow partial unrolling (unrolling of loops to expand the size of the
483  /// loop body, not only to eliminate small constant-trip-count loops).
484  bool Partial;
485  /// Allow runtime unrolling (unrolling of loops to expand the size of the
486  /// loop body even when the number of loop iterations is not known at
487  /// compile time).
488  bool Runtime;
489  /// Allow generation of a loop remainder (extra iterations after unroll).
491  /// Allow emitting expensive instructions (such as divisions) when computing
492  /// the trip count of a loop for runtime unrolling.
494  /// Apply loop unroll on any kind of loop
495  /// (mainly to loops that fail runtime unrolling).
496  bool Force;
497  /// Allow using trip count upper bound to unroll loops.
499  /// Allow unrolling of all the iterations of the runtime loop remainder.
501  /// Allow unroll and jam. Used to enable unroll and jam for the target.
503  /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
504  /// value above is used during unroll and jam for the outer loop size.
505  /// This value is used in the same manner to limit the size of the inner
506  /// loop.
508  /// Don't allow loop unrolling to simulate more than this number of
509  /// iterations when checking full unroll profitability
511  };
512 
513  /// Get target-customized preferences for the generic loop unrolling
514  /// transformation. The caller will initialize UP with the current
515  /// target-independent defaults.
518  OptimizationRemarkEmitter *ORE) const;
519 
520  /// Query the target whether it would be profitable to convert the given loop
521  /// into a hardware loop.
523  AssumptionCache &AC, TargetLibraryInfo *LibInfo,
524  HardwareLoopInfo &HWLoopInfo) const;
525 
526  /// Query the target whether it would be prefered to create a predicated
527  /// vector loop, which can avoid the need to emit a scalar epilogue loop.
530  DominatorTree *DT,
531  const LoopAccessInfo *LAI) const;
532 
533  /// Query the target whether lowering of the llvm.get.active.lane.mask
534  /// intrinsic is supported.
535  bool emitGetActiveLaneMask() const;
536 
537  // Parameters that control the loop peeling transformation
539  /// A forced peeling factor (the number of bodied of the original loop
540  /// that should be peeled off before the loop body). When set to 0, the
541  /// a peeling factor based on profile information and other factors.
542  unsigned PeelCount;
543  /// Allow peeling off loop iterations.
545  /// Allow peeling off loop iterations for loop nests.
547  /// Allow peeling basing on profile. Uses to enable peeling off all
548  /// iterations basing on provided profile.
549  /// If the value is true the peeling cost model can decide to peel only
550  /// some iterations and in this case it will set this to false.
552  };
553 
554  /// Get target-customized preferences for the generic loop peeling
555  /// transformation. The caller will initialize \p PP with the current
556  /// target-independent defaults with information from \p L and \p SE.
558  PeelingPreferences &PP) const;
559 
560  /// Targets can implement their own combinations for target-specific
561  /// intrinsics. This function will be called from the InstCombine pass every
562  /// time a target-specific intrinsic is encountered.
563  ///
564  /// \returns None to not do anything target specific or a value that will be
565  /// returned from the InstCombiner. It is possible to return null and stop
566  /// further processing of the intrinsic by returning nullptr.
568  IntrinsicInst &II) const;
569  /// Can be used to implement target-specific instruction combining.
570  /// \see instCombineIntrinsic
573  APInt DemandedMask, KnownBits &Known,
574  bool &KnownBitsComputed) const;
575  /// Can be used to implement target-specific instruction combining.
576  /// \see instCombineIntrinsic
578  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
579  APInt &UndefElts2, APInt &UndefElts3,
580  std::function<void(Instruction *, unsigned, APInt, APInt &)>
581  SimplifyAndSetOp) const;
582  /// @}
583 
584  /// \name Scalar Target Information
585  /// @{
586 
587  /// Flags indicating the kind of support for population count.
588  ///
589  /// Compared to the SW implementation, HW support is supposed to
590  /// significantly boost the performance when the population is dense, and it
591  /// may or may not degrade performance if the population is sparse. A HW
592  /// support is considered as "Fast" if it can outperform, or is on a par
593  /// with, SW implementation when the population is sparse; otherwise, it is
594  /// considered as "Slow".
596 
597  /// Return true if the specified immediate is legal add immediate, that
598  /// is the target has add instructions which can add a register with the
599  /// immediate without having to materialize the immediate into a register.
600  bool isLegalAddImmediate(int64_t Imm) const;
601 
602  /// Return true if the specified immediate is legal icmp immediate,
603  /// that is the target has icmp instructions which can compare a register
604  /// against the immediate without having to materialize the immediate into a
605  /// register.
606  bool isLegalICmpImmediate(int64_t Imm) const;
607 
608  /// Return true if the addressing mode represented by AM is legal for
609  /// this target, for a load/store of the specified type.
610  /// The type may be VoidTy, in which case only return true if the addressing
611  /// mode is legal for a load/store of any legal type.
612  /// If target returns true in LSRWithInstrQueries(), I may be valid.
613  /// TODO: Handle pre/postinc as well.
614  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
615  bool HasBaseReg, int64_t Scale,
616  unsigned AddrSpace = 0,
617  Instruction *I = nullptr) const;
618 
619  /// Return true if LSR cost of C1 is lower than C1.
621  TargetTransformInfo::LSRCost &C2) const;
622 
623  /// Return true if LSR major cost is number of registers. Targets which
624  /// implement their own isLSRCostLess and unset number of registers as major
625  /// cost should return false, otherwise return true.
626  bool isNumRegsMajorCostOfLSR() const;
627 
628  /// \returns true if LSR should not optimize a chain that includes \p I.
630 
631  /// Return true if the target can fuse a compare and branch.
632  /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
633  /// calculation for the instructions in a loop.
634  bool canMacroFuseCmp() const;
635 
636  /// Return true if the target can save a compare for loop count, for example
637  /// hardware loop saves a compare.
638  bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
640  TargetLibraryInfo *LibInfo) const;
641 
646  };
647 
648  /// Return the preferred addressing mode LSR should make efforts to generate.
650  ScalarEvolution *SE) const;
651 
652  /// Return true if the target supports masked store.
653  bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
654  /// Return true if the target supports masked load.
655  bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;
656 
657  /// Return true if the target supports nontemporal store.
658  bool isLegalNTStore(Type *DataType, Align Alignment) const;
659  /// Return true if the target supports nontemporal load.
660  bool isLegalNTLoad(Type *DataType, Align Alignment) const;
661 
662  /// Return true if the target supports masked scatter.
663  bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
664  /// Return true if the target supports masked gather.
665  bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
666  /// Return true if the target forces scalarizing of llvm.masked.gather
667  /// intrinsics.
668  bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const;
669  /// Return true if the target forces scalarizing of llvm.masked.scatter
670  /// intrinsics.
671  bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const;
672 
673  /// Return true if the target supports masked compress store.
674  bool isLegalMaskedCompressStore(Type *DataType) const;
675  /// Return true if the target supports masked expand load.
676  bool isLegalMaskedExpandLoad(Type *DataType) const;
677 
678  /// Return true if we should be enabling ordered reductions for the target.
679  bool enableOrderedReductions() const;
680 
681  /// Return true if the target has a unified operation to calculate division
682  /// and remainder. If so, the additional implicit multiplication and
683  /// subtraction required to calculate a remainder from division are free. This
684  /// can enable more aggressive transformations for division and remainder than
685  /// would typically be allowed using throughput or size cost models.
686  bool hasDivRemOp(Type *DataType, bool IsSigned) const;
687 
688  /// Return true if the given instruction (assumed to be a memory access
689  /// instruction) has a volatile variant. If that's the case then we can avoid
690  /// addrspacecast to generic AS for volatile loads/stores. Default
691  /// implementation returns false, which prevents address space inference for
692  /// volatile loads/stores.
693  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
694 
695  /// Return true if target doesn't mind addresses in vectors.
696  bool prefersVectorizedAddressing() const;
697 
698  /// Return the cost of the scaling factor used in the addressing
699  /// mode represented by AM for this target, for a load/store
700  /// of the specified type.
701  /// If the AM is supported, the return value must be >= 0.
702  /// If the AM is not supported, it returns a negative value.
703  /// TODO: Handle pre/postinc as well.
705  int64_t BaseOffset, bool HasBaseReg,
706  int64_t Scale,
707  unsigned AddrSpace = 0) const;
708 
709  /// Return true if the loop strength reduce pass should make
710  /// Instruction* based TTI queries to isLegalAddressingMode(). This is
711  /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
712  /// immediate offset and no index register.
713  bool LSRWithInstrQueries() const;
714 
715  /// Return true if it's free to truncate a value of type Ty1 to type
716  /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
717  /// by referencing its sub-register AX.
718  bool isTruncateFree(Type *Ty1, Type *Ty2) const;
719 
720  /// Return true if it is profitable to hoist instruction in the
721  /// then/else to before if.
722  bool isProfitableToHoist(Instruction *I) const;
723 
724  bool useAA() const;
725 
726  /// Return true if this type is legal.
727  bool isTypeLegal(Type *Ty) const;
728 
729  /// Returns the estimated number of registers required to represent \p Ty.
731 
732  /// Return true if switches should be turned into lookup tables for the
733  /// target.
734  bool shouldBuildLookupTables() const;
735 
736  /// Return true if switches should be turned into lookup tables
737  /// containing this constant value for the target.
739 
740  /// Return true if lookup tables should be turned into relative lookup tables.
741  bool shouldBuildRelLookupTables() const;
742 
743  /// Return true if the input function which is cold at all call sites,
744  /// should use coldcc calling convention.
745  bool useColdCCForColdCall(Function &F) const;
746 
747  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
748  /// are set if the demanded result elements need to be inserted and/or
749  /// extracted from vectors.
751  const APInt &DemandedElts,
752  bool Insert, bool Extract) const;
753 
754  /// Estimate the overhead of scalarizing an instructions unique
755  /// non-constant operands. The (potentially vector) types to use for each of
756  /// argument are passes via Tys.
758  ArrayRef<Type *> Tys) const;
759 
760  /// If target has efficient vector element load/store instructions, it can
761  /// return true here so that insertion/extraction costs are not added to
762  /// the scalarization cost of a load/store.
764 
765  /// Don't restrict interleaved unrolling to small loops.
766  bool enableAggressiveInterleaving(bool LoopHasReductions) const;
767 
768  /// Returns options for expansion of memcmp. IsZeroCmp is
769  // true if this is the expansion of memcmp(p1, p2, s) == 0.
771  // Return true if memcmp expansion is enabled.
772  operator bool() const { return MaxNumLoads > 0; }
773 
774  // Maximum number of load operations.
775  unsigned MaxNumLoads = 0;
776 
777  // The list of available load sizes (in bytes), sorted in decreasing order.
779 
780  // For memcmp expansion when the memcmp result is only compared equal or
781  // not-equal to 0, allow up to this number of load pairs per block. As an
782  // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
783  // a0 = load2bytes &a[0]
784  // b0 = load2bytes &b[0]
785  // a2 = load1byte &a[2]
786  // b2 = load1byte &b[2]
787  // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
788  unsigned NumLoadsPerBlock = 1;
789 
790  // Set to true to allow overlapping loads. For example, 7-byte compares can
791  // be done with two 4-byte compares instead of 4+2+1-byte compares. This
792  // requires all loads in LoadSizes to be doable in an unaligned way.
793  bool AllowOverlappingLoads = false;
794  };
796  bool IsZeroCmp) const;
797 
798  /// Enable matching of interleaved access groups.
800 
801  /// Enable matching of interleaved access groups that contain predicated
802  /// accesses or gaps and therefore vectorized using masked
803  /// vector loads/stores.
805 
806  /// Indicate that it is potentially unsafe to automatically vectorize
807  /// floating-point operations because the semantics of vector and scalar
808  /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
809  /// does not support IEEE-754 denormal numbers, while depending on the
810  /// platform, scalar floating-point math does.
811  /// This applies to floating-point math operations and calls, not memory
812  /// operations, shuffles, or casts.
814 
815  /// Determine if the target supports unaligned memory accesses.
817  unsigned AddressSpace = 0,
818  Align Alignment = Align(1),
819  bool *Fast = nullptr) const;
820 
821  /// Return hardware support for population count.
822  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
823 
824  /// Return true if the hardware has a fast square-root instruction.
825  bool haveFastSqrt(Type *Ty) const;
826 
827  /// Return true if it is faster to check if a floating-point value is NaN
828  /// (or not-NaN) versus a comparison against a constant FP zero value.
829  /// Targets should override this if materializing a 0.0 for comparison is
830  /// generally as cheap as checking for ordered/unordered.
831  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
832 
833  /// Return the expected cost of supporting the floating point operation
834  /// of the specified type.
835  InstructionCost getFPOpCost(Type *Ty) const;
836 
837  /// Return the expected cost of materializing for the given integer
838  /// immediate of the specified type.
839  InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
840  TargetCostKind CostKind) const;
841 
842  /// Return the expected cost of materialization for the given integer
843  /// immediate of the specified type for a given instruction. The cost can be
844  /// zero if the immediate can be folded into the specified instruction.
845  InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
846  const APInt &Imm, Type *Ty,
848  Instruction *Inst = nullptr) const;
850  const APInt &Imm, Type *Ty,
851  TargetCostKind CostKind) const;
852 
853  /// Return the expected cost for the given integer when optimising
854  /// for size. This is different than the other integer immediate cost
855  /// functions in that it is subtarget agnostic. This is useful when you e.g.
856  /// target one ISA such as Aarch32 but smaller encodings could be possible
857  /// with another such as Thumb. This return value is used as a penalty when
858  /// the total costs for a constant is calculated (the bigger the cost, the
859  /// more beneficial constant hoisting is).
860  InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
861  const APInt &Imm, Type *Ty) const;
862  /// @}
863 
864  /// \name Vector Target Information
865  /// @{
866 
867  /// The various kinds of shuffle patterns for vector queries.
868  enum ShuffleKind {
869  SK_Broadcast, ///< Broadcast element 0 to all other elements.
870  SK_Reverse, ///< Reverse the order of the vector.
871  SK_Select, ///< Selects elements from the corresponding lane of
872  ///< either source operand. This is equivalent to a
873  ///< vector select with a constant condition operand.
874  SK_Transpose, ///< Transpose two vectors.
875  SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
876  SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
877  SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
878  ///< with any shuffle mask.
879  SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
880  ///< shuffle mask.
881  SK_Splice ///< Concatenates elements from the first input vector
882  ///< with elements of the second input vector. Returning
883  ///< a vector of the same type as the input vectors.
884  };
885 
886  /// Additional information about an operand's possible values.
888  OK_AnyValue, // Operand can have any value.
889  OK_UniformValue, // Operand is uniform (splat of a value).
890  OK_UniformConstantValue, // Operand is uniform constant.
891  OK_NonUniformConstantValue // Operand is a non uniform constant value.
892  };
893 
894  /// Additional properties of an operand's values.
896 
897  /// \return the number of registers in the target-provided register class.
898  unsigned getNumberOfRegisters(unsigned ClassID) const;
899 
900  /// \return the target-provided register class ID for the provided type,
901  /// accounting for type promotion and other type-legalization techniques that
902  /// the target might apply. However, it specifically does not account for the
903  /// scalarization or splitting of vector types. Should a vector type require
904  /// scalarization or splitting into multiple underlying vector registers, that
905  /// type should be mapped to a register class containing no registers.
906  /// Specifically, this is designed to provide a simple, high-level view of the
907  /// register allocation later performed by the backend. These register classes
908  /// don't necessarily map onto the register classes used by the backend.
909  /// FIXME: It's not currently possible to determine how many registers
910  /// are used by the provided type.
911  unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
912 
913  /// \return the target-provided register class name
914  const char *getRegisterClassName(unsigned ClassID) const;
915 
917 
918  /// \return The width of the largest scalar or vector register type.
920 
921  /// \return The width of the smallest vector register type.
922  unsigned getMinVectorRegisterBitWidth() const;
923 
924  /// \return The maximum value of vscale if the target specifies an
925  /// architectural maximum vector length, and None otherwise.
927 
928  /// \return the value of vscale to tune the cost model for.
930 
931  /// \return True if the vectorization factor should be chosen to
932  /// make the vector of the smallest element type match the size of a
933  /// vector register. For wider element types, this could result in
934  /// creating vectors that span multiple vector registers.
935  /// If false, the vectorization factor will be chosen based on the
936  /// size of the widest element type.
937  bool shouldMaximizeVectorBandwidth() const;
938 
939  /// \return The minimum vectorization factor for types of given element
940  /// bit width, or 0 if there is no minimum VF. The returned value only
941  /// applies when shouldMaximizeVectorBandwidth returns true.
942  /// If IsScalable is true, the returned ElementCount must be a scalable VF.
943  ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
944 
945  /// \return The maximum vectorization factor for types of given element
946  /// bit width and opcode, or 0 if there is no maximum VF.
947  /// Currently only used by the SLP vectorizer.
948  unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
949 
950  /// \return True if it should be considered for address type promotion.
951  /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
952  /// profitable without finding other extensions fed by the same input.
954  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
955 
956  /// \return The size of a cache line in bytes.
957  unsigned getCacheLineSize() const;
958 
959  /// The possible cache levels
960  enum class CacheLevel {
961  L1D, // The L1 data cache
962  L2D, // The L2 data cache
963 
964  // We currently do not model L3 caches, as their sizes differ widely between
965  // microarchitectures. Also, we currently do not have a use for L3 cache
966  // size modeling yet.
967  };
968 
969  /// \return The size of the cache level in bytes, if available.
971 
972  /// \return The associativity of the cache level, if available.
974 
975  /// \return How much before a load we should place the prefetch
976  /// instruction. This is currently measured in number of
977  /// instructions.
978  unsigned getPrefetchDistance() const;
979 
980  /// Some HW prefetchers can handle accesses up to a certain constant stride.
981  /// Sometimes prefetching is beneficial even below the HW prefetcher limit,
982  /// and the arguments provided are meant to serve as a basis for deciding this
983  /// for a particular loop.
984  ///
985  /// \param NumMemAccesses Number of memory accesses in the loop.
986  /// \param NumStridedMemAccesses Number of the memory accesses that
987  /// ScalarEvolution could find a known stride
988  /// for.
989  /// \param NumPrefetches Number of software prefetches that will be
990  /// emitted as determined by the addresses
991  /// involved and the cache line size.
992  /// \param HasCall True if the loop contains a call.
993  ///
994  /// \return This is the minimum stride in bytes where it makes sense to start
995  /// adding SW prefetches. The default is 1, i.e. prefetch with any
996  /// stride.
997  unsigned getMinPrefetchStride(unsigned NumMemAccesses,
998  unsigned NumStridedMemAccesses,
999  unsigned NumPrefetches, bool HasCall) const;
1000 
1001  /// \return The maximum number of iterations to prefetch ahead. If
1002  /// the required number of iterations is more than this number, no
1003  /// prefetching is performed.
1004  unsigned getMaxPrefetchIterationsAhead() const;
1005 
1006  /// \return True if prefetching should also be done for writes.
1007  bool enableWritePrefetching() const;
1008 
1009  /// \return The maximum interleave factor that any transform should try to
1010  /// perform for this target. This number depends on the level of parallelism
1011  /// and the number of execution units in the CPU.
1012  unsigned getMaxInterleaveFactor(unsigned VF) const;
1013 
1014  /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
1015  static OperandValueKind getOperandInfo(const Value *V,
1016  OperandValueProperties &OpProps);
1017 
1018  /// This is an approximation of reciprocal throughput of a math/logic op.
1019  /// A higher cost indicates less expected throughput.
1020  /// From Agner Fog's guides, reciprocal throughput is "the average number of
1021  /// clock cycles per instruction when the instructions are not part of a
1022  /// limiting dependency chain."
1023  /// Therefore, costs should be scaled to account for multiple execution units
1024  /// on the target that can process this type of instruction. For example, if
1025  /// there are 5 scalar integer units and 2 vector integer units that can
1026  /// calculate an 'add' in a single cycle, this model should indicate that the
1027  /// cost of the vector add instruction is 2.5 times the cost of the scalar
1028  /// add instruction.
1029  /// \p Args is an optional argument which holds the instruction operands
1030  /// values so the TTI can analyze those values searching for special
1031  /// cases or optimizations based on those values.
1032  /// \p CxtI is the optional original context instruction, if one exists, to
1033  /// provide even more information.
1035  unsigned Opcode, Type *Ty,
1037  OperandValueKind Opd1Info = OK_AnyValue,
1038  OperandValueKind Opd2Info = OK_AnyValue,
1039  OperandValueProperties Opd1PropInfo = OP_None,
1040  OperandValueProperties Opd2PropInfo = OP_None,
1042  const Instruction *CxtI = nullptr) const;
1043 
1044  /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1045  /// The exact mask may be passed as Mask, or else the array will be empty.
1046  /// The index and subtype parameters are used by the subvector insertion and
1047  /// extraction shuffle kinds to show the insert/extract point and the type of
1048  /// the subvector being inserted/extracted.
1049  /// NOTE: For subvector extractions Tp represents the source type.
1051  ArrayRef<int> Mask = None, int Index = 0,
1052  VectorType *SubTp = nullptr) const;
1053 
1054  /// Represents a hint about the context in which a cast is used.
1055  ///
1056  /// For zext/sext, the context of the cast is the operand, which must be a
1057  /// load of some kind. For trunc, the context is of the cast is the single
1058  /// user of the instruction, which must be a store of some kind.
1059  ///
1060  /// This enum allows the vectorizer to give getCastInstrCost an idea of the
1061  /// type of cast it's dealing with, as not every cast is equal. For instance,
1062  /// the zext of a load may be free, but the zext of an interleaving load can
1063  //// be (very) expensive!
1064  ///
1065  /// See \c getCastContextHint to compute a CastContextHint from a cast
1066  /// Instruction*. Callers can use it if they don't need to override the
1067  /// context and just want it to be calculated from the instruction.
1068  ///
1069  /// FIXME: This handles the types of load/store that the vectorizer can
1070  /// produce, which are the cases where the context instruction is most
1071  /// likely to be incorrect. There are other situations where that can happen
1072  /// too, which might be handled here but in the long run a more general
1073  /// solution of costing multiple instructions at the same times may be better.
1074  enum class CastContextHint : uint8_t {
1075  None, ///< The cast is not used with a load/store of any kind.
1076  Normal, ///< The cast is used with a normal load/store.
1077  Masked, ///< The cast is used with a masked load/store.
1078  GatherScatter, ///< The cast is used with a gather/scatter.
1079  Interleave, ///< The cast is used with an interleaved load/store.
1080  Reversed, ///< The cast is used with a reversed load/store.
1081  };
1082 
1083  /// Calculates a CastContextHint from \p I.
1084  /// This should be used by callers of getCastInstrCost if they wish to
1085  /// determine the context from some instruction.
1086  /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
1087  /// or if it's another type of cast.
1089 
1090  /// \return The expected cost of cast instructions, such as bitcast, trunc,
1091  /// zext, etc. If there is an existing instruction that holds Opcode, it
1092  /// may be passed in the 'I' parameter.
1094  getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1097  const Instruction *I = nullptr) const;
1098 
1099  /// \return The expected cost of a sign- or zero-extended vector extract. Use
1100  /// -1 to indicate that there is no information about the index value.
1101  InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1102  VectorType *VecTy,
1103  unsigned Index = -1) const;
1104 
1105  /// \return The expected cost of control-flow related instructions such as
1106  /// Phi, Ret, Br, Switch.
1108  getCFInstrCost(unsigned Opcode,
1110  const Instruction *I = nullptr) const;
1111 
1112  /// \returns The expected cost of compare and select instructions. If there
1113  /// is an existing instruction that holds Opcode, it may be passed in the
1114  /// 'I' parameter. The \p VecPred parameter can be used to indicate the select
1115  /// is using a compare with the specified predicate as condition. When vector
1116  /// types are passed, \p VecPred must be used for all lanes.
1118  getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1119  CmpInst::Predicate VecPred,
1121  const Instruction *I = nullptr) const;
1122 
1123  /// \return The expected cost of vector Insert and Extract.
1124  /// Use -1 to indicate that there is no information on the index value.
1125  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1126  unsigned Index = -1) const;
1127 
1128  /// \return The cost of replication shuffle of \p VF elements typed \p EltTy
1129  /// \p ReplicationFactor times.
1130  ///
1131  /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
1132  /// <0,0,0,1,1,1,2,2,2,3,3,3>
1133  InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
1134  int VF,
1135  const APInt &DemandedDstElts,
1137 
1138  /// \return The cost of Load and Store instructions.
1140  getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1141  unsigned AddressSpace,
1143  const Instruction *I = nullptr) const;
1144 
1145  /// \return The cost of VP Load and Store instructions.
1147  getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1148  unsigned AddressSpace,
1150  const Instruction *I = nullptr) const;
1151 
1152  /// \return The cost of masked Load and Store instructions.
1154  unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
1156 
1157  /// \return The cost of Gather or Scatter operation
1158  /// \p Opcode - is a type of memory access Load or Store
1159  /// \p DataTy - a vector type of the data to be loaded or stored
1160  /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1161  /// \p VariableMask - true when the memory access is predicated with a mask
1162  /// that is not a compile-time constant
1163  /// \p Alignment - alignment of single element
1164  /// \p I - the optional original context instruction, if one exists, e.g. the
1165  /// load/store to transform or the call to the gather/scatter intrinsic
1167  unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1169  const Instruction *I = nullptr) const;
1170 
1171  /// \return The cost of the interleaved memory operation.
1172  /// \p Opcode is the memory operation code
1173  /// \p VecTy is the vector type of the interleaved access.
1174  /// \p Factor is the interleave factor
1175  /// \p Indices is the indices for interleaved load members (as interleaved
1176  /// load allows gaps)
1177  /// \p Alignment is the alignment of the memory operation
1178  /// \p AddressSpace is address space of the pointer.
1179  /// \p UseMaskForCond indicates if the memory access is predicated.
1180  /// \p UseMaskForGaps indicates if gaps should be masked.
1182  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1183  Align Alignment, unsigned AddressSpace,
1185  bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
1186 
1187  /// A helper function to determine the type of reduction algorithm used
1188  /// for a given \p Opcode and set of FastMathFlags \p FMF.
1190  return FMF != None && !(*FMF).allowReassoc();
1191  }
1192 
1193  /// Calculate the cost of vector reduction intrinsics.
1194  ///
1195  /// This is the cost of reducing the vector value of type \p Ty to a scalar
1196  /// value using the operation denoted by \p Opcode. The FastMathFlags
1197  /// parameter \p FMF indicates what type of reduction we are performing:
1198  /// 1. Tree-wise. This is the typical 'fast' reduction performed that
1199  /// involves successively splitting a vector into half and doing the
1200  /// operation on the pair of halves until you have a scalar value. For
1201  /// example:
1202  /// (v0, v1, v2, v3)
1203  /// ((v0+v2), (v1+v3), undef, undef)
1204  /// ((v0+v2+v1+v3), undef, undef, undef)
1205  /// This is the default behaviour for integer operations, whereas for
1206  /// floating point we only do this if \p FMF indicates that
1207  /// reassociation is allowed.
1208  /// 2. Ordered. For a vector with N elements this involves performing N
1209  /// operations in lane order, starting with an initial scalar value, i.e.
1210  /// result = InitVal + v0
1211  /// result = result + v1
1212  /// result = result + v2
1213  /// result = result + v3
1214  /// This is only the case for FP operations and when reassociation is not
1215  /// allowed.
1216  ///
1218  unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
1220 
1222  VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
1224 
1225  /// Calculate the cost of an extended reduction pattern, similar to
1226  /// getArithmeticReductionCost of an Add reduction with an extension and
1227  /// optional multiply. This is the cost of as:
1228  /// ResTy vecreduce.add(ext(Ty A)), or if IsMLA flag is set then:
1229  /// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)). The reduction happens
1230  /// on a VectorType with ResTy elements and Ty lanes.
1232  bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1234 
1235  /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1236  /// Three cases are handled: 1. scalar instruction 2. vector instruction
1237  /// 3. scalar instruction which is to be vectorized.
1240 
1241  /// \returns The cost of Call instructions.
1243  Function *F, Type *RetTy, ArrayRef<Type *> Tys,
1245 
1246  /// \returns The number of pieces into which the provided type must be
1247  /// split during legalization. Zero is returned when the answer is unknown.
1248  unsigned getNumberOfParts(Type *Tp) const;
1249 
1250  /// \returns The cost of the address computation. For most targets this can be
1251  /// merged into the instruction indexing mode. Some targets might want to
1252  /// distinguish between address computation for memory operations on vector
1253  /// types and scalar types. Such targets should override this function.
1254  /// The 'SE' parameter holds pointer for the scalar evolution object which
1255  /// is used in order to get the Ptr step value in case of constant stride.
1256  /// The 'Ptr' parameter holds SCEV of the access pointer.
1258  ScalarEvolution *SE = nullptr,
1259  const SCEV *Ptr = nullptr) const;
1260 
1261  /// \returns The cost, if any, of keeping values of the given types alive
1262  /// over a callsite.
1263  ///
1264  /// Some types may require the use of register classes that do not have
1265  /// any callee-saved registers, so would require a spill and fill.
1267 
1268  /// \returns True if the intrinsic is a supported memory intrinsic. Info
1269  /// will contain additional information - whether the intrinsic may write
1270  /// or read to memory, volatility and the pointer. Info is undefined
1271  /// if false is returned.
1273 
1274  /// \returns The maximum element size, in bytes, for an element
1275  /// unordered-atomic memory intrinsic.
1276  unsigned getAtomicMemIntrinsicMaxElementSize() const;
1277 
1278  /// \returns A value which is the result of the given memory intrinsic. New
1279  /// instructions may be created to extract the result from the given intrinsic
1280  /// memory operation. Returns nullptr if the target cannot create a result
1281  /// from the given intrinsic.
1283  Type *ExpectedType) const;
1284 
1285  /// \returns The type to use in a loop expansion of a memcpy call.
1287  unsigned SrcAddrSpace, unsigned DestAddrSpace,
1288  unsigned SrcAlign, unsigned DestAlign) const;
1289 
1290  /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1291  /// \param RemainingBytes The number of bytes to copy.
1292  ///
1293  /// Calculates the operand types to use when copying \p RemainingBytes of
1294  /// memory, where source and destination alignments are \p SrcAlign and
1295  /// \p DestAlign respectively.
1298  unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1299  unsigned SrcAlign, unsigned DestAlign) const;
1300 
1301  /// \returns True if the two functions have compatible attributes for inlining
1302  /// purposes.
1303  bool areInlineCompatible(const Function *Caller,
1304  const Function *Callee) const;
1305 
1306  /// \returns True if the caller and callee agree on how \p Types will be
1307  /// passed to or returned from the callee.
1308  /// to the callee.
1309  /// \param Types List of types to check.
1310  bool areTypesABICompatible(const Function *Caller, const Function *Callee,
1311  const ArrayRef<Type *> &Types) const;
1312 
1313  /// The type of load/store indexing.
1315  MIM_Unindexed, ///< No indexing.
1316  MIM_PreInc, ///< Pre-incrementing.
1317  MIM_PreDec, ///< Pre-decrementing.
1318  MIM_PostInc, ///< Post-incrementing.
1319  MIM_PostDec ///< Post-decrementing.
1320  };
1321 
1322  /// \returns True if the specified indexed load for the given type is legal.
1323  bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1324 
1325  /// \returns True if the specified indexed store for the given type is legal.
1326  bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1327 
1328  /// \returns The bitwidth of the largest vector type that should be used to
1329  /// load/store in the given address space.
1330  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1331 
1332  /// \returns True if the load instruction is legal to vectorize.
1333  bool isLegalToVectorizeLoad(LoadInst *LI) const;
1334 
1335  /// \returns True if the store instruction is legal to vectorize.
1336  bool isLegalToVectorizeStore(StoreInst *SI) const;
1337 
1338  /// \returns True if it is legal to vectorize the given load chain.
1339  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
1340  unsigned AddrSpace) const;
1341 
1342  /// \returns True if it is legal to vectorize the given store chain.
1343  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
1344  unsigned AddrSpace) const;
1345 
1346  /// \returns True if it is legal to vectorize the given reduction kind.
1348  ElementCount VF) const;
1349 
1350  /// \returns True if the given type is supported for scalable vectors
1351  bool isElementTypeLegalForScalableVector(Type *Ty) const;
1352 
1353  /// \returns The new vector factor value if the target doesn't support \p
1354  /// SizeInBytes loads or has a better vector factor.
1355  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1356  unsigned ChainSizeInBytes,
1357  VectorType *VecTy) const;
1358 
1359  /// \returns The new vector factor value if the target doesn't support \p
1360  /// SizeInBytes stores or has a better vector factor.
1361  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1362  unsigned ChainSizeInBytes,
1363  VectorType *VecTy) const;
1364 
1365  /// Flags describing the kind of vector reduction.
1368  bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation.
1369  bool IsSigned; ///< Whether the operation is a signed int reduction.
1370  bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.
1371  };
1372 
1373  /// \returns True if the target prefers reductions in loop.
1374  bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1375  ReductionFlags Flags) const;
1376 
1377  /// \returns True if the target prefers reductions select kept in the loop
1378  /// when tail folding. i.e.
1379  /// loop:
1380  /// p = phi (0, s)
1381  /// a = add (p, x)
1382  /// s = select (mask, a, p)
1383  /// vecreduce.add(s)
1384  ///
1385  /// As opposed to the normal scheme of p = phi (0, a) which allows the select
1386  /// to be pulled out of the loop. If the select(.., add, ..) can be predicated
1387  /// by the target, this can lead to cleaner code generation.
1388  bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1389  ReductionFlags Flags) const;
1390 
1391  /// \returns True if the target wants to expand the given reduction intrinsic
1392  /// into a shuffle sequence.
1393  bool shouldExpandReduction(const IntrinsicInst *II) const;
1394 
1395  /// \returns the size cost of rematerializing a GlobalValue address relative
1396  /// to a stack reload.
1397  unsigned getGISelRematGlobalCost() const;
1398 
1399  /// \returns True if the target supports scalable vectors.
1400  bool supportsScalableVectors() const;
1401 
1402  /// \return true when scalable vectorization is preferred.
1403  bool enableScalableVectorization() const;
1404 
1405  /// \name Vector Predication Information
1406  /// @{
1407  /// Whether the target supports the %evl parameter of VP intrinsic efficiently
1408  /// in hardware, for the given opcode and type/alignment. (see LLVM Language
1409  /// Reference - "Vector Predication Intrinsics").
1410  /// Use of %evl is discouraged when that is not the case.
1411  bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1412  Align Alignment) const;
1413 
1416  // keep the predicating parameter
1417  Legal = 0,
1418  // where legal, discard the predicate parameter
1419  Discard = 1,
1420  // transform into something else that is also predicating
1422  };
1423 
1424  // How to transform the EVL parameter.
1425  // Legal: keep the EVL parameter as it is.
1426  // Discard: Ignore the EVL parameter where it is safe to do so.
1427  // Convert: Fold the EVL into the mask parameter.
1429 
1430  // How to transform the operator.
1431  // Legal: The target supports this operator.
1432  // Convert: Convert this to a non-VP operation.
1433  // The 'Discard' strategy is invalid.
1435 
1436  bool shouldDoNothing() const {
1437  return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
1438  }
1441  };
1442 
1443  /// \returns How the target needs this vector-predicated operation to be
1444  /// transformed.
1446  /// @}
1447 
1448  /// @}
1449 
1450 private:
1451  /// Estimate the latency of specified instruction.
1452  /// Returns 1 as the default value.
1453  InstructionCost getInstructionLatency(const Instruction *I) const;
1454 
1455  /// Returns the expected throughput cost of the instruction.
1456  /// Returns -1 if the cost is unknown.
1457  InstructionCost getInstructionThroughput(const Instruction *I) const;
1458 
1459  /// The abstract base class used to type erase specific TTI
1460  /// implementations.
1461  class Concept;
1462 
1463  /// The template model for the base class which wraps a concrete
1464  /// implementation in a type erased interface.
1465  template <typename T> class Model;
1466 
1467  std::unique_ptr<Concept> TTIImpl;
1468 };
1469 
1471 public:
1472  virtual ~Concept() = 0;
1473  virtual const DataLayout &getDataLayout() const = 0;
1474  virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
1477  virtual unsigned getInliningThresholdMultiplier() = 0;
1478  virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
1479  virtual int getInlinerVectorBonusPercent() = 0;
1480  virtual InstructionCost getMemcpyCost(const Instruction *I) = 0;
1481  virtual unsigned
1482  getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
1483  ProfileSummaryInfo *PSI,
1484  BlockFrequencyInfo *BFI) = 0;
1485  virtual InstructionCost getUserCost(const User *U,
1487  TargetCostKind CostKind) = 0;
1489  virtual bool hasBranchDivergence() = 0;
1490  virtual bool useGPUDivergenceAnalysis() = 0;
1491  virtual bool isSourceOfDivergence(const Value *V) = 0;
1492  virtual bool isAlwaysUniform(const Value *V) = 0;
1493  virtual unsigned getFlatAddressSpace() = 0;
1494  virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1495  Intrinsic::ID IID) const = 0;
1496  virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1497  virtual bool
1498  canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0;
1499  virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
1500  virtual std::pair<const Value *, unsigned>
1501  getPredicatedAddrSpace(const Value *V) const = 0;
1503  Value *OldV,
1504  Value *NewV) const = 0;
1505  virtual bool isLoweredToCall(const Function *F) = 0;
1506  virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1508  OptimizationRemarkEmitter *ORE) = 0;
1509  virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
1510  PeelingPreferences &PP) = 0;
1511  virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1512  AssumptionCache &AC,
1513  TargetLibraryInfo *LibInfo,
1514  HardwareLoopInfo &HWLoopInfo) = 0;
1515  virtual bool
1518  DominatorTree *DT, const LoopAccessInfo *LAI) = 0;
1519  virtual bool emitGetActiveLaneMask() = 0;
1521  IntrinsicInst &II) = 0;
1522  virtual Optional<Value *>
1524  APInt DemandedMask, KnownBits &Known,
1525  bool &KnownBitsComputed) = 0;
1527  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1528  APInt &UndefElts2, APInt &UndefElts3,
1529  std::function<void(Instruction *, unsigned, APInt, APInt &)>
1530  SimplifyAndSetOp) = 0;
1531  virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1532  virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1533  virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1534  int64_t BaseOffset, bool HasBaseReg,
1535  int64_t Scale, unsigned AddrSpace,
1536  Instruction *I) = 0;
1539  virtual bool isNumRegsMajorCostOfLSR() = 0;
1540  virtual bool isProfitableLSRChainElement(Instruction *I) = 0;
1541  virtual bool canMacroFuseCmp() = 0;
1542  virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
1543  LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1544  TargetLibraryInfo *LibInfo) = 0;
1545  virtual AddressingModeKind
1546  getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const = 0;
1547  virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
1548  virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
1549  virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
1550  virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
1551  virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
1552  virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
1553  virtual bool forceScalarizeMaskedGather(VectorType *DataType,
1554  Align Alignment) = 0;
1555  virtual bool forceScalarizeMaskedScatter(VectorType *DataType,
1556  Align Alignment) = 0;
1557  virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
1558  virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
1559  virtual bool enableOrderedReductions() = 0;
1560  virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1561  virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1562  virtual bool prefersVectorizedAddressing() = 0;
1564  int64_t BaseOffset,
1565  bool HasBaseReg, int64_t Scale,
1566  unsigned AddrSpace) = 0;
1567  virtual bool LSRWithInstrQueries() = 0;
1568  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1569  virtual bool isProfitableToHoist(Instruction *I) = 0;
1570  virtual bool useAA() = 0;
1571  virtual bool isTypeLegal(Type *Ty) = 0;
1572  virtual InstructionCost getRegUsageForType(Type *Ty) = 0;
1573  virtual bool shouldBuildLookupTables() = 0;
1574  virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
1575  virtual bool shouldBuildRelLookupTables() = 0;
1576  virtual bool useColdCCForColdCall(Function &F) = 0;
1578  const APInt &DemandedElts,
1579  bool Insert,
1580  bool Extract) = 0;
1581  virtual InstructionCost
1583  ArrayRef<Type *> Tys) = 0;
1584  virtual bool supportsEfficientVectorElementLoadStore() = 0;
1585  virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1586  virtual MemCmpExpansionOptions
1587  enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
1588  virtual bool enableInterleavedAccessVectorization() = 0;
1589  virtual bool enableMaskedInterleavedAccessVectorization() = 0;
1590  virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
1592  unsigned BitWidth,
1593  unsigned AddressSpace,
1594  Align Alignment,
1595  bool *Fast) = 0;
1596  virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1597  virtual bool haveFastSqrt(Type *Ty) = 0;
1598  virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1599  virtual InstructionCost getFPOpCost(Type *Ty) = 0;
1600  virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1601  const APInt &Imm, Type *Ty) = 0;
1602  virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1603  TargetCostKind CostKind) = 0;
1604  virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1605  const APInt &Imm, Type *Ty,
1607  Instruction *Inst = nullptr) = 0;
1608  virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
1609  const APInt &Imm, Type *Ty,
1610  TargetCostKind CostKind) = 0;
1611  virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
1612  virtual unsigned getRegisterClassForType(bool Vector,
1613  Type *Ty = nullptr) const = 0;
1614  virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
1615  virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0;
1616  virtual unsigned getMinVectorRegisterBitWidth() const = 0;
1617  virtual Optional<unsigned> getMaxVScale() const = 0;
1618  virtual Optional<unsigned> getVScaleForTuning() const = 0;
1619  virtual bool shouldMaximizeVectorBandwidth() const = 0;
1620  virtual ElementCount getMinimumVF(unsigned ElemWidth,
1621  bool IsScalable) const = 0;
1622  virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
1624  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1625  virtual unsigned getCacheLineSize() const = 0;
1626  virtual Optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
1628 
1629  /// \return How much before a load we should place the prefetch
1630  /// instruction. This is currently measured in number of
1631  /// instructions.
1632  virtual unsigned getPrefetchDistance() const = 0;
1633 
1634  /// \return Some HW prefetchers can handle accesses up to a certain
1635  /// constant stride. This is the minimum stride in bytes where it
1636  /// makes sense to start adding SW prefetches. The default is 1,
1637  /// i.e. prefetch with any stride. Sometimes prefetching is beneficial
1638  /// even below the HW prefetcher limit, and the arguments provided are
1639  /// meant to serve as a basis for deciding this for a particular loop.
1640  virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1641  unsigned NumStridedMemAccesses,
1642  unsigned NumPrefetches,
1643  bool HasCall) const = 0;
1644 
1645  /// \return The maximum number of iterations to prefetch ahead. If
1646  /// the required number of iterations is more than this number, no
1647  /// prefetching is performed.
1648  virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
1649 
1650  /// \return True if prefetching should also be done for writes.
1651  virtual bool enableWritePrefetching() const = 0;
1652 
1653  virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
1655  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
1656  OperandValueKind Opd1Info, OperandValueKind Opd2Info,
1657  OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo,
1658  ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
1660  ArrayRef<int> Mask, int Index,
1661  VectorType *SubTp) = 0;
1662  virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
1663  Type *Src, CastContextHint CCH,
1665  const Instruction *I) = 0;
1666  virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1667  VectorType *VecTy,
1668  unsigned Index) = 0;
1669  virtual InstructionCost getCFInstrCost(unsigned Opcode,
1671  const Instruction *I = nullptr) = 0;
1672  virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1673  Type *CondTy,
1674  CmpInst::Predicate VecPred,
1676  const Instruction *I) = 0;
1677  virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1678  unsigned Index) = 0;
1679 
1680  virtual InstructionCost
1681  getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
1682  const APInt &DemandedDstElts,
1684 
1685  virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
1686  Align Alignment,
1687  unsigned AddressSpace,
1689  const Instruction *I) = 0;
1690  virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src,
1691  Align Alignment,
1692  unsigned AddressSpace,
1694  const Instruction *I) = 0;
1695  virtual InstructionCost
1696  getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1697  unsigned AddressSpace,
1699  virtual InstructionCost
1700  getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
1701  bool VariableMask, Align Alignment,
1703  const Instruction *I = nullptr) = 0;
1704 
1706  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1707  Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1708  bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
1709  virtual InstructionCost
1710  getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
1713  virtual InstructionCost
1714  getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
1717  bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1719  virtual InstructionCost
1722  virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy,
1723  ArrayRef<Type *> Tys,
1725  virtual unsigned getNumberOfParts(Type *Tp) = 0;
1726  virtual InstructionCost
1727  getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr) = 0;
1728  virtual InstructionCost
1730  virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1731  MemIntrinsicInfo &Info) = 0;
1732  virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
1734  Type *ExpectedType) = 0;
1736  unsigned SrcAddrSpace,
1737  unsigned DestAddrSpace,
1738  unsigned SrcAlign,
1739  unsigned DestAlign) const = 0;
1740  virtual void getMemcpyLoopResidualLoweringType(
1742  unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1743  unsigned SrcAlign, unsigned DestAlign) const = 0;
1744  virtual bool areInlineCompatible(const Function *Caller,
1745  const Function *Callee) const = 0;
1746  virtual bool areTypesABICompatible(const Function *Caller,
1747  const Function *Callee,
1748  const ArrayRef<Type *> &Types) const = 0;
1749  virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1750  virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1751  virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
1752  virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
1753  virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
1754  virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1755  Align Alignment,
1756  unsigned AddrSpace) const = 0;
1757  virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1758  Align Alignment,
1759  unsigned AddrSpace) const = 0;
1760  virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
1761  ElementCount VF) const = 0;
1762  virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
1763  virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1764  unsigned ChainSizeInBytes,
1765  VectorType *VecTy) const = 0;
1766  virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1767  unsigned ChainSizeInBytes,
1768  VectorType *VecTy) const = 0;
1769  virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1770  ReductionFlags) const = 0;
1771  virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1772  ReductionFlags) const = 0;
1773  virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
1774  virtual unsigned getGISelRematGlobalCost() const = 0;
1775  virtual bool enableScalableVectorization() const = 0;
1776  virtual bool supportsScalableVectors() const = 0;
1777  virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1778  Align Alignment) const = 0;
1779  virtual InstructionCost getInstructionLatency(const Instruction *I) = 0;
1780  virtual VPLegalization
1781  getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
1782 };
1783 
1784 template <typename T>
1786  T Impl;
1787 
1788 public:
1789  Model(T Impl) : Impl(std::move(Impl)) {}
1790  ~Model() override {}
1791 
1792  const DataLayout &getDataLayout() const override {
1793  return Impl.getDataLayout();
1794  }
1795 
1796  InstructionCost
1797  getGEPCost(Type *PointeeType, const Value *Ptr,
1798  ArrayRef<const Value *> Operands,
1800  return Impl.getGEPCost(PointeeType, Ptr, Operands, CostKind);
1801  }
1802  unsigned getInliningThresholdMultiplier() override {
1803  return Impl.getInliningThresholdMultiplier();
1804  }
1805  unsigned adjustInliningThreshold(const CallBase *CB) override {
1806  return Impl.adjustInliningThreshold(CB);
1807  }
1808  int getInlinerVectorBonusPercent() override {
1809  return Impl.getInlinerVectorBonusPercent();
1810  }
1811  InstructionCost getMemcpyCost(const Instruction *I) override {
1812  return Impl.getMemcpyCost(I);
1813  }
1814  InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
1815  TargetCostKind CostKind) override {
1816  return Impl.getUserCost(U, Operands, CostKind);
1817  }
1818  BranchProbability getPredictableBranchThreshold() override {
1819  return Impl.getPredictableBranchThreshold();
1820  }
1821  bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
1822  bool useGPUDivergenceAnalysis() override {
1823  return Impl.useGPUDivergenceAnalysis();
1824  }
1825  bool isSourceOfDivergence(const Value *V) override {
1826  return Impl.isSourceOfDivergence(V);
1827  }
1828 
1829  bool isAlwaysUniform(const Value *V) override {
1830  return Impl.isAlwaysUniform(V);
1831  }
1832 
1833  unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
1834 
1835  bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1836  Intrinsic::ID IID) const override {
1837  return Impl.collectFlatAddressOperands(OpIndexes, IID);
1838  }
1839 
1840  bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
1841  return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
1842  }
1843 
1844  bool
1845  canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
1846  return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
1847  }
1848 
1849  unsigned getAssumedAddrSpace(const Value *V) const override {
1850  return Impl.getAssumedAddrSpace(V);
1851  }
1852 
1853  std::pair<const Value *, unsigned>
1854  getPredicatedAddrSpace(const Value *V) const override {
1855  return Impl.getPredicatedAddrSpace(V);
1856  }
1857 
1858  Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
1859  Value *NewV) const override {
1860  return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
1861  }
1862 
1863  bool isLoweredToCall(const Function *F) override {
1864  return Impl.isLoweredToCall(F);
1865  }
1866  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1867  UnrollingPreferences &UP,
1868  OptimizationRemarkEmitter *ORE) override {
1869  return Impl.getUnrollingPreferences(L, SE, UP, ORE);
1870  }
1871  void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
1872  PeelingPreferences &PP) override {
1873  return Impl.getPeelingPreferences(L, SE, PP);
1874  }
1875  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1876  AssumptionCache &AC, TargetLibraryInfo *LibInfo,
1877  HardwareLoopInfo &HWLoopInfo) override {
1878  return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
1879  }
1880  bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
1881  AssumptionCache &AC, TargetLibraryInfo *TLI,
1882  DominatorTree *DT,
1883  const LoopAccessInfo *LAI) override {
1884  return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
1885  }
1886  bool emitGetActiveLaneMask() override {
1887  return Impl.emitGetActiveLaneMask();
1888  }
1889  Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
1890  IntrinsicInst &II) override {
1891  return Impl.instCombineIntrinsic(IC, II);
1892  }
1893  Optional<Value *>
1894  simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
1895  APInt DemandedMask, KnownBits &Known,
1896  bool &KnownBitsComputed) override {
1897  return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
1898  KnownBitsComputed);
1899  }
1900  Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
1901  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1902  APInt &UndefElts2, APInt &UndefElts3,
1903  std::function<void(Instruction *, unsigned, APInt, APInt &)>
1904  SimplifyAndSetOp) override {
1905  return Impl.simplifyDemandedVectorEltsIntrinsic(
1906  IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
1907  SimplifyAndSetOp);
1908  }
1909  bool isLegalAddImmediate(int64_t Imm) override {
1910  return Impl.isLegalAddImmediate(Imm);
1911  }
1912  bool isLegalICmpImmediate(int64_t Imm) override {
1913  return Impl.isLegalICmpImmediate(Imm);
1914  }
1915  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1916  bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
1917  Instruction *I) override {
1918  return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
1919  AddrSpace, I);
1920  }
1921  bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1922  TargetTransformInfo::LSRCost &C2) override {
1923  return Impl.isLSRCostLess(C1, C2);
1924  }
1925  bool isNumRegsMajorCostOfLSR() override {
1926  return Impl.isNumRegsMajorCostOfLSR();
1927  }
1928  bool isProfitableLSRChainElement(Instruction *I) override {
1929  return Impl.isProfitableLSRChainElement(I);
1930  }
1931  bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
1932  bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
1933  DominatorTree *DT, AssumptionCache *AC,
1934  TargetLibraryInfo *LibInfo) override {
1935  return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
1936  }
1938  getPreferredAddressingMode(const Loop *L,
1939  ScalarEvolution *SE) const override {
1940  return Impl.getPreferredAddressingMode(L, SE);
1941  }
1942  bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
1943  return Impl.isLegalMaskedStore(DataType, Alignment);
1944  }
1945  bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
1946  return Impl.isLegalMaskedLoad(DataType, Alignment);
1947  }
1948  bool isLegalNTStore(Type *DataType, Align Alignment) override {
1949  return Impl.isLegalNTStore(DataType, Alignment);
1950  }
1951  bool isLegalNTLoad(Type *DataType, Align Alignment) override {
1952  return Impl.isLegalNTLoad(DataType, Alignment);
1953  }
1954  bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
1955  return Impl.isLegalMaskedScatter(DataType, Alignment);
1956  }
1957  bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
1958  return Impl.isLegalMaskedGather(DataType, Alignment);
1959  }
1960  bool forceScalarizeMaskedGather(VectorType *DataType,
1961  Align Alignment) override {
1962  return Impl.forceScalarizeMaskedGather(DataType, Alignment);
1963  }
1964  bool forceScalarizeMaskedScatter(VectorType *DataType,
1965  Align Alignment) override {
1966  return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
1967  }
1968  bool isLegalMaskedCompressStore(Type *DataType) override {
1969  return Impl.isLegalMaskedCompressStore(DataType);
1970  }
1971  bool isLegalMaskedExpandLoad(Type *DataType) override {
1972  return Impl.isLegalMaskedExpandLoad(DataType);
1973  }
1974  bool enableOrderedReductions() override {
1975  return Impl.enableOrderedReductions();
1976  }
1977  bool hasDivRemOp(Type *DataType, bool IsSigned) override {
1978  return Impl.hasDivRemOp(DataType, IsSigned);
1979  }
1980  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
1981  return Impl.hasVolatileVariant(I, AddrSpace);
1982  }
1983  bool prefersVectorizedAddressing() override {
1984  return Impl.prefersVectorizedAddressing();
1985  }
1986  InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
1987  int64_t BaseOffset, bool HasBaseReg,
1988  int64_t Scale,
1989  unsigned AddrSpace) override {
1990  return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
1991  AddrSpace);
1992  }
1993  bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
1994  bool isTruncateFree(Type *Ty1, Type *Ty2) override {
1995  return Impl.isTruncateFree(Ty1, Ty2);
1996  }
1997  bool isProfitableToHoist(Instruction *I) override {
1998  return Impl.isProfitableToHoist(I);
1999  }
2000  bool useAA() override { return Impl.useAA(); }
2001  bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
2002  InstructionCost getRegUsageForType(Type *Ty) override {
2003  return Impl.getRegUsageForType(Ty);
2004  }
2005  bool shouldBuildLookupTables() override {
2006  return Impl.shouldBuildLookupTables();
2007  }
2008  bool shouldBuildLookupTablesForConstant(Constant *C) override {
2009  return Impl.shouldBuildLookupTablesForConstant(C);
2010  }
2011  bool shouldBuildRelLookupTables() override {
2012  return Impl.shouldBuildRelLookupTables();
2013  }
2014  bool useColdCCForColdCall(Function &F) override {
2015  return Impl.useColdCCForColdCall(F);
2016  }
2017 
2018  InstructionCost getScalarizationOverhead(VectorType *Ty,
2019  const APInt &DemandedElts,
2020  bool Insert, bool Extract) override {
2021  return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
2022  }
2023  InstructionCost
2024  getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2025  ArrayRef<Type *> Tys) override {
2026  return Impl.getOperandsScalarizationOverhead(Args, Tys);
2027  }
2028 
2029  bool supportsEfficientVectorElementLoadStore() override {
2030  return Impl.supportsEfficientVectorElementLoadStore();
2031  }
2032 
2033  bool enableAggressiveInterleaving(bool LoopHasReductions) override {
2034  return Impl.enableAggressiveInterleaving(LoopHasReductions);
2035  }
2036  MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
2037  bool IsZeroCmp) const override {
2038  return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2039  }
2040  bool enableInterleavedAccessVectorization() override {
2041  return Impl.enableInterleavedAccessVectorization();
2042  }
2044  return Impl.enableMaskedInterleavedAccessVectorization();
2045  }
2046  bool isFPVectorizationPotentiallyUnsafe() override {
2047  return Impl.isFPVectorizationPotentiallyUnsafe();
2048  }
2049  bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
2050  unsigned AddressSpace, Align Alignment,
2051  bool *Fast) override {
2052  return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
2053  Alignment, Fast);
2054  }
2055  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
2056  return Impl.getPopcntSupport(IntTyWidthInBit);
2057  }
2058  bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
2059 
2060  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
2061  return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2062  }
2063 
2064  InstructionCost getFPOpCost(Type *Ty) override {
2065  return Impl.getFPOpCost(Ty);
2066  }
2067 
2068  InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
2069  const APInt &Imm, Type *Ty) override {
2070  return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
2071  }
2072  InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
2073  TargetCostKind CostKind) override {
2074  return Impl.getIntImmCost(Imm, Ty, CostKind);
2075  }
2076  InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
2077  const APInt &Imm, Type *Ty,
2079  Instruction *Inst = nullptr) override {
2080  return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
2081  }
2082  InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
2083  const APInt &Imm, Type *Ty,
2084  TargetCostKind CostKind) override {
2085  return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
2086  }
2087  unsigned getNumberOfRegisters(unsigned ClassID) const override {
2088  return Impl.getNumberOfRegisters(ClassID);
2089  }
2090  unsigned getRegisterClassForType(bool Vector,
2091  Type *Ty = nullptr) const override {
2092  return Impl.getRegisterClassForType(Vector, Ty);
2093  }
2094  const char *getRegisterClassName(unsigned ClassID) const override {
2095  return Impl.getRegisterClassName(ClassID);
2096  }
2097  TypeSize getRegisterBitWidth(RegisterKind K) const override {
2098  return Impl.getRegisterBitWidth(K);
2099  }
2100  unsigned getMinVectorRegisterBitWidth() const override {
2101  return Impl.getMinVectorRegisterBitWidth();
2102  }
2103  Optional<unsigned> getMaxVScale() const override {
2104  return Impl.getMaxVScale();
2105  }
2106  Optional<unsigned> getVScaleForTuning() const override {
2107  return Impl.getVScaleForTuning();
2108  }
2109  bool shouldMaximizeVectorBandwidth() const override {
2110  return Impl.shouldMaximizeVectorBandwidth();
2111  }
2112  ElementCount getMinimumVF(unsigned ElemWidth,
2113  bool IsScalable) const override {
2114  return Impl.getMinimumVF(ElemWidth, IsScalable);
2115  }
2116  unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
2117  return Impl.getMaximumVF(ElemWidth, Opcode);
2118  }
2120  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
2121  return Impl.shouldConsiderAddressTypePromotion(
2122  I, AllowPromotionWithoutCommonHeader);
2123  }
2124  unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
2125  Optional<unsigned> getCacheSize(CacheLevel Level) const override {
2126  return Impl.getCacheSize(Level);
2127  }
2128  Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override {
2129  return Impl.getCacheAssociativity(Level);
2130  }
2131 
2132  /// Return the preferred prefetch distance in terms of instructions.
2133  ///
2134  unsigned getPrefetchDistance() const override {
2135  return Impl.getPrefetchDistance();
2136  }
2137 
2138  /// Return the minimum stride necessary to trigger software
2139  /// prefetching.
2140  ///
2141  unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2142  unsigned NumStridedMemAccesses,
2143  unsigned NumPrefetches,
2144  bool HasCall) const override {
2145  return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2146  NumPrefetches, HasCall);
2147  }
2148 
2149  /// Return the maximum prefetch distance in terms of loop
2150  /// iterations.
2151  ///
2152  unsigned getMaxPrefetchIterationsAhead() const override {
2153  return Impl.getMaxPrefetchIterationsAhead();
2154  }
2155 
2156  /// \return True if prefetching should also be done for writes.
2157  bool enableWritePrefetching() const override {
2158  return Impl.enableWritePrefetching();
2159  }
2160 
2161  unsigned getMaxInterleaveFactor(unsigned VF) override {
2162  return Impl.getMaxInterleaveFactor(VF);
2163  }
2164  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
2165  unsigned &JTSize,
2166  ProfileSummaryInfo *PSI,
2167  BlockFrequencyInfo *BFI) override {
2168  return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2169  }
2170  InstructionCost getArithmeticInstrCost(
2171  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2172  OperandValueKind Opd1Info, OperandValueKind Opd2Info,
2173  OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo,
2174  ArrayRef<const Value *> Args,
2175  const Instruction *CxtI = nullptr) override {
2176  return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
2177  Opd1PropInfo, Opd2PropInfo, Args, CxtI);
2178  }
2179  InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
2180  ArrayRef<int> Mask, int Index,
2181  VectorType *SubTp) override {
2182  return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp);
2183  }
2184  InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
2185  CastContextHint CCH,
2187  const Instruction *I) override {
2188  return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
2189  }
2190  InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2191  VectorType *VecTy,
2192  unsigned Index) override {
2193  return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
2194  }
2195  InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
2196  const Instruction *I = nullptr) override {
2197  return Impl.getCFInstrCost(Opcode, CostKind, I);
2198  }
2199  InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
2200  CmpInst::Predicate VecPred,
2202  const Instruction *I) override {
2203  return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
2204  }
2205  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2206  unsigned Index) override {
2207  return Impl.getVectorInstrCost(Opcode, Val, Index);
2208  }
2209  InstructionCost
2210  getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
2211  const APInt &DemandedDstElts,
2212  TTI::TargetCostKind CostKind) override {
2213  return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2214  DemandedDstElts, CostKind);
2215  }
2216  InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2217  unsigned AddressSpace,
2219  const Instruction *I) override {
2220  return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2221  CostKind, I);
2222  }
2223  InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2224  unsigned AddressSpace,
2226  const Instruction *I) override {
2227  return Impl.getVPMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2228  CostKind, I);
2229  }
2230  InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
2231  Align Alignment, unsigned AddressSpace,
2232  TTI::TargetCostKind CostKind) override {
2233  return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2234  CostKind);
2235  }
2236  InstructionCost
2237  getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2238  bool VariableMask, Align Alignment,
2240  const Instruction *I = nullptr) override {
2241  return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
2242  Alignment, CostKind, I);
2243  }
2244  InstructionCost getInterleavedMemoryOpCost(
2245  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2246  Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2247  bool UseMaskForCond, bool UseMaskForGaps) override {
2248  return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2249  Alignment, AddressSpace, CostKind,
2250  UseMaskForCond, UseMaskForGaps);
2251  }
2252  InstructionCost
2253  getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2254  Optional<FastMathFlags> FMF,
2255  TTI::TargetCostKind CostKind) override {
2256  return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
2257  }
2258  InstructionCost
2259  getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
2260  TTI::TargetCostKind CostKind) override {
2261  return Impl.getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
2262  }
2263  InstructionCost getExtendedAddReductionCost(
2264  bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
2266  return Impl.getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, Ty,
2267  CostKind);
2268  }
2269  InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2270  TTI::TargetCostKind CostKind) override {
2271  return Impl.getIntrinsicInstrCost(ICA, CostKind);
2272  }
2273  InstructionCost getCallInstrCost(Function *F, Type *RetTy,
2274  ArrayRef<Type *> Tys,
2275  TTI::TargetCostKind CostKind) override {
2276  return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
2277  }
2278  unsigned getNumberOfParts(Type *Tp) override {
2279  return Impl.getNumberOfParts(Tp);
2280  }
2281  InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2282  const SCEV *Ptr) override {
2283  return Impl.getAddressComputationCost(Ty, SE, Ptr);
2284  }
2285  InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
2286  return Impl.getCostOfKeepingLiveOverCall(Tys);
2287  }
2288  bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2289  MemIntrinsicInfo &Info) override {
2290  return Impl.getTgtMemIntrinsic(Inst, Info);
2291  }
2292  unsigned getAtomicMemIntrinsicMaxElementSize() const override {
2293  return Impl.getAtomicMemIntrinsicMaxElementSize();
2294  }
2295  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2296  Type *ExpectedType) override {
2297  return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2298  }
2299  Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
2300  unsigned SrcAddrSpace, unsigned DestAddrSpace,
2301  unsigned SrcAlign,
2302  unsigned DestAlign) const override {
2303  return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
2304  DestAddrSpace, SrcAlign, DestAlign);
2305  }
2307  SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2308  unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2309  unsigned SrcAlign, unsigned DestAlign) const override {
2310  Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2311  SrcAddrSpace, DestAddrSpace,
2312  SrcAlign, DestAlign);
2313  }
2314  bool areInlineCompatible(const Function *Caller,
2315  const Function *Callee) const override {
2316  return Impl.areInlineCompatible(Caller, Callee);
2317  }
2318  bool areTypesABICompatible(const Function *Caller, const Function *Callee,
2319  const ArrayRef<Type *> &Types) const override {
2320  return Impl.areTypesABICompatible(Caller, Callee, Types);
2321  }
2322  bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
2323  return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
2324  }
2325  bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
2326  return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
2327  }
2328  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
2329  return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2330  }
2331  bool isLegalToVectorizeLoad(LoadInst *LI) const override {
2332  return Impl.isLegalToVectorizeLoad(LI);
2333  }
2334  bool isLegalToVectorizeStore(StoreInst *SI) const override {
2335  return Impl.isLegalToVectorizeStore(SI);
2336  }
2337  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
2338  unsigned AddrSpace) const override {
2339  return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2340  AddrSpace);
2341  }
2342  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
2343  unsigned AddrSpace) const override {
2344  return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2345  AddrSpace);
2346  }
2347  bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
2348  ElementCount VF) const override {
2349  return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2350  }
2351  bool isElementTypeLegalForScalableVector(Type *Ty) const override {
2352  return Impl.isElementTypeLegalForScalableVector(Ty);
2353  }
2354  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2355  unsigned ChainSizeInBytes,
2356  VectorType *VecTy) const override {
2357  return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2358  }
2359  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2360  unsigned ChainSizeInBytes,
2361  VectorType *VecTy) const override {
2362  return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2363  }
2364  bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2365  ReductionFlags Flags) const override {
2366  return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2367  }
2368  bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2369  ReductionFlags Flags) const override {
2370  return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2371  }
2372  bool shouldExpandReduction(const IntrinsicInst *II) const override {
2373  return Impl.shouldExpandReduction(II);
2374  }
2375 
2376  unsigned getGISelRematGlobalCost() const override {
2377  return Impl.getGISelRematGlobalCost();
2378  }
2379 
2380  bool supportsScalableVectors() const override {
2381  return Impl.supportsScalableVectors();
2382  }
2383 
2384  bool enableScalableVectorization() const override {
2385  return Impl.enableScalableVectorization();
2386  }
2387 
2388  bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2389  Align Alignment) const override {
2390  return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2391  }
2392 
2393  InstructionCost getInstructionLatency(const Instruction *I) override {
2394  return Impl.getInstructionLatency(I);
2395  }
2396 
2398  getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
2399  return Impl.getVPLegalizationStrategy(PI);
2400  }
2401 };
2402 
2403 template <typename T>
2405  : TTIImpl(new Model<T>(Impl)) {}
2406 
2407 /// Analysis pass providing the \c TargetTransformInfo.
2408 ///
2409 /// The core idea of the TargetIRAnalysis is to expose an interface through
2410 /// which LLVM targets can analyze and provide information about the middle
2411 /// end's target-independent IR. This supports use cases such as target-aware
2412 /// cost modeling of IR constructs.
2413 ///
2414 /// This is a function analysis because much of the cost modeling for targets
2415 /// is done in a subtarget specific way and LLVM supports compiling different
2416 /// functions targeting different subtargets in order to support runtime
2417 /// dispatch according to the observed subtarget.
2418 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
2419 public:
2421 
2422  /// Default construct a target IR analysis.
2423  ///
2424  /// This will use the module's datalayout to construct a baseline
2425  /// conservative TTI result.
2426  TargetIRAnalysis();
2427 
2428  /// Construct an IR analysis pass around a target-provide callback.
2429  ///
2430  /// The callback will be called with a particular function for which the TTI
2431  /// is needed and must return a TTI object for that function.
2432  TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
2433 
2434  // Value semantics. We spell out the constructors for MSVC.
2436  : TTICallback(Arg.TTICallback) {}
2438  : TTICallback(std::move(Arg.TTICallback)) {}
2440  TTICallback = RHS.TTICallback;
2441  return *this;
2442  }
2444  TTICallback = std::move(RHS.TTICallback);
2445  return *this;
2446  }
2447 
2449 
2450 private:
2452  static AnalysisKey Key;
2453 
2454  /// The callback used to produce a result.
2455  ///
2456  /// We use a completely opaque callback so that targets can provide whatever
2457  /// mechanism they desire for constructing the TTI for a given function.
2458  ///
2459  /// FIXME: Should we really use std::function? It's relatively inefficient.
2460  /// It might be possible to arrange for even stateful callbacks to outlive
2461  /// the analysis and thus use a function_ref which would be lighter weight.
2462  /// This may also be less error prone as the callback is likely to reference
2463  /// the external TargetMachine, and that reference needs to never dangle.
2464  std::function<Result(const Function &)> TTICallback;
2465 
2466  /// Helper function used as the callback in the default constructor.
2467  static Result getDefaultTTI(const Function &F);
2468 };
2469 
2470 /// Wrapper pass for TargetTransformInfo.
2471 ///
2472 /// This pass can be constructed from a TTI object which it stores internally
2473 /// and is queried by passes.
2475  TargetIRAnalysis TIRA;
2477 
2478  virtual void anchor();
2479 
2480 public:
2481  static char ID;
2482 
2483  /// We must provide a default constructor for the pass but it should
2484  /// never be used.
2485  ///
2486  /// Use the constructor below or call one of the creation routines.
2488 
2490 
2492 };
2493 
2494 /// Create an analysis pass wrapper around a TTI object.
2495 ///
2496 /// This analysis pass just holds the TTI instance and makes it available to
2497 /// clients.
2499 
2500 } // namespace llvm
2501 
2502 #endif
llvm::TargetTransformInfo::ReductionFlags::IsMaxOp
bool IsMaxOp
If the op a min/max kind, true if it's a max operation.
Definition: TargetTransformInfo.h:1368
llvm::TargetTransformInfo::CastContextHint::GatherScatter
@ GatherScatter
The cast is used with a gather/scatter.
llvm::TargetTransformInfo::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
Definition: TargetTransformInfo.cpp:285
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::TargetTransformInfo::Concept::getExtractWithExtendCost
virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)=0
llvm::TargetTransformInfo::CacheLevel::L1D
@ L1D
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::TargetTransformInfo::PSK_FastHardware
@ PSK_FastHardware
Definition: TargetTransformInfo.h:595
llvm::TargetTransformInfo::Concept::getPopcntSupport
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit)=0
llvm::TargetTransformInfo::Concept::getGEPCost
virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::CastContextHint::Masked
@ Masked
The cast is used with a masked load/store.
llvm::TargetTransformInfo::getMemcpyLoopLoweringType
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const
Definition: TargetTransformInfo.cpp:974
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:481
llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:460
llvm::TargetTransformInfo::SK_Select
@ SK_Select
Selects elements from the corresponding lane of either source operand.
Definition: TargetTransformInfo.h:871
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2418
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:488
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:211
llvm::TargetTransformInfo::LSRCost::NumRegs
unsigned NumRegs
Definition: TargetTransformInfo.h:421
llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition: TargetTransformInfo.h:264
llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition: TargetTransformInfo.h:456
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::TargetTransformInfo::ReductionFlags
Flags describing the kind of vector reduction.
Definition: TargetTransformInfo.h:1366
llvm::TargetTransformInfo::instCombineIntrinsic
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
Definition: TargetTransformInfo.cpp:303
llvm::TargetTransformInfo::Concept::isHardwareLoopProfitable
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)=0
llvm::TargetTransformInfo::Concept::isSourceOfDivergence
virtual bool isSourceOfDivergence(const Value *V)=0
llvm::TargetTransformInfo::Concept::enableMaskedInterleavedAccessVectorization
virtual bool enableMaskedInterleavedAccessVectorization()=0
llvm::MemIntrinsicInfo::PtrVal
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
Definition: TargetTransformInfo.h:75
llvm::TargetTransformInfo::preferPredicateOverEpilogue
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI) const
Query the target whether it would be prefered to create a predicated vector loop, which can avoid the...
Definition: TargetTransformInfo.cpp:291
llvm::TargetTransformInfo::Concept::rewriteIntrinsicWithAddressSpace
virtual Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const =0
llvm::TargetTransformInfo::getMinVectorRegisterBitWidth
unsigned getMinVectorRegisterBitWidth() const
Definition: TargetTransformInfo.cpp:614
llvm::TargetTransformInfo::Concept::enableOrderedReductions
virtual bool enableOrderedReductions()=0
llvm::HardwareLoopInfo::LoopDecrement
Value * LoopDecrement
Definition: TargetTransformInfo.h:103
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::TargetTransformInfo::getShuffleCost
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask=None, int Index=0, VectorType *SubTp=nullptr) const
Definition: TargetTransformInfo.cpp:745
llvm::TargetTransformInfo::Concept::areTypesABICompatible
virtual bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const =0
llvm::TargetTransformInfo::MemIndexedMode
MemIndexedMode
The type of load/store indexing.
Definition: TargetTransformInfo.h:1314
llvm::TargetTransformInfo::TCK_Latency
@ TCK_Latency
The latency of instruction.
Definition: TargetTransformInfo.h:213
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:721
llvm::TargetTransformInfo::getVScaleForTuning
Optional< unsigned > getVScaleForTuning() const
Definition: TargetTransformInfo.cpp:622
llvm::TargetTransformInfo::UnrollingPreferences::MaxCount
unsigned MaxCount
Definition: TargetTransformInfo.h:472
llvm::ImmutablePass
ImmutablePass class - This class is used to provide information that does not need to be run.
Definition: Pass.h:269
llvm::TargetTransformInfo::getRegisterClassName
const char * getRegisterClassName(unsigned ClassID) const
Definition: TargetTransformInfo.cpp:605
AtomicOrdering.h
llvm::ElementCount
Definition: TypeSize.h:385
llvm::TargetTransformInfo::OK_UniformValue
@ OK_UniformValue
Definition: TargetTransformInfo.h:889
llvm::TargetTransformInfo::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:827
llvm::TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
Definition: TargetTransformInfo.cpp:1184
llvm::TargetTransformInfo::Concept::enableMemCmpExpansion
virtual MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const =0
llvm::TargetTransformInfo::canMacroFuseCmp
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
Definition: TargetTransformInfo.cpp:365
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::TargetTransformInfo::Concept::isLegalMaskedScatter
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment)=0
Pass.h
llvm::TargetTransformInfo::getRegisterBitWidth
TypeSize getRegisterBitWidth(RegisterKind K) const
Definition: TargetTransformInfo.cpp:609
llvm::TargetTransformInfo::PopcntSupportKind
PopcntSupportKind
Flags indicating the kind of support for population count.
Definition: TargetTransformInfo.h:595
llvm::TargetTransformInfo::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:855
llvm::TargetTransformInfo::Concept::getIntImmCost
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
llvm::TargetTransformInfo::getVPLegalizationStrategy
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
Definition: TargetTransformInfo.cpp:1069
llvm::TargetTransformInfo::AMK_PostIndexed
@ AMK_PostIndexed
Definition: TargetTransformInfo.h:644
llvm::TargetTransformInfoWrapperPass::getTTI
TargetTransformInfo & getTTI(const Function &F)
Definition: TargetTransformInfo.cpp:1197
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::TargetTransformInfo::getAddressComputationCost
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
Definition: TargetTransformInfo.cpp:917
InstCombiner
Machine InstCombiner
Definition: MachineCombiner.cpp:136
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1177
llvm::TargetTransformInfo::PeelingPreferences::AllowPeeling
bool AllowPeeling
Allow peeling off loop iterations.
Definition: TargetTransformInfo.h:544
llvm::TargetTransformInfo::Concept::hasVolatileVariant
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace)=0
llvm::TargetTransformInfo::Concept::isFPVectorizationPotentiallyUnsafe
virtual bool isFPVectorizationPotentiallyUnsafe()=0
llvm::TargetTransformInfo::Concept::isLegalMaskedExpandLoad
virtual bool isLegalMaskedExpandLoad(Type *DataType)=0
llvm::TargetTransformInfo::Concept::isAlwaysUniform
virtual bool isAlwaysUniform(const Value *V)=0
llvm::TargetTransformInfo::Concept::getMaxPrefetchIterationsAhead
virtual unsigned getMaxPrefetchIterationsAhead() const =0
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::TargetTransformInfo::MemCmpExpansionOptions::AllowOverlappingLoads
bool AllowOverlappingLoads
Definition: TargetTransformInfo.h:793
llvm::TargetTransformInfo::getScalingFactorCost
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
Definition: TargetTransformInfo.cpp:446
llvm::IntrinsicCostAttributes::getReturnType
Type * getReturnType() const
Definition: TargetTransformInfo.h:150
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
llvm::TargetTransformInfo::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfo.cpp:896
llvm::TargetTransformInfo::Concept::getRegisterClassForType
virtual unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const =0
llvm::TargetTransformInfo::RGK_Scalar
@ RGK_Scalar
Definition: TargetTransformInfo.h:916
llvm::TargetTransformInfo::Concept::enableInterleavedAccessVectorization
virtual bool enableInterleavedAccessVectorization()=0
llvm::TargetTransformInfo::getAssumedAddrSpace
unsigned getAssumedAddrSpace(const Value *V) const
Definition: TargetTransformInfo.cpp:267
llvm::IntrinsicCostAttributes::getInst
const IntrinsicInst * getInst() const
Definition: TargetTransformInfo.h:149
llvm::TargetTransformInfo::Concept::useGPUDivergenceAnalysis
virtual bool useGPUDivergenceAnalysis()=0
llvm::TargetTransformInfo::isLegalICmpImmediate
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition: TargetTransformInfo.cpp:340
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJamInnerLoopThreshold
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
Definition: TargetTransformInfo.h:507
llvm::TargetTransformInfo::Concept::getMinMaxReductionCost
virtual InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind)=0
llvm::TargetIRAnalysis::operator=
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Definition: TargetTransformInfo.h:2439
llvm::TargetIRAnalysis::TargetIRAnalysis
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Definition: TargetTransformInfo.h:2437
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::TargetTransformInfo::Concept::preferPredicateOverEpilogue
virtual bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI)=0
llvm::HardwareLoopInfo::ExitBranch
BranchInst * ExitBranch
Definition: TargetTransformInfo.h:100
llvm::TargetTransformInfo::UnrollingPreferences::UnrollRemainder
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
Definition: TargetTransformInfo.h:500
llvm::TargetTransformInfo::UnrollingPreferences::Count
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
Definition: TargetTransformInfo.h:465
llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:214
llvm::TargetTransformInfo::VPLegalization
Definition: TargetTransformInfo.h:1414
llvm::TargetTransformInfo::shouldBuildLookupTables
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
Definition: TargetTransformInfo.cpp:477
llvm::TargetTransformInfo::LSRCost::NumIVMuls
unsigned NumIVMuls
Definition: TargetTransformInfo.h:423
llvm::TargetTransformInfo::Concept::isLegalToVectorizeReduction
virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const =0
llvm::HardwareLoopInfo::isHardwareLoopCandidate
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Definition: TargetTransformInfo.cpp:100
llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:484
llvm::TargetTransformInfo::getMaximumVF
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
Definition: TargetTransformInfo.cpp:635
llvm::TargetTransformInfo::Concept::getEstimatedNumberOfCaseClusters
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)=0
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::TargetTransformInfo::useColdCCForColdCall
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
Definition: TargetTransformInfo.cpp:490
llvm::TargetTransformInfo::VPLegalization::Convert
@ Convert
Definition: TargetTransformInfo.h:1421
llvm::TargetTransformInfo::Concept::getGatherScatterOpCost
virtual InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:538
llvm::TargetTransformInfo::operator=
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
Definition: TargetTransformInfo.cpp:187
llvm::TargetTransformInfo::Concept::getPeelingPreferences
virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP)=0
llvm::HardwareLoopInfo::L
Loop * L
Definition: TargetTransformInfo.h:98
llvm::TargetTransformInfo::Concept::isLSRCostLess
virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2)=0
llvm::TargetTransformInfo::isLegalNTLoad
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
Definition: TargetTransformInfo.cpp:397
llvm::TargetTransformInfo::isLegalNTStore
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
Definition: TargetTransformInfo.cpp:392
llvm::TargetTransformInfo::UnrollingPreferences::FullUnrollMaxCount
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
Definition: TargetTransformInfo.h:476
llvm::Optional
Definition: APInt.h:33
ForceNestedLoop
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
llvm::TargetTransformInfo::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:818
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::TargetTransformInfo::OP_PowerOf2
@ OP_PowerOf2
Definition: TargetTransformInfo.h:895
llvm::TargetTransformInfo::getPredictableBranchThreshold
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
Definition: TargetTransformInfo.cpp:228
Operator.h
llvm::TargetTransformInfo::getIntImmCodeSizeCost
InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
Definition: TargetTransformInfo.cpp:560
llvm::TargetTransformInfo::getInlinerVectorBonusPercent
int getInlinerVectorBonusPercent() const
Definition: TargetTransformInfo.cpp:201
llvm::TargetTransformInfo::getIntImmCostIntrin
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Definition: TargetTransformInfo.cpp:587
RHS
Value * RHS
Definition: X86PartialReduction.cpp:74
VectorType
Definition: ItaniumDemangle.h:1037
llvm::TargetTransformInfo::Concept::isTruncateFree
virtual bool isTruncateFree(Type *Ty1, Type *Ty2)=0
llvm::TargetTransformInfo::getIntImmCostInst
InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
Definition: TargetTransformInfo.cpp:577
llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:165
llvm::TargetTransformInfo::Concept::getAtomicMemIntrinsicMaxElementSize
virtual unsigned getAtomicMemIntrinsicMaxElementSize() const =0
llvm::TargetTransformInfo::Concept::emitGetActiveLaneMask
virtual bool emitGetActiveLaneMask()=0
llvm::TargetTransformInfo::enableInterleavedAccessVectorization
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
Definition: TargetTransformInfo.cpp:520
llvm::TargetTransformInfo::Concept::prefersVectorizedAddressing
virtual bool prefersVectorizedAddressing()=0
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::TargetTransformInfo::SK_PermuteSingleSrc
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
Definition: TargetTransformInfo.h:879
llvm::TargetTransformInfo::Concept::getOrCreateResultFromMemIntrinsic
virtual Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)=0
llvm::TargetTransformInfo::Concept::getCostOfKeepingLiveOverCall
virtual InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)=0
llvm::TargetTransformInfo::getOperandsScalarizationOverhead
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys) const
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: TargetTransformInfo.cpp:501
llvm::TargetTransformInfo::Concept::getRegisterBitWidth
virtual TypeSize getRegisterBitWidth(RegisterKind K) const =0
llvm::TargetTransformInfo::UnrollingPreferences::AllowExpensiveTripCount
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
Definition: TargetTransformInfo.h:493
llvm::TargetTransformInfo::preferInLoopReduction
bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
Definition: TargetTransformInfo.cpp:1058
llvm::TargetTransformInfo::CacheLevel
CacheLevel
The possible cache levels.
Definition: TargetTransformInfo.h:960
llvm::TargetTransformInfo::Concept
Definition: TargetTransformInfo.h:1470
llvm::TargetTransformInfo::Concept::isLegalNTStore
virtual bool isLegalNTStore(Type *DataType, Align Alignment)=0
new
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM ID Predecessors according to mbb< bb27, 0x8b0a7c0 > Note ADDri is not a two address instruction its result reg1037 is an operand of the PHI node in bb76 and its operand reg1039 is the result of the PHI node We should treat it as a two address code and make sure the ADDri is scheduled after any node that reads reg1039 Use info(i.e. register scavenger) to assign it a free register to allow reuse the collector could move the objects and invalidate the derived pointer This is bad enough in the first but safe points can crop up unpredictably **array_addr i32 n y store obj * new
Definition: README.txt:125
llvm::TargetTransformInfo::LSRCost::Insns
unsigned Insns
TODO: Some of these could be merged.
Definition: TargetTransformInfo.h:420
llvm::TargetTransformInfo::Concept::shouldMaximizeVectorBandwidth
virtual bool shouldMaximizeVectorBandwidth() const =0
llvm::IntrinsicCostAttributes::getScalarizationCost
InstructionCost getScalarizationCost() const
Definition: TargetTransformInfo.h:152
llvm::TargetTransformInfo::SK_Broadcast
@ SK_Broadcast
Broadcast element 0 to all other elements.
Definition: TargetTransformInfo.h:869
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::TargetTransformInfo::Concept::getAddressComputationCost
virtual InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)=0
llvm::TargetTransformInfo::Concept::getIntImmCodeSizeCost
virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty)=0
llvm::TargetTransformInfo::canHaveNonUndefGlobalInitializerInAddressSpace
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
Return true if globals in this address space can have initializers other than undef.
Definition: TargetTransformInfo.cpp:262
llvm::TargetTransformInfo::Concept::isLegalNTLoad
virtual bool isLegalNTLoad(Type *DataType, Align Alignment)=0
llvm::IntrinsicCostAttributes::skipScalarizationCost
bool skipScalarizationCost() const
Definition: TargetTransformInfo.h:160
llvm::TargetTransformInfo::requiresOrderedReduction
static bool requiresOrderedReduction(Optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
Definition: TargetTransformInfo.h:1189
Context
ManagedStatic< detail::RecordContext > Context
Definition: Record.cpp:96
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::HardwareLoopInfo::IsNestingLegal
bool IsNestingLegal
Definition: TargetTransformInfo.h:105
llvm::TargetTransformInfo::LSRCost::AddRecCost
unsigned AddRecCost
Definition: TargetTransformInfo.h:422
llvm::IntrinsicCostAttributes::getFlags
FastMathFlags getFlags() const
Definition: TargetTransformInfo.h:151
llvm::Reloc::Model
Model
Definition: CodeGen.h:22
llvm::TargetTransformInfo::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Get target-customized preferences for the generic loop unrolling transformation.
Definition: TargetTransformInfo.cpp:325
llvm::TargetTransformInfo::isLegalMaskedScatter
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
Return true if the target supports masked scatter.
Definition: TargetTransformInfo.cpp:406
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:185
llvm::TargetTransformInfo::LSRCost::SetupCost
unsigned SetupCost
Definition: TargetTransformInfo.h:426
llvm::TargetTransformInfo::Concept::isLegalMaskedLoad
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment)=0
llvm::TargetTransformInfo::Concept::canMacroFuseCmp
virtual bool canMacroFuseCmp()=0
llvm::TargetTransformInfo::Concept::isTypeLegal
virtual bool isTypeLegal(Type *Ty)=0
llvm::TargetTransformInfo::getGISelRematGlobalCost
unsigned getGISelRematGlobalCost() const
Definition: TargetTransformInfo.cpp:1077
llvm::IntrinsicCostAttributes::getArgTypes
const SmallVectorImpl< Type * > & getArgTypes() const
Definition: TargetTransformInfo.h:154
llvm::TargetTransformInfo::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Definition: TargetTransformInfo.cpp:990
llvm::TargetTransformInfo::Concept::getMinimumVF
virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const =0
llvm::TargetTransformInfo::isTypeLegal
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
Definition: TargetTransformInfo.cpp:469
llvm::HardwareLoopInfo::ExitCount
const SCEV * ExitCount
Definition: TargetTransformInfo.h:101
llvm::TargetTransformInfo::SK_PermuteTwoSrc
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
Definition: TargetTransformInfo.h:877
llvm::TargetTransformInfo::Concept::getCacheSize
virtual Optional< unsigned > getCacheSize(CacheLevel Level) const =0
llvm::TargetTransformInfo::PeelingPreferences::PeelProfiledIterations
bool PeelProfiledIterations
Allow peeling basing on profile.
Definition: TargetTransformInfo.h:551
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::TargetTransformInfo::getMinimumVF
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
Definition: TargetTransformInfo.cpp:630
llvm::MemIntrinsicInfo::isUnordered
bool isUnordered() const
Definition: TargetTransformInfo.h:87
llvm::TargetTransformInfo::Concept::getPredictableBranchThreshold
virtual BranchProbability getPredictableBranchThreshold()=0
llvm::TargetTransformInfo::isProfitableLSRChainElement
bool isProfitableLSRChainElement(Instruction *I) const
Definition: TargetTransformInfo.cpp:361
llvm::TargetTransformInfo::Concept::useAA
virtual bool useAA()=0
llvm::TargetTransformInfo::getCastContextHint
static CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
Definition: TargetTransformInfo.cpp:756
llvm::TargetTransformInfo::getOrCreateResultFromMemIntrinsic
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
Definition: TargetTransformInfo.cpp:969
llvm::TargetTransformInfo::isLegalToVectorizeLoad
bool isLegalToVectorizeLoad(LoadInst *LI) const
Definition: TargetTransformInfo.cpp:1015
llvm::MemIntrinsicInfo::Ordering
AtomicOrdering Ordering
Definition: TargetTransformInfo.h:78
llvm::TargetTransformInfo::Concept::useColdCCForColdCall
virtual bool useColdCCForColdCall(Function &F)=0
llvm::TargetTransformInfoWrapperPass::ID
static char ID
Definition: TargetTransformInfo.h:2481
llvm::TargetTransformInfo::TargetCostConstants
TargetCostConstants
Underlying constants for 'cost' values in this interface.
Definition: TargetTransformInfo.h:261
llvm::TargetTransformInfo::getPopcntSupport
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
Definition: TargetTransformInfo.cpp:542
llvm::TargetIRAnalysis::TargetIRAnalysis
TargetIRAnalysis(const TargetIRAnalysis &Arg)
Definition: TargetTransformInfo.h:2435
llvm::TargetTransformInfo::OP_None
@ OP_None
Definition: TargetTransformInfo.h:895
llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition: TargetTransformInfo.h:868
llvm::TargetTransformInfo::getPreferredAddressingMode
AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
Return the preferred addressing mode LSR should make efforts to generate.
Definition: TargetTransformInfo.cpp:377
llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition: TargetTransformInfo.h:1074
llvm::TargetTransformInfo::Concept::getVPLegalizationStrategy
virtual VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const =0
llvm::User
Definition: User.h:44
llvm::TargetTransformInfo::useGPUDivergenceAnalysis
bool useGPUDivergenceAnalysis() const
Return true if the target prefers to use GPU divergence analysis to replace the legacy version.
Definition: TargetTransformInfo.cpp:236
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::TargetTransformInfo::UnrollingPreferences::Force
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
Definition: TargetTransformInfo.h:496
InstrTypes.h
llvm::TargetTransformInfo::Concept::getMaxVScale
virtual Optional< unsigned > getMaxVScale() const =0
llvm::TargetTransformInfo::Concept::getPrefetchDistance
virtual unsigned getPrefetchDistance() const =0
llvm::TargetTransformInfo::hasBranchDivergence
bool hasBranchDivergence() const
Return true if branch divergence exists.
Definition: TargetTransformInfo.cpp:232
llvm::TargetTransformInfo::isLegalToVectorizeReduction
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
Definition: TargetTransformInfo.cpp:1035
llvm::TargetTransformInfo::getIntImmCost
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
Definition: TargetTransformInfo.cpp:570
llvm::TargetTransformInfo::Concept::supportsEfficientVectorElementLoadStore
virtual bool supportsEfficientVectorElementLoadStore()=0
llvm::TargetTransformInfo::Concept::canSaveCmp
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo)=0
llvm::TargetTransformInfo::getNumberOfParts
unsigned getNumberOfParts(Type *Tp) const
Definition: TargetTransformInfo.cpp:912
llvm::TargetTransformInfo::Concept::isFCmpOrdCheaperThanFCmpZero
virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)=0
llvm::TargetTransformInfo::Concept::isNumRegsMajorCostOfLSR
virtual bool isNumRegsMajorCostOfLSR()=0
llvm::TargetTransformInfo::supportsScalableVectors
bool supportsScalableVectors() const
Definition: TargetTransformInfo.cpp:1081
llvm::TargetTransformInfo::Concept::getExtendedAddReductionCost
virtual InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
llvm::TargetTransformInfo::isIndexedLoadLegal
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
Definition: TargetTransformInfo.cpp:1001
llvm::TargetTransformInfo::CastContextHint::Interleave
@ Interleave
The cast is used with an interleaved load/store.
llvm::TargetTransformInfo::UnrollingPreferences::MaxIterationsCountToAnalyze
unsigned MaxIterationsCountToAnalyze
Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...
Definition: TargetTransformInfo.h:510
false
Definition: StackSlotColoring.cpp:142
llvm::TargetTransformInfo::Concept::getNumberOfRegisters
virtual unsigned getNumberOfRegisters(unsigned ClassID) const =0
llvm::TargetTransformInfo::Concept::isLegalToVectorizeLoadChain
virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJam
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
Definition: TargetTransformInfo.h:502
llvm::TargetTransformInfo::isLegalMaskedExpandLoad
bool isLegalMaskedExpandLoad(Type *DataType) const
Return true if the target supports masked expand load.
Definition: TargetTransformInfo.cpp:425
llvm::TargetTransformInfo::enableScalableVectorization
bool enableScalableVectorization() const
Definition: TargetTransformInfo.cpp:1085
llvm::TargetTransformInfo::Concept::simplifyDemandedVectorEltsIntrinsic
virtual Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)=0
llvm::TargetTransformInfo::Concept::isLegalMaskedGather
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment)=0
llvm::TargetTransformInfo::Concept::hasBranchDivergence
virtual bool hasBranchDivergence()=0
llvm::Instruction
Definition: Instruction.h:45
llvm::TargetTransformInfo::Concept::enableWritePrefetching
virtual bool enableWritePrefetching() const =0
llvm::TargetTransformInfo::isLSRCostLess
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C1.
Definition: TargetTransformInfo.cpp:353
llvm::TargetTransformInfo::MIM_PreDec
@ MIM_PreDec
Pre-decrementing.
Definition: TargetTransformInfo.h:1317
llvm::HardwareLoopInfo::PerformEntryTest
bool PerformEntryTest
Definition: TargetTransformInfo.h:109
llvm::TargetTransformInfo::Concept::getMaskedMemoryOpCost
virtual InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::isLegalMaskedLoad
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked load.
Definition: TargetTransformInfo.cpp:387
llvm::TargetTransformInfo::isSourceOfDivergence
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
Definition: TargetTransformInfo.cpp:240
llvm::TargetTransformInfo::Concept::getReplicationShuffleCost
virtual InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic
Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
Definition: TargetTransformInfo.cpp:315
llvm::TargetTransformInfo::CastContextHint::Reversed
@ Reversed
The cast is used with a reversed load/store.
llvm::TargetTransformInfo::getPrefetchDistance
unsigned getPrefetchDistance() const
Definition: TargetTransformInfo.cpp:660
llvm::HardwareLoopInfo::CounterInReg
bool CounterInReg
Definition: TargetTransformInfo.h:107
llvm::TargetTransformInfo::Concept::isIndexedStoreLegal
virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const =0
llvm::TargetTransformInfo::Concept::supportsScalableVectors
virtual bool supportsScalableVectors() const =0
llvm::TargetTransformInfo::Concept::isLegalToVectorizeLoad
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const =0
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::TargetTransformInfo::Concept::isLegalToVectorizeStoreChain
virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
llvm::TargetTransformInfo::isFCmpOrdCheaperThanFCmpZero
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
Definition: TargetTransformInfo.cpp:550
llvm::AnalysisManager::Invalidator
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:670
llvm::TargetTransformInfo::Concept::getMemcpyLoopResidualLoweringType
virtual void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const =0
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
llvm::TargetTransformInfo::RGK_FixedWidthVector
@ RGK_FixedWidthVector
Definition: TargetTransformInfo.h:916
llvm::TargetTransformInfo::Concept::allowsMisalignedMemoryAccesses
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, bool *Fast)=0
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::TargetTransformInfo::areTypesABICompatible
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
Definition: TargetTransformInfo.cpp:995
llvm::None
const NoneType None
Definition: None.h:23
llvm::TargetTransformInfo::Concept::getMemcpyLoopLoweringType
virtual Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const =0
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:118
BranchProbability.h
llvm::TargetTransformInfo::VPLegalization::VPLegalization
VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)
Definition: TargetTransformInfo.h:1439
llvm::TargetTransformInfo::Concept::getDataLayout
virtual const DataLayout & getDataLayout() const =0
llvm::TargetTransformInfo::hasVolatileVariant
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
Definition: TargetTransformInfo.cpp:437
llvm::TargetTransformInfo::PSK_Software
@ PSK_Software
Definition: TargetTransformInfo.h:595
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::TargetTransformInfo::Concept::isElementTypeLegalForScalableVector
virtual bool isElementTypeLegalForScalableVector(Type *Ty) const =0
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:78
llvm::TargetTransformInfo::isLegalMaskedCompressStore
bool isLegalMaskedCompressStore(Type *DataType) const
Return true if the target supports masked compress store.
Definition: TargetTransformInfo.cpp:421
llvm::TargetTransformInfo::haveFastSqrt
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
Definition: TargetTransformInfo.cpp:546
llvm::createTargetTransformInfoWrapperPass
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
Definition: TargetTransformInfo.cpp:1204
llvm::TargetTransformInfo::VPLegalization::EVLParamStrategy
VPTransform EVLParamStrategy
Definition: TargetTransformInfo.h:1428
llvm::TargetTransformInfo::ReductionFlags::ReductionFlags
ReductionFlags()
Definition: TargetTransformInfo.h:1367
llvm::TargetTransformInfo::SK_Reverse
@ SK_Reverse
Reverse the order of the vector.
Definition: TargetTransformInfo.h:870
llvm::TargetTransformInfo::Concept::collectFlatAddressOperands
virtual bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const =0
llvm::TargetTransformInfo::VPLegalization::VPTransform
VPTransform
Definition: TargetTransformInfo.h:1415
llvm::TargetTransformInfo::getFlatAddressSpace
unsigned getFlatAddressSpace() const
Returns the address space ID for a target's 'flat' address space.
Definition: TargetTransformInfo.cpp:248
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
llvm::TargetTransformInfo::CastContextHint::Normal
@ Normal
The cast is used with a normal load/store.
llvm::TargetTransformInfo::Concept::~Concept
virtual ~Concept()=0
Definition: TargetTransformInfo.cpp:1158
llvm::TargetTransformInfo::Concept::getIntrinsicInstrCost
virtual InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)=0
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:77
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:309
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::TargetTransformInfo::Concept::hasActiveVectorLength
virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const =0
llvm::TargetTransformInfo::SK_InsertSubvector
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
Definition: TargetTransformInfo.h:875
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::TargetTransformInfo::Concept::getInstructionLatency
virtual InstructionCost getInstructionLatency(const Instruction *I)=0
llvm::TargetTransformInfo::Concept::isProfitableLSRChainElement
virtual bool isProfitableLSRChainElement(Instruction *I)=0
llvm::TargetTransformInfo::shouldBuildLookupTablesForConstant
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
Definition: TargetTransformInfo.cpp:481
llvm::TargetTransformInfo::Concept::getRegUsageForType
virtual InstructionCost getRegUsageForType(Type *Ty)=0
llvm::TargetTransformInfo::MIM_PostInc
@ MIM_PostInc
Post-incrementing.
Definition: TargetTransformInfo.h:1318
llvm::TargetTransformInfo::shouldMaximizeVectorBandwidth
bool shouldMaximizeVectorBandwidth() const
Definition: TargetTransformInfo.cpp:626
llvm::HardwareLoopInfo::HardwareLoopInfo
HardwareLoopInfo()=delete
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
llvm::TargetTransformInfo::LSRCost
Definition: TargetTransformInfo.h:417
llvm::TargetTransformInfo::collectFlatAddressOperands
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Return any intrinsic address operand indexes which may be rewritten if they use a flat address space ...
Definition: TargetTransformInfo.cpp:252
llvm::TargetTransformInfo::VPLegalization::OpStrategy
VPTransform OpStrategy
Definition: TargetTransformInfo.h:1434
llvm::TargetTransformInfo::isLegalMaskedGather
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
Return true if the target supports masked gather.
Definition: TargetTransformInfo.cpp:401
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2474
llvm::TargetTransformInfo::Concept::getInterleavedMemoryOpCost
virtual InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)=0
llvm::TargetTransformInfo::emitGetActiveLaneMask
bool emitGetActiveLaneMask() const
Query the target whether lowering of the llvm.get.active.lane.mask intrinsic is supported.
Definition: TargetTransformInfo.cpp:298
llvm::TargetTransformInfo::preferPredicatedReductionSelect
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
Definition: TargetTransformInfo.cpp:1063
llvm::TargetTransformInfo::Concept::hasDivRemOp
virtual bool hasDivRemOp(Type *DataType, bool IsSigned)=0
llvm::TargetTransformInfo::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
Definition: TargetTransformInfo.cpp:884
llvm::TargetTransformInfo::ReductionFlags::NoNaN
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
Definition: TargetTransformInfo.h:1370
llvm::TargetTransformInfo::Concept::shouldBuildLookupTables
virtual bool shouldBuildLookupTables()=0
llvm::TargetTransformInfo::OK_UniformConstantValue
@ OK_UniformConstantValue
Definition: TargetTransformInfo.h:890
llvm::TargetTransformInfo::forceScalarizeMaskedGather
bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.gather intrinsics.
Definition: TargetTransformInfo.cpp:411
llvm::TargetIRAnalysis::Result
TargetTransformInfo Result
Definition: TargetTransformInfo.h:2420
llvm::TargetTransformInfo::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(unsigned VF) const
Definition: TargetTransformInfo.cpp:679
llvm::TargetTransformInfo::VPLegalization::shouldDoNothing
bool shouldDoNothing() const
Definition: TargetTransformInfo.h:1436
llvm::TargetTransformInfo::getRegisterClassForType
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
Definition: TargetTransformInfo.cpp:600
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::TargetTransformInfo::Concept::getMaximumVF
virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const =0
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::AnalysisKey
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:72
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:431
llvm::TargetTransformInfo::getCostOfKeepingLiveOverCall
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
Definition: TargetTransformInfo.cpp:956
llvm::TargetTransformInfo::Concept::getArithmeticInstrCost
virtual InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, OperandValueKind Opd1Info, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr)=0
llvm::AtomicOrdering::Unordered
@ Unordered
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::TargetTransformInfo::OperandValueProperties
OperandValueProperties
Additional properties of an operand's values.
Definition: TargetTransformInfo.h:895
llvm::TargetTransformInfo::Concept::isLegalMaskedStore
virtual bool isLegalMaskedStore(Type *DataType, Align Alignment)=0
llvm::TargetTransformInfo::shouldConsiderAddressTypePromotion
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
Definition: TargetTransformInfo.cpp:640
llvm::LoopAccessInfo
Drive the analysis of memory accesses in the loop.
Definition: LoopAccessAnalysis.h:514
llvm::TargetTransformInfo::Concept::getScalarizationOverhead
virtual InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract)=0
llvm::TargetTransformInfo::Concept::getVPMemoryOpCost
virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I)=0
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, TargetCostKind CostKind) const
This is a helper function which calls the two-argument getUserCost with Operands which are the curren...
Definition: TargetTransformInfo.h:326
llvm::TargetTransformInfo::Concept::getTgtMemIntrinsic
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)=0
llvm::TargetTransformInfo::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract) const
Estimate the overhead of scalarizing an instruction.
Definition: TargetTransformInfo.cpp:495
llvm::TargetTransformInfo::getReplicationShuffleCost
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
Definition: TargetTransformInfo.cpp:846
llvm::TargetTransformInfo::PeelingPreferences::AllowLoopNestsPeeling
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
Definition: TargetTransformInfo.h:546
llvm::TargetTransformInfo::rewriteIntrinsicWithAddressSpace
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Rewrite intrinsic call II such that OldV will be replaced with NewV, which has a different address sp...
Definition: TargetTransformInfo.cpp:276
llvm::TargetTransformInfo::Concept::getShuffleCost
virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, int Index, VectorType *SubTp)=0
llvm::TargetTransformInfo::Concept::shouldExpandReduction
virtual bool shouldExpandReduction(const IntrinsicInst *II) const =0
llvm::TargetTransformInfo::Concept::getLoadVectorFactor
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
llvm::TargetTransformInfo::getGatherScatterOpCost
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:875
llvm::TargetTransformInfo::VPLegalization::Discard
@ Discard
Definition: TargetTransformInfo.h:1419
llvm::TargetTransformInfo::Concept::getCastInstrCost
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I)=0
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1707
llvm::TargetTransformInfo::Concept::isLoweredToCall
virtual bool isLoweredToCall(const Function *F)=0
llvm::TargetTransformInfo::LSRWithInstrQueries
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
Definition: TargetTransformInfo.cpp:455
llvm::TargetTransformInfo::Concept::getScalingFactorCost
virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)=0
llvm::TargetTransformInfo::getMinMaxReductionCost
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Definition: TargetTransformInfo.cpp:939
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::TargetTransformInfo::isLegalToVectorizeLoadChain
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: TargetTransformInfo.cpp:1023
llvm::TargetTransformInfo::getGEPCost
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TargetCostKind CostKind=TCK_SizeAndLatency) const
Estimate the cost of a GEP operation when lowered.
Definition: TargetTransformInfo.cpp:206
llvm::TargetTransformInfo::isLegalAddImmediate
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
Definition: TargetTransformInfo.cpp:336
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:163
llvm::TargetTransformInfo::Concept::getOperandsScalarizationOverhead
virtual InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys)=0
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::TargetTransformInfo::LSRCost::ScaleCost
unsigned ScaleCost
Definition: TargetTransformInfo.h:427
llvm::TargetTransformInfo::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), bool *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
Definition: TargetTransformInfo.cpp:532
llvm::TargetTransformInfo::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
Definition: TargetTransformInfo.cpp:281
llvm::TargetTransformInfo::SK_Splice
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
Definition: TargetTransformInfo.h:881
llvm::TargetTransformInfo::Concept::getVScaleForTuning
virtual Optional< unsigned > getVScaleForTuning() const =0
llvm::TargetTransformInfo::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:799
llvm::TargetTransformInfo::Concept::shouldBuildRelLookupTables
virtual bool shouldBuildRelLookupTables()=0
llvm::TargetTransformInfo::PSK_SlowHardware
@ PSK_SlowHardware
Definition: TargetTransformInfo.h:595
llvm::TargetTransformInfo::Concept::getRegisterClassName
virtual const char * getRegisterClassName(unsigned ClassID) const =0
llvm::AnalysisInfoMixin
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:397
llvm::TargetTransformInfo::ReductionFlags::IsSigned
bool IsSigned
Whether the operation is a signed int reduction.
Definition: TargetTransformInfo.h:1369
llvm::TargetTransformInfo::OperandValueKind
OperandValueKind
Additional information about an operand's possible values.
Definition: TargetTransformInfo.h:887
llvm::TargetTransformInfo::getMemcpyLoopResidualLoweringType
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const
Definition: TargetTransformInfo.cpp:981
llvm::TargetTransformInfo::Concept::instCombineIntrinsic
virtual Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)=0
llvm::MemIntrinsicInfo::ReadMem
bool ReadMem
Definition: TargetTransformInfo.h:83
llvm::TargetTransformInfo::Concept::getCmpSelInstrCost
virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I)=0
llvm::TargetTransformInfo::MemCmpExpansionOptions::MaxNumLoads
unsigned MaxNumLoads
Definition: TargetTransformInfo.h:775
InstructionCost.h
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::TargetTransformInfo::canSaveCmp
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
Definition: TargetTransformInfo.cpp:369
llvm::TargetTransformInfo::isTruncateFree
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
Definition: TargetTransformInfo.cpp:459
llvm::TargetTransformInfo::prefersVectorizedAddressing
bool prefersVectorizedAddressing() const
Return true if target doesn't mind addresses in vectors.
Definition: TargetTransformInfo.cpp:442
llvm::TargetTransformInfo::MemCmpExpansionOptions
Returns options for expansion of memcmp. IsZeroCmp is.
Definition: TargetTransformInfo.h:770
llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:262
llvm::TargetTransformInfo::PeelingPreferences::PeelCount
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Definition: TargetTransformInfo.h:542
llvm::TargetTransformInfo::supportsEfficientVectorElementLoadStore
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
Definition: TargetTransformInfo.cpp:506
llvm::TargetTransformInfo::enableMemCmpExpansion
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
Definition: TargetTransformInfo.cpp:516
llvm::TargetTransformInfo::Concept::shouldConsiderAddressTypePromotion
virtual bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)=0
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:215
llvm::TargetTransformInfo::Concept::haveFastSqrt
virtual bool haveFastSqrt(Type *Ty)=0
llvm::TargetTransformInfo::Concept::isNoopAddrSpaceCast
virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const =0
llvm::TargetTransformInfo::isElementTypeLegalForScalableVector
bool isElementTypeLegalForScalableVector(Type *Ty) const
Definition: TargetTransformInfo.cpp:1040
llvm::TargetTransformInfo::isLegalMaskedStore
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
Return true if the target supports masked store.
Definition: TargetTransformInfo.cpp:382
llvm::TargetTransformInfo::getPredicatedAddrSpace
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
Definition: TargetTransformInfo.cpp:272
llvm::TargetTransformInfo::getRegUsageForType
InstructionCost getRegUsageForType(Type *Ty) const
Returns the estimated number of registers required to represent Ty.
Definition: TargetTransformInfo.cpp:473
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:219
llvm::TargetTransformInfo::Concept::isIndexedLoadLegal
virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const =0
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
llvm::BranchProbability
Definition: BranchProbability.h:30
llvm::TargetTransformInfo::Concept::getFlatAddressSpace
virtual unsigned getFlatAddressSpace()=0
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::TargetTransformInfo::UnrollingPreferences::DefaultUnrollRuntimeCount
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
Definition: TargetTransformInfo.h:467
llvm::TargetTransformInfo::hasDivRemOp
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
Definition: TargetTransformInfo.cpp:433
llvm::TargetTransformInfo::Concept::LSRWithInstrQueries
virtual bool LSRWithInstrQueries()=0
llvm::TargetTransformInfo::simplifyDemandedUseBitsIntrinsic
Optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Can be used to implement target-specific instruction combining.
Definition: TargetTransformInfo.cpp:308
llvm::TargetTransformInfo::getCacheSize
Optional< unsigned > getCacheSize(CacheLevel Level) const
Definition: TargetTransformInfo.cpp:651
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::TargetTransformInfo::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: TargetTransformInfo.cpp:596
llvm::InstructionCost::isValid
bool isValid() const
Definition: InstructionCost.h:79
llvm::TargetTransformInfo::AddressingModeKind
AddressingModeKind
Definition: TargetTransformInfo.h:642
llvm::TargetTransformInfo::getMaxPrefetchIterationsAhead
unsigned getMaxPrefetchIterationsAhead() const
Definition: TargetTransformInfo.cpp:671
llvm::TargetTransformInfo::MIM_Unindexed
@ MIM_Unindexed
No indexing.
Definition: TargetTransformInfo.h:1315
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:431
llvm::TargetTransformInfo::OK_AnyValue
@ OK_AnyValue
Definition: TargetTransformInfo.h:888
llvm::TargetTransformInfo::getLoadVectorFactor
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
Definition: TargetTransformInfo.cpp:1044
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:180
llvm::TargetTransformInfo::SK_Transpose
@ SK_Transpose
Transpose two vectors.
Definition: TargetTransformInfo.h:874
llvm::TargetTransformInfo::CastContextHint::None
@ None
The cast is not used with a load/store of any kind.
llvm::TargetTransformInfo::isLegalToVectorizeStore
bool isLegalToVectorizeStore(StoreInst *SI) const
Definition: TargetTransformInfo.cpp:1019
llvm::TargetTransformInfo::CacheLevel::L2D
@ L2D
llvm::TargetTransformInfo::Concept::getAssumedAddrSpace
virtual unsigned getAssumedAddrSpace(const Value *V) const =0
llvm::TargetTransformInfo::MIM_PreInc
@ MIM_PreInc
Pre-incrementing.
Definition: TargetTransformInfo.h:1316
llvm::TargetTransformInfo::MemCmpExpansionOptions::LoadSizes
SmallVector< unsigned, 8 > LoadSizes
Definition: TargetTransformInfo.h:778
llvm::TargetIRAnalysis::TargetIRAnalysis
TargetIRAnalysis()
Default construct a target IR analysis.
Definition: TargetTransformInfo.cpp:1160
llvm::TargetTransformInfo::Concept::preferInLoopReduction
virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags) const =0
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:185
llvm::TargetTransformInfo::enableOrderedReductions
bool enableOrderedReductions() const
Return true if we should be enabling ordered reductions for the target.
Definition: TargetTransformInfo.cpp:429
llvm::TargetTransformInfo::getMaxVScale
Optional< unsigned > getMaxVScale() const
Definition: TargetTransformInfo.cpp:618
llvm::TargetTransformInfo::Concept::forceScalarizeMaskedGather
virtual bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment)=0
llvm::TargetTransformInfo::Concept::isLegalICmpImmediate
virtual bool isLegalICmpImmediate(int64_t Imm)=0
llvm::TargetTransformInfo::getCacheAssociativity
Optional< unsigned > getCacheAssociativity(CacheLevel Level) const
Definition: TargetTransformInfo.cpp:656
llvm::TargetTransformInfo::getCacheLineSize
unsigned getCacheLineSize() const
Definition: TargetTransformInfo.cpp:646
std
Definition: BitVector.h:838
llvm::TargetTransformInfo::enableMaskedInterleavedAccessVectorization
bool enableMaskedInterleavedAccessVectorization() const
Enable matching of interleaved access groups that contain predicated accesses or gaps and therefore v...
Definition: TargetTransformInfo.cpp:524
llvm::KnownBits
Definition: KnownBits.h:23
llvm::TargetTransformInfo::LSRCost::NumBaseAdds
unsigned NumBaseAdds
Definition: TargetTransformInfo.h:424
llvm::TargetIRAnalysis::operator=
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
Definition: TargetTransformInfo.h:2443
llvm::HardwareLoopInfo::ExitBlock
BasicBlock * ExitBlock
Definition: TargetTransformInfo.h:99
llvm::MemIntrinsicInfo::WriteMem
bool WriteMem
Definition: TargetTransformInfo.h:84
llvm::TargetTransformInfo::UnrollingPreferences::UpperBound
bool UpperBound
Allow using trip count upper bound to unroll loops.
Definition: TargetTransformInfo.h:498
llvm::VPIntrinsic
This is the common base class for vector predication intrinsics.
Definition: IntrinsicInst.h:390
llvm::TargetTransformInfo::isNoopAddrSpaceCast
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Definition: TargetTransformInfo.cpp:257
llvm::TypeSize
Definition: TypeSize.h:416
llvm::TargetTransformInfo::getLoadStoreVecRegBitWidth
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
Definition: TargetTransformInfo.cpp:1011
llvm::TargetTransformInfo::UnrollingPreferences::AllowRemainder
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
Definition: TargetTransformInfo.h:490
llvm::TargetTransformInfo::Concept::enableAggressiveInterleaving
virtual bool enableAggressiveInterleaving(bool LoopHasReductions)=0
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::TargetTransformInfo::isFPVectorizationPotentiallyUnsafe
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
Definition: TargetTransformInfo.cpp:528
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
PassManager.h
Arguments
AMDGPU Lower Kernel Arguments
Definition: AMDGPULowerKernelArguments.cpp:243
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:221
llvm::User::operand_values
iterator_range< value_op_iterator > operand_values()
Definition: User.h:266
llvm::TargetTransformInfo::LSRCost::ImmCost
unsigned ImmCost
Definition: TargetTransformInfo.h:425
llvm::TargetTransformInfo::hasActiveVectorLength
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
Definition: TargetTransformInfo.cpp:1089
llvm::TargetTransformInfo::forceScalarizeMaskedScatter
bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.scatter intrinsics.
Definition: TargetTransformInfo.cpp:416
llvm::TargetIRAnalysis::run
Result run(const Function &F, FunctionAnalysisManager &)
Definition: TargetTransformInfo.cpp:1166
llvm::TargetTransformInfo::getInstructionCost
InstructionCost getInstructionCost(const Instruction *I, enum TargetCostKind kind) const
Query the cost of a specified instruction.
Definition: TargetTransformInfo.h:225
llvm::HardwareLoopInfo::HardwareLoopInfo
HardwareLoopInfo(Loop *L)
Definition: TargetTransformInfo.h:97
llvm::TargetTransformInfo::Concept::getFPOpCost
virtual InstructionCost getFPOpCost(Type *Ty)=0
llvm::TargetTransformInfo::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier() const
Definition: TargetTransformInfo.cpp:192
llvm::TargetTransformInfo::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index=-1) const
Definition: TargetTransformInfo.cpp:838
llvm::InstCombiner
The core instruction combiner logic.
Definition: InstCombiner.h:45
llvm::TargetTransformInfo::adjustInliningThreshold
unsigned adjustInliningThreshold(const CallBase *CB) const
Definition: TargetTransformInfo.cpp:197
llvm::TargetTransformInfo::getMaskedMemoryOpCost
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Definition: TargetTransformInfo.cpp:866
llvm::TargetTransformInfo::RGK_ScalableVector
@ RGK_ScalableVector
Definition: TargetTransformInfo.h:916
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
llvm::HardwareLoopInfo
Attributes of a target dependent hardware loop.
Definition: TargetTransformInfo.h:95
llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition: InstructionCost.h:73
Vector
So we should use XX3Form_Rcr to implement instrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
llvm::TargetTransformInfo::getEstimatedNumberOfCaseClusters
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Definition: TargetTransformInfo.cpp:212
llvm::TargetTransformInfo::TargetTransformInfo
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
Definition: TargetTransformInfo.h:2404
llvm::TargetTransformInfo::getExtendedAddReductionCost
InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of an Add ...
Definition: TargetTransformInfo.cpp:948
llvm::RecurrenceDescriptor
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:73
llvm::TargetTransformInfo::Concept::getCallInstrCost
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::Concept::getArithmeticReductionCost
virtual InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::MemCmpExpansionOptions::NumLoadsPerBlock
unsigned NumLoadsPerBlock
Definition: TargetTransformInfo.h:788
llvm::TargetTransformInfo::getArithmeticReductionCost
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
Definition: TargetTransformInfo.cpp:930
llvm::IntrinsicCostAttributes::getID
Intrinsic::ID getID() const
Definition: TargetTransformInfo.h:148
llvm::TargetTransformInfo::Concept::getGISelRematGlobalCost
virtual unsigned getGISelRematGlobalCost() const =0
llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:916
llvm::TargetTransformInfo::Concept::getIntImmCostInst
virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr)=0
llvm::TargetTransformInfo::Concept::getCFInstrCost
virtual InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
llvm::TargetTransformInfo::invalidate
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
Definition: TargetTransformInfo.h:197
llvm::TargetTransformInfo::Concept::getInlinerVectorBonusPercent
virtual int getInlinerVectorBonusPercent()=0
llvm::TargetTransformInfo::Concept::isLegalAddImmediate
virtual bool isLegalAddImmediate(int64_t Imm)=0
llvm::TargetTransformInfo::UnrollingPreferences::Threshold
unsigned Threshold
The cost threshold for the unrolled loop.
Definition: TargetTransformInfo.h:439
llvm::TargetTransformInfo::Concept::enableScalableVectorization
virtual bool enableScalableVectorization() const =0
llvm::TargetTransformInfo::Concept::getNumberOfParts
virtual unsigned getNumberOfParts(Type *Tp)=0
llvm::TargetTransformInfo::Concept::getPredicatedAddrSpace
virtual std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const =0
llvm::TargetTransformInfo::VPLegalization::Legal
@ Legal
Definition: TargetTransformInfo.h:1417
llvm::TargetTransformInfo::Concept::shouldBuildLookupTablesForConstant
virtual bool shouldBuildLookupTablesForConstant(Constant *C)=0
llvm::TargetTransformInfo::Concept::isProfitableToHoist
virtual bool isProfitableToHoist(Instruction *I)=0
llvm::TargetTransformInfo::getMinPrefetchStride
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Some HW prefetchers can handle accesses up to a certain constant stride.
Definition: TargetTransformInfo.cpp:664
llvm::TargetTransformInfo::isIndexedStoreLegal
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
Definition: TargetTransformInfo.cpp:1006
llvm::TargetTransformInfo::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR() const
Return true if LSR major cost is number of registers.
Definition: TargetTransformInfo.cpp:357
llvm::TargetTransformInfo::shouldExpandReduction
bool shouldExpandReduction(const IntrinsicInst *II) const
Definition: TargetTransformInfo.cpp:1073
llvm::TargetTransformInfo::Concept::isLegalAddressingMode
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I)=0
llvm::TargetTransformInfo::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const
Get target-customized preferences for the generic loop peeling transformation.
Definition: TargetTransformInfo.cpp:331
llvm::TargetTransformInfo::getStoreVectorFactor
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
Definition: TargetTransformInfo.cpp:1051
llvm::TargetTransformInfo::Concept::getCacheAssociativity
virtual Optional< unsigned > getCacheAssociativity(CacheLevel Level) const =0
llvm::TargetTransformInfo::MIM_PostDec
@ MIM_PostDec
Post-decrementing.
Definition: TargetTransformInfo.h:1319
llvm::HardwareLoopInfo::canAnalyze
bool canAnalyze(LoopInfo &LI)
Definition: TargetTransformInfo.cpp:47
llvm::TargetTransformInfo::isLegalToVectorizeStoreChain
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: TargetTransformInfo.cpp:1029
llvm::SmallVectorImpl< const Value * >
ForceHardwareLoopPHI
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
llvm::TargetTransformInfo::Concept::preferPredicatedReductionSelect
virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags) const =0
llvm::MemIntrinsicInfo
Information about a load/store intrinsic defined by the target.
Definition: TargetTransformInfo.h:70
llvm::TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize
unsigned getAtomicMemIntrinsicMaxElementSize() const
Definition: TargetTransformInfo.cpp:965
llvm::msgpack::Type
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
Definition: MsgPackReader.h:49
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1176
DataTypes.h
llvm::TargetTransformInfo::enableAggressiveInterleaving
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don't restrict interleaved unrolling to small loops.
Definition: TargetTransformInfo.cpp:510
llvm::TargetTransformInfo::Concept::areInlineCompatible
virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const =0
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition: TargetTransformInfo.h:453
llvm::TargetTransformInfo::Concept::getMinVectorRegisterBitWidth
virtual unsigned getMinVectorRegisterBitWidth() const =0
llvm::TargetTransformInfo::getCallInstrCost
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
Definition: TargetTransformInfo.cpp:904
llvm::TargetTransformInfo::getOperandInfo
static OperandValueKind getOperandInfo(const Value *V, OperandValueProperties &OpProps)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Definition: TargetTransformInfo.cpp:684
llvm::TargetTransformInfo::getExtractWithExtendCost
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index=-1) const
Definition: TargetTransformInfo.cpp:810
llvm::MemIntrinsicInfo::MatchingId
unsigned short MatchingId
Definition: TargetTransformInfo.h:81
llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:263
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3236
llvm::TargetTransformInfo::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
Definition: TargetTransformInfo.cpp:733
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::TargetTransformInfo::Concept::getUserCost
virtual InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind)=0
llvm::TargetTransformInfo::Concept::getPreferredAddressingMode
virtual AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const =0
llvm::IntrinsicCostAttributes::isTypeBasedOnly
bool isTypeBasedOnly() const
Definition: TargetTransformInfo.h:156
llvm::TargetTransformInfo::isProfitableToHoist
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
Definition: TargetTransformInfo.cpp:463
llvm::TargetTransformInfo::shouldBuildRelLookupTables
bool shouldBuildRelLookupTables() const
Return true if lookup tables should be turned into relative lookup tables.
Definition: TargetTransformInfo.cpp:486
llvm::TargetTransformInfo::Concept::getMaxInterleaveFactor
virtual unsigned getMaxInterleaveFactor(unsigned VF)=0
llvm::TargetTransformInfo::Concept::getStoreVectorFactor
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3092
llvm::TargetTransformInfo::Concept::getLoadStoreVecRegBitWidth
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const =0
llvm::TargetTransformInfo::~TargetTransformInfo
~TargetTransformInfo()
Definition: TargetTransformInfo.cpp:182
llvm::TargetTransformInfo::Concept::getCacheLineSize
virtual unsigned getCacheLineSize() const =0
llvm::AMDGPU::VGPRIndexMode::Id
Id
Definition: SIDefines.h:232
llvm::TargetTransformInfo::Concept::canHaveNonUndefGlobalInitializerInAddressSpace
virtual bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const =0
llvm::TargetTransformInfo::Concept::adjustInliningThreshold
virtual unsigned adjustInliningThreshold(const CallBase *CB)=0
llvm::TargetTransformInfo::Concept::getIntImmCostIntrin
virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
llvm::TargetTransformInfo::OK_NonUniformConstantValue
@ OK_NonUniformConstantValue
Definition: TargetTransformInfo.h:891
llvm::TargetTransformInfo::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
Definition: TargetTransformInfo.cpp:960
llvm::TargetTransformInfo::Concept::isLegalMaskedCompressStore
virtual bool isLegalMaskedCompressStore(Type *DataType)=0
llvm::TargetTransformInfo::useAA
bool useAA() const
Definition: TargetTransformInfo.cpp:467
llvm::TargetTransformInfo::Concept::getInliningThresholdMultiplier
virtual unsigned getInliningThresholdMultiplier()=0
llvm::HardwareLoopInfo::CountType
IntegerType * CountType
Definition: TargetTransformInfo.h:102
llvm::TargetTransformInfo::enableWritePrefetching
bool enableWritePrefetching() const
Definition: TargetTransformInfo.cpp:675
llvm::TargetTransformInfo::getFPOpCost
InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
Definition: TargetTransformInfo.cpp:554
llvm::TargetTransformInfo::Concept::getMemoryOpCost
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I)=0
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition: TargetTransformInfo.h:212
llvm::IntrinsicCostAttributes::getArgs
const SmallVectorImpl< const Value * > & getArgs() const
Definition: TargetTransformInfo.h:153
llvm::TargetTransformInfo::AMK_None
@ AMK_None
Definition: TargetTransformInfo.h:645
llvm::TargetTransformInfo::SK_ExtractSubvector
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
Definition: TargetTransformInfo.h:876
llvm::TargetTransformInfo::AMK_PreIndexed
@ AMK_PreIndexed
Definition: TargetTransformInfo.h:643
llvm::AtomicOrdering::NotAtomic
@ NotAtomic
llvm::TargetTransformInfo::Concept::isLegalToVectorizeStore
virtual bool isLegalToVectorizeStore(StoreInst *SI) const =0
llvm::TargetTransformInfo::getVPMemoryOpCost
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
llvm::TargetTransformInfo::isAlwaysUniform
bool isAlwaysUniform(const Value *V) const
Definition: TargetTransformInfo.cpp:244
llvm::TargetTransformInfo::Concept::getMemcpyCost
virtual InstructionCost getMemcpyCost(const Instruction *I)=0
llvm::TargetTransformInfo::isLegalAddressingMode
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Definition: TargetTransformInfo.cpp:344
llvm::TargetTransformInfo::getMemcpyCost
InstructionCost getMemcpyCost(const Instruction *I) const
Definition: TargetTransformInfo.cpp:924
llvm::TargetTransformInfo::Concept::simplifyDemandedUseBitsIntrinsic
virtual Optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)=0
llvm::TargetTransformInfo::UnrollingPreferences::MaxPercentThresholdBoost
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
Definition: TargetTransformInfo.h:450
llvm::TargetTransformInfo::Concept::getVectorInstrCost
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)=0
llvm::TargetTransformInfo::Concept::forceScalarizeMaskedScatter
virtual bool forceScalarizeMaskedScatter(VectorType *DataType, Align Alignment)=0
llvm::TargetTransformInfo::Concept::getUnrollingPreferences
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)=0
llvm::MemIntrinsicInfo::IsVolatile
bool IsVolatile
Definition: TargetTransformInfo.h:85
llvm::TargetTransformInfo::Concept::getMinPrefetchStride
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const =0
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::IntrinsicCostAttributes::IntrinsicCostAttributes
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarCost=InstructionCost::getInvalid())
Definition: TargetTransformInfo.cpp:57