LLVM  14.0.0git
TargetTransformInfo.h
Go to the documentation of this file.
1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This pass exposes codegen information to IR-level passes. Every
10 /// transformation that uses codegen information is broken into three parts:
11 /// 1. The IR-level analysis pass.
12 /// 2. The IR-level transformation interface which provides the needed
13 /// information.
14 /// 3. Codegen-level implementation which uses target-specific hooks.
15 ///
16 /// This file defines #2, which is the interface that IR-level transformations
17 /// use for querying the codegen.
18 ///
19 //===----------------------------------------------------------------------===//
20 
21 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23 
25 #include "llvm/IR/InstrTypes.h"
26 #include "llvm/IR/Operator.h"
27 #include "llvm/IR/PassManager.h"
28 #include "llvm/Pass.h"
31 #include "llvm/Support/DataTypes.h"
33 #include <functional>
34 
35 namespace llvm {
36 
37 namespace Intrinsic {
38 typedef unsigned ID;
39 }
40 
41 class AssumptionCache;
42 class BlockFrequencyInfo;
43 class DominatorTree;
44 class BranchInst;
45 class CallBase;
46 class ExtractElementInst;
47 class Function;
48 class GlobalValue;
49 class InstCombiner;
51 class IntrinsicInst;
52 class LoadInst;
53 class LoopAccessInfo;
54 class Loop;
55 class LoopInfo;
56 class ProfileSummaryInfo;
57 class SCEV;
58 class ScalarEvolution;
59 class StoreInst;
60 class SwitchInst;
61 class TargetLibraryInfo;
62 class Type;
63 class User;
64 class Value;
65 class VPIntrinsic;
66 struct KnownBits;
67 template <typename T> class Optional;
68 
69 /// Information about a load/store intrinsic defined by the target.
71  /// This is the pointer that the intrinsic is loading from or storing to.
72  /// If this is non-null, then analysis/optimization passes can assume that
73  /// this intrinsic is functionally equivalent to a load/store from this
74  /// pointer.
75  Value *PtrVal = nullptr;
76 
77  // Ordering for atomic operations.
79 
80  // Same Id is set by the target for corresponding load/store intrinsics.
81  unsigned short MatchingId = 0;
82 
83  bool ReadMem = false;
84  bool WriteMem = false;
85  bool IsVolatile = false;
86 
87  bool isUnordered() const {
90  !IsVolatile;
91  }
92 };
93 
94 /// Attributes of a target dependent hardware loop.
96  HardwareLoopInfo() = delete;
98  Loop *L = nullptr;
99  BasicBlock *ExitBlock = nullptr;
100  BranchInst *ExitBranch = nullptr;
101  const SCEV *TripCount = nullptr;
102  IntegerType *CountType = nullptr;
103  Value *LoopDecrement = nullptr; // Decrement the loop counter by this
104  // value in every iteration.
105  bool IsNestingLegal = false; // Can a hardware loop be a parent to
106  // another hardware loop?
107  bool CounterInReg = false; // Should loop counter be updated in
108  // the loop via a phi?
109  bool PerformEntryTest = false; // Generate the intrinsic which also performs
110  // icmp ne zero on the loop counter value and
111  // produces an i1 to guard the loop entry.
113  DominatorTree &DT, bool ForceNestedLoop = false,
114  bool ForceHardwareLoopPHI = false);
115  bool canAnalyze(LoopInfo &LI);
116 };
117 
119  const IntrinsicInst *II = nullptr;
120  Type *RetTy = nullptr;
121  Intrinsic::ID IID;
122  SmallVector<Type *, 4> ParamTys;
124  FastMathFlags FMF;
125  // If ScalarizationCost is UINT_MAX, the cost of scalarizing the
126  // arguments and the return value will be computed based on types.
127  InstructionCost ScalarizationCost = InstructionCost::getInvalid();
128 
129 public:
131  Intrinsic::ID Id, const CallBase &CI,
133 
136  FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
138 
141 
145  const IntrinsicInst *I = nullptr,
147 
148  Intrinsic::ID getID() const { return IID; }
149  const IntrinsicInst *getInst() const { return II; }
150  Type *getReturnType() const { return RetTy; }
151  FastMathFlags getFlags() const { return FMF; }
152  InstructionCost getScalarizationCost() const { return ScalarizationCost; }
154  const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
155 
156  bool isTypeBasedOnly() const {
157  return Arguments.empty();
158  }
159 
160  bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
161 };
162 
164 typedef TargetTransformInfo TTI;
165 
166 /// This pass provides access to the codegen interfaces that are needed
167 /// for IR-level transformations.
169 public:
170  /// Construct a TTI object using a type implementing the \c Concept
171  /// API below.
172  ///
173  /// This is used by targets to construct a TTI wrapping their target-specific
174  /// implementation that encodes appropriate costs for their target.
175  template <typename T> TargetTransformInfo(T Impl);
176 
177  /// Construct a baseline TTI object using a minimal implementation of
178  /// the \c Concept API below.
179  ///
180  /// The TTI implementation will reflect the information in the DataLayout
181  /// provided if non-null.
182  explicit TargetTransformInfo(const DataLayout &DL);
183 
184  // Provide move semantics.
187 
188  // We need to define the destructor out-of-line to define our sub-classes
189  // out-of-line.
191 
192  /// Handle the invalidation of this information.
193  ///
194  /// When used as a result of \c TargetIRAnalysis this method will be called
195  /// when the function this was computed for changes. When it returns false,
196  /// the information is preserved across those changes.
199  // FIXME: We should probably in some way ensure that the subtarget
200  // information for a function hasn't changed.
201  return false;
202  }
203 
204  /// \name Generic Target Information
205  /// @{
206 
207  /// The kind of cost model.
208  ///
209  /// There are several different cost models that can be customized by the
210  /// target. The normalization of each cost model may be target specific.
212  TCK_RecipThroughput, ///< Reciprocal throughput.
213  TCK_Latency, ///< The latency of instruction.
214  TCK_CodeSize, ///< Instruction code size.
215  TCK_SizeAndLatency ///< The weighted sum of size and latency.
216  };
217 
218  /// Query the cost of a specified instruction.
219  ///
220  /// Clients should use this interface to query the cost of an existing
221  /// instruction. The instruction must have a valid parent (basic block).
222  ///
223  /// Note, this method does not cache the cost calculation and it
224  /// can be expensive in some cases.
226  enum TargetCostKind kind) const {
227  InstructionCost Cost;
228  switch (kind) {
229  case TCK_RecipThroughput:
230  Cost = getInstructionThroughput(I);
231  break;
232  case TCK_Latency:
233  Cost = getInstructionLatency(I);
234  break;
235  case TCK_CodeSize:
236  case TCK_SizeAndLatency:
237  Cost = getUserCost(I, kind);
238  break;
239  }
240  return Cost;
241  }
242 
243  /// Underlying constants for 'cost' values in this interface.
244  ///
245  /// Many APIs in this interface return a cost. This enum defines the
246  /// fundamental values that should be used to interpret (and produce) those
247  /// costs. The costs are returned as an int rather than a member of this
248  /// enumeration because it is expected that the cost of one IR instruction
249  /// may have a multiplicative factor to it or otherwise won't fit directly
250  /// into the enum. Moreover, it is common to sum or average costs which works
251  /// better as simple integral values. Thus this enum only provides constants.
252  /// Also note that the returned costs are signed integers to make it natural
253  /// to add, subtract, and test with zero (a common boundary condition). It is
254  /// not expected that 2^32 is a realistic cost to be modeling at any point.
255  ///
256  /// Note that these costs should usually reflect the intersection of code-size
257  /// cost and execution cost. A free instruction is typically one that folds
258  /// into another instruction. For example, reg-to-reg moves can often be
259  /// skipped by renaming the registers in the CPU, but they still are encoded
260  /// and thus wouldn't be considered 'free' here.
262  TCC_Free = 0, ///< Expected to fold away in lowering.
263  TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
264  TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
265  };
266 
267  /// Estimate the cost of a GEP operation when lowered.
269  getGEPCost(Type *PointeeType, const Value *Ptr,
272 
273  /// \returns A value by which our inlining threshold should be multiplied.
274  /// This is primarily used to bump up the inlining threshold wholesale on
275  /// targets where calls are unusually expensive.
276  ///
277  /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
278  /// individual classes of instructions would be better.
279  unsigned getInliningThresholdMultiplier() const;
280 
281  /// \returns A value to be added to the inlining threshold.
282  unsigned adjustInliningThreshold(const CallBase *CB) const;
283 
284  /// \returns Vector bonus in percent.
285  ///
286  /// Vector bonuses: We want to more aggressively inline vector-dense kernels
287  /// and apply this bonus based on the percentage of vector instructions. A
288  /// bonus is applied if the vector instructions exceed 50% and half that
289  /// amount is applied if it exceeds 10%. Note that these bonuses are some what
290  /// arbitrary and evolved over time by accident as much as because they are
291  /// principled bonuses.
292  /// FIXME: It would be nice to base the bonus values on something more
293  /// scientific. A target may has no bonus on vector instructions.
294  int getInlinerVectorBonusPercent() const;
295 
296  /// \return the expected cost of a memcpy, which could e.g. depend on the
297  /// source/destination type and alignment and the number of bytes copied.
299 
300  /// \return The estimated number of case clusters when lowering \p 'SI'.
301  /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
302  /// table.
304  unsigned &JTSize,
305  ProfileSummaryInfo *PSI,
306  BlockFrequencyInfo *BFI) const;
307 
308  /// Estimate the cost of a given IR user when lowered.
309  ///
310  /// This can estimate the cost of either a ConstantExpr or Instruction when
311  /// lowered.
312  ///
313  /// \p Operands is a list of operands which can be a result of transformations
314  /// of the current operands. The number of the operands on the list must equal
315  /// to the number of the current operands the IR user has. Their order on the
316  /// list must be the same as the order of the current operands the IR user
317  /// has.
318  ///
319  /// The returned cost is defined in terms of \c TargetCostConstants, see its
320  /// comments for a detailed explanation of the cost values.
322  TargetCostKind CostKind) const;
323 
324  /// This is a helper function which calls the two-argument getUserCost
325  /// with \p Operands which are the current operands U has.
328  return getUserCost(U, Operands, CostKind);
329  }
330 
331  /// If a branch or a select condition is skewed in one direction by more than
332  /// this factor, it is very likely to be predicted correctly.
334 
335  /// Return true if branch divergence exists.
336  ///
337  /// Branch divergence has a significantly negative impact on GPU performance
338  /// when threads in the same wavefront take different paths due to conditional
339  /// branches.
340  bool hasBranchDivergence() const;
341 
342  /// Return true if the target prefers to use GPU divergence analysis to
343  /// replace the legacy version.
344  bool useGPUDivergenceAnalysis() const;
345 
346  /// Returns whether V is a source of divergence.
347  ///
348  /// This function provides the target-dependent information for
349  /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis
350  /// first builds the dependency graph, and then runs the reachability
351  /// algorithm starting with the sources of divergence.
352  bool isSourceOfDivergence(const Value *V) const;
353 
354  // Returns true for the target specific
355  // set of operations which produce uniform result
356  // even taking non-uniform arguments
357  bool isAlwaysUniform(const Value *V) const;
358 
359  /// Returns the address space ID for a target's 'flat' address space. Note
360  /// this is not necessarily the same as addrspace(0), which LLVM sometimes
361  /// refers to as the generic address space. The flat address space is a
362  /// generic address space that can be used access multiple segments of memory
363  /// with different address spaces. Access of a memory location through a
364  /// pointer with this address space is expected to be legal but slower
365  /// compared to the same memory location accessed through a pointer with a
366  /// different address space.
367  //
368  /// This is for targets with different pointer representations which can
369  /// be converted with the addrspacecast instruction. If a pointer is converted
370  /// to this address space, optimizations should attempt to replace the access
371  /// with the source address space.
372  ///
373  /// \returns ~0u if the target does not have such a flat address space to
374  /// optimize away.
375  unsigned getFlatAddressSpace() const;
376 
377  /// Return any intrinsic address operand indexes which may be rewritten if
378  /// they use a flat address space pointer.
379  ///
380  /// \returns true if the intrinsic was handled.
382  Intrinsic::ID IID) const;
383 
384  bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
385 
386  unsigned getAssumedAddrSpace(const Value *V) const;
387 
388  /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
389  /// NewV, which has a different address space. This should happen for every
390  /// operand index that collectFlatAddressOperands returned for the intrinsic.
391  /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
392  /// new value (which may be the original \p II with modified operands).
394  Value *NewV) const;
395 
396  /// Test whether calls to a function lower to actual program function
397  /// calls.
398  ///
399  /// The idea is to test whether the program is likely to require a 'call'
400  /// instruction or equivalent in order to call the given function.
401  ///
402  /// FIXME: It's not clear that this is a good or useful query API. Client's
403  /// should probably move to simpler cost metrics using the above.
404  /// Alternatively, we could split the cost interface into distinct code-size
405  /// and execution-speed costs. This would allow modelling the core of this
406  /// query more accurately as a call is a single small instruction, but
407  /// incurs significant execution cost.
408  bool isLoweredToCall(const Function *F) const;
409 
410  struct LSRCost {
411  /// TODO: Some of these could be merged. Also, a lexical ordering
412  /// isn't always optimal.
413  unsigned Insns;
414  unsigned NumRegs;
415  unsigned AddRecCost;
416  unsigned NumIVMuls;
417  unsigned NumBaseAdds;
418  unsigned ImmCost;
419  unsigned SetupCost;
420  unsigned ScaleCost;
421  };
422 
423  /// Parameters that control the generic loop unrolling transformation.
425  /// The cost threshold for the unrolled loop. Should be relative to the
426  /// getUserCost values returned by this API, and the expectation is that
427  /// the unrolled loop's instructions when run through that interface should
428  /// not exceed this cost. However, this is only an estimate. Also, specific
429  /// loops may be unrolled even with a cost above this threshold if deemed
430  /// profitable. Set this to UINT_MAX to disable the loop body cost
431  /// restriction.
432  unsigned Threshold;
433  /// If complete unrolling will reduce the cost of the loop, we will boost
434  /// the Threshold by a certain percent to allow more aggressive complete
435  /// unrolling. This value provides the maximum boost percentage that we
436  /// can apply to Threshold (The value should be no less than 100).
437  /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
438  /// MaxPercentThresholdBoost / 100)
439  /// E.g. if complete unrolling reduces the loop execution time by 50%
440  /// then we boost the threshold by the factor of 2x. If unrolling is not
441  /// expected to reduce the running time, then we do not increase the
442  /// threshold.
444  /// The cost threshold for the unrolled loop when optimizing for size (set
445  /// to UINT_MAX to disable).
447  /// The cost threshold for the unrolled loop, like Threshold, but used
448  /// for partial/runtime unrolling (set to UINT_MAX to disable).
450  /// The cost threshold for the unrolled loop when optimizing for size, like
451  /// OptSizeThreshold, but used for partial/runtime unrolling (set to
452  /// UINT_MAX to disable).
454  /// A forced unrolling factor (the number of concatenated bodies of the
455  /// original loop in the unrolled loop body). When set to 0, the unrolling
456  /// transformation will select an unrolling factor based on the current cost
457  /// threshold and other factors.
458  unsigned Count;
459  /// Default unroll count for loops with run-time trip count.
461  // Set the maximum unrolling factor. The unrolling factor may be selected
462  // using the appropriate cost threshold, but may not exceed this number
463  // (set to UINT_MAX to disable). This does not apply in cases where the
464  // loop is being fully unrolled.
465  unsigned MaxCount;
466  /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
467  /// applies even if full unrolling is selected. This allows a target to fall
468  /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
470  // Represents number of instructions optimized when "back edge"
471  // becomes "fall through" in unrolled loop.
472  // For now we count a conditional branch on a backedge and a comparison
473  // feeding it.
474  unsigned BEInsns;
475  /// Allow partial unrolling (unrolling of loops to expand the size of the
476  /// loop body, not only to eliminate small constant-trip-count loops).
477  bool Partial;
478  /// Allow runtime unrolling (unrolling of loops to expand the size of the
479  /// loop body even when the number of loop iterations is not known at
480  /// compile time).
481  bool Runtime;
482  /// Allow generation of a loop remainder (extra iterations after unroll).
484  /// Allow emitting expensive instructions (such as divisions) when computing
485  /// the trip count of a loop for runtime unrolling.
487  /// Apply loop unroll on any kind of loop
488  /// (mainly to loops that fail runtime unrolling).
489  bool Force;
490  /// Allow using trip count upper bound to unroll loops.
492  /// Allow unrolling of all the iterations of the runtime loop remainder.
494  /// Allow unroll and jam. Used to enable unroll and jam for the target.
496  /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
497  /// value above is used during unroll and jam for the outer loop size.
498  /// This value is used in the same manner to limit the size of the inner
499  /// loop.
501  /// Don't allow loop unrolling to simulate more than this number of
502  /// iterations when checking full unroll profitability
504  };
505 
506  /// Get target-customized preferences for the generic loop unrolling
507  /// transformation. The caller will initialize UP with the current
508  /// target-independent defaults.
511  OptimizationRemarkEmitter *ORE) const;
512 
513  /// Query the target whether it would be profitable to convert the given loop
514  /// into a hardware loop.
516  AssumptionCache &AC, TargetLibraryInfo *LibInfo,
517  HardwareLoopInfo &HWLoopInfo) const;
518 
519  /// Query the target whether it would be prefered to create a predicated
520  /// vector loop, which can avoid the need to emit a scalar epilogue loop.
523  DominatorTree *DT,
524  const LoopAccessInfo *LAI) const;
525 
526  /// Query the target whether lowering of the llvm.get.active.lane.mask
527  /// intrinsic is supported.
528  bool emitGetActiveLaneMask() const;
529 
530  // Parameters that control the loop peeling transformation
532  /// A forced peeling factor (the number of bodied of the original loop
533  /// that should be peeled off before the loop body). When set to 0, the
534  /// a peeling factor based on profile information and other factors.
535  unsigned PeelCount;
536  /// Allow peeling off loop iterations.
538  /// Allow peeling off loop iterations for loop nests.
540  /// Allow peeling basing on profile. Uses to enable peeling off all
541  /// iterations basing on provided profile.
542  /// If the value is true the peeling cost model can decide to peel only
543  /// some iterations and in this case it will set this to false.
545  };
546 
547  /// Get target-customized preferences for the generic loop peeling
548  /// transformation. The caller will initialize \p PP with the current
549  /// target-independent defaults with information from \p L and \p SE.
551  PeelingPreferences &PP) const;
552 
553  /// Targets can implement their own combinations for target-specific
554  /// intrinsics. This function will be called from the InstCombine pass every
555  /// time a target-specific intrinsic is encountered.
556  ///
557  /// \returns None to not do anything target specific or a value that will be
558  /// returned from the InstCombiner. It is possible to return null and stop
559  /// further processing of the intrinsic by returning nullptr.
561  IntrinsicInst &II) const;
562  /// Can be used to implement target-specific instruction combining.
563  /// \see instCombineIntrinsic
566  APInt DemandedMask, KnownBits &Known,
567  bool &KnownBitsComputed) const;
568  /// Can be used to implement target-specific instruction combining.
569  /// \see instCombineIntrinsic
571  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
572  APInt &UndefElts2, APInt &UndefElts3,
573  std::function<void(Instruction *, unsigned, APInt, APInt &)>
574  SimplifyAndSetOp) const;
575  /// @}
576 
577  /// \name Scalar Target Information
578  /// @{
579 
580  /// Flags indicating the kind of support for population count.
581  ///
582  /// Compared to the SW implementation, HW support is supposed to
583  /// significantly boost the performance when the population is dense, and it
584  /// may or may not degrade performance if the population is sparse. A HW
585  /// support is considered as "Fast" if it can outperform, or is on a par
586  /// with, SW implementation when the population is sparse; otherwise, it is
587  /// considered as "Slow".
589 
590  /// Return true if the specified immediate is legal add immediate, that
591  /// is the target has add instructions which can add a register with the
592  /// immediate without having to materialize the immediate into a register.
593  bool isLegalAddImmediate(int64_t Imm) const;
594 
595  /// Return true if the specified immediate is legal icmp immediate,
596  /// that is the target has icmp instructions which can compare a register
597  /// against the immediate without having to materialize the immediate into a
598  /// register.
599  bool isLegalICmpImmediate(int64_t Imm) const;
600 
601  /// Return true if the addressing mode represented by AM is legal for
602  /// this target, for a load/store of the specified type.
603  /// The type may be VoidTy, in which case only return true if the addressing
604  /// mode is legal for a load/store of any legal type.
605  /// If target returns true in LSRWithInstrQueries(), I may be valid.
606  /// TODO: Handle pre/postinc as well.
607  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
608  bool HasBaseReg, int64_t Scale,
609  unsigned AddrSpace = 0,
610  Instruction *I = nullptr) const;
611 
612  /// Return true if LSR cost of C1 is lower than C1.
614  TargetTransformInfo::LSRCost &C2) const;
615 
616  /// Return true if LSR major cost is number of registers. Targets which
617  /// implement their own isLSRCostLess and unset number of registers as major
618  /// cost should return false, otherwise return true.
619  bool isNumRegsMajorCostOfLSR() const;
620 
621  /// \returns true if LSR should not optimize a chain that includes \p I.
623 
624  /// Return true if the target can fuse a compare and branch.
625  /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
626  /// calculation for the instructions in a loop.
627  bool canMacroFuseCmp() const;
628 
629  /// Return true if the target can save a compare for loop count, for example
630  /// hardware loop saves a compare.
631  bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
633  TargetLibraryInfo *LibInfo) const;
634 
639  };
640 
641  /// Return the preferred addressing mode LSR should make efforts to generate.
643  ScalarEvolution *SE) const;
644 
645  /// Return true if the target supports masked store.
646  bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
647  /// Return true if the target supports masked load.
648  bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;
649 
650  /// Return true if the target supports nontemporal store.
651  bool isLegalNTStore(Type *DataType, Align Alignment) const;
652  /// Return true if the target supports nontemporal load.
653  bool isLegalNTLoad(Type *DataType, Align Alignment) const;
654 
655  /// Return true if the target supports masked scatter.
656  bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
657  /// Return true if the target supports masked gather.
658  bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
659 
660  /// Return true if the target supports masked compress store.
661  bool isLegalMaskedCompressStore(Type *DataType) const;
662  /// Return true if the target supports masked expand load.
663  bool isLegalMaskedExpandLoad(Type *DataType) const;
664 
665  /// Return true if the target has a unified operation to calculate division
666  /// and remainder. If so, the additional implicit multiplication and
667  /// subtraction required to calculate a remainder from division are free. This
668  /// can enable more aggressive transformations for division and remainder than
669  /// would typically be allowed using throughput or size cost models.
670  bool hasDivRemOp(Type *DataType, bool IsSigned) const;
671 
672  /// Return true if the given instruction (assumed to be a memory access
673  /// instruction) has a volatile variant. If that's the case then we can avoid
674  /// addrspacecast to generic AS for volatile loads/stores. Default
675  /// implementation returns false, which prevents address space inference for
676  /// volatile loads/stores.
677  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
678 
679  /// Return true if target doesn't mind addresses in vectors.
680  bool prefersVectorizedAddressing() const;
681 
682  /// Return the cost of the scaling factor used in the addressing
683  /// mode represented by AM for this target, for a load/store
684  /// of the specified type.
685  /// If the AM is supported, the return value must be >= 0.
686  /// If the AM is not supported, it returns a negative value.
687  /// TODO: Handle pre/postinc as well.
689  int64_t BaseOffset, bool HasBaseReg,
690  int64_t Scale,
691  unsigned AddrSpace = 0) const;
692 
693  /// Return true if the loop strength reduce pass should make
694  /// Instruction* based TTI queries to isLegalAddressingMode(). This is
695  /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
696  /// immediate offset and no index register.
697  bool LSRWithInstrQueries() const;
698 
699  /// Return true if it's free to truncate a value of type Ty1 to type
700  /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
701  /// by referencing its sub-register AX.
702  bool isTruncateFree(Type *Ty1, Type *Ty2) const;
703 
704  /// Return true if it is profitable to hoist instruction in the
705  /// then/else to before if.
706  bool isProfitableToHoist(Instruction *I) const;
707 
708  bool useAA() const;
709 
710  /// Return true if this type is legal.
711  bool isTypeLegal(Type *Ty) const;
712 
713  /// Returns the estimated number of registers required to represent \p Ty.
715 
716  /// Return true if switches should be turned into lookup tables for the
717  /// target.
718  bool shouldBuildLookupTables() const;
719 
720  /// Return true if switches should be turned into lookup tables
721  /// containing this constant value for the target.
723 
724  /// Return true if lookup tables should be turned into relative lookup tables.
725  bool shouldBuildRelLookupTables() const;
726 
727  /// Return true if the input function which is cold at all call sites,
728  /// should use coldcc calling convention.
729  bool useColdCCForColdCall(Function &F) const;
730 
731  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
732  /// are set if the demanded result elements need to be inserted and/or
733  /// extracted from vectors.
735  const APInt &DemandedElts,
736  bool Insert, bool Extract) const;
737 
738  /// Estimate the overhead of scalarizing an instructions unique
739  /// non-constant operands. The (potentially vector) types to use for each of
740  /// argument are passes via Tys.
742  ArrayRef<Type *> Tys) const;
743 
744  /// If target has efficient vector element load/store instructions, it can
745  /// return true here so that insertion/extraction costs are not added to
746  /// the scalarization cost of a load/store.
748 
749  /// Don't restrict interleaved unrolling to small loops.
750  bool enableAggressiveInterleaving(bool LoopHasReductions) const;
751 
752  /// Returns options for expansion of memcmp. IsZeroCmp is
753  // true if this is the expansion of memcmp(p1, p2, s) == 0.
755  // Return true if memcmp expansion is enabled.
756  operator bool() const { return MaxNumLoads > 0; }
757 
758  // Maximum number of load operations.
759  unsigned MaxNumLoads = 0;
760 
761  // The list of available load sizes (in bytes), sorted in decreasing order.
763 
764  // For memcmp expansion when the memcmp result is only compared equal or
765  // not-equal to 0, allow up to this number of load pairs per block. As an
766  // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
767  // a0 = load2bytes &a[0]
768  // b0 = load2bytes &b[0]
769  // a2 = load1byte &a[2]
770  // b2 = load1byte &b[2]
771  // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
772  unsigned NumLoadsPerBlock = 1;
773 
774  // Set to true to allow overlapping loads. For example, 7-byte compares can
775  // be done with two 4-byte compares instead of 4+2+1-byte compares. This
776  // requires all loads in LoadSizes to be doable in an unaligned way.
777  bool AllowOverlappingLoads = false;
778  };
780  bool IsZeroCmp) const;
781 
782  /// Enable matching of interleaved access groups.
784 
785  /// Enable matching of interleaved access groups that contain predicated
786  /// accesses or gaps and therefore vectorized using masked
787  /// vector loads/stores.
789 
790  /// Indicate that it is potentially unsafe to automatically vectorize
791  /// floating-point operations because the semantics of vector and scalar
792  /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
793  /// does not support IEEE-754 denormal numbers, while depending on the
794  /// platform, scalar floating-point math does.
795  /// This applies to floating-point math operations and calls, not memory
796  /// operations, shuffles, or casts.
798 
799  /// Determine if the target supports unaligned memory accesses.
801  unsigned AddressSpace = 0,
802  Align Alignment = Align(1),
803  bool *Fast = nullptr) const;
804 
805  /// Return hardware support for population count.
806  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
807 
808  /// Return true if the hardware has a fast square-root instruction.
809  bool haveFastSqrt(Type *Ty) const;
810 
811  /// Return true if it is faster to check if a floating-point value is NaN
812  /// (or not-NaN) versus a comparison against a constant FP zero value.
813  /// Targets should override this if materializing a 0.0 for comparison is
814  /// generally as cheap as checking for ordered/unordered.
815  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
816 
817  /// Return the expected cost of supporting the floating point operation
818  /// of the specified type.
819  InstructionCost getFPOpCost(Type *Ty) const;
820 
821  /// Return the expected cost of materializing for the given integer
822  /// immediate of the specified type.
823  InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
824  TargetCostKind CostKind) const;
825 
826  /// Return the expected cost of materialization for the given integer
827  /// immediate of the specified type for a given instruction. The cost can be
828  /// zero if the immediate can be folded into the specified instruction.
829  InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
830  const APInt &Imm, Type *Ty,
832  Instruction *Inst = nullptr) const;
834  const APInt &Imm, Type *Ty,
835  TargetCostKind CostKind) const;
836 
837  /// Return the expected cost for the given integer when optimising
838  /// for size. This is different than the other integer immediate cost
839  /// functions in that it is subtarget agnostic. This is useful when you e.g.
840  /// target one ISA such as Aarch32 but smaller encodings could be possible
841  /// with another such as Thumb. This return value is used as a penalty when
842  /// the total costs for a constant is calculated (the bigger the cost, the
843  /// more beneficial constant hoisting is).
844  InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
845  const APInt &Imm, Type *Ty) const;
846  /// @}
847 
848  /// \name Vector Target Information
849  /// @{
850 
851  /// The various kinds of shuffle patterns for vector queries.
852  enum ShuffleKind {
853  SK_Broadcast, ///< Broadcast element 0 to all other elements.
854  SK_Reverse, ///< Reverse the order of the vector.
855  SK_Select, ///< Selects elements from the corresponding lane of
856  ///< either source operand. This is equivalent to a
857  ///< vector select with a constant condition operand.
858  SK_Transpose, ///< Transpose two vectors.
859  SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
860  SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
861  SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
862  ///< with any shuffle mask.
863  SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
864  ///< shuffle mask.
865  SK_Splice ///< Concatenates elements from the first input vector
866  ///< with elements of the second input vector. Returning
867  ///< a vector of the same type as the input vectors.
868  };
869 
870  /// Additional information about an operand's possible values.
872  OK_AnyValue, // Operand can have any value.
873  OK_UniformValue, // Operand is uniform (splat of a value).
874  OK_UniformConstantValue, // Operand is uniform constant.
875  OK_NonUniformConstantValue // Operand is a non uniform constant value.
876  };
877 
878  /// Additional properties of an operand's values.
880 
881  /// \return the number of registers in the target-provided register class.
882  unsigned getNumberOfRegisters(unsigned ClassID) const;
883 
884  /// \return the target-provided register class ID for the provided type,
885  /// accounting for type promotion and other type-legalization techniques that
886  /// the target might apply. However, it specifically does not account for the
887  /// scalarization or splitting of vector types. Should a vector type require
888  /// scalarization or splitting into multiple underlying vector registers, that
889  /// type should be mapped to a register class containing no registers.
890  /// Specifically, this is designed to provide a simple, high-level view of the
891  /// register allocation later performed by the backend. These register classes
892  /// don't necessarily map onto the register classes used by the backend.
893  /// FIXME: It's not currently possible to determine how many registers
894  /// are used by the provided type.
895  unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
896 
897  /// \return the target-provided register class name
898  const char *getRegisterClassName(unsigned ClassID) const;
899 
901 
902  /// \return The width of the largest scalar or vector register type.
904 
905  /// \return The width of the smallest vector register type.
906  unsigned getMinVectorRegisterBitWidth() const;
907 
908  /// \return The maximum value of vscale if the target specifies an
909  /// architectural maximum vector length, and None otherwise.
911 
912  /// \return True if the vectorization factor should be chosen to
913  /// make the vector of the smallest element type match the size of a
914  /// vector register. For wider element types, this could result in
915  /// creating vectors that span multiple vector registers.
916  /// If false, the vectorization factor will be chosen based on the
917  /// size of the widest element type.
918  bool shouldMaximizeVectorBandwidth() const;
919 
920  /// \return The minimum vectorization factor for types of given element
921  /// bit width, or 0 if there is no minimum VF. The returned value only
922  /// applies when shouldMaximizeVectorBandwidth returns true.
923  /// If IsScalable is true, the returned ElementCount must be a scalable VF.
924  ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
925 
926  /// \return The maximum vectorization factor for types of given element
927  /// bit width and opcode, or 0 if there is no maximum VF.
928  /// Currently only used by the SLP vectorizer.
929  unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
930 
931  /// \return True if it should be considered for address type promotion.
932  /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
933  /// profitable without finding other extensions fed by the same input.
935  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
936 
937  /// \return The size of a cache line in bytes.
938  unsigned getCacheLineSize() const;
939 
940  /// The possible cache levels
941  enum class CacheLevel {
942  L1D, // The L1 data cache
943  L2D, // The L2 data cache
944 
945  // We currently do not model L3 caches, as their sizes differ widely between
946  // microarchitectures. Also, we currently do not have a use for L3 cache
947  // size modeling yet.
948  };
949 
950  /// \return The size of the cache level in bytes, if available.
952 
953  /// \return The associativity of the cache level, if available.
955 
956  /// \return How much before a load we should place the prefetch
957  /// instruction. This is currently measured in number of
958  /// instructions.
959  unsigned getPrefetchDistance() const;
960 
961  /// Some HW prefetchers can handle accesses up to a certain constant stride.
962  /// Sometimes prefetching is beneficial even below the HW prefetcher limit,
963  /// and the arguments provided are meant to serve as a basis for deciding this
964  /// for a particular loop.
965  ///
966  /// \param NumMemAccesses Number of memory accesses in the loop.
967  /// \param NumStridedMemAccesses Number of the memory accesses that
968  /// ScalarEvolution could find a known stride
969  /// for.
970  /// \param NumPrefetches Number of software prefetches that will be
971  /// emitted as determined by the addresses
972  /// involved and the cache line size.
973  /// \param HasCall True if the loop contains a call.
974  ///
975  /// \return This is the minimum stride in bytes where it makes sense to start
976  /// adding SW prefetches. The default is 1, i.e. prefetch with any
977  /// stride.
978  unsigned getMinPrefetchStride(unsigned NumMemAccesses,
979  unsigned NumStridedMemAccesses,
980  unsigned NumPrefetches, bool HasCall) const;
981 
982  /// \return The maximum number of iterations to prefetch ahead. If
983  /// the required number of iterations is more than this number, no
984  /// prefetching is performed.
985  unsigned getMaxPrefetchIterationsAhead() const;
986 
987  /// \return True if prefetching should also be done for writes.
988  bool enableWritePrefetching() const;
989 
990  /// \return The maximum interleave factor that any transform should try to
991  /// perform for this target. This number depends on the level of parallelism
992  /// and the number of execution units in the CPU.
993  unsigned getMaxInterleaveFactor(unsigned VF) const;
994 
995  /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
996  static OperandValueKind getOperandInfo(const Value *V,
997  OperandValueProperties &OpProps);
998 
999  /// This is an approximation of reciprocal throughput of a math/logic op.
1000  /// A higher cost indicates less expected throughput.
1001  /// From Agner Fog's guides, reciprocal throughput is "the average number of
1002  /// clock cycles per instruction when the instructions are not part of a
1003  /// limiting dependency chain."
1004  /// Therefore, costs should be scaled to account for multiple execution units
1005  /// on the target that can process this type of instruction. For example, if
1006  /// there are 5 scalar integer units and 2 vector integer units that can
1007  /// calculate an 'add' in a single cycle, this model should indicate that the
1008  /// cost of the vector add instruction is 2.5 times the cost of the scalar
1009  /// add instruction.
1010  /// \p Args is an optional argument which holds the instruction operands
1011  /// values so the TTI can analyze those values searching for special
1012  /// cases or optimizations based on those values.
1013  /// \p CxtI is the optional original context instruction, if one exists, to
1014  /// provide even more information.
1016  unsigned Opcode, Type *Ty,
1018  OperandValueKind Opd1Info = OK_AnyValue,
1019  OperandValueKind Opd2Info = OK_AnyValue,
1020  OperandValueProperties Opd1PropInfo = OP_None,
1021  OperandValueProperties Opd2PropInfo = OP_None,
1023  const Instruction *CxtI = nullptr) const;
1024 
1025  /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1026  /// The exact mask may be passed as Mask, or else the array will be empty.
1027  /// The index and subtype parameters are used by the subvector insertion and
1028  /// extraction shuffle kinds to show the insert/extract point and the type of
1029  /// the subvector being inserted/extracted.
1030  /// NOTE: For subvector extractions Tp represents the source type.
1032  ArrayRef<int> Mask = None, int Index = 0,
1033  VectorType *SubTp = nullptr) const;
1034 
1035  /// Represents a hint about the context in which a cast is used.
1036  ///
1037  /// For zext/sext, the context of the cast is the operand, which must be a
1038  /// load of some kind. For trunc, the context is of the cast is the single
1039  /// user of the instruction, which must be a store of some kind.
1040  ///
1041  /// This enum allows the vectorizer to give getCastInstrCost an idea of the
1042  /// type of cast it's dealing with, as not every cast is equal. For instance,
1043  /// the zext of a load may be free, but the zext of an interleaving load can
1044  //// be (very) expensive!
1045  ///
1046  /// See \c getCastContextHint to compute a CastContextHint from a cast
1047  /// Instruction*. Callers can use it if they don't need to override the
1048  /// context and just want it to be calculated from the instruction.
1049  ///
1050  /// FIXME: This handles the types of load/store that the vectorizer can
1051  /// produce, which are the cases where the context instruction is most
1052  /// likely to be incorrect. There are other situations where that can happen
1053  /// too, which might be handled here but in the long run a more general
1054  /// solution of costing multiple instructions at the same times may be better.
1055  enum class CastContextHint : uint8_t {
1056  None, ///< The cast is not used with a load/store of any kind.
1057  Normal, ///< The cast is used with a normal load/store.
1058  Masked, ///< The cast is used with a masked load/store.
1059  GatherScatter, ///< The cast is used with a gather/scatter.
1060  Interleave, ///< The cast is used with an interleaved load/store.
1061  Reversed, ///< The cast is used with a reversed load/store.
1062  };
1063 
1064  /// Calculates a CastContextHint from \p I.
1065  /// This should be used by callers of getCastInstrCost if they wish to
1066  /// determine the context from some instruction.
1067  /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
1068  /// or if it's another type of cast.
1070 
1071  /// \return The expected cost of cast instructions, such as bitcast, trunc,
1072  /// zext, etc. If there is an existing instruction that holds Opcode, it
1073  /// may be passed in the 'I' parameter.
1075  getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1078  const Instruction *I = nullptr) const;
1079 
1080  /// \return The expected cost of a sign- or zero-extended vector extract. Use
1081  /// -1 to indicate that there is no information about the index value.
1082  InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1083  VectorType *VecTy,
1084  unsigned Index = -1) const;
1085 
1086  /// \return The expected cost of control-flow related instructions such as
1087  /// Phi, Ret, Br, Switch.
1089  getCFInstrCost(unsigned Opcode,
1091  const Instruction *I = nullptr) const;
1092 
1093  /// \returns The expected cost of compare and select instructions. If there
1094  /// is an existing instruction that holds Opcode, it may be passed in the
1095  /// 'I' parameter. The \p VecPred parameter can be used to indicate the select
1096  /// is using a compare with the specified predicate as condition. When vector
1097  /// types are passed, \p VecPred must be used for all lanes.
1099  getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy = nullptr,
1102  const Instruction *I = nullptr) const;
1103 
1104  /// \return The expected cost of vector Insert and Extract.
1105  /// Use -1 to indicate that there is no information on the index value.
1106  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1107  unsigned Index = -1) const;
1108 
1109  /// \return The cost of Load and Store instructions.
1111  getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1112  unsigned AddressSpace,
1114  const Instruction *I = nullptr) const;
1115 
1116  /// \return The cost of masked Load and Store instructions.
1118  unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
1120 
1121  /// \return The cost of Gather or Scatter operation
1122  /// \p Opcode - is a type of memory access Load or Store
1123  /// \p DataTy - a vector type of the data to be loaded or stored
1124  /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1125  /// \p VariableMask - true when the memory access is predicated with a mask
1126  /// that is not a compile-time constant
1127  /// \p Alignment - alignment of single element
1128  /// \p I - the optional original context instruction, if one exists, e.g. the
1129  /// load/store to transform or the call to the gather/scatter intrinsic
1131  unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1133  const Instruction *I = nullptr) const;
1134 
1135  /// \return The cost of the interleaved memory operation.
1136  /// \p Opcode is the memory operation code
1137  /// \p VecTy is the vector type of the interleaved access.
1138  /// \p Factor is the interleave factor
1139  /// \p Indices is the indices for interleaved load members (as interleaved
1140  /// load allows gaps)
1141  /// \p Alignment is the alignment of the memory operation
1142  /// \p AddressSpace is address space of the pointer.
1143  /// \p UseMaskForCond indicates if the memory access is predicated.
1144  /// \p UseMaskForGaps indicates if gaps should be masked.
1146  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1147  Align Alignment, unsigned AddressSpace,
1149  bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
1150 
1151  /// A helper function to determine the type of reduction algorithm used
1152  /// for a given \p Opcode and set of FastMathFlags \p FMF.
1154  return FMF != None && !(*FMF).allowReassoc();
1155  }
1156 
1157  /// Calculate the cost of vector reduction intrinsics.
1158  ///
1159  /// This is the cost of reducing the vector value of type \p Ty to a scalar
1160  /// value using the operation denoted by \p Opcode. The FastMathFlags
1161  /// parameter \p FMF indicates what type of reduction we are performing:
1162  /// 1. Tree-wise. This is the typical 'fast' reduction performed that
1163  /// involves successively splitting a vector into half and doing the
1164  /// operation on the pair of halves until you have a scalar value. For
1165  /// example:
1166  /// (v0, v1, v2, v3)
1167  /// ((v0+v2), (v1+v3), undef, undef)
1168  /// ((v0+v2+v1+v3), undef, undef, undef)
1169  /// This is the default behaviour for integer operations, whereas for
1170  /// floating point we only do this if \p FMF indicates that
1171  /// reassociation is allowed.
1172  /// 2. Ordered. For a vector with N elements this involves performing N
1173  /// operations in lane order, starting with an initial scalar value, i.e.
1174  /// result = InitVal + v0
1175  /// result = result + v1
1176  /// result = result + v2
1177  /// result = result + v3
1178  /// This is only the case for FP operations and when reassociation is not
1179  /// allowed.
1180  ///
1182  unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
1184 
1186  VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
1188 
1189  /// Calculate the cost of an extended reduction pattern, similar to
1190  /// getArithmeticReductionCost of an Add reduction with an extension and
1191  /// optional multiply. This is the cost of as:
1192  /// ResTy vecreduce.add(ext(Ty A)), or if IsMLA flag is set then:
1193  /// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)). The reduction happens
1194  /// on a VectorType with ResTy elements and Ty lanes.
1196  bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1198 
1199  /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1200  /// Three cases are handled: 1. scalar instruction 2. vector instruction
1201  /// 3. scalar instruction which is to be vectorized.
1204 
1205  /// \returns The cost of Call instructions.
1207  Function *F, Type *RetTy, ArrayRef<Type *> Tys,
1209 
1210  /// \returns The number of pieces into which the provided type must be
1211  /// split during legalization. Zero is returned when the answer is unknown.
1212  unsigned getNumberOfParts(Type *Tp) const;
1213 
1214  /// \returns The cost of the address computation. For most targets this can be
1215  /// merged into the instruction indexing mode. Some targets might want to
1216  /// distinguish between address computation for memory operations on vector
1217  /// types and scalar types. Such targets should override this function.
1218  /// The 'SE' parameter holds pointer for the scalar evolution object which
1219  /// is used in order to get the Ptr step value in case of constant stride.
1220  /// The 'Ptr' parameter holds SCEV of the access pointer.
1222  ScalarEvolution *SE = nullptr,
1223  const SCEV *Ptr = nullptr) const;
1224 
1225  /// \returns The cost, if any, of keeping values of the given types alive
1226  /// over a callsite.
1227  ///
1228  /// Some types may require the use of register classes that do not have
1229  /// any callee-saved registers, so would require a spill and fill.
1231 
1232  /// \returns True if the intrinsic is a supported memory intrinsic. Info
1233  /// will contain additional information - whether the intrinsic may write
1234  /// or read to memory, volatility and the pointer. Info is undefined
1235  /// if false is returned.
1237 
1238  /// \returns The maximum element size, in bytes, for an element
1239  /// unordered-atomic memory intrinsic.
1240  unsigned getAtomicMemIntrinsicMaxElementSize() const;
1241 
1242  /// \returns A value which is the result of the given memory intrinsic. New
1243  /// instructions may be created to extract the result from the given intrinsic
1244  /// memory operation. Returns nullptr if the target cannot create a result
1245  /// from the given intrinsic.
1247  Type *ExpectedType) const;
1248 
1249  /// \returns The type to use in a loop expansion of a memcpy call.
1251  unsigned SrcAddrSpace, unsigned DestAddrSpace,
1252  unsigned SrcAlign, unsigned DestAlign) const;
1253 
1254  /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1255  /// \param RemainingBytes The number of bytes to copy.
1256  ///
1257  /// Calculates the operand types to use when copying \p RemainingBytes of
1258  /// memory, where source and destination alignments are \p SrcAlign and
1259  /// \p DestAlign respectively.
1262  unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1263  unsigned SrcAlign, unsigned DestAlign) const;
1264 
1265  /// \returns True if the two functions have compatible attributes for inlining
1266  /// purposes.
1267  bool areInlineCompatible(const Function *Caller,
1268  const Function *Callee) const;
1269 
1270  /// \returns True if the caller and callee agree on how \p Args will be passed
1271  /// to the callee.
1272  /// \param[out] Args The list of compatible arguments. The implementation may
1273  /// filter out any incompatible args from this list.
1274  bool areFunctionArgsABICompatible(const Function *Caller,
1275  const Function *Callee,
1277 
1278  /// The type of load/store indexing.
1280  MIM_Unindexed, ///< No indexing.
1281  MIM_PreInc, ///< Pre-incrementing.
1282  MIM_PreDec, ///< Pre-decrementing.
1283  MIM_PostInc, ///< Post-incrementing.
1284  MIM_PostDec ///< Post-decrementing.
1285  };
1286 
1287  /// \returns True if the specified indexed load for the given type is legal.
1288  bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1289 
1290  /// \returns True if the specified indexed store for the given type is legal.
1291  bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1292 
1293  /// \returns The bitwidth of the largest vector type that should be used to
1294  /// load/store in the given address space.
1295  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1296 
1297  /// \returns True if the load instruction is legal to vectorize.
1298  bool isLegalToVectorizeLoad(LoadInst *LI) const;
1299 
1300  /// \returns True if the store instruction is legal to vectorize.
1301  bool isLegalToVectorizeStore(StoreInst *SI) const;
1302 
1303  /// \returns True if it is legal to vectorize the given load chain.
1304  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
1305  unsigned AddrSpace) const;
1306 
1307  /// \returns True if it is legal to vectorize the given store chain.
1308  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
1309  unsigned AddrSpace) const;
1310 
1311  /// \returns True if it is legal to vectorize the given reduction kind.
1313  ElementCount VF) const;
1314 
1315  /// \returns True if the given type is supported for scalable vectors
1316  bool isElementTypeLegalForScalableVector(Type *Ty) const;
1317 
1318  /// \returns The new vector factor value if the target doesn't support \p
1319  /// SizeInBytes loads or has a better vector factor.
1320  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1321  unsigned ChainSizeInBytes,
1322  VectorType *VecTy) const;
1323 
1324  /// \returns The new vector factor value if the target doesn't support \p
1325  /// SizeInBytes stores or has a better vector factor.
1326  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1327  unsigned ChainSizeInBytes,
1328  VectorType *VecTy) const;
1329 
1330  /// Flags describing the kind of vector reduction.
1333  bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation.
1334  bool IsSigned; ///< Whether the operation is a signed int reduction.
1335  bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.
1336  };
1337 
1338  /// \returns True if the target prefers reductions in loop.
1339  bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1340  ReductionFlags Flags) const;
1341 
1342  /// \returns True if the target prefers reductions select kept in the loop
1343  /// when tail folding. i.e.
1344  /// loop:
1345  /// p = phi (0, s)
1346  /// a = add (p, x)
1347  /// s = select (mask, a, p)
1348  /// vecreduce.add(s)
1349  ///
1350  /// As opposed to the normal scheme of p = phi (0, a) which allows the select
1351  /// to be pulled out of the loop. If the select(.., add, ..) can be predicated
1352  /// by the target, this can lead to cleaner code generation.
1353  bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1354  ReductionFlags Flags) const;
1355 
1356  /// \returns True if the target wants to expand the given reduction intrinsic
1357  /// into a shuffle sequence.
1358  bool shouldExpandReduction(const IntrinsicInst *II) const;
1359 
1360  /// \returns the size cost of rematerializing a GlobalValue address relative
1361  /// to a stack reload.
1362  unsigned getGISelRematGlobalCost() const;
1363 
1364  /// \returns True if the target supports scalable vectors.
1365  bool supportsScalableVectors() const;
1366 
1367  /// \name Vector Predication Information
1368  /// @{
1369  /// Whether the target supports the %evl parameter of VP intrinsic efficiently
1370  /// in hardware. (see LLVM Language Reference - "Vector Predication
1371  /// Intrinsics") Use of %evl is discouraged when that is not the case.
1372  bool hasActiveVectorLength() const;
1373 
1376  // keep the predicating parameter
1377  Legal = 0,
1378  // where legal, discard the predicate parameter
1379  Discard = 1,
1380  // transform into something else that is also predicating
1382  };
1383 
1384  // How to transform the EVL parameter.
1385  // Legal: keep the EVL parameter as it is.
1386  // Discard: Ignore the EVL parameter where it is safe to do so.
1387  // Convert: Fold the EVL into the mask parameter.
1389 
1390  // How to transform the operator.
1391  // Legal: The target supports this operator.
1392  // Convert: Convert this to a non-VP operation.
1393  // The 'Discard' strategy is invalid.
1395 
1396  bool shouldDoNothing() const {
1397  return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
1398  }
1401  };
1402 
1403  /// \returns How the target needs this vector-predicated operation to be
1404  /// transformed.
1406  /// @}
1407 
1408  /// @}
1409 
1410 private:
1411  /// Estimate the latency of specified instruction.
1412  /// Returns 1 as the default value.
1413  InstructionCost getInstructionLatency(const Instruction *I) const;
1414 
1415  /// Returns the expected throughput cost of the instruction.
1416  /// Returns -1 if the cost is unknown.
1417  InstructionCost getInstructionThroughput(const Instruction *I) const;
1418 
1419  /// The abstract base class used to type erase specific TTI
1420  /// implementations.
1421  class Concept;
1422 
1423  /// The template model for the base class which wraps a concrete
1424  /// implementation in a type erased interface.
1425  template <typename T> class Model;
1426 
1427  std::unique_ptr<Concept> TTIImpl;
1428 };
1429 
1431 public:
1432  virtual ~Concept() = 0;
1433  virtual const DataLayout &getDataLayout() const = 0;
1434  virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
1437  virtual unsigned getInliningThresholdMultiplier() = 0;
1438  virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
1439  virtual int getInlinerVectorBonusPercent() = 0;
1440  virtual InstructionCost getMemcpyCost(const Instruction *I) = 0;
1441  virtual unsigned
1442  getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
1443  ProfileSummaryInfo *PSI,
1444  BlockFrequencyInfo *BFI) = 0;
1445  virtual InstructionCost getUserCost(const User *U,
1447  TargetCostKind CostKind) = 0;
1449  virtual bool hasBranchDivergence() = 0;
1450  virtual bool useGPUDivergenceAnalysis() = 0;
1451  virtual bool isSourceOfDivergence(const Value *V) = 0;
1452  virtual bool isAlwaysUniform(const Value *V) = 0;
1453  virtual unsigned getFlatAddressSpace() = 0;
1454  virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1455  Intrinsic::ID IID) const = 0;
1456  virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1457  virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
1459  Value *OldV,
1460  Value *NewV) const = 0;
1461  virtual bool isLoweredToCall(const Function *F) = 0;
1462  virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1464  OptimizationRemarkEmitter *ORE) = 0;
1465  virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
1466  PeelingPreferences &PP) = 0;
1467  virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1468  AssumptionCache &AC,
1469  TargetLibraryInfo *LibInfo,
1470  HardwareLoopInfo &HWLoopInfo) = 0;
1471  virtual bool
1474  DominatorTree *DT, const LoopAccessInfo *LAI) = 0;
1475  virtual bool emitGetActiveLaneMask() = 0;
1477  IntrinsicInst &II) = 0;
1478  virtual Optional<Value *>
1480  APInt DemandedMask, KnownBits &Known,
1481  bool &KnownBitsComputed) = 0;
1483  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1484  APInt &UndefElts2, APInt &UndefElts3,
1485  std::function<void(Instruction *, unsigned, APInt, APInt &)>
1486  SimplifyAndSetOp) = 0;
1487  virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1488  virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1489  virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1490  int64_t BaseOffset, bool HasBaseReg,
1491  int64_t Scale, unsigned AddrSpace,
1492  Instruction *I) = 0;
1495  virtual bool isNumRegsMajorCostOfLSR() = 0;
1496  virtual bool isProfitableLSRChainElement(Instruction *I) = 0;
1497  virtual bool canMacroFuseCmp() = 0;
1498  virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
1499  LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1500  TargetLibraryInfo *LibInfo) = 0;
1501  virtual AddressingModeKind
1502  getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const = 0;
1503  virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
1504  virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
1505  virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
1506  virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
1507  virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
1508  virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
1509  virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
1510  virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
1511  virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1512  virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1513  virtual bool prefersVectorizedAddressing() = 0;
1515  int64_t BaseOffset,
1516  bool HasBaseReg, int64_t Scale,
1517  unsigned AddrSpace) = 0;
1518  virtual bool LSRWithInstrQueries() = 0;
1519  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1520  virtual bool isProfitableToHoist(Instruction *I) = 0;
1521  virtual bool useAA() = 0;
1522  virtual bool isTypeLegal(Type *Ty) = 0;
1523  virtual InstructionCost getRegUsageForType(Type *Ty) = 0;
1524  virtual bool shouldBuildLookupTables() = 0;
1525  virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
1526  virtual bool shouldBuildRelLookupTables() = 0;
1527  virtual bool useColdCCForColdCall(Function &F) = 0;
1529  const APInt &DemandedElts,
1530  bool Insert,
1531  bool Extract) = 0;
1532  virtual InstructionCost
1534  ArrayRef<Type *> Tys) = 0;
1535  virtual bool supportsEfficientVectorElementLoadStore() = 0;
1536  virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1537  virtual MemCmpExpansionOptions
1538  enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
1539  virtual bool enableInterleavedAccessVectorization() = 0;
1540  virtual bool enableMaskedInterleavedAccessVectorization() = 0;
1541  virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
1543  unsigned BitWidth,
1544  unsigned AddressSpace,
1545  Align Alignment,
1546  bool *Fast) = 0;
1547  virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1548  virtual bool haveFastSqrt(Type *Ty) = 0;
1549  virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1550  virtual InstructionCost getFPOpCost(Type *Ty) = 0;
1551  virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1552  const APInt &Imm, Type *Ty) = 0;
1553  virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1554  TargetCostKind CostKind) = 0;
1555  virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1556  const APInt &Imm, Type *Ty,
1558  Instruction *Inst = nullptr) = 0;
1559  virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
1560  const APInt &Imm, Type *Ty,
1561  TargetCostKind CostKind) = 0;
1562  virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
1563  virtual unsigned getRegisterClassForType(bool Vector,
1564  Type *Ty = nullptr) const = 0;
1565  virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
1566  virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0;
1567  virtual unsigned getMinVectorRegisterBitWidth() const = 0;
1568  virtual Optional<unsigned> getMaxVScale() const = 0;
1569  virtual bool shouldMaximizeVectorBandwidth() const = 0;
1570  virtual ElementCount getMinimumVF(unsigned ElemWidth,
1571  bool IsScalable) const = 0;
1572  virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
1574  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1575  virtual unsigned getCacheLineSize() const = 0;
1576  virtual Optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
1578 
1579  /// \return How much before a load we should place the prefetch
1580  /// instruction. This is currently measured in number of
1581  /// instructions.
1582  virtual unsigned getPrefetchDistance() const = 0;
1583 
1584  /// \return Some HW prefetchers can handle accesses up to a certain
1585  /// constant stride. This is the minimum stride in bytes where it
1586  /// makes sense to start adding SW prefetches. The default is 1,
1587  /// i.e. prefetch with any stride. Sometimes prefetching is beneficial
1588  /// even below the HW prefetcher limit, and the arguments provided are
1589  /// meant to serve as a basis for deciding this for a particular loop.
1590  virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1591  unsigned NumStridedMemAccesses,
1592  unsigned NumPrefetches,
1593  bool HasCall) const = 0;
1594 
1595  /// \return The maximum number of iterations to prefetch ahead. If
1596  /// the required number of iterations is more than this number, no
1597  /// prefetching is performed.
1598  virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
1599 
1600  /// \return True if prefetching should also be done for writes.
1601  virtual bool enableWritePrefetching() const = 0;
1602 
1603  virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
1605  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
1606  OperandValueKind Opd1Info, OperandValueKind Opd2Info,
1607  OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo,
1608  ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
1610  ArrayRef<int> Mask, int Index,
1611  VectorType *SubTp) = 0;
1612  virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
1613  Type *Src, CastContextHint CCH,
1615  const Instruction *I) = 0;
1616  virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1617  VectorType *VecTy,
1618  unsigned Index) = 0;
1619  virtual InstructionCost getCFInstrCost(unsigned Opcode,
1621  const Instruction *I = nullptr) = 0;
1622  virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1623  Type *CondTy,
1624  CmpInst::Predicate VecPred,
1626  const Instruction *I) = 0;
1627  virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1628  unsigned Index) = 0;
1629  virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
1630  Align Alignment,
1631  unsigned AddressSpace,
1633  const Instruction *I) = 0;
1634  virtual InstructionCost
1635  getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1636  unsigned AddressSpace,
1638  virtual InstructionCost
1639  getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
1640  bool VariableMask, Align Alignment,
1642  const Instruction *I = nullptr) = 0;
1643 
1645  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1646  Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1647  bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
1648  virtual InstructionCost
1649  getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
1652  virtual InstructionCost
1653  getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
1656  bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1658  virtual InstructionCost
1661  virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy,
1662  ArrayRef<Type *> Tys,
1664  virtual unsigned getNumberOfParts(Type *Tp) = 0;
1665  virtual InstructionCost
1666  getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr) = 0;
1667  virtual InstructionCost
1669  virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1670  MemIntrinsicInfo &Info) = 0;
1671  virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
1673  Type *ExpectedType) = 0;
1675  unsigned SrcAddrSpace,
1676  unsigned DestAddrSpace,
1677  unsigned SrcAlign,
1678  unsigned DestAlign) const = 0;
1679  virtual void getMemcpyLoopResidualLoweringType(
1681  unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1682  unsigned SrcAlign, unsigned DestAlign) const = 0;
1683  virtual bool areInlineCompatible(const Function *Caller,
1684  const Function *Callee) const = 0;
1685  virtual bool
1686  areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
1687  SmallPtrSetImpl<Argument *> &Args) const = 0;
1688  virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1689  virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1690  virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
1691  virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
1692  virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
1693  virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1694  Align Alignment,
1695  unsigned AddrSpace) const = 0;
1696  virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1697  Align Alignment,
1698  unsigned AddrSpace) const = 0;
1699  virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
1700  ElementCount VF) const = 0;
1701  virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
1702  virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1703  unsigned ChainSizeInBytes,
1704  VectorType *VecTy) const = 0;
1705  virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1706  unsigned ChainSizeInBytes,
1707  VectorType *VecTy) const = 0;
1708  virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1709  ReductionFlags) const = 0;
1710  virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1711  ReductionFlags) const = 0;
1712  virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
1713  virtual unsigned getGISelRematGlobalCost() const = 0;
1714  virtual bool supportsScalableVectors() const = 0;
1715  virtual bool hasActiveVectorLength() const = 0;
1716  virtual InstructionCost getInstructionLatency(const Instruction *I) = 0;
1717  virtual VPLegalization
1718  getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
1719 };
1720 
1721 template <typename T>
1723  T Impl;
1724 
1725 public:
1726  Model(T Impl) : Impl(std::move(Impl)) {}
1727  ~Model() override {}
1728 
1729  const DataLayout &getDataLayout() const override {
1730  return Impl.getDataLayout();
1731  }
1732 
1733  InstructionCost
1734  getGEPCost(Type *PointeeType, const Value *Ptr,
1735  ArrayRef<const Value *> Operands,
1737  return Impl.getGEPCost(PointeeType, Ptr, Operands);
1738  }
1739  unsigned getInliningThresholdMultiplier() override {
1740  return Impl.getInliningThresholdMultiplier();
1741  }
1742  unsigned adjustInliningThreshold(const CallBase *CB) override {
1743  return Impl.adjustInliningThreshold(CB);
1744  }
1745  int getInlinerVectorBonusPercent() override {
1746  return Impl.getInlinerVectorBonusPercent();
1747  }
1748  InstructionCost getMemcpyCost(const Instruction *I) override {
1749  return Impl.getMemcpyCost(I);
1750  }
1751  InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
1752  TargetCostKind CostKind) override {
1753  return Impl.getUserCost(U, Operands, CostKind);
1754  }
1755  BranchProbability getPredictableBranchThreshold() override {
1756  return Impl.getPredictableBranchThreshold();
1757  }
1758  bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
1759  bool useGPUDivergenceAnalysis() override {
1760  return Impl.useGPUDivergenceAnalysis();
1761  }
1762  bool isSourceOfDivergence(const Value *V) override {
1763  return Impl.isSourceOfDivergence(V);
1764  }
1765 
1766  bool isAlwaysUniform(const Value *V) override {
1767  return Impl.isAlwaysUniform(V);
1768  }
1769 
1770  unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
1771 
1772  bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1773  Intrinsic::ID IID) const override {
1774  return Impl.collectFlatAddressOperands(OpIndexes, IID);
1775  }
1776 
1777  bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
1778  return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
1779  }
1780 
1781  unsigned getAssumedAddrSpace(const Value *V) const override {
1782  return Impl.getAssumedAddrSpace(V);
1783  }
1784 
1785  Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
1786  Value *NewV) const override {
1787  return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
1788  }
1789 
1790  bool isLoweredToCall(const Function *F) override {
1791  return Impl.isLoweredToCall(F);
1792  }
1793  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1794  UnrollingPreferences &UP,
1795  OptimizationRemarkEmitter *ORE) override {
1796  return Impl.getUnrollingPreferences(L, SE, UP, ORE);
1797  }
1798  void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
1799  PeelingPreferences &PP) override {
1800  return Impl.getPeelingPreferences(L, SE, PP);
1801  }
1802  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1803  AssumptionCache &AC, TargetLibraryInfo *LibInfo,
1804  HardwareLoopInfo &HWLoopInfo) override {
1805  return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
1806  }
1807  bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
1808  AssumptionCache &AC, TargetLibraryInfo *TLI,
1809  DominatorTree *DT,
1810  const LoopAccessInfo *LAI) override {
1811  return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
1812  }
1813  bool emitGetActiveLaneMask() override {
1814  return Impl.emitGetActiveLaneMask();
1815  }
1816  Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
1817  IntrinsicInst &II) override {
1818  return Impl.instCombineIntrinsic(IC, II);
1819  }
1820  Optional<Value *>
1821  simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
1822  APInt DemandedMask, KnownBits &Known,
1823  bool &KnownBitsComputed) override {
1824  return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
1825  KnownBitsComputed);
1826  }
1827  Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
1828  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1829  APInt &UndefElts2, APInt &UndefElts3,
1830  std::function<void(Instruction *, unsigned, APInt, APInt &)>
1831  SimplifyAndSetOp) override {
1832  return Impl.simplifyDemandedVectorEltsIntrinsic(
1833  IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
1834  SimplifyAndSetOp);
1835  }
1836  bool isLegalAddImmediate(int64_t Imm) override {
1837  return Impl.isLegalAddImmediate(Imm);
1838  }
1839  bool isLegalICmpImmediate(int64_t Imm) override {
1840  return Impl.isLegalICmpImmediate(Imm);
1841  }
1842  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1843  bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
1844  Instruction *I) override {
1845  return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
1846  AddrSpace, I);
1847  }
1848  bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1849  TargetTransformInfo::LSRCost &C2) override {
1850  return Impl.isLSRCostLess(C1, C2);
1851  }
1852  bool isNumRegsMajorCostOfLSR() override {
1853  return Impl.isNumRegsMajorCostOfLSR();
1854  }
1855  bool isProfitableLSRChainElement(Instruction *I) override {
1856  return Impl.isProfitableLSRChainElement(I);
1857  }
1858  bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
1859  bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
1860  DominatorTree *DT, AssumptionCache *AC,
1861  TargetLibraryInfo *LibInfo) override {
1862  return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
1863  }
1865  getPreferredAddressingMode(const Loop *L,
1866  ScalarEvolution *SE) const override {
1867  return Impl.getPreferredAddressingMode(L, SE);
1868  }
1869  bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
1870  return Impl.isLegalMaskedStore(DataType, Alignment);
1871  }
1872  bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
1873  return Impl.isLegalMaskedLoad(DataType, Alignment);
1874  }
1875  bool isLegalNTStore(Type *DataType, Align Alignment) override {
1876  return Impl.isLegalNTStore(DataType, Alignment);
1877  }
1878  bool isLegalNTLoad(Type *DataType, Align Alignment) override {
1879  return Impl.isLegalNTLoad(DataType, Alignment);
1880  }
1881  bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
1882  return Impl.isLegalMaskedScatter(DataType, Alignment);
1883  }
1884  bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
1885  return Impl.isLegalMaskedGather(DataType, Alignment);
1886  }
1887  bool isLegalMaskedCompressStore(Type *DataType) override {
1888  return Impl.isLegalMaskedCompressStore(DataType);
1889  }
1890  bool isLegalMaskedExpandLoad(Type *DataType) override {
1891  return Impl.isLegalMaskedExpandLoad(DataType);
1892  }
1893  bool hasDivRemOp(Type *DataType, bool IsSigned) override {
1894  return Impl.hasDivRemOp(DataType, IsSigned);
1895  }
1896  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
1897  return Impl.hasVolatileVariant(I, AddrSpace);
1898  }
1899  bool prefersVectorizedAddressing() override {
1900  return Impl.prefersVectorizedAddressing();
1901  }
1902  InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
1903  int64_t BaseOffset, bool HasBaseReg,
1904  int64_t Scale,
1905  unsigned AddrSpace) override {
1906  return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
1907  AddrSpace);
1908  }
1909  bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
1910  bool isTruncateFree(Type *Ty1, Type *Ty2) override {
1911  return Impl.isTruncateFree(Ty1, Ty2);
1912  }
1913  bool isProfitableToHoist(Instruction *I) override {
1914  return Impl.isProfitableToHoist(I);
1915  }
1916  bool useAA() override { return Impl.useAA(); }
1917  bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
1918  InstructionCost getRegUsageForType(Type *Ty) override {
1919  return Impl.getRegUsageForType(Ty);
1920  }
1921  bool shouldBuildLookupTables() override {
1922  return Impl.shouldBuildLookupTables();
1923  }
1924  bool shouldBuildLookupTablesForConstant(Constant *C) override {
1925  return Impl.shouldBuildLookupTablesForConstant(C);
1926  }
1927  bool shouldBuildRelLookupTables() override {
1928  return Impl.shouldBuildRelLookupTables();
1929  }
1930  bool useColdCCForColdCall(Function &F) override {
1931  return Impl.useColdCCForColdCall(F);
1932  }
1933 
1934  InstructionCost getScalarizationOverhead(VectorType *Ty,
1935  const APInt &DemandedElts,
1936  bool Insert, bool Extract) override {
1937  return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
1938  }
1939  InstructionCost
1940  getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1941  ArrayRef<Type *> Tys) override {
1942  return Impl.getOperandsScalarizationOverhead(Args, Tys);
1943  }
1944 
1945  bool supportsEfficientVectorElementLoadStore() override {
1946  return Impl.supportsEfficientVectorElementLoadStore();
1947  }
1948 
1949  bool enableAggressiveInterleaving(bool LoopHasReductions) override {
1950  return Impl.enableAggressiveInterleaving(LoopHasReductions);
1951  }
1952  MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
1953  bool IsZeroCmp) const override {
1954  return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
1955  }
1956  bool enableInterleavedAccessVectorization() override {
1957  return Impl.enableInterleavedAccessVectorization();
1958  }
1960  return Impl.enableMaskedInterleavedAccessVectorization();
1961  }
1962  bool isFPVectorizationPotentiallyUnsafe() override {
1963  return Impl.isFPVectorizationPotentiallyUnsafe();
1964  }
1965  bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
1966  unsigned AddressSpace, Align Alignment,
1967  bool *Fast) override {
1968  return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
1969  Alignment, Fast);
1970  }
1971  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
1972  return Impl.getPopcntSupport(IntTyWidthInBit);
1973  }
1974  bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
1975 
1976  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
1977  return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
1978  }
1979 
1980  InstructionCost getFPOpCost(Type *Ty) override {
1981  return Impl.getFPOpCost(Ty);
1982  }
1983 
1984  InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1985  const APInt &Imm, Type *Ty) override {
1986  return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
1987  }
1988  InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1989  TargetCostKind CostKind) override {
1990  return Impl.getIntImmCost(Imm, Ty, CostKind);
1991  }
1992  InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1993  const APInt &Imm, Type *Ty,
1995  Instruction *Inst = nullptr) override {
1996  return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
1997  }
1998  InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
1999  const APInt &Imm, Type *Ty,
2000  TargetCostKind CostKind) override {
2001  return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
2002  }
2003  unsigned getNumberOfRegisters(unsigned ClassID) const override {
2004  return Impl.getNumberOfRegisters(ClassID);
2005  }
2006  unsigned getRegisterClassForType(bool Vector,
2007  Type *Ty = nullptr) const override {
2008  return Impl.getRegisterClassForType(Vector, Ty);
2009  }
2010  const char *getRegisterClassName(unsigned ClassID) const override {
2011  return Impl.getRegisterClassName(ClassID);
2012  }
2013  TypeSize getRegisterBitWidth(RegisterKind K) const override {
2014  return Impl.getRegisterBitWidth(K);
2015  }
2016  unsigned getMinVectorRegisterBitWidth() const override {
2017  return Impl.getMinVectorRegisterBitWidth();
2018  }
2019  Optional<unsigned> getMaxVScale() const override {
2020  return Impl.getMaxVScale();
2021  }
2022  bool shouldMaximizeVectorBandwidth() const override {
2023  return Impl.shouldMaximizeVectorBandwidth();
2024  }
2025  ElementCount getMinimumVF(unsigned ElemWidth,
2026  bool IsScalable) const override {
2027  return Impl.getMinimumVF(ElemWidth, IsScalable);
2028  }
2029  unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
2030  return Impl.getMaximumVF(ElemWidth, Opcode);
2031  }
2033  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
2034  return Impl.shouldConsiderAddressTypePromotion(
2035  I, AllowPromotionWithoutCommonHeader);
2036  }
2037  unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
2038  Optional<unsigned> getCacheSize(CacheLevel Level) const override {
2039  return Impl.getCacheSize(Level);
2040  }
2041  Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override {
2042  return Impl.getCacheAssociativity(Level);
2043  }
2044 
2045  /// Return the preferred prefetch distance in terms of instructions.
2046  ///
2047  unsigned getPrefetchDistance() const override {
2048  return Impl.getPrefetchDistance();
2049  }
2050 
2051  /// Return the minimum stride necessary to trigger software
2052  /// prefetching.
2053  ///
2054  unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2055  unsigned NumStridedMemAccesses,
2056  unsigned NumPrefetches,
2057  bool HasCall) const override {
2058  return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2059  NumPrefetches, HasCall);
2060  }
2061 
2062  /// Return the maximum prefetch distance in terms of loop
2063  /// iterations.
2064  ///
2065  unsigned getMaxPrefetchIterationsAhead() const override {
2066  return Impl.getMaxPrefetchIterationsAhead();
2067  }
2068 
2069  /// \return True if prefetching should also be done for writes.
2070  bool enableWritePrefetching() const override {
2071  return Impl.enableWritePrefetching();
2072  }
2073 
2074  unsigned getMaxInterleaveFactor(unsigned VF) override {
2075  return Impl.getMaxInterleaveFactor(VF);
2076  }
2077  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
2078  unsigned &JTSize,
2079  ProfileSummaryInfo *PSI,
2080  BlockFrequencyInfo *BFI) override {
2081  return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2082  }
2083  InstructionCost getArithmeticInstrCost(
2084  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2085  OperandValueKind Opd1Info, OperandValueKind Opd2Info,
2086  OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo,
2087  ArrayRef<const Value *> Args,
2088  const Instruction *CxtI = nullptr) override {
2089  return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
2090  Opd1PropInfo, Opd2PropInfo, Args, CxtI);
2091  }
2092  InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
2093  ArrayRef<int> Mask, int Index,
2094  VectorType *SubTp) override {
2095  return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp);
2096  }
2097  InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
2098  CastContextHint CCH,
2100  const Instruction *I) override {
2101  return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
2102  }
2103  InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2104  VectorType *VecTy,
2105  unsigned Index) override {
2106  return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
2107  }
2108  InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
2109  const Instruction *I = nullptr) override {
2110  return Impl.getCFInstrCost(Opcode, CostKind, I);
2111  }
2112  InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
2113  CmpInst::Predicate VecPred,
2115  const Instruction *I) override {
2116  return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
2117  }
2118  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2119  unsigned Index) override {
2120  return Impl.getVectorInstrCost(Opcode, Val, Index);
2121  }
2122  InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2123  unsigned AddressSpace,
2125  const Instruction *I) override {
2126  return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2127  CostKind, I);
2128  }
2129  InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
2130  Align Alignment, unsigned AddressSpace,
2131  TTI::TargetCostKind CostKind) override {
2132  return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2133  CostKind);
2134  }
2135  InstructionCost
2136  getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2137  bool VariableMask, Align Alignment,
2139  const Instruction *I = nullptr) override {
2140  return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
2141  Alignment, CostKind, I);
2142  }
2143  InstructionCost getInterleavedMemoryOpCost(
2144  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2145  Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2146  bool UseMaskForCond, bool UseMaskForGaps) override {
2147  return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2148  Alignment, AddressSpace, CostKind,
2149  UseMaskForCond, UseMaskForGaps);
2150  }
2151  InstructionCost
2152  getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2153  Optional<FastMathFlags> FMF,
2154  TTI::TargetCostKind CostKind) override {
2155  return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
2156  }
2157  InstructionCost
2158  getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
2159  TTI::TargetCostKind CostKind) override {
2160  return Impl.getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
2161  }
2162  InstructionCost getExtendedAddReductionCost(
2163  bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
2165  return Impl.getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, Ty,
2166  CostKind);
2167  }
2168  InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2169  TTI::TargetCostKind CostKind) override {
2170  return Impl.getIntrinsicInstrCost(ICA, CostKind);
2171  }
2172  InstructionCost getCallInstrCost(Function *F, Type *RetTy,
2173  ArrayRef<Type *> Tys,
2174  TTI::TargetCostKind CostKind) override {
2175  return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
2176  }
2177  unsigned getNumberOfParts(Type *Tp) override {
2178  return Impl.getNumberOfParts(Tp);
2179  }
2180  InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2181  const SCEV *Ptr) override {
2182  return Impl.getAddressComputationCost(Ty, SE, Ptr);
2183  }
2184  InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
2185  return Impl.getCostOfKeepingLiveOverCall(Tys);
2186  }
2187  bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2188  MemIntrinsicInfo &Info) override {
2189  return Impl.getTgtMemIntrinsic(Inst, Info);
2190  }
2191  unsigned getAtomicMemIntrinsicMaxElementSize() const override {
2192  return Impl.getAtomicMemIntrinsicMaxElementSize();
2193  }
2194  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2195  Type *ExpectedType) override {
2196  return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2197  }
2198  Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
2199  unsigned SrcAddrSpace, unsigned DestAddrSpace,
2200  unsigned SrcAlign,
2201  unsigned DestAlign) const override {
2202  return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
2203  DestAddrSpace, SrcAlign, DestAlign);
2204  }
2206  SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2207  unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2208  unsigned SrcAlign, unsigned DestAlign) const override {
2209  Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2210  SrcAddrSpace, DestAddrSpace,
2211  SrcAlign, DestAlign);
2212  }
2213  bool areInlineCompatible(const Function *Caller,
2214  const Function *Callee) const override {
2215  return Impl.areInlineCompatible(Caller, Callee);
2216  }
2218  const Function *Caller, const Function *Callee,
2219  SmallPtrSetImpl<Argument *> &Args) const override {
2220  return Impl.areFunctionArgsABICompatible(Caller, Callee, Args);
2221  }
2222  bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
2223  return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
2224  }
2225  bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
2226  return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
2227  }
2228  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
2229  return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2230  }
2231  bool isLegalToVectorizeLoad(LoadInst *LI) const override {
2232  return Impl.isLegalToVectorizeLoad(LI);
2233  }
2234  bool isLegalToVectorizeStore(StoreInst *SI) const override {
2235  return Impl.isLegalToVectorizeStore(SI);
2236  }
2237  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
2238  unsigned AddrSpace) const override {
2239  return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2240  AddrSpace);
2241  }
2242  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
2243  unsigned AddrSpace) const override {
2244  return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2245  AddrSpace);
2246  }
2247  bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
2248  ElementCount VF) const override {
2249  return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2250  }
2251  bool isElementTypeLegalForScalableVector(Type *Ty) const override {
2252  return Impl.isElementTypeLegalForScalableVector(Ty);
2253  }
2254  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2255  unsigned ChainSizeInBytes,
2256  VectorType *VecTy) const override {
2257  return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2258  }
2259  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2260  unsigned ChainSizeInBytes,
2261  VectorType *VecTy) const override {
2262  return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2263  }
2264  bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2265  ReductionFlags Flags) const override {
2266  return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2267  }
2268  bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2269  ReductionFlags Flags) const override {
2270  return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2271  }
2272  bool shouldExpandReduction(const IntrinsicInst *II) const override {
2273  return Impl.shouldExpandReduction(II);
2274  }
2275 
2276  unsigned getGISelRematGlobalCost() const override {
2277  return Impl.getGISelRematGlobalCost();
2278  }
2279 
2280  bool supportsScalableVectors() const override {
2281  return Impl.supportsScalableVectors();
2282  }
2283 
2284  bool hasActiveVectorLength() const override {
2285  return Impl.hasActiveVectorLength();
2286  }
2287 
2288  InstructionCost getInstructionLatency(const Instruction *I) override {
2289  return Impl.getInstructionLatency(I);
2290  }
2291 
2293  getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
2294  return Impl.getVPLegalizationStrategy(PI);
2295  }
2296 };
2297 
2298 template <typename T>
2300  : TTIImpl(new Model<T>(Impl)) {}
2301 
2302 /// Analysis pass providing the \c TargetTransformInfo.
2303 ///
2304 /// The core idea of the TargetIRAnalysis is to expose an interface through
2305 /// which LLVM targets can analyze and provide information about the middle
2306 /// end's target-independent IR. This supports use cases such as target-aware
2307 /// cost modeling of IR constructs.
2308 ///
2309 /// This is a function analysis because much of the cost modeling for targets
2310 /// is done in a subtarget specific way and LLVM supports compiling different
2311 /// functions targeting different subtargets in order to support runtime
2312 /// dispatch according to the observed subtarget.
2313 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
2314 public:
2316 
2317  /// Default construct a target IR analysis.
2318  ///
2319  /// This will use the module's datalayout to construct a baseline
2320  /// conservative TTI result.
2321  TargetIRAnalysis();
2322 
2323  /// Construct an IR analysis pass around a target-provide callback.
2324  ///
2325  /// The callback will be called with a particular function for which the TTI
2326  /// is needed and must return a TTI object for that function.
2327  TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
2328 
2329  // Value semantics. We spell out the constructors for MSVC.
2331  : TTICallback(Arg.TTICallback) {}
2333  : TTICallback(std::move(Arg.TTICallback)) {}
2335  TTICallback = RHS.TTICallback;
2336  return *this;
2337  }
2339  TTICallback = std::move(RHS.TTICallback);
2340  return *this;
2341  }
2342 
2344 
2345 private:
2347  static AnalysisKey Key;
2348 
2349  /// The callback used to produce a result.
2350  ///
2351  /// We use a completely opaque callback so that targets can provide whatever
2352  /// mechanism they desire for constructing the TTI for a given function.
2353  ///
2354  /// FIXME: Should we really use std::function? It's relatively inefficient.
2355  /// It might be possible to arrange for even stateful callbacks to outlive
2356  /// the analysis and thus use a function_ref which would be lighter weight.
2357  /// This may also be less error prone as the callback is likely to reference
2358  /// the external TargetMachine, and that reference needs to never dangle.
2359  std::function<Result(const Function &)> TTICallback;
2360 
2361  /// Helper function used as the callback in the default constructor.
2362  static Result getDefaultTTI(const Function &F);
2363 };
2364 
2365 /// Wrapper pass for TargetTransformInfo.
2366 ///
2367 /// This pass can be constructed from a TTI object which it stores internally
2368 /// and is queried by passes.
2370  TargetIRAnalysis TIRA;
2372 
2373  virtual void anchor();
2374 
2375 public:
2376  static char ID;
2377 
2378  /// We must provide a default constructor for the pass but it should
2379  /// never be used.
2380  ///
2381  /// Use the constructor below or call one of the creation routines.
2383 
2385 
2387 };
2388 
2389 /// Create an analysis pass wrapper around a TTI object.
2390 ///
2391 /// This analysis pass just holds the TTI instance and makes it available to
2392 /// clients.
2394 
2395 } // namespace llvm
2396 
2397 #endif
llvm::TargetTransformInfo::ReductionFlags::IsMaxOp
bool IsMaxOp
If the op a min/max kind, true if it's a max operation.
Definition: TargetTransformInfo.h:1333
llvm::TargetTransformInfo::CastContextHint::GatherScatter
@ GatherScatter
The cast is used with a gather/scatter.
llvm::TargetTransformInfo::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
Definition: TargetTransformInfo.cpp:279
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::TargetTransformInfo::Concept::getExtractWithExtendCost
virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)=0
llvm::TargetTransformInfo::CacheLevel::L1D
@ L1D
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::TargetTransformInfo::PSK_FastHardware
@ PSK_FastHardware
Definition: TargetTransformInfo.h:588
llvm::TargetTransformInfo::Concept::getPopcntSupport
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit)=0
llvm::TargetTransformInfo::Concept::getGEPCost
virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::CastContextHint::Masked
@ Masked
The cast is used with a masked load/store.
llvm::TargetTransformInfo::getMemcpyLoopLoweringType
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const
Definition: TargetTransformInfo.cpp:941
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:474
llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:453
llvm::TargetTransformInfo::SK_Select
@ SK_Select
Selects elements from the corresponding lane of either source operand.
Definition: TargetTransformInfo.h:855
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2313
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:481
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:211
llvm::TargetTransformInfo::LSRCost::NumRegs
unsigned NumRegs
Definition: TargetTransformInfo.h:414
llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition: TargetTransformInfo.h:264
llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition: TargetTransformInfo.h:449
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::TargetTransformInfo::ReductionFlags
Flags describing the kind of vector reduction.
Definition: TargetTransformInfo.h:1331
llvm::TargetTransformInfo::instCombineIntrinsic
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
Definition: TargetTransformInfo.cpp:297
llvm::TargetTransformInfo::Concept::isHardwareLoopProfitable
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)=0
llvm::TargetTransformInfo::Concept::isSourceOfDivergence
virtual bool isSourceOfDivergence(const Value *V)=0
llvm::TargetTransformInfo::Concept::enableMaskedInterleavedAccessVectorization
virtual bool enableMaskedInterleavedAccessVectorization()=0
llvm::MemIntrinsicInfo::PtrVal
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
Definition: TargetTransformInfo.h:75
llvm::TargetTransformInfo::preferPredicateOverEpilogue
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI) const
Query the target whether it would be prefered to create a predicated vector loop, which can avoid the...
Definition: TargetTransformInfo.cpp:285
llvm::TargetTransformInfo::Concept::rewriteIntrinsicWithAddressSpace
virtual Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const =0
llvm::TargetTransformInfo::getMinVectorRegisterBitWidth
unsigned getMinVectorRegisterBitWidth() const
Definition: TargetTransformInfo.cpp:594
llvm::HardwareLoopInfo::LoopDecrement
Value * LoopDecrement
Definition: TargetTransformInfo.h:103
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::TargetTransformInfo::getShuffleCost
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask=None, int Index=0, VectorType *SubTp=nullptr) const
Definition: TargetTransformInfo.cpp:721
llvm::TargetTransformInfo::MemIndexedMode
MemIndexedMode
The type of load/store indexing.
Definition: TargetTransformInfo.h:1279
llvm::TargetTransformInfo::TCK_Latency
@ TCK_Latency
The latency of instruction.
Definition: TargetTransformInfo.h:213
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
llvm::TargetTransformInfo::UnrollingPreferences::MaxCount
unsigned MaxCount
Definition: TargetTransformInfo.h:465
llvm::ImmutablePass
ImmutablePass class - This class is used to provide information that does not need to be run.
Definition: Pass.h:269
llvm::TargetTransformInfo::getRegisterClassName
const char * getRegisterClassName(unsigned ClassID) const
Definition: TargetTransformInfo.cpp:585
AtomicOrdering.h
llvm::ElementCount
Definition: TypeSize.h:386
llvm::TargetTransformInfo::OK_UniformValue
@ OK_UniformValue
Definition: TargetTransformInfo.h:873
llvm::TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
Definition: TargetTransformInfo.cpp:1146
llvm::TargetTransformInfo::Concept::enableMemCmpExpansion
virtual MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const =0
llvm::ExtractElementInst
This instruction extracts a single (scalar) element from a VectorType value.
Definition: Instructions.h:1875
llvm::TargetTransformInfo::canMacroFuseCmp
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
Definition: TargetTransformInfo.cpp:359
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::TargetTransformInfo::Concept::isLegalMaskedScatter
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment)=0
Pass.h
llvm::TargetTransformInfo::getRegisterBitWidth
TypeSize getRegisterBitWidth(RegisterKind K) const
Definition: TargetTransformInfo.cpp:589
llvm::TargetTransformInfo::PopcntSupportKind
PopcntSupportKind
Flags indicating the kind of support for population count.
Definition: TargetTransformInfo.h:588
llvm::TargetTransformInfo::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:822
llvm::TargetTransformInfo::Concept::getIntImmCost
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
llvm::TargetTransformInfo::getVPLegalizationStrategy
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
Definition: TargetTransformInfo.cpp:1036
llvm::TargetTransformInfo::AMK_PostIndexed
@ AMK_PostIndexed
Definition: TargetTransformInfo.h:637
llvm::TargetTransformInfoWrapperPass::getTTI
TargetTransformInfo & getTTI(const Function &F)
Definition: TargetTransformInfo.cpp:1159
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::TargetTransformInfo::getAddressComputationCost
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
Definition: TargetTransformInfo.cpp:884
InstCombiner
Machine InstCombiner
Definition: MachineCombiner.cpp:136
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::TargetTransformInfo::PeelingPreferences::AllowPeeling
bool AllowPeeling
Allow peeling off loop iterations.
Definition: TargetTransformInfo.h:537
llvm::TargetTransformInfo::Concept::hasVolatileVariant
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace)=0
llvm::TargetTransformInfo::Concept::isFPVectorizationPotentiallyUnsafe
virtual bool isFPVectorizationPotentiallyUnsafe()=0
llvm::TargetTransformInfo::Concept::isLegalMaskedExpandLoad
virtual bool isLegalMaskedExpandLoad(Type *DataType)=0
llvm::TargetTransformInfo::Concept::isAlwaysUniform
virtual bool isAlwaysUniform(const Value *V)=0
llvm::TargetTransformInfo::Concept::getMaxPrefetchIterationsAhead
virtual unsigned getMaxPrefetchIterationsAhead() const =0
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::TargetTransformInfo::MemCmpExpansionOptions::AllowOverlappingLoads
bool AllowOverlappingLoads
Definition: TargetTransformInfo.h:777
llvm::TargetTransformInfo::getScalingFactorCost
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
Definition: TargetTransformInfo.cpp:426
llvm::IntrinsicCostAttributes::getReturnType
Type * getReturnType() const
Definition: TargetTransformInfo.h:150
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:443
llvm::TargetTransformInfo::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfo.cpp:863
llvm::TargetTransformInfo::Concept::getRegisterClassForType
virtual unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const =0
llvm::TargetTransformInfo::RGK_Scalar
@ RGK_Scalar
Definition: TargetTransformInfo.h:900
llvm::TargetTransformInfo::Concept::enableInterleavedAccessVectorization
virtual bool enableInterleavedAccessVectorization()=0
llvm::TargetTransformInfo::getAssumedAddrSpace
unsigned getAssumedAddrSpace(const Value *V) const
Definition: TargetTransformInfo.cpp:266
llvm::IntrinsicCostAttributes::getInst
const IntrinsicInst * getInst() const
Definition: TargetTransformInfo.h:149
llvm::TargetTransformInfo::Concept::useGPUDivergenceAnalysis
virtual bool useGPUDivergenceAnalysis()=0
llvm::TargetTransformInfo::isLegalICmpImmediate
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition: TargetTransformInfo.cpp:334
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJamInnerLoopThreshold
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
Definition: TargetTransformInfo.h:500
llvm::TargetTransformInfo::Concept::getMinMaxReductionCost
virtual InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind)=0
llvm::TargetIRAnalysis::operator=
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Definition: TargetTransformInfo.h:2334
llvm::TargetIRAnalysis::TargetIRAnalysis
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Definition: TargetTransformInfo.h:2332
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::TargetTransformInfo::Concept::preferPredicateOverEpilogue
virtual bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI)=0
llvm::TargetTransformInfo::areFunctionArgsABICompatible
bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, SmallPtrSetImpl< Argument * > &Args) const
Definition: TargetTransformInfo.cpp:962
llvm::HardwareLoopInfo::ExitBranch
BranchInst * ExitBranch
Definition: TargetTransformInfo.h:100
llvm::TargetTransformInfo::UnrollingPreferences::UnrollRemainder
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
Definition: TargetTransformInfo.h:493
llvm::TargetTransformInfo::UnrollingPreferences::Count
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
Definition: TargetTransformInfo.h:458
llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:214
llvm::TargetTransformInfo::VPLegalization
Definition: TargetTransformInfo.h:1374
llvm::TargetTransformInfo::shouldBuildLookupTables
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
Definition: TargetTransformInfo.cpp:457
llvm::TargetTransformInfo::LSRCost::NumIVMuls
unsigned NumIVMuls
Definition: TargetTransformInfo.h:416
llvm::TargetTransformInfo::Concept::isLegalToVectorizeReduction
virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const =0
llvm::HardwareLoopInfo::isHardwareLoopCandidate
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Definition: TargetTransformInfo.cpp:100
llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:477
llvm::TargetTransformInfo::getMaximumVF
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
Definition: TargetTransformInfo.cpp:611
llvm::TargetTransformInfo::Concept::getEstimatedNumberOfCaseClusters
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)=0
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::TargetTransformInfo::useColdCCForColdCall
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
Definition: TargetTransformInfo.cpp:470
llvm::TargetTransformInfo::VPLegalization::Convert
@ Convert
Definition: TargetTransformInfo.h:1381
llvm::TargetTransformInfo::Concept::getGatherScatterOpCost
virtual InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:531
llvm::TargetTransformInfo::operator=
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
Definition: TargetTransformInfo.cpp:191
llvm::TargetTransformInfo::Concept::getPeelingPreferences
virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP)=0
llvm::HardwareLoopInfo::L
Loop * L
Definition: TargetTransformInfo.h:98
llvm::TargetTransformInfo::Concept::isLSRCostLess
virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2)=0
llvm::TargetTransformInfo::isLegalNTLoad
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
Definition: TargetTransformInfo.cpp:391
llvm::TargetTransformInfo::isLegalNTStore
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
Definition: TargetTransformInfo.cpp:386
llvm::TargetTransformInfo::UnrollingPreferences::FullUnrollMaxCount
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
Definition: TargetTransformInfo.h:469
llvm::Optional
Definition: APInt.h:33
ForceNestedLoop
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
llvm::TargetTransformInfo::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:794
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::TargetTransformInfo::OP_PowerOf2
@ OP_PowerOf2
Definition: TargetTransformInfo.h:879
llvm::TargetTransformInfo::getPredictableBranchThreshold
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
Definition: TargetTransformInfo.cpp:232
Operator.h
llvm::TargetTransformInfo::getIntImmCodeSizeCost
InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
Definition: TargetTransformInfo.cpp:540
llvm::TargetTransformInfo::getInlinerVectorBonusPercent
int getInlinerVectorBonusPercent() const
Definition: TargetTransformInfo.cpp:205
llvm::TargetTransformInfo::getIntImmCostIntrin
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Definition: TargetTransformInfo.cpp:567
VectorType
Definition: ItaniumDemangle.h:901
llvm::TargetTransformInfo::Concept::isTruncateFree
virtual bool isTruncateFree(Type *Ty1, Type *Ty2)=0
llvm::TargetTransformInfo::getIntImmCostInst
InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
Definition: TargetTransformInfo.cpp:557
llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:161
llvm::TargetTransformInfo::Concept::getAtomicMemIntrinsicMaxElementSize
virtual unsigned getAtomicMemIntrinsicMaxElementSize() const =0
llvm::TargetTransformInfo::Concept::emitGetActiveLaneMask
virtual bool emitGetActiveLaneMask()=0
llvm::TargetTransformInfo::enableInterleavedAccessVectorization
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
Definition: TargetTransformInfo.cpp:500
llvm::TargetTransformInfo::Concept::prefersVectorizedAddressing
virtual bool prefersVectorizedAddressing()=0
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::TargetTransformInfo::SK_PermuteSingleSrc
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
Definition: TargetTransformInfo.h:863
llvm::TargetTransformInfo::Concept::getOrCreateResultFromMemIntrinsic
virtual Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)=0
llvm::TargetTransformInfo::Concept::getCostOfKeepingLiveOverCall
virtual InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)=0
llvm::TargetTransformInfo::getOperandsScalarizationOverhead
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys) const
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: TargetTransformInfo.cpp:481
llvm::TargetTransformInfo::Concept::getRegisterBitWidth
virtual TypeSize getRegisterBitWidth(RegisterKind K) const =0
llvm::TargetTransformInfo::UnrollingPreferences::AllowExpensiveTripCount
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
Definition: TargetTransformInfo.h:486
llvm::TargetTransformInfo::preferInLoopReduction
bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
Definition: TargetTransformInfo.cpp:1025
llvm::TargetTransformInfo::CacheLevel
CacheLevel
The possible cache levels.
Definition: TargetTransformInfo.h:941
llvm::TargetTransformInfo::Concept
Definition: TargetTransformInfo.h:1430
llvm::TargetTransformInfo::Concept::isLegalNTStore
virtual bool isLegalNTStore(Type *DataType, Align Alignment)=0
new
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM ID Predecessors according to mbb< bb27, 0x8b0a7c0 > Note ADDri is not a two address instruction its result reg1037 is an operand of the PHI node in bb76 and its operand reg1039 is the result of the PHI node We should treat it as a two address code and make sure the ADDri is scheduled after any node that reads reg1039 Use info(i.e. register scavenger) to assign it a free register to allow reuse the collector could move the objects and invalidate the derived pointer This is bad enough in the first but safe points can crop up unpredictably **array_addr i32 n y store obj * new
Definition: README.txt:125
llvm::TargetTransformInfo::LSRCost::Insns
unsigned Insns
TODO: Some of these could be merged.
Definition: TargetTransformInfo.h:413
llvm::TargetTransformInfo::Concept::shouldMaximizeVectorBandwidth
virtual bool shouldMaximizeVectorBandwidth() const =0
llvm::IntrinsicCostAttributes::getScalarizationCost
InstructionCost getScalarizationCost() const
Definition: TargetTransformInfo.h:152
llvm::TargetTransformInfo::SK_Broadcast
@ SK_Broadcast
Broadcast element 0 to all other elements.
Definition: TargetTransformInfo.h:853
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::TargetTransformInfo::Concept::getAddressComputationCost
virtual InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)=0
llvm::TargetTransformInfo::Concept::getIntImmCodeSizeCost
virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty)=0
llvm::TargetTransformInfo::Concept::isLegalNTLoad
virtual bool isLegalNTLoad(Type *DataType, Align Alignment)=0
llvm::IntrinsicCostAttributes::skipScalarizationCost
bool skipScalarizationCost() const
Definition: TargetTransformInfo.h:160
llvm::TargetTransformInfo::requiresOrderedReduction
static bool requiresOrderedReduction(Optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
Definition: TargetTransformInfo.h:1153
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::HardwareLoopInfo::IsNestingLegal
bool IsNestingLegal
Definition: TargetTransformInfo.h:105
llvm::TargetTransformInfo::LSRCost::AddRecCost
unsigned AddRecCost
Definition: TargetTransformInfo.h:415
llvm::IntrinsicCostAttributes::getFlags
FastMathFlags getFlags() const
Definition: TargetTransformInfo.h:151
llvm::Reloc::Model
Model
Definition: CodeGen.h:22
llvm::TargetTransformInfo::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Get target-customized preferences for the generic loop unrolling transformation.
Definition: TargetTransformInfo.cpp:319
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::TargetTransformInfo::isLegalMaskedScatter
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
Return true if the target supports masked scatter.
Definition: TargetTransformInfo.cpp:400
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
llvm::TargetTransformInfo::LSRCost::SetupCost
unsigned SetupCost
Definition: TargetTransformInfo.h:419
llvm::TargetTransformInfo::Concept::isLegalMaskedLoad
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment)=0
llvm::TargetTransformInfo::Concept::canMacroFuseCmp
virtual bool canMacroFuseCmp()=0
llvm::TargetTransformInfo::Concept::isTypeLegal
virtual bool isTypeLegal(Type *Ty)=0
llvm::TargetTransformInfo::getGISelRematGlobalCost
unsigned getGISelRematGlobalCost() const
Definition: TargetTransformInfo.cpp:1044
llvm::IntrinsicCostAttributes::getArgTypes
const SmallVectorImpl< Type * > & getArgTypes() const
Definition: TargetTransformInfo.h:154
llvm::TargetTransformInfo::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Definition: TargetTransformInfo.cpp:957
llvm::TargetTransformInfo::Concept::getMinimumVF
virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const =0
llvm::TargetTransformInfo::isTypeLegal
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
Definition: TargetTransformInfo.cpp:449
llvm::TargetTransformInfo::SK_PermuteTwoSrc
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
Definition: TargetTransformInfo.h:861
llvm::TargetTransformInfo::Concept::getCacheSize
virtual Optional< unsigned > getCacheSize(CacheLevel Level) const =0
llvm::TargetTransformInfo::PeelingPreferences::PeelProfiledIterations
bool PeelProfiledIterations
Allow peeling basing on profile.
Definition: TargetTransformInfo.h:544
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::TargetTransformInfo::getMinimumVF
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
Definition: TargetTransformInfo.cpp:606
llvm::MemIntrinsicInfo::isUnordered
bool isUnordered() const
Definition: TargetTransformInfo.h:87
llvm::TargetTransformInfo::Concept::getPredictableBranchThreshold
virtual BranchProbability getPredictableBranchThreshold()=0
llvm::TargetTransformInfo::isProfitableLSRChainElement
bool isProfitableLSRChainElement(Instruction *I) const
Definition: TargetTransformInfo.cpp:355
llvm::TargetTransformInfo::Concept::useAA
virtual bool useAA()=0
llvm::TargetTransformInfo::getCastContextHint
static CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
Definition: TargetTransformInfo.cpp:732
llvm::TargetTransformInfo::getOrCreateResultFromMemIntrinsic
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
Definition: TargetTransformInfo.cpp:936
llvm::TargetTransformInfo::isLegalToVectorizeLoad
bool isLegalToVectorizeLoad(LoadInst *LI) const
Definition: TargetTransformInfo.cpp:982
llvm::MemIntrinsicInfo::Ordering
AtomicOrdering Ordering
Definition: TargetTransformInfo.h:78
llvm::TargetTransformInfo::Concept::useColdCCForColdCall
virtual bool useColdCCForColdCall(Function &F)=0
llvm::TargetTransformInfoWrapperPass::ID
static char ID
Definition: TargetTransformInfo.h:2376
llvm::TargetTransformInfo::TargetCostConstants
TargetCostConstants
Underlying constants for 'cost' values in this interface.
Definition: TargetTransformInfo.h:261
llvm::TargetTransformInfo::getPopcntSupport
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
Definition: TargetTransformInfo.cpp:522
llvm::TargetIRAnalysis::TargetIRAnalysis
TargetIRAnalysis(const TargetIRAnalysis &Arg)
Definition: TargetTransformInfo.h:2330
llvm::TargetTransformInfo::OP_None
@ OP_None
Definition: TargetTransformInfo.h:879
llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition: TargetTransformInfo.h:852
llvm::TargetTransformInfo::getPreferredAddressingMode
AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
Return the preferred addressing mode LSR should make efforts to generate.
Definition: TargetTransformInfo.cpp:371
llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition: TargetTransformInfo.h:1055
llvm::TargetTransformInfo::Concept::getVPLegalizationStrategy
virtual VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const =0
llvm::User
Definition: User.h:44
llvm::TargetTransformInfo::useGPUDivergenceAnalysis
bool useGPUDivergenceAnalysis() const
Return true if the target prefers to use GPU divergence analysis to replace the legacy version.
Definition: TargetTransformInfo.cpp:240
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::TargetTransformInfo::UnrollingPreferences::Force
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
Definition: TargetTransformInfo.h:489
InstrTypes.h
llvm::TargetTransformInfo::Concept::getMaxVScale
virtual Optional< unsigned > getMaxVScale() const =0
llvm::TargetTransformInfo::Concept::getPrefetchDistance
virtual unsigned getPrefetchDistance() const =0
llvm::TargetTransformInfo::hasBranchDivergence
bool hasBranchDivergence() const
Return true if branch divergence exists.
Definition: TargetTransformInfo.cpp:236
llvm::TargetTransformInfo::isLegalToVectorizeReduction
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
Definition: TargetTransformInfo.cpp:1002
llvm::TargetTransformInfo::getIntImmCost
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
Definition: TargetTransformInfo.cpp:550
llvm::TargetTransformInfo::Concept::supportsEfficientVectorElementLoadStore
virtual bool supportsEfficientVectorElementLoadStore()=0
llvm::TargetTransformInfo::Concept::canSaveCmp
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo)=0
llvm::TargetTransformInfo::getNumberOfParts
unsigned getNumberOfParts(Type *Tp) const
Definition: TargetTransformInfo.cpp:879
llvm::TargetTransformInfo::Concept::isFCmpOrdCheaperThanFCmpZero
virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)=0
llvm::TargetTransformInfo::Concept::isNumRegsMajorCostOfLSR
virtual bool isNumRegsMajorCostOfLSR()=0
llvm::TargetTransformInfo::supportsScalableVectors
bool supportsScalableVectors() const
Definition: TargetTransformInfo.cpp:1048
llvm::TargetTransformInfo::Concept::getExtendedAddReductionCost
virtual InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
llvm::TargetTransformInfo::isIndexedLoadLegal
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
Definition: TargetTransformInfo.cpp:968
llvm::TargetTransformInfo::CastContextHint::Interleave
@ Interleave
The cast is used with an interleaved load/store.
llvm::TargetTransformInfo::UnrollingPreferences::MaxIterationsCountToAnalyze
unsigned MaxIterationsCountToAnalyze
Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...
Definition: TargetTransformInfo.h:503
false
Definition: StackSlotColoring.cpp:142
llvm::TargetTransformInfo::Concept::getNumberOfRegisters
virtual unsigned getNumberOfRegisters(unsigned ClassID) const =0
llvm::TargetTransformInfo::Concept::isLegalToVectorizeLoadChain
virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJam
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
Definition: TargetTransformInfo.h:495
llvm::TargetTransformInfo::isLegalMaskedExpandLoad
bool isLegalMaskedExpandLoad(Type *DataType) const
Return true if the target supports masked expand load.
Definition: TargetTransformInfo.cpp:409
llvm::TargetTransformInfo::Concept::simplifyDemandedVectorEltsIntrinsic
virtual Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)=0
llvm::TargetTransformInfo::Concept::isLegalMaskedGather
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment)=0
llvm::TargetTransformInfo::Concept::hasBranchDivergence
virtual bool hasBranchDivergence()=0
llvm::Instruction
Definition: Instruction.h:45
llvm::TargetTransformInfo::Concept::enableWritePrefetching
virtual bool enableWritePrefetching() const =0
llvm::TargetTransformInfo::isLSRCostLess
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C1.
Definition: TargetTransformInfo.cpp:347
llvm::TargetTransformInfo::MIM_PreDec
@ MIM_PreDec
Pre-decrementing.
Definition: TargetTransformInfo.h:1282
llvm::HardwareLoopInfo::PerformEntryTest
bool PerformEntryTest
Definition: TargetTransformInfo.h:109
llvm::TargetTransformInfo::Concept::getMaskedMemoryOpCost
virtual InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::isLegalMaskedLoad
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked load.
Definition: TargetTransformInfo.cpp:381
llvm::TargetTransformInfo::isSourceOfDivergence
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
Definition: TargetTransformInfo.cpp:244
llvm::TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic
Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
Definition: TargetTransformInfo.cpp:309
llvm::TargetTransformInfo::CastContextHint::Reversed
@ Reversed
The cast is used with a reversed load/store.
llvm::TargetTransformInfo::getPrefetchDistance
unsigned getPrefetchDistance() const
Definition: TargetTransformInfo.cpp:636
llvm::HardwareLoopInfo::CounterInReg
bool CounterInReg
Definition: TargetTransformInfo.h:107
llvm::TargetTransformInfo::Concept::isIndexedStoreLegal
virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const =0
llvm::TargetTransformInfo::Concept::supportsScalableVectors
virtual bool supportsScalableVectors() const =0
llvm::TargetTransformInfo::Concept::isLegalToVectorizeLoad
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const =0
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::TargetTransformInfo::Concept::isLegalToVectorizeStoreChain
virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
llvm::TargetTransformInfo::isFCmpOrdCheaperThanFCmpZero
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
Definition: TargetTransformInfo.cpp:530
llvm::AnalysisManager::Invalidator
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:656
llvm::TargetTransformInfo::Concept::getMemcpyLoopResidualLoweringType
virtual void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const =0
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::TargetTransformInfo::RGK_FixedWidthVector
@ RGK_FixedWidthVector
Definition: TargetTransformInfo.h:900
llvm::TargetTransformInfo::Concept::allowsMisalignedMemoryAccesses
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, bool *Fast)=0
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
IVDescriptors.h
llvm::TargetTransformInfo::Concept::areFunctionArgsABICompatible
virtual bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, SmallPtrSetImpl< Argument * > &Args) const =0
llvm::None
const NoneType None
Definition: None.h:23
llvm::TargetTransformInfo::Concept::getMemcpyLoopLoweringType
virtual Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const =0
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:118
BranchProbability.h
llvm::TargetTransformInfo::VPLegalization::VPLegalization
VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)
Definition: TargetTransformInfo.h:1399
llvm::TargetTransformInfo::Concept::getDataLayout
virtual const DataLayout & getDataLayout() const =0
llvm::TargetTransformInfo::hasVolatileVariant
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
Definition: TargetTransformInfo.cpp:417
llvm::TargetTransformInfo::PSK_Software
@ PSK_Software
Definition: TargetTransformInfo.h:588
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::TargetTransformInfo::Concept::isElementTypeLegalForScalableVector
virtual bool isElementTypeLegalForScalableVector(Type *Ty) const =0
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:78
llvm::TargetTransformInfo::isLegalMaskedCompressStore
bool isLegalMaskedCompressStore(Type *DataType) const
Return true if the target supports masked compress store.
Definition: TargetTransformInfo.cpp:405
llvm::TargetTransformInfo::haveFastSqrt
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
Definition: TargetTransformInfo.cpp:526
llvm::createTargetTransformInfoWrapperPass
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
Definition: TargetTransformInfo.cpp:1166
llvm::TargetTransformInfo::VPLegalization::EVLParamStrategy
VPTransform EVLParamStrategy
Definition: TargetTransformInfo.h:1388
llvm::TargetTransformInfo::ReductionFlags::ReductionFlags
ReductionFlags()
Definition: TargetTransformInfo.h:1332
llvm::TargetTransformInfo::SK_Reverse
@ SK_Reverse
Reverse the order of the vector.
Definition: TargetTransformInfo.h:854
llvm::TargetTransformInfo::Concept::collectFlatAddressOperands
virtual bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const =0
llvm::TargetTransformInfo::VPLegalization::VPTransform
VPTransform
Definition: TargetTransformInfo.h:1375
llvm::TargetTransformInfo::getFlatAddressSpace
unsigned getFlatAddressSpace() const
Returns the address space ID for a target's 'flat' address space.
Definition: TargetTransformInfo.cpp:252
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:388
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
llvm::TargetTransformInfo::CastContextHint::Normal
@ Normal
The cast is used with a normal load/store.
llvm::TargetTransformInfo::Concept::~Concept
virtual ~Concept()=0
Definition: TargetTransformInfo.cpp:1120
llvm::TargetTransformInfo::Concept::getIntrinsicInstrCost
virtual InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)=0
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::TargetTransformInfo::SK_InsertSubvector
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
Definition: TargetTransformInfo.h:859
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::TargetTransformInfo::Concept::getInstructionLatency
virtual InstructionCost getInstructionLatency(const Instruction *I)=0
llvm::TargetTransformInfo::Concept::isProfitableLSRChainElement
virtual bool isProfitableLSRChainElement(Instruction *I)=0
llvm::TargetTransformInfo::shouldBuildLookupTablesForConstant
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
Definition: TargetTransformInfo.cpp:461
llvm::TargetTransformInfo::Concept::getRegUsageForType
virtual InstructionCost getRegUsageForType(Type *Ty)=0
llvm::TargetTransformInfo::MIM_PostInc
@ MIM_PostInc
Post-incrementing.
Definition: TargetTransformInfo.h:1283
llvm::TargetTransformInfo::shouldMaximizeVectorBandwidth
bool shouldMaximizeVectorBandwidth() const
Definition: TargetTransformInfo.cpp:602
llvm::HardwareLoopInfo::HardwareLoopInfo
HardwareLoopInfo()=delete
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::TargetTransformInfo::LSRCost
Definition: TargetTransformInfo.h:410
llvm::TargetTransformInfo::collectFlatAddressOperands
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Return any intrinsic address operand indexes which may be rewritten if they use a flat address space ...
Definition: TargetTransformInfo.cpp:256
llvm::TargetTransformInfo::VPLegalization::OpStrategy
VPTransform OpStrategy
Definition: TargetTransformInfo.h:1394
llvm::TargetTransformInfo::isLegalMaskedGather
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
Return true if the target supports masked gather.
Definition: TargetTransformInfo.cpp:395
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2369
llvm::TargetTransformInfo::Concept::getInterleavedMemoryOpCost
virtual InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)=0
llvm::TargetTransformInfo::emitGetActiveLaneMask
bool emitGetActiveLaneMask() const
Query the target whether lowering of the llvm.get.active.lane.mask intrinsic is supported.
Definition: TargetTransformInfo.cpp:292
llvm::TargetTransformInfo::preferPredicatedReductionSelect
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
Definition: TargetTransformInfo.cpp:1030
llvm::TargetTransformInfo::Concept::hasDivRemOp
virtual bool hasDivRemOp(Type *DataType, bool IsSigned)=0
llvm::TargetTransformInfo::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
Definition: TargetTransformInfo.cpp:851
llvm::TargetTransformInfo::ReductionFlags::NoNaN
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
Definition: TargetTransformInfo.h:1335
llvm::TargetTransformInfo::Concept::shouldBuildLookupTables
virtual bool shouldBuildLookupTables()=0
llvm::TargetTransformInfo::OK_UniformConstantValue
@ OK_UniformConstantValue
Definition: TargetTransformInfo.h:874
llvm::TargetIRAnalysis::Result
TargetTransformInfo Result
Definition: TargetTransformInfo.h:2315
llvm::TargetTransformInfo::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(unsigned VF) const
Definition: TargetTransformInfo.cpp:655
llvm::TargetTransformInfo::VPLegalization::shouldDoNothing
bool shouldDoNothing() const
Definition: TargetTransformInfo.h:1396
llvm::TargetTransformInfo::getRegisterClassForType
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
Definition: TargetTransformInfo.cpp:580
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::TargetTransformInfo::Concept::getMaximumVF
virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const =0
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::AnalysisKey
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:72
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:424
llvm::TargetTransformInfo::getCostOfKeepingLiveOverCall
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
Definition: TargetTransformInfo.cpp:923
llvm::TargetTransformInfo::Concept::getArithmeticInstrCost
virtual InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, OperandValueKind Opd1Info, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr)=0
llvm::AtomicOrdering::Unordered
@ Unordered
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::TargetTransformInfo::OperandValueProperties
OperandValueProperties
Additional properties of an operand's values.
Definition: TargetTransformInfo.h:879
llvm::TargetTransformInfo::Concept::isLegalMaskedStore
virtual bool isLegalMaskedStore(Type *DataType, Align Alignment)=0
llvm::TargetTransformInfo::shouldConsiderAddressTypePromotion
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
Definition: TargetTransformInfo.cpp:616
llvm::LoopAccessInfo
Drive the analysis of memory accesses in the loop.
Definition: LoopAccessAnalysis.h:525
llvm::TargetTransformInfo::Concept::getScalarizationOverhead
virtual InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract)=0
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, TargetCostKind CostKind) const
This is a helper function which calls the two-argument getUserCost with Operands which are the curren...
Definition: TargetTransformInfo.h:326
llvm::TargetTransformInfo::Concept::getTgtMemIntrinsic
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)=0
llvm::TargetTransformInfo::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract) const
Estimate the overhead of scalarizing an instruction.
Definition: TargetTransformInfo.cpp:475
llvm::TargetTransformInfo::PeelingPreferences::AllowLoopNestsPeeling
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
Definition: TargetTransformInfo.h:539
llvm::TargetTransformInfo::rewriteIntrinsicWithAddressSpace
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Rewrite intrinsic call II such that OldV will be replaced with NewV, which has a different address sp...
Definition: TargetTransformInfo.cpp:270
llvm::TargetTransformInfo::Concept::getShuffleCost
virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, int Index, VectorType *SubTp)=0
llvm::TargetTransformInfo::Concept::shouldExpandReduction
virtual bool shouldExpandReduction(const IntrinsicInst *II) const =0
llvm::TargetTransformInfo::Concept::getLoadVectorFactor
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
llvm::TargetTransformInfo::getGatherScatterOpCost
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:842
llvm::TargetTransformInfo::VPLegalization::Discard
@ Discard
Definition: TargetTransformInfo.h:1379
llvm::TargetTransformInfo::Concept::getCastInstrCost
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I)=0
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1605
llvm::TargetTransformInfo::Concept::isLoweredToCall
virtual bool isLoweredToCall(const Function *F)=0
llvm::TargetTransformInfo::LSRWithInstrQueries
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
Definition: TargetTransformInfo.cpp:435
llvm::TargetTransformInfo::Concept::getScalingFactorCost
virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)=0
llvm::TargetTransformInfo::getMinMaxReductionCost
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Definition: TargetTransformInfo.cpp:906
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::TargetTransformInfo::isLegalToVectorizeLoadChain
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: TargetTransformInfo.cpp:990
llvm::TargetTransformInfo::getGEPCost
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TargetCostKind CostKind=TCK_SizeAndLatency) const
Estimate the cost of a GEP operation when lowered.
Definition: TargetTransformInfo.cpp:210
llvm::TargetTransformInfo::isLegalAddImmediate
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
Definition: TargetTransformInfo.cpp:330
llvm::CmpInst::BAD_ICMP_PREDICATE
@ BAD_ICMP_PREDICATE
Definition: InstrTypes.h:753
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:163
llvm::TargetTransformInfo::Concept::getOperandsScalarizationOverhead
virtual InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys)=0
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::TargetTransformInfo::LSRCost::ScaleCost
unsigned ScaleCost
Definition: TargetTransformInfo.h:420
llvm::TargetTransformInfo::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), bool *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
Definition: TargetTransformInfo.cpp:512
llvm::TargetTransformInfo::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
Definition: TargetTransformInfo.cpp:275
llvm::TargetTransformInfo::SK_Splice
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
Definition: TargetTransformInfo.h:865
llvm::TargetTransformInfo::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:775
llvm::TargetTransformInfo::Concept::shouldBuildRelLookupTables
virtual bool shouldBuildRelLookupTables()=0
llvm::TargetTransformInfo::PSK_SlowHardware
@ PSK_SlowHardware
Definition: TargetTransformInfo.h:588
llvm::TargetTransformInfo::Concept::getRegisterClassName
virtual const char * getRegisterClassName(unsigned ClassID) const =0
llvm::AnalysisInfoMixin
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:391
llvm::TargetTransformInfo::ReductionFlags::IsSigned
bool IsSigned
Whether the operation is a signed int reduction.
Definition: TargetTransformInfo.h:1334
llvm::TargetTransformInfo::OperandValueKind
OperandValueKind
Additional information about an operand's possible values.
Definition: TargetTransformInfo.h:871
llvm::TargetTransformInfo::getMemcpyLoopResidualLoweringType
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const
Definition: TargetTransformInfo.cpp:948
llvm::TargetTransformInfo::Concept::instCombineIntrinsic
virtual Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)=0
llvm::MemIntrinsicInfo::ReadMem
bool ReadMem
Definition: TargetTransformInfo.h:83
llvm::TargetTransformInfo::Concept::getCmpSelInstrCost
virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I)=0
llvm::TargetTransformInfo::MemCmpExpansionOptions::MaxNumLoads
unsigned MaxNumLoads
Definition: TargetTransformInfo.h:759
InstructionCost.h
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:70
llvm::TargetTransformInfo::canSaveCmp
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
Definition: TargetTransformInfo.cpp:363
llvm::TargetTransformInfo::isTruncateFree
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
Definition: TargetTransformInfo.cpp:439
llvm::TargetTransformInfo::prefersVectorizedAddressing
bool prefersVectorizedAddressing() const
Return true if target doesn't mind addresses in vectors.
Definition: TargetTransformInfo.cpp:422
llvm::TargetTransformInfo::MemCmpExpansionOptions
Returns options for expansion of memcmp. IsZeroCmp is.
Definition: TargetTransformInfo.h:754
llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:262
llvm::TargetTransformInfo::PeelingPreferences::PeelCount
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Definition: TargetTransformInfo.h:535
llvm::TargetTransformInfo::supportsEfficientVectorElementLoadStore
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
Definition: TargetTransformInfo.cpp:486
llvm::TargetTransformInfo::enableMemCmpExpansion
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
Definition: TargetTransformInfo.cpp:496
llvm::TargetTransformInfo::Concept::shouldConsiderAddressTypePromotion
virtual bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)=0
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:215
llvm::TargetTransformInfo::Concept::haveFastSqrt
virtual bool haveFastSqrt(Type *Ty)=0
llvm::TargetTransformInfo::Concept::isNoopAddrSpaceCast
virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const =0
llvm::TargetTransformInfo::isElementTypeLegalForScalableVector
bool isElementTypeLegalForScalableVector(Type *Ty) const
Definition: TargetTransformInfo.cpp:1007
llvm::TargetTransformInfo::isLegalMaskedStore
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
Return true if the target supports masked store.
Definition: TargetTransformInfo.cpp:376
llvm::TargetTransformInfo::getRegUsageForType
InstructionCost getRegUsageForType(Type *Ty) const
Returns the estimated number of registers required to represent Ty.
Definition: TargetTransformInfo.cpp:453
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:223
llvm::TargetTransformInfo::Concept::isIndexedLoadLegal
virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const =0
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
llvm::BranchProbability
Definition: BranchProbability.h:30
llvm::TargetTransformInfo::Concept::getFlatAddressSpace
virtual unsigned getFlatAddressSpace()=0
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::TargetTransformInfo::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy=nullptr, CmpInst::Predicate VecPred=CmpInst::BAD_ICMP_PREDICATE, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:803
llvm::TargetTransformInfo::UnrollingPreferences::DefaultUnrollRuntimeCount
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
Definition: TargetTransformInfo.h:460
llvm::TargetTransformInfo::hasDivRemOp
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
Definition: TargetTransformInfo.cpp:413
llvm::TargetTransformInfo::Concept::LSRWithInstrQueries
virtual bool LSRWithInstrQueries()=0
llvm::TargetTransformInfo::simplifyDemandedUseBitsIntrinsic
Optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Can be used to implement target-specific instruction combining.
Definition: TargetTransformInfo.cpp:302
llvm::TargetTransformInfo::getCacheSize
Optional< unsigned > getCacheSize(CacheLevel Level) const
Definition: TargetTransformInfo.cpp:627
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::TargetTransformInfo::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: TargetTransformInfo.cpp:576
llvm::InstructionCost::isValid
bool isValid() const
Definition: InstructionCost.h:79
llvm::TargetTransformInfo::AddressingModeKind
AddressingModeKind
Definition: TargetTransformInfo.h:635
llvm::TargetTransformInfo::getMaxPrefetchIterationsAhead
unsigned getMaxPrefetchIterationsAhead() const
Definition: TargetTransformInfo.cpp:647
llvm::TargetTransformInfo::MIM_Unindexed
@ MIM_Unindexed
No indexing.
Definition: TargetTransformInfo.h:1280
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:421
llvm::TargetTransformInfo::OK_AnyValue
@ OK_AnyValue
Definition: TargetTransformInfo.h:872
llvm::TargetTransformInfo::getLoadVectorFactor
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
Definition: TargetTransformInfo.cpp:1011
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
llvm::TargetTransformInfo::SK_Transpose
@ SK_Transpose
Transpose two vectors.
Definition: TargetTransformInfo.h:858
llvm::TargetTransformInfo::CastContextHint::None
@ None
The cast is not used with a load/store of any kind.
llvm::TargetTransformInfo::isLegalToVectorizeStore
bool isLegalToVectorizeStore(StoreInst *SI) const
Definition: TargetTransformInfo.cpp:986
llvm::TargetTransformInfo::CacheLevel::L2D
@ L2D
llvm::TargetTransformInfo::Concept::getAssumedAddrSpace
virtual unsigned getAssumedAddrSpace(const Value *V) const =0
llvm::TargetTransformInfo::MIM_PreInc
@ MIM_PreInc
Pre-incrementing.
Definition: TargetTransformInfo.h:1281
llvm::TargetTransformInfo::MemCmpExpansionOptions::LoadSizes
SmallVector< unsigned, 8 > LoadSizes
Definition: TargetTransformInfo.h:762
llvm::TargetIRAnalysis::TargetIRAnalysis
TargetIRAnalysis()
Default construct a target IR analysis.
Definition: TargetTransformInfo.cpp:1122
llvm::TargetTransformInfo::Concept::preferInLoopReduction
virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags) const =0
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:206
llvm::TargetTransformInfo::getMaxVScale
Optional< unsigned > getMaxVScale() const
Definition: TargetTransformInfo.cpp:598
llvm::TargetTransformInfo::Concept::isLegalICmpImmediate
virtual bool isLegalICmpImmediate(int64_t Imm)=0
llvm::TargetTransformInfo::getCacheAssociativity
Optional< unsigned > getCacheAssociativity(CacheLevel Level) const
Definition: TargetTransformInfo.cpp:632
llvm::TargetTransformInfo::getCacheLineSize
unsigned getCacheLineSize() const
Definition: TargetTransformInfo.cpp:622
std
Definition: BitVector.h:838
llvm::TargetTransformInfo::enableMaskedInterleavedAccessVectorization
bool enableMaskedInterleavedAccessVectorization() const
Enable matching of interleaved access groups that contain predicated accesses or gaps and therefore v...
Definition: TargetTransformInfo.cpp:504
llvm::KnownBits
Definition: KnownBits.h:23
llvm::TargetTransformInfo::LSRCost::NumBaseAdds
unsigned NumBaseAdds
Definition: TargetTransformInfo.h:417
llvm::TargetIRAnalysis::operator=
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
Definition: TargetTransformInfo.h:2338
llvm::HardwareLoopInfo::ExitBlock
BasicBlock * ExitBlock
Definition: TargetTransformInfo.h:99
llvm::MemIntrinsicInfo::WriteMem
bool WriteMem
Definition: TargetTransformInfo.h:84
llvm::TargetTransformInfo::UnrollingPreferences::UpperBound
bool UpperBound
Allow using trip count upper bound to unroll loops.
Definition: TargetTransformInfo.h:491
llvm::VPIntrinsic
This is the common base class for vector predication intrinsics.
Definition: IntrinsicInst.h:390
llvm::TargetTransformInfo::isNoopAddrSpaceCast
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Definition: TargetTransformInfo.cpp:261
llvm::TypeSize
Definition: TypeSize.h:417
llvm::TargetTransformInfo::getLoadStoreVecRegBitWidth
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
Definition: TargetTransformInfo.cpp:978
llvm::TargetTransformInfo::UnrollingPreferences::AllowRemainder
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
Definition: TargetTransformInfo.h:483
llvm::TargetTransformInfo::Concept::enableAggressiveInterleaving
virtual bool enableAggressiveInterleaving(bool LoopHasReductions)=0
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::TargetTransformInfo::isFPVectorizationPotentiallyUnsafe
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
Definition: TargetTransformInfo.cpp:508
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
PassManager.h
Arguments
AMDGPU Lower Kernel Arguments
Definition: AMDGPULowerKernelArguments.cpp:244
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:219
llvm::User::operand_values
iterator_range< value_op_iterator > operand_values()
Definition: User.h:266
llvm::TargetTransformInfo::LSRCost::ImmCost
unsigned ImmCost
Definition: TargetTransformInfo.h:418
llvm::HardwareLoopInfo::TripCount
const SCEV * TripCount
Definition: TargetTransformInfo.h:101
llvm::TargetIRAnalysis::run
Result run(const Function &F, FunctionAnalysisManager &)
Definition: TargetTransformInfo.cpp:1128
llvm::TargetTransformInfo::getInstructionCost
InstructionCost getInstructionCost(const Instruction *I, enum TargetCostKind kind) const
Query the cost of a specified instruction.
Definition: TargetTransformInfo.h:225
llvm::HardwareLoopInfo::HardwareLoopInfo
HardwareLoopInfo(Loop *L)
Definition: TargetTransformInfo.h:97
llvm::TargetTransformInfo::Concept::getFPOpCost
virtual InstructionCost getFPOpCost(Type *Ty)=0
llvm::TargetTransformInfo::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier() const
Definition: TargetTransformInfo.cpp:196
llvm::TargetTransformInfo::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index=-1) const
Definition: TargetTransformInfo.cpp:814
llvm::InstCombiner
The core instruction combiner logic.
Definition: InstCombiner.h:45
llvm::TargetTransformInfo::adjustInliningThreshold
unsigned adjustInliningThreshold(const CallBase *CB) const
Definition: TargetTransformInfo.cpp:201
llvm::TargetTransformInfo::getMaskedMemoryOpCost
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Definition: TargetTransformInfo.cpp:833
llvm::TargetTransformInfo::RGK_ScalableVector
@ RGK_ScalableVector
Definition: TargetTransformInfo.h:900
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
llvm::HardwareLoopInfo
Attributes of a target dependent hardware loop.
Definition: TargetTransformInfo.h:95
llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition: InstructionCost.h:73
Vector
So we should use XX3Form_Rcr to implement instrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
llvm::TargetTransformInfo::getEstimatedNumberOfCaseClusters
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Definition: TargetTransformInfo.cpp:216
llvm::TargetTransformInfo::TargetTransformInfo
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
Definition: TargetTransformInfo.h:2299
llvm::TargetTransformInfo::getExtendedAddReductionCost
InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of an Add ...
Definition: TargetTransformInfo.cpp:915
llvm::RecurrenceDescriptor
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:67
llvm::TargetTransformInfo::Concept::getCallInstrCost
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::Concept::getArithmeticReductionCost
virtual InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::MemCmpExpansionOptions::NumLoadsPerBlock
unsigned NumLoadsPerBlock
Definition: TargetTransformInfo.h:772
llvm::TargetTransformInfo::getArithmeticReductionCost
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
Definition: TargetTransformInfo.cpp:897
llvm::IntrinsicCostAttributes::getID
Intrinsic::ID getID() const
Definition: TargetTransformInfo.h:148
llvm::TargetTransformInfo::Concept::getGISelRematGlobalCost
virtual unsigned getGISelRematGlobalCost() const =0
llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:900
llvm::TargetTransformInfo::Concept::getIntImmCostInst
virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr)=0
llvm::TargetTransformInfo::Concept::getCFInstrCost
virtual InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
llvm::TargetTransformInfo::invalidate
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
Definition: TargetTransformInfo.h:197
llvm::TargetTransformInfo::Concept::getInlinerVectorBonusPercent
virtual int getInlinerVectorBonusPercent()=0
llvm::TargetTransformInfo::Concept::isLegalAddImmediate
virtual bool isLegalAddImmediate(int64_t Imm)=0
llvm::TargetTransformInfo::UnrollingPreferences::Threshold
unsigned Threshold
The cost threshold for the unrolled loop.
Definition: TargetTransformInfo.h:432
llvm::TargetTransformInfo::Concept::getNumberOfParts
virtual unsigned getNumberOfParts(Type *Tp)=0
llvm::TargetTransformInfo::VPLegalization::Legal
@ Legal
Definition: TargetTransformInfo.h:1377
llvm::TargetTransformInfo::Concept::shouldBuildLookupTablesForConstant
virtual bool shouldBuildLookupTablesForConstant(Constant *C)=0
llvm::TargetTransformInfo::Concept::isProfitableToHoist
virtual bool isProfitableToHoist(Instruction *I)=0
llvm::TargetTransformInfo::getMinPrefetchStride
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Some HW prefetchers can handle accesses up to a certain constant stride.
Definition: TargetTransformInfo.cpp:640
llvm::TargetTransformInfo::isIndexedStoreLegal
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
Definition: TargetTransformInfo.cpp:973
llvm::TargetTransformInfo::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR() const
Return true if LSR major cost is number of registers.
Definition: TargetTransformInfo.cpp:351
llvm::TargetTransformInfo::shouldExpandReduction
bool shouldExpandReduction(const IntrinsicInst *II) const
Definition: TargetTransformInfo.cpp:1040
llvm::TargetTransformInfo::Concept::isLegalAddressingMode
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I)=0
llvm::TargetTransformInfo::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const
Get target-customized preferences for the generic loop peeling transformation.
Definition: TargetTransformInfo.cpp:325
llvm::TargetTransformInfo::getStoreVectorFactor
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
Definition: TargetTransformInfo.cpp:1018
llvm::TargetTransformInfo::Concept::getCacheAssociativity
virtual Optional< unsigned > getCacheAssociativity(CacheLevel Level) const =0
llvm::TargetTransformInfo::MIM_PostDec
@ MIM_PostDec
Post-decrementing.
Definition: TargetTransformInfo.h:1284
llvm::HardwareLoopInfo::canAnalyze
bool canAnalyze(LoopInfo &LI)
Definition: TargetTransformInfo.cpp:47
llvm::TargetTransformInfo::isLegalToVectorizeStoreChain
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: TargetTransformInfo.cpp:996
llvm::SmallVectorImpl< const Value * >
ForceHardwareLoopPHI
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
llvm::TargetTransformInfo::Concept::preferPredicatedReductionSelect
virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags) const =0
llvm::MemIntrinsicInfo
Information about a load/store intrinsic defined by the target.
Definition: TargetTransformInfo.h:70
llvm::TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize
unsigned getAtomicMemIntrinsicMaxElementSize() const
Definition: TargetTransformInfo.cpp:932
llvm::msgpack::Type
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
Definition: MsgPackReader.h:49
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1161
DataTypes.h
llvm::TargetTransformInfo::enableAggressiveInterleaving
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don't restrict interleaved unrolling to small loops.
Definition: TargetTransformInfo.cpp:490
llvm::TargetTransformInfo::Concept::areInlineCompatible
virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const =0
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::TargetTransformInfo::hasActiveVectorLength
bool hasActiveVectorLength() const
Definition: TargetTransformInfo.cpp:1052
llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition: TargetTransformInfo.h:446
llvm::TargetTransformInfo::Concept::getMinVectorRegisterBitWidth
virtual unsigned getMinVectorRegisterBitWidth() const =0
llvm::TargetTransformInfo::getCallInstrCost
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
Definition: TargetTransformInfo.cpp:871
llvm::TargetTransformInfo::getOperandInfo
static OperandValueKind getOperandInfo(const Value *V, OperandValueProperties &OpProps)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Definition: TargetTransformInfo.cpp:660
llvm::TargetTransformInfo::getExtractWithExtendCost
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index=-1) const
Definition: TargetTransformInfo.cpp:786
llvm::MemIntrinsicInfo::MatchingId
unsigned short MatchingId
Definition: TargetTransformInfo.h:81
llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:263
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3206
llvm::TargetTransformInfo::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
Definition: TargetTransformInfo.cpp:709
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::TargetTransformInfo::Concept::getUserCost
virtual InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind)=0
llvm::TargetTransformInfo::Concept::getPreferredAddressingMode
virtual AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const =0
llvm::IntrinsicCostAttributes::isTypeBasedOnly
bool isTypeBasedOnly() const
Definition: TargetTransformInfo.h:156
llvm::TargetTransformInfo::isProfitableToHoist
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
Definition: TargetTransformInfo.cpp:443
llvm::TargetTransformInfo::shouldBuildRelLookupTables
bool shouldBuildRelLookupTables() const
Return true if lookup tables should be turned into relative lookup tables.
Definition: TargetTransformInfo.cpp:466
llvm::TargetTransformInfo::Concept::getMaxInterleaveFactor
virtual unsigned getMaxInterleaveFactor(unsigned VF)=0
llvm::TargetTransformInfo::Concept::getStoreVectorFactor
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3062
llvm::TargetTransformInfo::Concept::getLoadStoreVecRegBitWidth
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const =0
llvm::TargetTransformInfo::~TargetTransformInfo
~TargetTransformInfo()
Definition: TargetTransformInfo.cpp:186
llvm::TargetTransformInfo::Concept::getCacheLineSize
virtual unsigned getCacheLineSize() const =0
llvm::AMDGPU::VGPRIndexMode::Id
Id
Definition: SIDefines.h:221
llvm::TargetTransformInfo::Concept::adjustInliningThreshold
virtual unsigned adjustInliningThreshold(const CallBase *CB)=0
llvm::TargetTransformInfo::Concept::getIntImmCostIntrin
virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
llvm::TargetTransformInfo::OK_NonUniformConstantValue
@ OK_NonUniformConstantValue
Definition: TargetTransformInfo.h:875
llvm::TargetTransformInfo::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
Definition: TargetTransformInfo.cpp:927
llvm::TargetTransformInfo::Concept::isLegalMaskedCompressStore
virtual bool isLegalMaskedCompressStore(Type *DataType)=0
llvm::TargetTransformInfo::useAA
bool useAA() const
Definition: TargetTransformInfo.cpp:447
llvm::TargetTransformInfo::Concept::getInliningThresholdMultiplier
virtual unsigned getInliningThresholdMultiplier()=0
llvm::HardwareLoopInfo::CountType
IntegerType * CountType
Definition: TargetTransformInfo.h:102
llvm::TargetTransformInfo::enableWritePrefetching
bool enableWritePrefetching() const
Definition: TargetTransformInfo.cpp:651
llvm::TargetTransformInfo::getFPOpCost
InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
Definition: TargetTransformInfo.cpp:534
llvm::TargetTransformInfo::Concept::getMemoryOpCost
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I)=0
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition: TargetTransformInfo.h:212
llvm::IntrinsicCostAttributes::getArgs
const SmallVectorImpl< const Value * > & getArgs() const
Definition: TargetTransformInfo.h:153
llvm::TargetTransformInfo::AMK_None
@ AMK_None
Definition: TargetTransformInfo.h:638
llvm::TargetTransformInfo::SK_ExtractSubvector
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
Definition: TargetTransformInfo.h:860
llvm::TargetTransformInfo::AMK_PreIndexed
@ AMK_PreIndexed
Definition: TargetTransformInfo.h:636
llvm::AtomicOrdering::NotAtomic
@ NotAtomic
llvm::TargetTransformInfo::Concept::isLegalToVectorizeStore
virtual bool isLegalToVectorizeStore(StoreInst *SI) const =0
llvm::TargetTransformInfo::isAlwaysUniform
bool isAlwaysUniform(const Value *V) const
Definition: TargetTransformInfo.cpp:248
llvm::TargetTransformInfo::Concept::getMemcpyCost
virtual InstructionCost getMemcpyCost(const Instruction *I)=0
llvm::TargetTransformInfo::isLegalAddressingMode
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Definition: TargetTransformInfo.cpp:338
llvm::TargetTransformInfo::getMemcpyCost
InstructionCost getMemcpyCost(const Instruction *I) const
Definition: TargetTransformInfo.cpp:891
llvm::TargetTransformInfo::Concept::simplifyDemandedUseBitsIntrinsic
virtual Optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)=0
llvm::TargetTransformInfo::UnrollingPreferences::MaxPercentThresholdBoost
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
Definition: TargetTransformInfo.h:443
llvm::TargetTransformInfo::Concept::getVectorInstrCost
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)=0
llvm::TargetTransformInfo::Concept::hasActiveVectorLength
virtual bool hasActiveVectorLength() const =0
llvm::TargetTransformInfo::Concept::getUnrollingPreferences
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)=0
llvm::MemIntrinsicInfo::IsVolatile
bool IsVolatile
Definition: TargetTransformInfo.h:85
llvm::TargetTransformInfo::Concept::getMinPrefetchStride
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const =0
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::IntrinsicCostAttributes::IntrinsicCostAttributes
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarCost=InstructionCost::getInvalid())
Definition: TargetTransformInfo.cpp:57