LLVM  14.0.0git
TargetTransformInfo.h
Go to the documentation of this file.
1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This pass exposes codegen information to IR-level passes. Every
10 /// transformation that uses codegen information is broken into three parts:
11 /// 1. The IR-level analysis pass.
12 /// 2. The IR-level transformation interface which provides the needed
13 /// information.
14 /// 3. Codegen-level implementation which uses target-specific hooks.
15 ///
16 /// This file defines #2, which is the interface that IR-level transformations
17 /// use for querying the codegen.
18 ///
19 //===----------------------------------------------------------------------===//
20 
21 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23 
24 #include "llvm/IR/InstrTypes.h"
25 #include "llvm/IR/Operator.h"
26 #include "llvm/IR/PassManager.h"
27 #include "llvm/Pass.h"
30 #include "llvm/Support/DataTypes.h"
32 #include <functional>
33 
34 namespace llvm {
35 
36 namespace Intrinsic {
37 typedef unsigned ID;
38 }
39 
40 class AssumptionCache;
41 class BlockFrequencyInfo;
42 class DominatorTree;
43 class BranchInst;
44 class CallBase;
45 class ExtractElementInst;
46 class Function;
47 class GlobalValue;
48 class InstCombiner;
50 class IntrinsicInst;
51 class LoadInst;
52 class LoopAccessInfo;
53 class Loop;
54 class LoopInfo;
55 class ProfileSummaryInfo;
57 class SCEV;
58 class ScalarEvolution;
59 class StoreInst;
60 class SwitchInst;
61 class TargetLibraryInfo;
62 class Type;
63 class User;
64 class Value;
65 class VPIntrinsic;
66 struct KnownBits;
67 template <typename T> class Optional;
68 
69 /// Information about a load/store intrinsic defined by the target.
71  /// This is the pointer that the intrinsic is loading from or storing to.
72  /// If this is non-null, then analysis/optimization passes can assume that
73  /// this intrinsic is functionally equivalent to a load/store from this
74  /// pointer.
75  Value *PtrVal = nullptr;
76 
77  // Ordering for atomic operations.
79 
80  // Same Id is set by the target for corresponding load/store intrinsics.
81  unsigned short MatchingId = 0;
82 
83  bool ReadMem = false;
84  bool WriteMem = false;
85  bool IsVolatile = false;
86 
87  bool isUnordered() const {
90  !IsVolatile;
91  }
92 };
93 
94 /// Attributes of a target dependent hardware loop.
96  HardwareLoopInfo() = delete;
98  Loop *L = nullptr;
99  BasicBlock *ExitBlock = nullptr;
100  BranchInst *ExitBranch = nullptr;
101  const SCEV *ExitCount = nullptr;
102  IntegerType *CountType = nullptr;
103  Value *LoopDecrement = nullptr; // Decrement the loop counter by this
104  // value in every iteration.
105  bool IsNestingLegal = false; // Can a hardware loop be a parent to
106  // another hardware loop?
107  bool CounterInReg = false; // Should loop counter be updated in
108  // the loop via a phi?
109  bool PerformEntryTest = false; // Generate the intrinsic which also performs
110  // icmp ne zero on the loop counter value and
111  // produces an i1 to guard the loop entry.
113  DominatorTree &DT, bool ForceNestedLoop = false,
114  bool ForceHardwareLoopPHI = false);
115  bool canAnalyze(LoopInfo &LI);
116 };
117 
119  const IntrinsicInst *II = nullptr;
120  Type *RetTy = nullptr;
121  Intrinsic::ID IID;
122  SmallVector<Type *, 4> ParamTys;
124  FastMathFlags FMF;
125  // If ScalarizationCost is UINT_MAX, the cost of scalarizing the
126  // arguments and the return value will be computed based on types.
127  InstructionCost ScalarizationCost = InstructionCost::getInvalid();
128 
129 public:
131  Intrinsic::ID Id, const CallBase &CI,
133 
136  FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
138 
141 
145  const IntrinsicInst *I = nullptr,
147 
148  Intrinsic::ID getID() const { return IID; }
149  const IntrinsicInst *getInst() const { return II; }
150  Type *getReturnType() const { return RetTy; }
151  FastMathFlags getFlags() const { return FMF; }
152  InstructionCost getScalarizationCost() const { return ScalarizationCost; }
154  const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
155 
156  bool isTypeBasedOnly() const {
157  return Arguments.empty();
158  }
159 
160  bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
161 };
162 
164 typedef TargetTransformInfo TTI;
165 
166 /// This pass provides access to the codegen interfaces that are needed
167 /// for IR-level transformations.
169 public:
170  /// Construct a TTI object using a type implementing the \c Concept
171  /// API below.
172  ///
173  /// This is used by targets to construct a TTI wrapping their target-specific
174  /// implementation that encodes appropriate costs for their target.
175  template <typename T> TargetTransformInfo(T Impl);
176 
177  /// Construct a baseline TTI object using a minimal implementation of
178  /// the \c Concept API below.
179  ///
180  /// The TTI implementation will reflect the information in the DataLayout
181  /// provided if non-null.
182  explicit TargetTransformInfo(const DataLayout &DL);
183 
184  // Provide move semantics.
187 
188  // We need to define the destructor out-of-line to define our sub-classes
189  // out-of-line.
191 
192  /// Handle the invalidation of this information.
193  ///
194  /// When used as a result of \c TargetIRAnalysis this method will be called
195  /// when the function this was computed for changes. When it returns false,
196  /// the information is preserved across those changes.
199  // FIXME: We should probably in some way ensure that the subtarget
200  // information for a function hasn't changed.
201  return false;
202  }
203 
204  /// \name Generic Target Information
205  /// @{
206 
207  /// The kind of cost model.
208  ///
209  /// There are several different cost models that can be customized by the
210  /// target. The normalization of each cost model may be target specific.
212  TCK_RecipThroughput, ///< Reciprocal throughput.
213  TCK_Latency, ///< The latency of instruction.
214  TCK_CodeSize, ///< Instruction code size.
215  TCK_SizeAndLatency ///< The weighted sum of size and latency.
216  };
217 
218  /// Query the cost of a specified instruction.
219  ///
220  /// Clients should use this interface to query the cost of an existing
221  /// instruction. The instruction must have a valid parent (basic block).
222  ///
223  /// Note, this method does not cache the cost calculation and it
224  /// can be expensive in some cases.
226  enum TargetCostKind kind) const {
227  InstructionCost Cost;
228  switch (kind) {
229  case TCK_RecipThroughput:
230  Cost = getInstructionThroughput(I);
231  break;
232  case TCK_Latency:
233  Cost = getInstructionLatency(I);
234  break;
235  case TCK_CodeSize:
236  case TCK_SizeAndLatency:
237  Cost = getUserCost(I, kind);
238  break;
239  }
240  return Cost;
241  }
242 
243  /// Underlying constants for 'cost' values in this interface.
244  ///
245  /// Many APIs in this interface return a cost. This enum defines the
246  /// fundamental values that should be used to interpret (and produce) those
247  /// costs. The costs are returned as an int rather than a member of this
248  /// enumeration because it is expected that the cost of one IR instruction
249  /// may have a multiplicative factor to it or otherwise won't fit directly
250  /// into the enum. Moreover, it is common to sum or average costs which works
251  /// better as simple integral values. Thus this enum only provides constants.
252  /// Also note that the returned costs are signed integers to make it natural
253  /// to add, subtract, and test with zero (a common boundary condition). It is
254  /// not expected that 2^32 is a realistic cost to be modeling at any point.
255  ///
256  /// Note that these costs should usually reflect the intersection of code-size
257  /// cost and execution cost. A free instruction is typically one that folds
258  /// into another instruction. For example, reg-to-reg moves can often be
259  /// skipped by renaming the registers in the CPU, but they still are encoded
260  /// and thus wouldn't be considered 'free' here.
262  TCC_Free = 0, ///< Expected to fold away in lowering.
263  TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
264  TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
265  };
266 
267  /// Estimate the cost of a GEP operation when lowered.
269  getGEPCost(Type *PointeeType, const Value *Ptr,
272 
273  /// \returns A value by which our inlining threshold should be multiplied.
274  /// This is primarily used to bump up the inlining threshold wholesale on
275  /// targets where calls are unusually expensive.
276  ///
277  /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
278  /// individual classes of instructions would be better.
279  unsigned getInliningThresholdMultiplier() const;
280 
281  /// \returns A value to be added to the inlining threshold.
282  unsigned adjustInliningThreshold(const CallBase *CB) const;
283 
284  /// \returns Vector bonus in percent.
285  ///
286  /// Vector bonuses: We want to more aggressively inline vector-dense kernels
287  /// and apply this bonus based on the percentage of vector instructions. A
288  /// bonus is applied if the vector instructions exceed 50% and half that
289  /// amount is applied if it exceeds 10%. Note that these bonuses are some what
290  /// arbitrary and evolved over time by accident as much as because they are
291  /// principled bonuses.
292  /// FIXME: It would be nice to base the bonus values on something more
293  /// scientific. A target may has no bonus on vector instructions.
294  int getInlinerVectorBonusPercent() const;
295 
296  /// \return the expected cost of a memcpy, which could e.g. depend on the
297  /// source/destination type and alignment and the number of bytes copied.
299 
300  /// \return The estimated number of case clusters when lowering \p 'SI'.
301  /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
302  /// table.
304  unsigned &JTSize,
305  ProfileSummaryInfo *PSI,
306  BlockFrequencyInfo *BFI) const;
307 
308  /// Estimate the cost of a given IR user when lowered.
309  ///
310  /// This can estimate the cost of either a ConstantExpr or Instruction when
311  /// lowered.
312  ///
313  /// \p Operands is a list of operands which can be a result of transformations
314  /// of the current operands. The number of the operands on the list must equal
315  /// to the number of the current operands the IR user has. Their order on the
316  /// list must be the same as the order of the current operands the IR user
317  /// has.
318  ///
319  /// The returned cost is defined in terms of \c TargetCostConstants, see its
320  /// comments for a detailed explanation of the cost values.
322  TargetCostKind CostKind) const;
323 
324  /// This is a helper function which calls the two-argument getUserCost
325  /// with \p Operands which are the current operands U has.
328  return getUserCost(U, Operands, CostKind);
329  }
330 
331  /// If a branch or a select condition is skewed in one direction by more than
332  /// this factor, it is very likely to be predicted correctly.
334 
335  /// Return true if branch divergence exists.
336  ///
337  /// Branch divergence has a significantly negative impact on GPU performance
338  /// when threads in the same wavefront take different paths due to conditional
339  /// branches.
340  bool hasBranchDivergence() const;
341 
342  /// Return true if the target prefers to use GPU divergence analysis to
343  /// replace the legacy version.
344  bool useGPUDivergenceAnalysis() const;
345 
346  /// Returns whether V is a source of divergence.
347  ///
348  /// This function provides the target-dependent information for
349  /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis
350  /// first builds the dependency graph, and then runs the reachability
351  /// algorithm starting with the sources of divergence.
352  bool isSourceOfDivergence(const Value *V) const;
353 
354  // Returns true for the target specific
355  // set of operations which produce uniform result
356  // even taking non-uniform arguments
357  bool isAlwaysUniform(const Value *V) const;
358 
359  /// Returns the address space ID for a target's 'flat' address space. Note
360  /// this is not necessarily the same as addrspace(0), which LLVM sometimes
361  /// refers to as the generic address space. The flat address space is a
362  /// generic address space that can be used access multiple segments of memory
363  /// with different address spaces. Access of a memory location through a
364  /// pointer with this address space is expected to be legal but slower
365  /// compared to the same memory location accessed through a pointer with a
366  /// different address space.
367  //
368  /// This is for targets with different pointer representations which can
369  /// be converted with the addrspacecast instruction. If a pointer is converted
370  /// to this address space, optimizations should attempt to replace the access
371  /// with the source address space.
372  ///
373  /// \returns ~0u if the target does not have such a flat address space to
374  /// optimize away.
375  unsigned getFlatAddressSpace() const;
376 
377  /// Return any intrinsic address operand indexes which may be rewritten if
378  /// they use a flat address space pointer.
379  ///
380  /// \returns true if the intrinsic was handled.
382  Intrinsic::ID IID) const;
383 
384  bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
385 
386  /// Return true if globals in this address space can have initializers other
387  /// than `undef`.
388  bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const;
389 
390  unsigned getAssumedAddrSpace(const Value *V) const;
391 
392  /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
393  /// NewV, which has a different address space. This should happen for every
394  /// operand index that collectFlatAddressOperands returned for the intrinsic.
395  /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
396  /// new value (which may be the original \p II with modified operands).
398  Value *NewV) const;
399 
400  /// Test whether calls to a function lower to actual program function
401  /// calls.
402  ///
403  /// The idea is to test whether the program is likely to require a 'call'
404  /// instruction or equivalent in order to call the given function.
405  ///
406  /// FIXME: It's not clear that this is a good or useful query API. Client's
407  /// should probably move to simpler cost metrics using the above.
408  /// Alternatively, we could split the cost interface into distinct code-size
409  /// and execution-speed costs. This would allow modelling the core of this
410  /// query more accurately as a call is a single small instruction, but
411  /// incurs significant execution cost.
412  bool isLoweredToCall(const Function *F) const;
413 
414  struct LSRCost {
415  /// TODO: Some of these could be merged. Also, a lexical ordering
416  /// isn't always optimal.
417  unsigned Insns;
418  unsigned NumRegs;
419  unsigned AddRecCost;
420  unsigned NumIVMuls;
421  unsigned NumBaseAdds;
422  unsigned ImmCost;
423  unsigned SetupCost;
424  unsigned ScaleCost;
425  };
426 
427  /// Parameters that control the generic loop unrolling transformation.
429  /// The cost threshold for the unrolled loop. Should be relative to the
430  /// getUserCost values returned by this API, and the expectation is that
431  /// the unrolled loop's instructions when run through that interface should
432  /// not exceed this cost. However, this is only an estimate. Also, specific
433  /// loops may be unrolled even with a cost above this threshold if deemed
434  /// profitable. Set this to UINT_MAX to disable the loop body cost
435  /// restriction.
436  unsigned Threshold;
437  /// If complete unrolling will reduce the cost of the loop, we will boost
438  /// the Threshold by a certain percent to allow more aggressive complete
439  /// unrolling. This value provides the maximum boost percentage that we
440  /// can apply to Threshold (The value should be no less than 100).
441  /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
442  /// MaxPercentThresholdBoost / 100)
443  /// E.g. if complete unrolling reduces the loop execution time by 50%
444  /// then we boost the threshold by the factor of 2x. If unrolling is not
445  /// expected to reduce the running time, then we do not increase the
446  /// threshold.
448  /// The cost threshold for the unrolled loop when optimizing for size (set
449  /// to UINT_MAX to disable).
451  /// The cost threshold for the unrolled loop, like Threshold, but used
452  /// for partial/runtime unrolling (set to UINT_MAX to disable).
454  /// The cost threshold for the unrolled loop when optimizing for size, like
455  /// OptSizeThreshold, but used for partial/runtime unrolling (set to
456  /// UINT_MAX to disable).
458  /// A forced unrolling factor (the number of concatenated bodies of the
459  /// original loop in the unrolled loop body). When set to 0, the unrolling
460  /// transformation will select an unrolling factor based on the current cost
461  /// threshold and other factors.
462  unsigned Count;
463  /// Default unroll count for loops with run-time trip count.
465  // Set the maximum unrolling factor. The unrolling factor may be selected
466  // using the appropriate cost threshold, but may not exceed this number
467  // (set to UINT_MAX to disable). This does not apply in cases where the
468  // loop is being fully unrolled.
469  unsigned MaxCount;
470  /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
471  /// applies even if full unrolling is selected. This allows a target to fall
472  /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
474  // Represents number of instructions optimized when "back edge"
475  // becomes "fall through" in unrolled loop.
476  // For now we count a conditional branch on a backedge and a comparison
477  // feeding it.
478  unsigned BEInsns;
479  /// Allow partial unrolling (unrolling of loops to expand the size of the
480  /// loop body, not only to eliminate small constant-trip-count loops).
481  bool Partial;
482  /// Allow runtime unrolling (unrolling of loops to expand the size of the
483  /// loop body even when the number of loop iterations is not known at
484  /// compile time).
485  bool Runtime;
486  /// Allow generation of a loop remainder (extra iterations after unroll).
488  /// Allow emitting expensive instructions (such as divisions) when computing
489  /// the trip count of a loop for runtime unrolling.
491  /// Apply loop unroll on any kind of loop
492  /// (mainly to loops that fail runtime unrolling).
493  bool Force;
494  /// Allow using trip count upper bound to unroll loops.
496  /// Allow unrolling of all the iterations of the runtime loop remainder.
498  /// Allow unroll and jam. Used to enable unroll and jam for the target.
500  /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
501  /// value above is used during unroll and jam for the outer loop size.
502  /// This value is used in the same manner to limit the size of the inner
503  /// loop.
505  /// Don't allow loop unrolling to simulate more than this number of
506  /// iterations when checking full unroll profitability
508  };
509 
510  /// Get target-customized preferences for the generic loop unrolling
511  /// transformation. The caller will initialize UP with the current
512  /// target-independent defaults.
515  OptimizationRemarkEmitter *ORE) const;
516 
517  /// Query the target whether it would be profitable to convert the given loop
518  /// into a hardware loop.
520  AssumptionCache &AC, TargetLibraryInfo *LibInfo,
521  HardwareLoopInfo &HWLoopInfo) const;
522 
523  /// Query the target whether it would be prefered to create a predicated
524  /// vector loop, which can avoid the need to emit a scalar epilogue loop.
527  DominatorTree *DT,
528  const LoopAccessInfo *LAI) const;
529 
530  /// Query the target whether lowering of the llvm.get.active.lane.mask
531  /// intrinsic is supported.
532  bool emitGetActiveLaneMask() const;
533 
534  // Parameters that control the loop peeling transformation
536  /// A forced peeling factor (the number of bodied of the original loop
537  /// that should be peeled off before the loop body). When set to 0, the
538  /// a peeling factor based on profile information and other factors.
539  unsigned PeelCount;
540  /// Allow peeling off loop iterations.
542  /// Allow peeling off loop iterations for loop nests.
544  /// Allow peeling basing on profile. Uses to enable peeling off all
545  /// iterations basing on provided profile.
546  /// If the value is true the peeling cost model can decide to peel only
547  /// some iterations and in this case it will set this to false.
549  };
550 
551  /// Get target-customized preferences for the generic loop peeling
552  /// transformation. The caller will initialize \p PP with the current
553  /// target-independent defaults with information from \p L and \p SE.
555  PeelingPreferences &PP) const;
556 
557  /// Targets can implement their own combinations for target-specific
558  /// intrinsics. This function will be called from the InstCombine pass every
559  /// time a target-specific intrinsic is encountered.
560  ///
561  /// \returns None to not do anything target specific or a value that will be
562  /// returned from the InstCombiner. It is possible to return null and stop
563  /// further processing of the intrinsic by returning nullptr.
565  IntrinsicInst &II) const;
566  /// Can be used to implement target-specific instruction combining.
567  /// \see instCombineIntrinsic
570  APInt DemandedMask, KnownBits &Known,
571  bool &KnownBitsComputed) const;
572  /// Can be used to implement target-specific instruction combining.
573  /// \see instCombineIntrinsic
575  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
576  APInt &UndefElts2, APInt &UndefElts3,
577  std::function<void(Instruction *, unsigned, APInt, APInt &)>
578  SimplifyAndSetOp) const;
579  /// @}
580 
581  /// \name Scalar Target Information
582  /// @{
583 
584  /// Flags indicating the kind of support for population count.
585  ///
586  /// Compared to the SW implementation, HW support is supposed to
587  /// significantly boost the performance when the population is dense, and it
588  /// may or may not degrade performance if the population is sparse. A HW
589  /// support is considered as "Fast" if it can outperform, or is on a par
590  /// with, SW implementation when the population is sparse; otherwise, it is
591  /// considered as "Slow".
593 
594  /// Return true if the specified immediate is legal add immediate, that
595  /// is the target has add instructions which can add a register with the
596  /// immediate without having to materialize the immediate into a register.
597  bool isLegalAddImmediate(int64_t Imm) const;
598 
599  /// Return true if the specified immediate is legal icmp immediate,
600  /// that is the target has icmp instructions which can compare a register
601  /// against the immediate without having to materialize the immediate into a
602  /// register.
603  bool isLegalICmpImmediate(int64_t Imm) const;
604 
605  /// Return true if the addressing mode represented by AM is legal for
606  /// this target, for a load/store of the specified type.
607  /// The type may be VoidTy, in which case only return true if the addressing
608  /// mode is legal for a load/store of any legal type.
609  /// If target returns true in LSRWithInstrQueries(), I may be valid.
610  /// TODO: Handle pre/postinc as well.
611  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
612  bool HasBaseReg, int64_t Scale,
613  unsigned AddrSpace = 0,
614  Instruction *I = nullptr) const;
615 
616  /// Return true if LSR cost of C1 is lower than C1.
618  TargetTransformInfo::LSRCost &C2) const;
619 
620  /// Return true if LSR major cost is number of registers. Targets which
621  /// implement their own isLSRCostLess and unset number of registers as major
622  /// cost should return false, otherwise return true.
623  bool isNumRegsMajorCostOfLSR() const;
624 
625  /// \returns true if LSR should not optimize a chain that includes \p I.
627 
628  /// Return true if the target can fuse a compare and branch.
629  /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
630  /// calculation for the instructions in a loop.
631  bool canMacroFuseCmp() const;
632 
633  /// Return true if the target can save a compare for loop count, for example
634  /// hardware loop saves a compare.
635  bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
637  TargetLibraryInfo *LibInfo) const;
638 
643  };
644 
645  /// Return the preferred addressing mode LSR should make efforts to generate.
647  ScalarEvolution *SE) const;
648 
649  /// Return true if the target supports masked store.
650  bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
651  /// Return true if the target supports masked load.
652  bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;
653 
654  /// Return true if the target supports nontemporal store.
655  bool isLegalNTStore(Type *DataType, Align Alignment) const;
656  /// Return true if the target supports nontemporal load.
657  bool isLegalNTLoad(Type *DataType, Align Alignment) const;
658 
659  /// Return true if the target supports masked scatter.
660  bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
661  /// Return true if the target supports masked gather.
662  bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
663 
664  /// Return true if the target supports masked compress store.
665  bool isLegalMaskedCompressStore(Type *DataType) const;
666  /// Return true if the target supports masked expand load.
667  bool isLegalMaskedExpandLoad(Type *DataType) const;
668 
669  /// Return true if we should be enabling ordered reductions for the target.
670  bool enableOrderedReductions() const;
671 
672  /// Return true if the target has a unified operation to calculate division
673  /// and remainder. If so, the additional implicit multiplication and
674  /// subtraction required to calculate a remainder from division are free. This
675  /// can enable more aggressive transformations for division and remainder than
676  /// would typically be allowed using throughput or size cost models.
677  bool hasDivRemOp(Type *DataType, bool IsSigned) const;
678 
679  /// Return true if the given instruction (assumed to be a memory access
680  /// instruction) has a volatile variant. If that's the case then we can avoid
681  /// addrspacecast to generic AS for volatile loads/stores. Default
682  /// implementation returns false, which prevents address space inference for
683  /// volatile loads/stores.
684  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
685 
686  /// Return true if target doesn't mind addresses in vectors.
687  bool prefersVectorizedAddressing() const;
688 
689  /// Return the cost of the scaling factor used in the addressing
690  /// mode represented by AM for this target, for a load/store
691  /// of the specified type.
692  /// If the AM is supported, the return value must be >= 0.
693  /// If the AM is not supported, it returns a negative value.
694  /// TODO: Handle pre/postinc as well.
696  int64_t BaseOffset, bool HasBaseReg,
697  int64_t Scale,
698  unsigned AddrSpace = 0) const;
699 
700  /// Return true if the loop strength reduce pass should make
701  /// Instruction* based TTI queries to isLegalAddressingMode(). This is
702  /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
703  /// immediate offset and no index register.
704  bool LSRWithInstrQueries() const;
705 
706  /// Return true if it's free to truncate a value of type Ty1 to type
707  /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
708  /// by referencing its sub-register AX.
709  bool isTruncateFree(Type *Ty1, Type *Ty2) const;
710 
711  /// Return true if it is profitable to hoist instruction in the
712  /// then/else to before if.
713  bool isProfitableToHoist(Instruction *I) const;
714 
715  bool useAA() const;
716 
717  /// Return true if this type is legal.
718  bool isTypeLegal(Type *Ty) const;
719 
720  /// Returns the estimated number of registers required to represent \p Ty.
722 
723  /// Return true if switches should be turned into lookup tables for the
724  /// target.
725  bool shouldBuildLookupTables() const;
726 
727  /// Return true if switches should be turned into lookup tables
728  /// containing this constant value for the target.
730 
731  /// Return true if lookup tables should be turned into relative lookup tables.
732  bool shouldBuildRelLookupTables() const;
733 
734  /// Return true if the input function which is cold at all call sites,
735  /// should use coldcc calling convention.
736  bool useColdCCForColdCall(Function &F) const;
737 
738  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
739  /// are set if the demanded result elements need to be inserted and/or
740  /// extracted from vectors.
742  const APInt &DemandedElts,
743  bool Insert, bool Extract) const;
744 
745  /// Estimate the overhead of scalarizing an instructions unique
746  /// non-constant operands. The (potentially vector) types to use for each of
747  /// argument are passes via Tys.
749  ArrayRef<Type *> Tys) const;
750 
751  /// If target has efficient vector element load/store instructions, it can
752  /// return true here so that insertion/extraction costs are not added to
753  /// the scalarization cost of a load/store.
755 
756  /// Don't restrict interleaved unrolling to small loops.
757  bool enableAggressiveInterleaving(bool LoopHasReductions) const;
758 
759  /// Returns options for expansion of memcmp. IsZeroCmp is
760  // true if this is the expansion of memcmp(p1, p2, s) == 0.
762  // Return true if memcmp expansion is enabled.
763  operator bool() const { return MaxNumLoads > 0; }
764 
765  // Maximum number of load operations.
766  unsigned MaxNumLoads = 0;
767 
768  // The list of available load sizes (in bytes), sorted in decreasing order.
770 
771  // For memcmp expansion when the memcmp result is only compared equal or
772  // not-equal to 0, allow up to this number of load pairs per block. As an
773  // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
774  // a0 = load2bytes &a[0]
775  // b0 = load2bytes &b[0]
776  // a2 = load1byte &a[2]
777  // b2 = load1byte &b[2]
778  // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
779  unsigned NumLoadsPerBlock = 1;
780 
781  // Set to true to allow overlapping loads. For example, 7-byte compares can
782  // be done with two 4-byte compares instead of 4+2+1-byte compares. This
783  // requires all loads in LoadSizes to be doable in an unaligned way.
784  bool AllowOverlappingLoads = false;
785  };
787  bool IsZeroCmp) const;
788 
789  /// Enable matching of interleaved access groups.
791 
792  /// Enable matching of interleaved access groups that contain predicated
793  /// accesses or gaps and therefore vectorized using masked
794  /// vector loads/stores.
796 
797  /// Indicate that it is potentially unsafe to automatically vectorize
798  /// floating-point operations because the semantics of vector and scalar
799  /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
800  /// does not support IEEE-754 denormal numbers, while depending on the
801  /// platform, scalar floating-point math does.
802  /// This applies to floating-point math operations and calls, not memory
803  /// operations, shuffles, or casts.
805 
806  /// Determine if the target supports unaligned memory accesses.
808  unsigned AddressSpace = 0,
809  Align Alignment = Align(1),
810  bool *Fast = nullptr) const;
811 
812  /// Return hardware support for population count.
813  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
814 
815  /// Return true if the hardware has a fast square-root instruction.
816  bool haveFastSqrt(Type *Ty) const;
817 
818  /// Return true if it is faster to check if a floating-point value is NaN
819  /// (or not-NaN) versus a comparison against a constant FP zero value.
820  /// Targets should override this if materializing a 0.0 for comparison is
821  /// generally as cheap as checking for ordered/unordered.
822  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
823 
824  /// Return the expected cost of supporting the floating point operation
825  /// of the specified type.
826  InstructionCost getFPOpCost(Type *Ty) const;
827 
828  /// Return the expected cost of materializing for the given integer
829  /// immediate of the specified type.
830  InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
831  TargetCostKind CostKind) const;
832 
833  /// Return the expected cost of materialization for the given integer
834  /// immediate of the specified type for a given instruction. The cost can be
835  /// zero if the immediate can be folded into the specified instruction.
836  InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
837  const APInt &Imm, Type *Ty,
839  Instruction *Inst = nullptr) const;
841  const APInt &Imm, Type *Ty,
842  TargetCostKind CostKind) const;
843 
844  /// Return the expected cost for the given integer when optimising
845  /// for size. This is different than the other integer immediate cost
846  /// functions in that it is subtarget agnostic. This is useful when you e.g.
847  /// target one ISA such as Aarch32 but smaller encodings could be possible
848  /// with another such as Thumb. This return value is used as a penalty when
849  /// the total costs for a constant is calculated (the bigger the cost, the
850  /// more beneficial constant hoisting is).
851  InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
852  const APInt &Imm, Type *Ty) const;
853  /// @}
854 
855  /// \name Vector Target Information
856  /// @{
857 
858  /// The various kinds of shuffle patterns for vector queries.
859  enum ShuffleKind {
860  SK_Broadcast, ///< Broadcast element 0 to all other elements.
861  SK_Reverse, ///< Reverse the order of the vector.
862  SK_Select, ///< Selects elements from the corresponding lane of
863  ///< either source operand. This is equivalent to a
864  ///< vector select with a constant condition operand.
865  SK_Transpose, ///< Transpose two vectors.
866  SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
867  SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
868  SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
869  ///< with any shuffle mask.
870  SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
871  ///< shuffle mask.
872  SK_Splice ///< Concatenates elements from the first input vector
873  ///< with elements of the second input vector. Returning
874  ///< a vector of the same type as the input vectors.
875  };
876 
877  /// Additional information about an operand's possible values.
879  OK_AnyValue, // Operand can have any value.
880  OK_UniformValue, // Operand is uniform (splat of a value).
881  OK_UniformConstantValue, // Operand is uniform constant.
882  OK_NonUniformConstantValue // Operand is a non uniform constant value.
883  };
884 
885  /// Additional properties of an operand's values.
887 
888  /// \return the number of registers in the target-provided register class.
889  unsigned getNumberOfRegisters(unsigned ClassID) const;
890 
891  /// \return the target-provided register class ID for the provided type,
892  /// accounting for type promotion and other type-legalization techniques that
893  /// the target might apply. However, it specifically does not account for the
894  /// scalarization or splitting of vector types. Should a vector type require
895  /// scalarization or splitting into multiple underlying vector registers, that
896  /// type should be mapped to a register class containing no registers.
897  /// Specifically, this is designed to provide a simple, high-level view of the
898  /// register allocation later performed by the backend. These register classes
899  /// don't necessarily map onto the register classes used by the backend.
900  /// FIXME: It's not currently possible to determine how many registers
901  /// are used by the provided type.
902  unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
903 
904  /// \return the target-provided register class name
905  const char *getRegisterClassName(unsigned ClassID) const;
906 
908 
909  /// \return The width of the largest scalar or vector register type.
911 
912  /// \return The width of the smallest vector register type.
913  unsigned getMinVectorRegisterBitWidth() const;
914 
915  /// \return The maximum value of vscale if the target specifies an
916  /// architectural maximum vector length, and None otherwise.
918 
919  /// \return True if the vectorization factor should be chosen to
920  /// make the vector of the smallest element type match the size of a
921  /// vector register. For wider element types, this could result in
922  /// creating vectors that span multiple vector registers.
923  /// If false, the vectorization factor will be chosen based on the
924  /// size of the widest element type.
925  bool shouldMaximizeVectorBandwidth() const;
926 
927  /// \return The minimum vectorization factor for types of given element
928  /// bit width, or 0 if there is no minimum VF. The returned value only
929  /// applies when shouldMaximizeVectorBandwidth returns true.
930  /// If IsScalable is true, the returned ElementCount must be a scalable VF.
931  ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
932 
933  /// \return The maximum vectorization factor for types of given element
934  /// bit width and opcode, or 0 if there is no maximum VF.
935  /// Currently only used by the SLP vectorizer.
936  unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
937 
938  /// \return True if it should be considered for address type promotion.
939  /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
940  /// profitable without finding other extensions fed by the same input.
942  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
943 
944  /// \return The size of a cache line in bytes.
945  unsigned getCacheLineSize() const;
946 
947  /// The possible cache levels
948  enum class CacheLevel {
949  L1D, // The L1 data cache
950  L2D, // The L2 data cache
951 
952  // We currently do not model L3 caches, as their sizes differ widely between
953  // microarchitectures. Also, we currently do not have a use for L3 cache
954  // size modeling yet.
955  };
956 
957  /// \return The size of the cache level in bytes, if available.
959 
960  /// \return The associativity of the cache level, if available.
962 
963  /// \return How much before a load we should place the prefetch
964  /// instruction. This is currently measured in number of
965  /// instructions.
966  unsigned getPrefetchDistance() const;
967 
968  /// Some HW prefetchers can handle accesses up to a certain constant stride.
969  /// Sometimes prefetching is beneficial even below the HW prefetcher limit,
970  /// and the arguments provided are meant to serve as a basis for deciding this
971  /// for a particular loop.
972  ///
973  /// \param NumMemAccesses Number of memory accesses in the loop.
974  /// \param NumStridedMemAccesses Number of the memory accesses that
975  /// ScalarEvolution could find a known stride
976  /// for.
977  /// \param NumPrefetches Number of software prefetches that will be
978  /// emitted as determined by the addresses
979  /// involved and the cache line size.
980  /// \param HasCall True if the loop contains a call.
981  ///
982  /// \return This is the minimum stride in bytes where it makes sense to start
983  /// adding SW prefetches. The default is 1, i.e. prefetch with any
984  /// stride.
985  unsigned getMinPrefetchStride(unsigned NumMemAccesses,
986  unsigned NumStridedMemAccesses,
987  unsigned NumPrefetches, bool HasCall) const;
988 
989  /// \return The maximum number of iterations to prefetch ahead. If
990  /// the required number of iterations is more than this number, no
991  /// prefetching is performed.
992  unsigned getMaxPrefetchIterationsAhead() const;
993 
994  /// \return True if prefetching should also be done for writes.
995  bool enableWritePrefetching() const;
996 
997  /// \return The maximum interleave factor that any transform should try to
998  /// perform for this target. This number depends on the level of parallelism
999  /// and the number of execution units in the CPU.
1000  unsigned getMaxInterleaveFactor(unsigned VF) const;
1001 
1002  /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
1003  static OperandValueKind getOperandInfo(const Value *V,
1004  OperandValueProperties &OpProps);
1005 
1006  /// This is an approximation of reciprocal throughput of a math/logic op.
1007  /// A higher cost indicates less expected throughput.
1008  /// From Agner Fog's guides, reciprocal throughput is "the average number of
1009  /// clock cycles per instruction when the instructions are not part of a
1010  /// limiting dependency chain."
1011  /// Therefore, costs should be scaled to account for multiple execution units
1012  /// on the target that can process this type of instruction. For example, if
1013  /// there are 5 scalar integer units and 2 vector integer units that can
1014  /// calculate an 'add' in a single cycle, this model should indicate that the
1015  /// cost of the vector add instruction is 2.5 times the cost of the scalar
1016  /// add instruction.
1017  /// \p Args is an optional argument which holds the instruction operands
1018  /// values so the TTI can analyze those values searching for special
1019  /// cases or optimizations based on those values.
1020  /// \p CxtI is the optional original context instruction, if one exists, to
1021  /// provide even more information.
1023  unsigned Opcode, Type *Ty,
1025  OperandValueKind Opd1Info = OK_AnyValue,
1026  OperandValueKind Opd2Info = OK_AnyValue,
1027  OperandValueProperties Opd1PropInfo = OP_None,
1028  OperandValueProperties Opd2PropInfo = OP_None,
1030  const Instruction *CxtI = nullptr) const;
1031 
1032  /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1033  /// The exact mask may be passed as Mask, or else the array will be empty.
1034  /// The index and subtype parameters are used by the subvector insertion and
1035  /// extraction shuffle kinds to show the insert/extract point and the type of
1036  /// the subvector being inserted/extracted.
1037  /// NOTE: For subvector extractions Tp represents the source type.
1039  ArrayRef<int> Mask = None, int Index = 0,
1040  VectorType *SubTp = nullptr) const;
1041 
1042  /// Represents a hint about the context in which a cast is used.
1043  ///
1044  /// For zext/sext, the context of the cast is the operand, which must be a
1045  /// load of some kind. For trunc, the context is of the cast is the single
1046  /// user of the instruction, which must be a store of some kind.
1047  ///
1048  /// This enum allows the vectorizer to give getCastInstrCost an idea of the
1049  /// type of cast it's dealing with, as not every cast is equal. For instance,
1050  /// the zext of a load may be free, but the zext of an interleaving load can
1051  //// be (very) expensive!
1052  ///
1053  /// See \c getCastContextHint to compute a CastContextHint from a cast
1054  /// Instruction*. Callers can use it if they don't need to override the
1055  /// context and just want it to be calculated from the instruction.
1056  ///
1057  /// FIXME: This handles the types of load/store that the vectorizer can
1058  /// produce, which are the cases where the context instruction is most
1059  /// likely to be incorrect. There are other situations where that can happen
1060  /// too, which might be handled here but in the long run a more general
1061  /// solution of costing multiple instructions at the same times may be better.
1062  enum class CastContextHint : uint8_t {
1063  None, ///< The cast is not used with a load/store of any kind.
1064  Normal, ///< The cast is used with a normal load/store.
1065  Masked, ///< The cast is used with a masked load/store.
1066  GatherScatter, ///< The cast is used with a gather/scatter.
1067  Interleave, ///< The cast is used with an interleaved load/store.
1068  Reversed, ///< The cast is used with a reversed load/store.
1069  };
1070 
1071  /// Calculates a CastContextHint from \p I.
1072  /// This should be used by callers of getCastInstrCost if they wish to
1073  /// determine the context from some instruction.
1074  /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
1075  /// or if it's another type of cast.
1077 
1078  /// \return The expected cost of cast instructions, such as bitcast, trunc,
1079  /// zext, etc. If there is an existing instruction that holds Opcode, it
1080  /// may be passed in the 'I' parameter.
1082  getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1085  const Instruction *I = nullptr) const;
1086 
1087  /// \return The expected cost of a sign- or zero-extended vector extract. Use
1088  /// -1 to indicate that there is no information about the index value.
1089  InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1090  VectorType *VecTy,
1091  unsigned Index = -1) const;
1092 
1093  /// \return The expected cost of control-flow related instructions such as
1094  /// Phi, Ret, Br, Switch.
1096  getCFInstrCost(unsigned Opcode,
1098  const Instruction *I = nullptr) const;
1099 
1100  /// \returns The expected cost of compare and select instructions. If there
1101  /// is an existing instruction that holds Opcode, it may be passed in the
1102  /// 'I' parameter. The \p VecPred parameter can be used to indicate the select
1103  /// is using a compare with the specified predicate as condition. When vector
1104  /// types are passed, \p VecPred must be used for all lanes.
1106  getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1107  CmpInst::Predicate VecPred,
1109  const Instruction *I = nullptr) const;
1110 
1111  /// \return The expected cost of vector Insert and Extract.
1112  /// Use -1 to indicate that there is no information on the index value.
1113  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1114  unsigned Index = -1) const;
1115 
1116  /// \return The cost of Load and Store instructions.
1118  getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1119  unsigned AddressSpace,
1121  const Instruction *I = nullptr) const;
1122 
1123  /// \return The cost of masked Load and Store instructions.
1125  unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
1127 
1128  /// \return The cost of Gather or Scatter operation
1129  /// \p Opcode - is a type of memory access Load or Store
1130  /// \p DataTy - a vector type of the data to be loaded or stored
1131  /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1132  /// \p VariableMask - true when the memory access is predicated with a mask
1133  /// that is not a compile-time constant
1134  /// \p Alignment - alignment of single element
1135  /// \p I - the optional original context instruction, if one exists, e.g. the
1136  /// load/store to transform or the call to the gather/scatter intrinsic
1138  unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1140  const Instruction *I = nullptr) const;
1141 
1142  /// \return The cost of the interleaved memory operation.
1143  /// \p Opcode is the memory operation code
1144  /// \p VecTy is the vector type of the interleaved access.
1145  /// \p Factor is the interleave factor
1146  /// \p Indices is the indices for interleaved load members (as interleaved
1147  /// load allows gaps)
1148  /// \p Alignment is the alignment of the memory operation
1149  /// \p AddressSpace is address space of the pointer.
1150  /// \p UseMaskForCond indicates if the memory access is predicated.
1151  /// \p UseMaskForGaps indicates if gaps should be masked.
1153  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1154  Align Alignment, unsigned AddressSpace,
1156  bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
1157 
1158  /// A helper function to determine the type of reduction algorithm used
1159  /// for a given \p Opcode and set of FastMathFlags \p FMF.
1161  return FMF != None && !(*FMF).allowReassoc();
1162  }
1163 
1164  /// Calculate the cost of vector reduction intrinsics.
1165  ///
1166  /// This is the cost of reducing the vector value of type \p Ty to a scalar
1167  /// value using the operation denoted by \p Opcode. The FastMathFlags
1168  /// parameter \p FMF indicates what type of reduction we are performing:
1169  /// 1. Tree-wise. This is the typical 'fast' reduction performed that
1170  /// involves successively splitting a vector into half and doing the
1171  /// operation on the pair of halves until you have a scalar value. For
1172  /// example:
1173  /// (v0, v1, v2, v3)
1174  /// ((v0+v2), (v1+v3), undef, undef)
1175  /// ((v0+v2+v1+v3), undef, undef, undef)
1176  /// This is the default behaviour for integer operations, whereas for
1177  /// floating point we only do this if \p FMF indicates that
1178  /// reassociation is allowed.
1179  /// 2. Ordered. For a vector with N elements this involves performing N
1180  /// operations in lane order, starting with an initial scalar value, i.e.
1181  /// result = InitVal + v0
1182  /// result = result + v1
1183  /// result = result + v2
1184  /// result = result + v3
1185  /// This is only the case for FP operations and when reassociation is not
1186  /// allowed.
1187  ///
1189  unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
1191 
1193  VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
1195 
1196  /// Calculate the cost of an extended reduction pattern, similar to
1197  /// getArithmeticReductionCost of an Add reduction with an extension and
1198  /// optional multiply. This is the cost of as:
1199  /// ResTy vecreduce.add(ext(Ty A)), or if IsMLA flag is set then:
1200  /// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)). The reduction happens
1201  /// on a VectorType with ResTy elements and Ty lanes.
1203  bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1205 
1206  /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1207  /// Three cases are handled: 1. scalar instruction 2. vector instruction
1208  /// 3. scalar instruction which is to be vectorized.
1211 
1212  /// \returns The cost of Call instructions.
1214  Function *F, Type *RetTy, ArrayRef<Type *> Tys,
1216 
1217  /// \returns The number of pieces into which the provided type must be
1218  /// split during legalization. Zero is returned when the answer is unknown.
1219  unsigned getNumberOfParts(Type *Tp) const;
1220 
1221  /// \returns The cost of the address computation. For most targets this can be
1222  /// merged into the instruction indexing mode. Some targets might want to
1223  /// distinguish between address computation for memory operations on vector
1224  /// types and scalar types. Such targets should override this function.
1225  /// The 'SE' parameter holds pointer for the scalar evolution object which
1226  /// is used in order to get the Ptr step value in case of constant stride.
1227  /// The 'Ptr' parameter holds SCEV of the access pointer.
1229  ScalarEvolution *SE = nullptr,
1230  const SCEV *Ptr = nullptr) const;
1231 
1232  /// \returns The cost, if any, of keeping values of the given types alive
1233  /// over a callsite.
1234  ///
1235  /// Some types may require the use of register classes that do not have
1236  /// any callee-saved registers, so would require a spill and fill.
1238 
1239  /// \returns True if the intrinsic is a supported memory intrinsic. Info
1240  /// will contain additional information - whether the intrinsic may write
1241  /// or read to memory, volatility and the pointer. Info is undefined
1242  /// if false is returned.
1244 
1245  /// \returns The maximum element size, in bytes, for an element
1246  /// unordered-atomic memory intrinsic.
1247  unsigned getAtomicMemIntrinsicMaxElementSize() const;
1248 
1249  /// \returns A value which is the result of the given memory intrinsic. New
1250  /// instructions may be created to extract the result from the given intrinsic
1251  /// memory operation. Returns nullptr if the target cannot create a result
1252  /// from the given intrinsic.
1254  Type *ExpectedType) const;
1255 
1256  /// \returns The type to use in a loop expansion of a memcpy call.
1258  unsigned SrcAddrSpace, unsigned DestAddrSpace,
1259  unsigned SrcAlign, unsigned DestAlign) const;
1260 
1261  /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1262  /// \param RemainingBytes The number of bytes to copy.
1263  ///
1264  /// Calculates the operand types to use when copying \p RemainingBytes of
1265  /// memory, where source and destination alignments are \p SrcAlign and
1266  /// \p DestAlign respectively.
1269  unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1270  unsigned SrcAlign, unsigned DestAlign) const;
1271 
1272  /// \returns True if the two functions have compatible attributes for inlining
1273  /// purposes.
1274  bool areInlineCompatible(const Function *Caller,
1275  const Function *Callee) const;
1276 
1277  /// \returns True if the caller and callee agree on how \p Args will be passed
1278  /// to the callee.
1279  /// \param[out] Args The list of compatible arguments. The implementation may
1280  /// filter out any incompatible args from this list.
1281  bool areFunctionArgsABICompatible(const Function *Caller,
1282  const Function *Callee,
1284 
1285  /// The type of load/store indexing.
1287  MIM_Unindexed, ///< No indexing.
1288  MIM_PreInc, ///< Pre-incrementing.
1289  MIM_PreDec, ///< Pre-decrementing.
1290  MIM_PostInc, ///< Post-incrementing.
1291  MIM_PostDec ///< Post-decrementing.
1292  };
1293 
1294  /// \returns True if the specified indexed load for the given type is legal.
1295  bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1296 
1297  /// \returns True if the specified indexed store for the given type is legal.
1298  bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1299 
1300  /// \returns The bitwidth of the largest vector type that should be used to
1301  /// load/store in the given address space.
1302  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1303 
1304  /// \returns True if the load instruction is legal to vectorize.
1305  bool isLegalToVectorizeLoad(LoadInst *LI) const;
1306 
1307  /// \returns True if the store instruction is legal to vectorize.
1308  bool isLegalToVectorizeStore(StoreInst *SI) const;
1309 
1310  /// \returns True if it is legal to vectorize the given load chain.
1311  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
1312  unsigned AddrSpace) const;
1313 
1314  /// \returns True if it is legal to vectorize the given store chain.
1315  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
1316  unsigned AddrSpace) const;
1317 
1318  /// \returns True if it is legal to vectorize the given reduction kind.
1320  ElementCount VF) const;
1321 
1322  /// \returns True if the given type is supported for scalable vectors
1323  bool isElementTypeLegalForScalableVector(Type *Ty) const;
1324 
1325  /// \returns The new vector factor value if the target doesn't support \p
1326  /// SizeInBytes loads or has a better vector factor.
1327  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1328  unsigned ChainSizeInBytes,
1329  VectorType *VecTy) const;
1330 
1331  /// \returns The new vector factor value if the target doesn't support \p
1332  /// SizeInBytes stores or has a better vector factor.
1333  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1334  unsigned ChainSizeInBytes,
1335  VectorType *VecTy) const;
1336 
1337  /// Flags describing the kind of vector reduction.
1340  bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation.
1341  bool IsSigned; ///< Whether the operation is a signed int reduction.
1342  bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.
1343  };
1344 
1345  /// \returns True if the target prefers reductions in loop.
1346  bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1347  ReductionFlags Flags) const;
1348 
1349  /// \returns True if the target prefers reductions select kept in the loop
1350  /// when tail folding. i.e.
1351  /// loop:
1352  /// p = phi (0, s)
1353  /// a = add (p, x)
1354  /// s = select (mask, a, p)
1355  /// vecreduce.add(s)
1356  ///
1357  /// As opposed to the normal scheme of p = phi (0, a) which allows the select
1358  /// to be pulled out of the loop. If the select(.., add, ..) can be predicated
1359  /// by the target, this can lead to cleaner code generation.
1360  bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1361  ReductionFlags Flags) const;
1362 
1363  /// \returns True if the target wants to expand the given reduction intrinsic
1364  /// into a shuffle sequence.
1365  bool shouldExpandReduction(const IntrinsicInst *II) const;
1366 
1367  /// \returns the size cost of rematerializing a GlobalValue address relative
1368  /// to a stack reload.
1369  unsigned getGISelRematGlobalCost() const;
1370 
1371  /// \returns True if the target supports scalable vectors.
1372  bool supportsScalableVectors() const;
1373 
1374  /// \name Vector Predication Information
1375  /// @{
1376  /// Whether the target supports the %evl parameter of VP intrinsic efficiently
1377  /// in hardware. (see LLVM Language Reference - "Vector Predication
1378  /// Intrinsics") Use of %evl is discouraged when that is not the case.
1379  bool hasActiveVectorLength() const;
1380 
1383  // keep the predicating parameter
1384  Legal = 0,
1385  // where legal, discard the predicate parameter
1386  Discard = 1,
1387  // transform into something else that is also predicating
1389  };
1390 
1391  // How to transform the EVL parameter.
1392  // Legal: keep the EVL parameter as it is.
1393  // Discard: Ignore the EVL parameter where it is safe to do so.
1394  // Convert: Fold the EVL into the mask parameter.
1396 
1397  // How to transform the operator.
1398  // Legal: The target supports this operator.
1399  // Convert: Convert this to a non-VP operation.
1400  // The 'Discard' strategy is invalid.
1402 
1403  bool shouldDoNothing() const {
1404  return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
1405  }
1408  };
1409 
1410  /// \returns How the target needs this vector-predicated operation to be
1411  /// transformed.
1413  /// @}
1414 
1415  /// @}
1416 
1417 private:
1418  /// Estimate the latency of specified instruction.
1419  /// Returns 1 as the default value.
1420  InstructionCost getInstructionLatency(const Instruction *I) const;
1421 
1422  /// Returns the expected throughput cost of the instruction.
1423  /// Returns -1 if the cost is unknown.
1424  InstructionCost getInstructionThroughput(const Instruction *I) const;
1425 
1426  /// The abstract base class used to type erase specific TTI
1427  /// implementations.
1428  class Concept;
1429 
1430  /// The template model for the base class which wraps a concrete
1431  /// implementation in a type erased interface.
1432  template <typename T> class Model;
1433 
1434  std::unique_ptr<Concept> TTIImpl;
1435 };
1436 
1438 public:
1439  virtual ~Concept() = 0;
1440  virtual const DataLayout &getDataLayout() const = 0;
1441  virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
1444  virtual unsigned getInliningThresholdMultiplier() = 0;
1445  virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
1446  virtual int getInlinerVectorBonusPercent() = 0;
1447  virtual InstructionCost getMemcpyCost(const Instruction *I) = 0;
1448  virtual unsigned
1449  getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
1450  ProfileSummaryInfo *PSI,
1451  BlockFrequencyInfo *BFI) = 0;
1452  virtual InstructionCost getUserCost(const User *U,
1454  TargetCostKind CostKind) = 0;
1456  virtual bool hasBranchDivergence() = 0;
1457  virtual bool useGPUDivergenceAnalysis() = 0;
1458  virtual bool isSourceOfDivergence(const Value *V) = 0;
1459  virtual bool isAlwaysUniform(const Value *V) = 0;
1460  virtual unsigned getFlatAddressSpace() = 0;
1461  virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1462  Intrinsic::ID IID) const = 0;
1463  virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1464  virtual bool
1465  canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0;
1466  virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
1468  Value *OldV,
1469  Value *NewV) const = 0;
1470  virtual bool isLoweredToCall(const Function *F) = 0;
1471  virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1473  OptimizationRemarkEmitter *ORE) = 0;
1474  virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
1475  PeelingPreferences &PP) = 0;
1476  virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1477  AssumptionCache &AC,
1478  TargetLibraryInfo *LibInfo,
1479  HardwareLoopInfo &HWLoopInfo) = 0;
1480  virtual bool
1483  DominatorTree *DT, const LoopAccessInfo *LAI) = 0;
1484  virtual bool emitGetActiveLaneMask() = 0;
1486  IntrinsicInst &II) = 0;
1487  virtual Optional<Value *>
1489  APInt DemandedMask, KnownBits &Known,
1490  bool &KnownBitsComputed) = 0;
1492  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1493  APInt &UndefElts2, APInt &UndefElts3,
1494  std::function<void(Instruction *, unsigned, APInt, APInt &)>
1495  SimplifyAndSetOp) = 0;
1496  virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1497  virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1498  virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1499  int64_t BaseOffset, bool HasBaseReg,
1500  int64_t Scale, unsigned AddrSpace,
1501  Instruction *I) = 0;
1504  virtual bool isNumRegsMajorCostOfLSR() = 0;
1505  virtual bool isProfitableLSRChainElement(Instruction *I) = 0;
1506  virtual bool canMacroFuseCmp() = 0;
1507  virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
1508  LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1509  TargetLibraryInfo *LibInfo) = 0;
1510  virtual AddressingModeKind
1511  getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const = 0;
1512  virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
1513  virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
1514  virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
1515  virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
1516  virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
1517  virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
1518  virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
1519  virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
1520  virtual bool enableOrderedReductions() = 0;
1521  virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1522  virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1523  virtual bool prefersVectorizedAddressing() = 0;
1525  int64_t BaseOffset,
1526  bool HasBaseReg, int64_t Scale,
1527  unsigned AddrSpace) = 0;
1528  virtual bool LSRWithInstrQueries() = 0;
1529  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1530  virtual bool isProfitableToHoist(Instruction *I) = 0;
1531  virtual bool useAA() = 0;
1532  virtual bool isTypeLegal(Type *Ty) = 0;
1533  virtual InstructionCost getRegUsageForType(Type *Ty) = 0;
1534  virtual bool shouldBuildLookupTables() = 0;
1535  virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
1536  virtual bool shouldBuildRelLookupTables() = 0;
1537  virtual bool useColdCCForColdCall(Function &F) = 0;
1539  const APInt &DemandedElts,
1540  bool Insert,
1541  bool Extract) = 0;
1542  virtual InstructionCost
1544  ArrayRef<Type *> Tys) = 0;
1545  virtual bool supportsEfficientVectorElementLoadStore() = 0;
1546  virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1547  virtual MemCmpExpansionOptions
1548  enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
1549  virtual bool enableInterleavedAccessVectorization() = 0;
1550  virtual bool enableMaskedInterleavedAccessVectorization() = 0;
1551  virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
1553  unsigned BitWidth,
1554  unsigned AddressSpace,
1555  Align Alignment,
1556  bool *Fast) = 0;
1557  virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1558  virtual bool haveFastSqrt(Type *Ty) = 0;
1559  virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1560  virtual InstructionCost getFPOpCost(Type *Ty) = 0;
1561  virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1562  const APInt &Imm, Type *Ty) = 0;
1563  virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1564  TargetCostKind CostKind) = 0;
1565  virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1566  const APInt &Imm, Type *Ty,
1568  Instruction *Inst = nullptr) = 0;
1569  virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
1570  const APInt &Imm, Type *Ty,
1571  TargetCostKind CostKind) = 0;
1572  virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
1573  virtual unsigned getRegisterClassForType(bool Vector,
1574  Type *Ty = nullptr) const = 0;
1575  virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
1576  virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0;
1577  virtual unsigned getMinVectorRegisterBitWidth() const = 0;
1578  virtual Optional<unsigned> getMaxVScale() const = 0;
1579  virtual bool shouldMaximizeVectorBandwidth() const = 0;
1580  virtual ElementCount getMinimumVF(unsigned ElemWidth,
1581  bool IsScalable) const = 0;
1582  virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
1584  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1585  virtual unsigned getCacheLineSize() const = 0;
1586  virtual Optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
1588 
1589  /// \return How much before a load we should place the prefetch
1590  /// instruction. This is currently measured in number of
1591  /// instructions.
1592  virtual unsigned getPrefetchDistance() const = 0;
1593 
1594  /// \return Some HW prefetchers can handle accesses up to a certain
1595  /// constant stride. This is the minimum stride in bytes where it
1596  /// makes sense to start adding SW prefetches. The default is 1,
1597  /// i.e. prefetch with any stride. Sometimes prefetching is beneficial
1598  /// even below the HW prefetcher limit, and the arguments provided are
1599  /// meant to serve as a basis for deciding this for a particular loop.
1600  virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1601  unsigned NumStridedMemAccesses,
1602  unsigned NumPrefetches,
1603  bool HasCall) const = 0;
1604 
1605  /// \return The maximum number of iterations to prefetch ahead. If
1606  /// the required number of iterations is more than this number, no
1607  /// prefetching is performed.
1608  virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
1609 
1610  /// \return True if prefetching should also be done for writes.
1611  virtual bool enableWritePrefetching() const = 0;
1612 
1613  virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
1615  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
1616  OperandValueKind Opd1Info, OperandValueKind Opd2Info,
1617  OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo,
1618  ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
1620  ArrayRef<int> Mask, int Index,
1621  VectorType *SubTp) = 0;
1622  virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
1623  Type *Src, CastContextHint CCH,
1625  const Instruction *I) = 0;
1626  virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1627  VectorType *VecTy,
1628  unsigned Index) = 0;
1629  virtual InstructionCost getCFInstrCost(unsigned Opcode,
1631  const Instruction *I = nullptr) = 0;
1632  virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1633  Type *CondTy,
1634  CmpInst::Predicate VecPred,
1636  const Instruction *I) = 0;
1637  virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1638  unsigned Index) = 0;
1639  virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
1640  Align Alignment,
1641  unsigned AddressSpace,
1643  const Instruction *I) = 0;
1644  virtual InstructionCost
1645  getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1646  unsigned AddressSpace,
1648  virtual InstructionCost
1649  getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
1650  bool VariableMask, Align Alignment,
1652  const Instruction *I = nullptr) = 0;
1653 
1655  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1656  Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1657  bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
1658  virtual InstructionCost
1659  getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
1662  virtual InstructionCost
1663  getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
1666  bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1668  virtual InstructionCost
1671  virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy,
1672  ArrayRef<Type *> Tys,
1674  virtual unsigned getNumberOfParts(Type *Tp) = 0;
1675  virtual InstructionCost
1676  getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr) = 0;
1677  virtual InstructionCost
1679  virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1680  MemIntrinsicInfo &Info) = 0;
1681  virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
1683  Type *ExpectedType) = 0;
1685  unsigned SrcAddrSpace,
1686  unsigned DestAddrSpace,
1687  unsigned SrcAlign,
1688  unsigned DestAlign) const = 0;
1689  virtual void getMemcpyLoopResidualLoweringType(
1691  unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1692  unsigned SrcAlign, unsigned DestAlign) const = 0;
1693  virtual bool areInlineCompatible(const Function *Caller,
1694  const Function *Callee) const = 0;
1695  virtual bool
1696  areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
1697  SmallPtrSetImpl<Argument *> &Args) const = 0;
1698  virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1699  virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1700  virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
1701  virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
1702  virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
1703  virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1704  Align Alignment,
1705  unsigned AddrSpace) const = 0;
1706  virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1707  Align Alignment,
1708  unsigned AddrSpace) const = 0;
1709  virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
1710  ElementCount VF) const = 0;
1711  virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
1712  virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1713  unsigned ChainSizeInBytes,
1714  VectorType *VecTy) const = 0;
1715  virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1716  unsigned ChainSizeInBytes,
1717  VectorType *VecTy) const = 0;
1718  virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1719  ReductionFlags) const = 0;
1720  virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1721  ReductionFlags) const = 0;
1722  virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
1723  virtual unsigned getGISelRematGlobalCost() const = 0;
1724  virtual bool supportsScalableVectors() const = 0;
1725  virtual bool hasActiveVectorLength() const = 0;
1726  virtual InstructionCost getInstructionLatency(const Instruction *I) = 0;
1727  virtual VPLegalization
1728  getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
1729 };
1730 
1731 template <typename T>
1733  T Impl;
1734 
1735 public:
1736  Model(T Impl) : Impl(std::move(Impl)) {}
1737  ~Model() override {}
1738 
1739  const DataLayout &getDataLayout() const override {
1740  return Impl.getDataLayout();
1741  }
1742 
1743  InstructionCost
1744  getGEPCost(Type *PointeeType, const Value *Ptr,
1745  ArrayRef<const Value *> Operands,
1747  return Impl.getGEPCost(PointeeType, Ptr, Operands, CostKind);
1748  }
1749  unsigned getInliningThresholdMultiplier() override {
1750  return Impl.getInliningThresholdMultiplier();
1751  }
1752  unsigned adjustInliningThreshold(const CallBase *CB) override {
1753  return Impl.adjustInliningThreshold(CB);
1754  }
1755  int getInlinerVectorBonusPercent() override {
1756  return Impl.getInlinerVectorBonusPercent();
1757  }
1758  InstructionCost getMemcpyCost(const Instruction *I) override {
1759  return Impl.getMemcpyCost(I);
1760  }
1761  InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
1762  TargetCostKind CostKind) override {
1763  return Impl.getUserCost(U, Operands, CostKind);
1764  }
1765  BranchProbability getPredictableBranchThreshold() override {
1766  return Impl.getPredictableBranchThreshold();
1767  }
1768  bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
1769  bool useGPUDivergenceAnalysis() override {
1770  return Impl.useGPUDivergenceAnalysis();
1771  }
1772  bool isSourceOfDivergence(const Value *V) override {
1773  return Impl.isSourceOfDivergence(V);
1774  }
1775 
1776  bool isAlwaysUniform(const Value *V) override {
1777  return Impl.isAlwaysUniform(V);
1778  }
1779 
1780  unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
1781 
1782  bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1783  Intrinsic::ID IID) const override {
1784  return Impl.collectFlatAddressOperands(OpIndexes, IID);
1785  }
1786 
1787  bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
1788  return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
1789  }
1790 
1791  bool
1792  canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
1793  return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
1794  }
1795 
1796  unsigned getAssumedAddrSpace(const Value *V) const override {
1797  return Impl.getAssumedAddrSpace(V);
1798  }
1799 
1800  Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
1801  Value *NewV) const override {
1802  return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
1803  }
1804 
1805  bool isLoweredToCall(const Function *F) override {
1806  return Impl.isLoweredToCall(F);
1807  }
1808  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1809  UnrollingPreferences &UP,
1810  OptimizationRemarkEmitter *ORE) override {
1811  return Impl.getUnrollingPreferences(L, SE, UP, ORE);
1812  }
1813  void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
1814  PeelingPreferences &PP) override {
1815  return Impl.getPeelingPreferences(L, SE, PP);
1816  }
1817  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1818  AssumptionCache &AC, TargetLibraryInfo *LibInfo,
1819  HardwareLoopInfo &HWLoopInfo) override {
1820  return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
1821  }
1822  bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
1823  AssumptionCache &AC, TargetLibraryInfo *TLI,
1824  DominatorTree *DT,
1825  const LoopAccessInfo *LAI) override {
1826  return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
1827  }
1828  bool emitGetActiveLaneMask() override {
1829  return Impl.emitGetActiveLaneMask();
1830  }
1831  Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
1832  IntrinsicInst &II) override {
1833  return Impl.instCombineIntrinsic(IC, II);
1834  }
1835  Optional<Value *>
1836  simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
1837  APInt DemandedMask, KnownBits &Known,
1838  bool &KnownBitsComputed) override {
1839  return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
1840  KnownBitsComputed);
1841  }
1842  Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
1843  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1844  APInt &UndefElts2, APInt &UndefElts3,
1845  std::function<void(Instruction *, unsigned, APInt, APInt &)>
1846  SimplifyAndSetOp) override {
1847  return Impl.simplifyDemandedVectorEltsIntrinsic(
1848  IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
1849  SimplifyAndSetOp);
1850  }
1851  bool isLegalAddImmediate(int64_t Imm) override {
1852  return Impl.isLegalAddImmediate(Imm);
1853  }
1854  bool isLegalICmpImmediate(int64_t Imm) override {
1855  return Impl.isLegalICmpImmediate(Imm);
1856  }
1857  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1858  bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
1859  Instruction *I) override {
1860  return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
1861  AddrSpace, I);
1862  }
1863  bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1864  TargetTransformInfo::LSRCost &C2) override {
1865  return Impl.isLSRCostLess(C1, C2);
1866  }
1867  bool isNumRegsMajorCostOfLSR() override {
1868  return Impl.isNumRegsMajorCostOfLSR();
1869  }
1870  bool isProfitableLSRChainElement(Instruction *I) override {
1871  return Impl.isProfitableLSRChainElement(I);
1872  }
1873  bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
1874  bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
1875  DominatorTree *DT, AssumptionCache *AC,
1876  TargetLibraryInfo *LibInfo) override {
1877  return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
1878  }
1880  getPreferredAddressingMode(const Loop *L,
1881  ScalarEvolution *SE) const override {
1882  return Impl.getPreferredAddressingMode(L, SE);
1883  }
1884  bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
1885  return Impl.isLegalMaskedStore(DataType, Alignment);
1886  }
1887  bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
1888  return Impl.isLegalMaskedLoad(DataType, Alignment);
1889  }
1890  bool isLegalNTStore(Type *DataType, Align Alignment) override {
1891  return Impl.isLegalNTStore(DataType, Alignment);
1892  }
1893  bool isLegalNTLoad(Type *DataType, Align Alignment) override {
1894  return Impl.isLegalNTLoad(DataType, Alignment);
1895  }
1896  bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
1897  return Impl.isLegalMaskedScatter(DataType, Alignment);
1898  }
1899  bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
1900  return Impl.isLegalMaskedGather(DataType, Alignment);
1901  }
1902  bool isLegalMaskedCompressStore(Type *DataType) override {
1903  return Impl.isLegalMaskedCompressStore(DataType);
1904  }
1905  bool isLegalMaskedExpandLoad(Type *DataType) override {
1906  return Impl.isLegalMaskedExpandLoad(DataType);
1907  }
1908  bool enableOrderedReductions() override {
1909  return Impl.enableOrderedReductions();
1910  }
1911  bool hasDivRemOp(Type *DataType, bool IsSigned) override {
1912  return Impl.hasDivRemOp(DataType, IsSigned);
1913  }
1914  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
1915  return Impl.hasVolatileVariant(I, AddrSpace);
1916  }
1917  bool prefersVectorizedAddressing() override {
1918  return Impl.prefersVectorizedAddressing();
1919  }
1920  InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
1921  int64_t BaseOffset, bool HasBaseReg,
1922  int64_t Scale,
1923  unsigned AddrSpace) override {
1924  return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
1925  AddrSpace);
1926  }
1927  bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
1928  bool isTruncateFree(Type *Ty1, Type *Ty2) override {
1929  return Impl.isTruncateFree(Ty1, Ty2);
1930  }
1931  bool isProfitableToHoist(Instruction *I) override {
1932  return Impl.isProfitableToHoist(I);
1933  }
1934  bool useAA() override { return Impl.useAA(); }
1935  bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
1936  InstructionCost getRegUsageForType(Type *Ty) override {
1937  return Impl.getRegUsageForType(Ty);
1938  }
1939  bool shouldBuildLookupTables() override {
1940  return Impl.shouldBuildLookupTables();
1941  }
1942  bool shouldBuildLookupTablesForConstant(Constant *C) override {
1943  return Impl.shouldBuildLookupTablesForConstant(C);
1944  }
1945  bool shouldBuildRelLookupTables() override {
1946  return Impl.shouldBuildRelLookupTables();
1947  }
1948  bool useColdCCForColdCall(Function &F) override {
1949  return Impl.useColdCCForColdCall(F);
1950  }
1951 
1952  InstructionCost getScalarizationOverhead(VectorType *Ty,
1953  const APInt &DemandedElts,
1954  bool Insert, bool Extract) override {
1955  return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
1956  }
1957  InstructionCost
1958  getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1959  ArrayRef<Type *> Tys) override {
1960  return Impl.getOperandsScalarizationOverhead(Args, Tys);
1961  }
1962 
1963  bool supportsEfficientVectorElementLoadStore() override {
1964  return Impl.supportsEfficientVectorElementLoadStore();
1965  }
1966 
1967  bool enableAggressiveInterleaving(bool LoopHasReductions) override {
1968  return Impl.enableAggressiveInterleaving(LoopHasReductions);
1969  }
1970  MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
1971  bool IsZeroCmp) const override {
1972  return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
1973  }
1974  bool enableInterleavedAccessVectorization() override {
1975  return Impl.enableInterleavedAccessVectorization();
1976  }
1978  return Impl.enableMaskedInterleavedAccessVectorization();
1979  }
1980  bool isFPVectorizationPotentiallyUnsafe() override {
1981  return Impl.isFPVectorizationPotentiallyUnsafe();
1982  }
1983  bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
1984  unsigned AddressSpace, Align Alignment,
1985  bool *Fast) override {
1986  return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
1987  Alignment, Fast);
1988  }
1989  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
1990  return Impl.getPopcntSupport(IntTyWidthInBit);
1991  }
1992  bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
1993 
1994  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
1995  return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
1996  }
1997 
1998  InstructionCost getFPOpCost(Type *Ty) override {
1999  return Impl.getFPOpCost(Ty);
2000  }
2001 
2002  InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
2003  const APInt &Imm, Type *Ty) override {
2004  return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
2005  }
2006  InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
2007  TargetCostKind CostKind) override {
2008  return Impl.getIntImmCost(Imm, Ty, CostKind);
2009  }
2010  InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
2011  const APInt &Imm, Type *Ty,
2013  Instruction *Inst = nullptr) override {
2014  return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
2015  }
2016  InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
2017  const APInt &Imm, Type *Ty,
2018  TargetCostKind CostKind) override {
2019  return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
2020  }
2021  unsigned getNumberOfRegisters(unsigned ClassID) const override {
2022  return Impl.getNumberOfRegisters(ClassID);
2023  }
2024  unsigned getRegisterClassForType(bool Vector,
2025  Type *Ty = nullptr) const override {
2026  return Impl.getRegisterClassForType(Vector, Ty);
2027  }
2028  const char *getRegisterClassName(unsigned ClassID) const override {
2029  return Impl.getRegisterClassName(ClassID);
2030  }
2031  TypeSize getRegisterBitWidth(RegisterKind K) const override {
2032  return Impl.getRegisterBitWidth(K);
2033  }
2034  unsigned getMinVectorRegisterBitWidth() const override {
2035  return Impl.getMinVectorRegisterBitWidth();
2036  }
2037  Optional<unsigned> getMaxVScale() const override {
2038  return Impl.getMaxVScale();
2039  }
2040  bool shouldMaximizeVectorBandwidth() const override {
2041  return Impl.shouldMaximizeVectorBandwidth();
2042  }
2043  ElementCount getMinimumVF(unsigned ElemWidth,
2044  bool IsScalable) const override {
2045  return Impl.getMinimumVF(ElemWidth, IsScalable);
2046  }
2047  unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
2048  return Impl.getMaximumVF(ElemWidth, Opcode);
2049  }
2051  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
2052  return Impl.shouldConsiderAddressTypePromotion(
2053  I, AllowPromotionWithoutCommonHeader);
2054  }
2055  unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
2056  Optional<unsigned> getCacheSize(CacheLevel Level) const override {
2057  return Impl.getCacheSize(Level);
2058  }
2059  Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override {
2060  return Impl.getCacheAssociativity(Level);
2061  }
2062 
2063  /// Return the preferred prefetch distance in terms of instructions.
2064  ///
2065  unsigned getPrefetchDistance() const override {
2066  return Impl.getPrefetchDistance();
2067  }
2068 
2069  /// Return the minimum stride necessary to trigger software
2070  /// prefetching.
2071  ///
2072  unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2073  unsigned NumStridedMemAccesses,
2074  unsigned NumPrefetches,
2075  bool HasCall) const override {
2076  return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2077  NumPrefetches, HasCall);
2078  }
2079 
2080  /// Return the maximum prefetch distance in terms of loop
2081  /// iterations.
2082  ///
2083  unsigned getMaxPrefetchIterationsAhead() const override {
2084  return Impl.getMaxPrefetchIterationsAhead();
2085  }
2086 
2087  /// \return True if prefetching should also be done for writes.
2088  bool enableWritePrefetching() const override {
2089  return Impl.enableWritePrefetching();
2090  }
2091 
2092  unsigned getMaxInterleaveFactor(unsigned VF) override {
2093  return Impl.getMaxInterleaveFactor(VF);
2094  }
2095  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
2096  unsigned &JTSize,
2097  ProfileSummaryInfo *PSI,
2098  BlockFrequencyInfo *BFI) override {
2099  return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2100  }
2101  InstructionCost getArithmeticInstrCost(
2102  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2103  OperandValueKind Opd1Info, OperandValueKind Opd2Info,
2104  OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo,
2105  ArrayRef<const Value *> Args,
2106  const Instruction *CxtI = nullptr) override {
2107  return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
2108  Opd1PropInfo, Opd2PropInfo, Args, CxtI);
2109  }
2110  InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
2111  ArrayRef<int> Mask, int Index,
2112  VectorType *SubTp) override {
2113  return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp);
2114  }
2115  InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
2116  CastContextHint CCH,
2118  const Instruction *I) override {
2119  return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
2120  }
2121  InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2122  VectorType *VecTy,
2123  unsigned Index) override {
2124  return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
2125  }
2126  InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
2127  const Instruction *I = nullptr) override {
2128  return Impl.getCFInstrCost(Opcode, CostKind, I);
2129  }
2130  InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
2131  CmpInst::Predicate VecPred,
2133  const Instruction *I) override {
2134  return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
2135  }
2136  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2137  unsigned Index) override {
2138  return Impl.getVectorInstrCost(Opcode, Val, Index);
2139  }
2140  InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2141  unsigned AddressSpace,
2143  const Instruction *I) override {
2144  return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2145  CostKind, I);
2146  }
2147  InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
2148  Align Alignment, unsigned AddressSpace,
2149  TTI::TargetCostKind CostKind) override {
2150  return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2151  CostKind);
2152  }
2153  InstructionCost
2154  getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2155  bool VariableMask, Align Alignment,
2157  const Instruction *I = nullptr) override {
2158  return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
2159  Alignment, CostKind, I);
2160  }
2161  InstructionCost getInterleavedMemoryOpCost(
2162  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2163  Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2164  bool UseMaskForCond, bool UseMaskForGaps) override {
2165  return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2166  Alignment, AddressSpace, CostKind,
2167  UseMaskForCond, UseMaskForGaps);
2168  }
2169  InstructionCost
2170  getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2171  Optional<FastMathFlags> FMF,
2172  TTI::TargetCostKind CostKind) override {
2173  return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
2174  }
2175  InstructionCost
2176  getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
2177  TTI::TargetCostKind CostKind) override {
2178  return Impl.getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
2179  }
2180  InstructionCost getExtendedAddReductionCost(
2181  bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
2183  return Impl.getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, Ty,
2184  CostKind);
2185  }
2186  InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2187  TTI::TargetCostKind CostKind) override {
2188  return Impl.getIntrinsicInstrCost(ICA, CostKind);
2189  }
2190  InstructionCost getCallInstrCost(Function *F, Type *RetTy,
2191  ArrayRef<Type *> Tys,
2192  TTI::TargetCostKind CostKind) override {
2193  return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
2194  }
2195  unsigned getNumberOfParts(Type *Tp) override {
2196  return Impl.getNumberOfParts(Tp);
2197  }
2198  InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2199  const SCEV *Ptr) override {
2200  return Impl.getAddressComputationCost(Ty, SE, Ptr);
2201  }
2202  InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
2203  return Impl.getCostOfKeepingLiveOverCall(Tys);
2204  }
2205  bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2206  MemIntrinsicInfo &Info) override {
2207  return Impl.getTgtMemIntrinsic(Inst, Info);
2208  }
2209  unsigned getAtomicMemIntrinsicMaxElementSize() const override {
2210  return Impl.getAtomicMemIntrinsicMaxElementSize();
2211  }
2212  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2213  Type *ExpectedType) override {
2214  return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2215  }
2216  Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
2217  unsigned SrcAddrSpace, unsigned DestAddrSpace,
2218  unsigned SrcAlign,
2219  unsigned DestAlign) const override {
2220  return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
2221  DestAddrSpace, SrcAlign, DestAlign);
2222  }
2224  SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2225  unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2226  unsigned SrcAlign, unsigned DestAlign) const override {
2227  Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2228  SrcAddrSpace, DestAddrSpace,
2229  SrcAlign, DestAlign);
2230  }
2231  bool areInlineCompatible(const Function *Caller,
2232  const Function *Callee) const override {
2233  return Impl.areInlineCompatible(Caller, Callee);
2234  }
2236  const Function *Caller, const Function *Callee,
2237  SmallPtrSetImpl<Argument *> &Args) const override {
2238  return Impl.areFunctionArgsABICompatible(Caller, Callee, Args);
2239  }
2240  bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
2241  return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
2242  }
2243  bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
2244  return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
2245  }
2246  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
2247  return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2248  }
2249  bool isLegalToVectorizeLoad(LoadInst *LI) const override {
2250  return Impl.isLegalToVectorizeLoad(LI);
2251  }
2252  bool isLegalToVectorizeStore(StoreInst *SI) const override {
2253  return Impl.isLegalToVectorizeStore(SI);
2254  }
2255  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
2256  unsigned AddrSpace) const override {
2257  return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2258  AddrSpace);
2259  }
2260  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
2261  unsigned AddrSpace) const override {
2262  return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2263  AddrSpace);
2264  }
2265  bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
2266  ElementCount VF) const override {
2267  return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2268  }
2269  bool isElementTypeLegalForScalableVector(Type *Ty) const override {
2270  return Impl.isElementTypeLegalForScalableVector(Ty);
2271  }
2272  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2273  unsigned ChainSizeInBytes,
2274  VectorType *VecTy) const override {
2275  return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2276  }
2277  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2278  unsigned ChainSizeInBytes,
2279  VectorType *VecTy) const override {
2280  return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2281  }
2282  bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2283  ReductionFlags Flags) const override {
2284  return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2285  }
2286  bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2287  ReductionFlags Flags) const override {
2288  return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2289  }
2290  bool shouldExpandReduction(const IntrinsicInst *II) const override {
2291  return Impl.shouldExpandReduction(II);
2292  }
2293 
2294  unsigned getGISelRematGlobalCost() const override {
2295  return Impl.getGISelRematGlobalCost();
2296  }
2297 
2298  bool supportsScalableVectors() const override {
2299  return Impl.supportsScalableVectors();
2300  }
2301 
2302  bool hasActiveVectorLength() const override {
2303  return Impl.hasActiveVectorLength();
2304  }
2305 
2306  InstructionCost getInstructionLatency(const Instruction *I) override {
2307  return Impl.getInstructionLatency(I);
2308  }
2309 
2311  getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
2312  return Impl.getVPLegalizationStrategy(PI);
2313  }
2314 };
2315 
2316 template <typename T>
2318  : TTIImpl(new Model<T>(Impl)) {}
2319 
2320 /// Analysis pass providing the \c TargetTransformInfo.
2321 ///
2322 /// The core idea of the TargetIRAnalysis is to expose an interface through
2323 /// which LLVM targets can analyze and provide information about the middle
2324 /// end's target-independent IR. This supports use cases such as target-aware
2325 /// cost modeling of IR constructs.
2326 ///
2327 /// This is a function analysis because much of the cost modeling for targets
2328 /// is done in a subtarget specific way and LLVM supports compiling different
2329 /// functions targeting different subtargets in order to support runtime
2330 /// dispatch according to the observed subtarget.
2331 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
2332 public:
2334 
2335  /// Default construct a target IR analysis.
2336  ///
2337  /// This will use the module's datalayout to construct a baseline
2338  /// conservative TTI result.
2339  TargetIRAnalysis();
2340 
2341  /// Construct an IR analysis pass around a target-provide callback.
2342  ///
2343  /// The callback will be called with a particular function for which the TTI
2344  /// is needed and must return a TTI object for that function.
2345  TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
2346 
2347  // Value semantics. We spell out the constructors for MSVC.
2349  : TTICallback(Arg.TTICallback) {}
2351  : TTICallback(std::move(Arg.TTICallback)) {}
2353  TTICallback = RHS.TTICallback;
2354  return *this;
2355  }
2357  TTICallback = std::move(RHS.TTICallback);
2358  return *this;
2359  }
2360 
2362 
2363 private:
2365  static AnalysisKey Key;
2366 
2367  /// The callback used to produce a result.
2368  ///
2369  /// We use a completely opaque callback so that targets can provide whatever
2370  /// mechanism they desire for constructing the TTI for a given function.
2371  ///
2372  /// FIXME: Should we really use std::function? It's relatively inefficient.
2373  /// It might be possible to arrange for even stateful callbacks to outlive
2374  /// the analysis and thus use a function_ref which would be lighter weight.
2375  /// This may also be less error prone as the callback is likely to reference
2376  /// the external TargetMachine, and that reference needs to never dangle.
2377  std::function<Result(const Function &)> TTICallback;
2378 
2379  /// Helper function used as the callback in the default constructor.
2380  static Result getDefaultTTI(const Function &F);
2381 };
2382 
2383 /// Wrapper pass for TargetTransformInfo.
2384 ///
2385 /// This pass can be constructed from a TTI object which it stores internally
2386 /// and is queried by passes.
2388  TargetIRAnalysis TIRA;
2390 
2391  virtual void anchor();
2392 
2393 public:
2394  static char ID;
2395 
2396  /// We must provide a default constructor for the pass but it should
2397  /// never be used.
2398  ///
2399  /// Use the constructor below or call one of the creation routines.
2401 
2403 
2405 };
2406 
2407 /// Create an analysis pass wrapper around a TTI object.
2408 ///
2409 /// This analysis pass just holds the TTI instance and makes it available to
2410 /// clients.
2412 
2413 } // namespace llvm
2414 
2415 #endif
llvm::TargetTransformInfo::ReductionFlags::IsMaxOp
bool IsMaxOp
If the op a min/max kind, true if it's a max operation.
Definition: TargetTransformInfo.h:1340
llvm::TargetTransformInfo::CastContextHint::GatherScatter
@ GatherScatter
The cast is used with a gather/scatter.
llvm::TargetTransformInfo::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
Definition: TargetTransformInfo.cpp:280
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::TargetTransformInfo::Concept::getExtractWithExtendCost
virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)=0
llvm::TargetTransformInfo::CacheLevel::L1D
@ L1D
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::TargetTransformInfo::PSK_FastHardware
@ PSK_FastHardware
Definition: TargetTransformInfo.h:592
llvm::TargetTransformInfo::Concept::getPopcntSupport
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit)=0
llvm::TargetTransformInfo::Concept::getGEPCost
virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::CastContextHint::Masked
@ Masked
The cast is used with a masked load/store.
llvm::TargetTransformInfo::getMemcpyLoopLoweringType
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const
Definition: TargetTransformInfo.cpp:946
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:478
llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:457
llvm::TargetTransformInfo::SK_Select
@ SK_Select
Selects elements from the corresponding lane of either source operand.
Definition: TargetTransformInfo.h:862
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2331
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:485
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:211
llvm::TargetTransformInfo::LSRCost::NumRegs
unsigned NumRegs
Definition: TargetTransformInfo.h:418
llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition: TargetTransformInfo.h:264
llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition: TargetTransformInfo.h:453
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
llvm::TargetTransformInfo::ReductionFlags
Flags describing the kind of vector reduction.
Definition: TargetTransformInfo.h:1338
llvm::TargetTransformInfo::instCombineIntrinsic
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
Definition: TargetTransformInfo.cpp:298
llvm::TargetTransformInfo::Concept::isHardwareLoopProfitable
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)=0
llvm::TargetTransformInfo::Concept::isSourceOfDivergence
virtual bool isSourceOfDivergence(const Value *V)=0
llvm::TargetTransformInfo::Concept::enableMaskedInterleavedAccessVectorization
virtual bool enableMaskedInterleavedAccessVectorization()=0
llvm::MemIntrinsicInfo::PtrVal
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
Definition: TargetTransformInfo.h:75
llvm::TargetTransformInfo::preferPredicateOverEpilogue
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI) const
Query the target whether it would be prefered to create a predicated vector loop, which can avoid the...
Definition: TargetTransformInfo.cpp:286
llvm::TargetTransformInfo::Concept::rewriteIntrinsicWithAddressSpace
virtual Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const =0
llvm::TargetTransformInfo::getMinVectorRegisterBitWidth
unsigned getMinVectorRegisterBitWidth() const
Definition: TargetTransformInfo.cpp:599
llvm::TargetTransformInfo::Concept::enableOrderedReductions
virtual bool enableOrderedReductions()=0
llvm::HardwareLoopInfo::LoopDecrement
Value * LoopDecrement
Definition: TargetTransformInfo.h:103
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::TargetTransformInfo::getShuffleCost
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask=None, int Index=0, VectorType *SubTp=nullptr) const
Definition: TargetTransformInfo.cpp:726
llvm::TargetTransformInfo::MemIndexedMode
MemIndexedMode
The type of load/store indexing.
Definition: TargetTransformInfo.h:1286
llvm::TargetTransformInfo::TCK_Latency
@ TCK_Latency
The latency of instruction.
Definition: TargetTransformInfo.h:213
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
llvm::TargetTransformInfo::UnrollingPreferences::MaxCount
unsigned MaxCount
Definition: TargetTransformInfo.h:469
llvm::ImmutablePass
ImmutablePass class - This class is used to provide information that does not need to be run.
Definition: Pass.h:269
llvm::TargetTransformInfo::getRegisterClassName
const char * getRegisterClassName(unsigned ClassID) const
Definition: TargetTransformInfo.cpp:590
AtomicOrdering.h
llvm::ElementCount
Definition: TypeSize.h:386
llvm::TargetTransformInfo::OK_UniformValue
@ OK_UniformValue
Definition: TargetTransformInfo.h:880
llvm::TargetTransformInfo::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:808
llvm::TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
Definition: TargetTransformInfo.cpp:1151
llvm::TargetTransformInfo::Concept::enableMemCmpExpansion
virtual MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const =0
llvm::ExtractElementInst
This instruction extracts a single (scalar) element from a VectorType value.
Definition: Instructions.h:1873
llvm::TargetTransformInfo::canMacroFuseCmp
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
Definition: TargetTransformInfo.cpp:360
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::TargetTransformInfo::Concept::isLegalMaskedScatter
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment)=0
Pass.h
llvm::TargetTransformInfo::getRegisterBitWidth
TypeSize getRegisterBitWidth(RegisterKind K) const
Definition: TargetTransformInfo.cpp:594
llvm::TargetTransformInfo::PopcntSupportKind
PopcntSupportKind
Flags indicating the kind of support for population count.
Definition: TargetTransformInfo.h:592
llvm::TargetTransformInfo::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:827
llvm::TargetTransformInfo::Concept::getIntImmCost
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
llvm::TargetTransformInfo::getVPLegalizationStrategy
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
Definition: TargetTransformInfo.cpp:1041
llvm::TargetTransformInfo::AMK_PostIndexed
@ AMK_PostIndexed
Definition: TargetTransformInfo.h:641
llvm::TargetTransformInfoWrapperPass::getTTI
TargetTransformInfo & getTTI(const Function &F)
Definition: TargetTransformInfo.cpp:1164
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::TargetTransformInfo::getAddressComputationCost
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
Definition: TargetTransformInfo.cpp:889
InstCombiner
Machine InstCombiner
Definition: MachineCombiner.cpp:136
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::TargetTransformInfo::PeelingPreferences::AllowPeeling
bool AllowPeeling
Allow peeling off loop iterations.
Definition: TargetTransformInfo.h:541
llvm::TargetTransformInfo::Concept::hasVolatileVariant
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace)=0
llvm::TargetTransformInfo::Concept::isFPVectorizationPotentiallyUnsafe
virtual bool isFPVectorizationPotentiallyUnsafe()=0
llvm::TargetTransformInfo::Concept::isLegalMaskedExpandLoad
virtual bool isLegalMaskedExpandLoad(Type *DataType)=0
llvm::TargetTransformInfo::Concept::isAlwaysUniform
virtual bool isAlwaysUniform(const Value *V)=0
llvm::TargetTransformInfo::Concept::getMaxPrefetchIterationsAhead
virtual unsigned getMaxPrefetchIterationsAhead() const =0
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::TargetTransformInfo::MemCmpExpansionOptions::AllowOverlappingLoads
bool AllowOverlappingLoads
Definition: TargetTransformInfo.h:784
llvm::TargetTransformInfo::getScalingFactorCost
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
Definition: TargetTransformInfo.cpp:431
llvm::IntrinsicCostAttributes::getReturnType
Type * getReturnType() const
Definition: TargetTransformInfo.h:150
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
llvm::TargetTransformInfo::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfo.cpp:868
llvm::TargetTransformInfo::Concept::getRegisterClassForType
virtual unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const =0
llvm::TargetTransformInfo::RGK_Scalar
@ RGK_Scalar
Definition: TargetTransformInfo.h:907
llvm::TargetTransformInfo::Concept::enableInterleavedAccessVectorization
virtual bool enableInterleavedAccessVectorization()=0
llvm::TargetTransformInfo::getAssumedAddrSpace
unsigned getAssumedAddrSpace(const Value *V) const
Definition: TargetTransformInfo.cpp:267
llvm::IntrinsicCostAttributes::getInst
const IntrinsicInst * getInst() const
Definition: TargetTransformInfo.h:149
llvm::TargetTransformInfo::Concept::useGPUDivergenceAnalysis
virtual bool useGPUDivergenceAnalysis()=0
llvm::TargetTransformInfo::isLegalICmpImmediate
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition: TargetTransformInfo.cpp:335
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJamInnerLoopThreshold
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
Definition: TargetTransformInfo.h:504
llvm::TargetTransformInfo::Concept::getMinMaxReductionCost
virtual InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind)=0
llvm::TargetIRAnalysis::operator=
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Definition: TargetTransformInfo.h:2352
llvm::TargetIRAnalysis::TargetIRAnalysis
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Definition: TargetTransformInfo.h:2350
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::TargetTransformInfo::Concept::preferPredicateOverEpilogue
virtual bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI)=0
llvm::TargetTransformInfo::areFunctionArgsABICompatible
bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, SmallPtrSetImpl< Argument * > &Args) const
Definition: TargetTransformInfo.cpp:967
llvm::HardwareLoopInfo::ExitBranch
BranchInst * ExitBranch
Definition: TargetTransformInfo.h:100
llvm::TargetTransformInfo::UnrollingPreferences::UnrollRemainder
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
Definition: TargetTransformInfo.h:497
llvm::TargetTransformInfo::UnrollingPreferences::Count
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
Definition: TargetTransformInfo.h:462
llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:214
llvm::TargetTransformInfo::VPLegalization
Definition: TargetTransformInfo.h:1381
llvm::TargetTransformInfo::shouldBuildLookupTables
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
Definition: TargetTransformInfo.cpp:462
llvm::TargetTransformInfo::LSRCost::NumIVMuls
unsigned NumIVMuls
Definition: TargetTransformInfo.h:420
llvm::TargetTransformInfo::Concept::isLegalToVectorizeReduction
virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const =0
llvm::HardwareLoopInfo::isHardwareLoopCandidate
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Definition: TargetTransformInfo.cpp:100
llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:481
llvm::TargetTransformInfo::getMaximumVF
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
Definition: TargetTransformInfo.cpp:616
llvm::TargetTransformInfo::Concept::getEstimatedNumberOfCaseClusters
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)=0
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::TargetTransformInfo::useColdCCForColdCall
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
Definition: TargetTransformInfo.cpp:475
llvm::TargetTransformInfo::VPLegalization::Convert
@ Convert
Definition: TargetTransformInfo.h:1388
llvm::TargetTransformInfo::Concept::getGatherScatterOpCost
virtual InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:535
llvm::TargetTransformInfo::operator=
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
Definition: TargetTransformInfo.cpp:187
llvm::TargetTransformInfo::Concept::getPeelingPreferences
virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP)=0
llvm::HardwareLoopInfo::L
Loop * L
Definition: TargetTransformInfo.h:98
llvm::TargetTransformInfo::Concept::isLSRCostLess
virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2)=0
llvm::TargetTransformInfo::isLegalNTLoad
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
Definition: TargetTransformInfo.cpp:392
llvm::TargetTransformInfo::isLegalNTStore
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
Definition: TargetTransformInfo.cpp:387
llvm::TargetTransformInfo::UnrollingPreferences::FullUnrollMaxCount
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
Definition: TargetTransformInfo.h:473
llvm::Optional
Definition: APInt.h:33
ForceNestedLoop
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
llvm::TargetTransformInfo::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:799
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::TargetTransformInfo::OP_PowerOf2
@ OP_PowerOf2
Definition: TargetTransformInfo.h:886
llvm::TargetTransformInfo::getPredictableBranchThreshold
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
Definition: TargetTransformInfo.cpp:228
Operator.h
llvm::TargetTransformInfo::getIntImmCodeSizeCost
InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
Definition: TargetTransformInfo.cpp:545
llvm::TargetTransformInfo::getInlinerVectorBonusPercent
int getInlinerVectorBonusPercent() const
Definition: TargetTransformInfo.cpp:201
llvm::TargetTransformInfo::getIntImmCostIntrin
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Definition: TargetTransformInfo.cpp:572
VectorType
Definition: ItaniumDemangle.h:1041
llvm::TargetTransformInfo::Concept::isTruncateFree
virtual bool isTruncateFree(Type *Ty1, Type *Ty2)=0
llvm::TargetTransformInfo::getIntImmCostInst
InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
Definition: TargetTransformInfo.cpp:562
llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:161
llvm::TargetTransformInfo::Concept::getAtomicMemIntrinsicMaxElementSize
virtual unsigned getAtomicMemIntrinsicMaxElementSize() const =0
llvm::TargetTransformInfo::Concept::emitGetActiveLaneMask
virtual bool emitGetActiveLaneMask()=0
llvm::TargetTransformInfo::enableInterleavedAccessVectorization
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
Definition: TargetTransformInfo.cpp:505
llvm::TargetTransformInfo::Concept::prefersVectorizedAddressing
virtual bool prefersVectorizedAddressing()=0
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::TargetTransformInfo::SK_PermuteSingleSrc
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
Definition: TargetTransformInfo.h:870
llvm::TargetTransformInfo::Concept::getOrCreateResultFromMemIntrinsic
virtual Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)=0
llvm::TargetTransformInfo::Concept::getCostOfKeepingLiveOverCall
virtual InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)=0
llvm::TargetTransformInfo::getOperandsScalarizationOverhead
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys) const
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: TargetTransformInfo.cpp:486
llvm::TargetTransformInfo::Concept::getRegisterBitWidth
virtual TypeSize getRegisterBitWidth(RegisterKind K) const =0
llvm::TargetTransformInfo::UnrollingPreferences::AllowExpensiveTripCount
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
Definition: TargetTransformInfo.h:490
llvm::TargetTransformInfo::preferInLoopReduction
bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
Definition: TargetTransformInfo.cpp:1030
llvm::TargetTransformInfo::CacheLevel
CacheLevel
The possible cache levels.
Definition: TargetTransformInfo.h:948
llvm::TargetTransformInfo::Concept
Definition: TargetTransformInfo.h:1437
llvm::TargetTransformInfo::Concept::isLegalNTStore
virtual bool isLegalNTStore(Type *DataType, Align Alignment)=0
new
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM ID Predecessors according to mbb< bb27, 0x8b0a7c0 > Note ADDri is not a two address instruction its result reg1037 is an operand of the PHI node in bb76 and its operand reg1039 is the result of the PHI node We should treat it as a two address code and make sure the ADDri is scheduled after any node that reads reg1039 Use info(i.e. register scavenger) to assign it a free register to allow reuse the collector could move the objects and invalidate the derived pointer This is bad enough in the first but safe points can crop up unpredictably **array_addr i32 n y store obj * new
Definition: README.txt:125
llvm::TargetTransformInfo::LSRCost::Insns
unsigned Insns
TODO: Some of these could be merged.
Definition: TargetTransformInfo.h:417
llvm::TargetTransformInfo::Concept::shouldMaximizeVectorBandwidth
virtual bool shouldMaximizeVectorBandwidth() const =0
llvm::IntrinsicCostAttributes::getScalarizationCost
InstructionCost getScalarizationCost() const
Definition: TargetTransformInfo.h:152
llvm::TargetTransformInfo::SK_Broadcast
@ SK_Broadcast
Broadcast element 0 to all other elements.
Definition: TargetTransformInfo.h:860
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::TargetTransformInfo::Concept::getAddressComputationCost
virtual InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)=0
llvm::TargetTransformInfo::Concept::getIntImmCodeSizeCost
virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty)=0
llvm::TargetTransformInfo::canHaveNonUndefGlobalInitializerInAddressSpace
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
Return true if globals in this address space can have initializers other than undef.
Definition: TargetTransformInfo.cpp:262
llvm::TargetTransformInfo::Concept::isLegalNTLoad
virtual bool isLegalNTLoad(Type *DataType, Align Alignment)=0
llvm::IntrinsicCostAttributes::skipScalarizationCost
bool skipScalarizationCost() const
Definition: TargetTransformInfo.h:160
llvm::TargetTransformInfo::requiresOrderedReduction
static bool requiresOrderedReduction(Optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
Definition: TargetTransformInfo.h:1160
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::HardwareLoopInfo::IsNestingLegal
bool IsNestingLegal
Definition: TargetTransformInfo.h:105
llvm::TargetTransformInfo::LSRCost::AddRecCost
unsigned AddRecCost
Definition: TargetTransformInfo.h:419
llvm::IntrinsicCostAttributes::getFlags
FastMathFlags getFlags() const
Definition: TargetTransformInfo.h:151
llvm::Reloc::Model
Model
Definition: CodeGen.h:22
llvm::TargetTransformInfo::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Get target-customized preferences for the generic loop unrolling transformation.
Definition: TargetTransformInfo.cpp:320
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::TargetTransformInfo::isLegalMaskedScatter
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
Return true if the target supports masked scatter.
Definition: TargetTransformInfo.cpp:401
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
llvm::TargetTransformInfo::LSRCost::SetupCost
unsigned SetupCost
Definition: TargetTransformInfo.h:423
llvm::TargetTransformInfo::Concept::isLegalMaskedLoad
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment)=0
llvm::TargetTransformInfo::Concept::canMacroFuseCmp
virtual bool canMacroFuseCmp()=0
llvm::TargetTransformInfo::Concept::isTypeLegal
virtual bool isTypeLegal(Type *Ty)=0
llvm::TargetTransformInfo::getGISelRematGlobalCost
unsigned getGISelRematGlobalCost() const
Definition: TargetTransformInfo.cpp:1049
llvm::IntrinsicCostAttributes::getArgTypes
const SmallVectorImpl< Type * > & getArgTypes() const
Definition: TargetTransformInfo.h:154
llvm::TargetTransformInfo::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Definition: TargetTransformInfo.cpp:962
llvm::TargetTransformInfo::Concept::getMinimumVF
virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const =0
llvm::TargetTransformInfo::isTypeLegal
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
Definition: TargetTransformInfo.cpp:454
llvm::HardwareLoopInfo::ExitCount
const SCEV * ExitCount
Definition: TargetTransformInfo.h:101
llvm::TargetTransformInfo::SK_PermuteTwoSrc
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
Definition: TargetTransformInfo.h:868
llvm::TargetTransformInfo::Concept::getCacheSize
virtual Optional< unsigned > getCacheSize(CacheLevel Level) const =0
llvm::TargetTransformInfo::PeelingPreferences::PeelProfiledIterations
bool PeelProfiledIterations
Allow peeling basing on profile.
Definition: TargetTransformInfo.h:548
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::TargetTransformInfo::getMinimumVF
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
Definition: TargetTransformInfo.cpp:611
llvm::MemIntrinsicInfo::isUnordered
bool isUnordered() const
Definition: TargetTransformInfo.h:87
llvm::TargetTransformInfo::Concept::getPredictableBranchThreshold
virtual BranchProbability getPredictableBranchThreshold()=0
llvm::TargetTransformInfo::isProfitableLSRChainElement
bool isProfitableLSRChainElement(Instruction *I) const
Definition: TargetTransformInfo.cpp:356
llvm::TargetTransformInfo::Concept::useAA
virtual bool useAA()=0
llvm::TargetTransformInfo::getCastContextHint
static CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
Definition: TargetTransformInfo.cpp:737
llvm::TargetTransformInfo::getOrCreateResultFromMemIntrinsic
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
Definition: TargetTransformInfo.cpp:941
llvm::TargetTransformInfo::isLegalToVectorizeLoad
bool isLegalToVectorizeLoad(LoadInst *LI) const
Definition: TargetTransformInfo.cpp:987
llvm::MemIntrinsicInfo::Ordering
AtomicOrdering Ordering
Definition: TargetTransformInfo.h:78
llvm::TargetTransformInfo::Concept::useColdCCForColdCall
virtual bool useColdCCForColdCall(Function &F)=0
llvm::TargetTransformInfoWrapperPass::ID
static char ID
Definition: TargetTransformInfo.h:2394
llvm::TargetTransformInfo::TargetCostConstants
TargetCostConstants
Underlying constants for 'cost' values in this interface.
Definition: TargetTransformInfo.h:261
llvm::TargetTransformInfo::getPopcntSupport
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
Definition: TargetTransformInfo.cpp:527
llvm::TargetIRAnalysis::TargetIRAnalysis
TargetIRAnalysis(const TargetIRAnalysis &Arg)
Definition: TargetTransformInfo.h:2348
llvm::TargetTransformInfo::OP_None
@ OP_None
Definition: TargetTransformInfo.h:886
llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition: TargetTransformInfo.h:859
llvm::TargetTransformInfo::getPreferredAddressingMode
AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
Return the preferred addressing mode LSR should make efforts to generate.
Definition: TargetTransformInfo.cpp:372
llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition: TargetTransformInfo.h:1062
llvm::TargetTransformInfo::Concept::getVPLegalizationStrategy
virtual VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const =0
llvm::User
Definition: User.h:44
llvm::TargetTransformInfo::useGPUDivergenceAnalysis
bool useGPUDivergenceAnalysis() const
Return true if the target prefers to use GPU divergence analysis to replace the legacy version.
Definition: TargetTransformInfo.cpp:236
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::TargetTransformInfo::UnrollingPreferences::Force
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
Definition: TargetTransformInfo.h:493
InstrTypes.h
llvm::TargetTransformInfo::Concept::getMaxVScale
virtual Optional< unsigned > getMaxVScale() const =0
llvm::TargetTransformInfo::Concept::getPrefetchDistance
virtual unsigned getPrefetchDistance() const =0
llvm::TargetTransformInfo::hasBranchDivergence
bool hasBranchDivergence() const
Return true if branch divergence exists.
Definition: TargetTransformInfo.cpp:232
llvm::TargetTransformInfo::isLegalToVectorizeReduction
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
Definition: TargetTransformInfo.cpp:1007
llvm::TargetTransformInfo::getIntImmCost
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
Definition: TargetTransformInfo.cpp:555
llvm::TargetTransformInfo::Concept::supportsEfficientVectorElementLoadStore
virtual bool supportsEfficientVectorElementLoadStore()=0
llvm::TargetTransformInfo::Concept::canSaveCmp
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo)=0
llvm::TargetTransformInfo::getNumberOfParts
unsigned getNumberOfParts(Type *Tp) const
Definition: TargetTransformInfo.cpp:884
llvm::TargetTransformInfo::Concept::isFCmpOrdCheaperThanFCmpZero
virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)=0
llvm::TargetTransformInfo::Concept::isNumRegsMajorCostOfLSR
virtual bool isNumRegsMajorCostOfLSR()=0
llvm::TargetTransformInfo::supportsScalableVectors
bool supportsScalableVectors() const
Definition: TargetTransformInfo.cpp:1053
llvm::TargetTransformInfo::Concept::getExtendedAddReductionCost
virtual InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
llvm::TargetTransformInfo::isIndexedLoadLegal
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
Definition: TargetTransformInfo.cpp:973
llvm::TargetTransformInfo::CastContextHint::Interleave
@ Interleave
The cast is used with an interleaved load/store.
llvm::TargetTransformInfo::UnrollingPreferences::MaxIterationsCountToAnalyze
unsigned MaxIterationsCountToAnalyze
Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...
Definition: TargetTransformInfo.h:507
false
Definition: StackSlotColoring.cpp:142
llvm::TargetTransformInfo::Concept::getNumberOfRegisters
virtual unsigned getNumberOfRegisters(unsigned ClassID) const =0
llvm::TargetTransformInfo::Concept::isLegalToVectorizeLoadChain
virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJam
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
Definition: TargetTransformInfo.h:499
llvm::TargetTransformInfo::isLegalMaskedExpandLoad
bool isLegalMaskedExpandLoad(Type *DataType) const
Return true if the target supports masked expand load.
Definition: TargetTransformInfo.cpp:410
llvm::TargetTransformInfo::Concept::simplifyDemandedVectorEltsIntrinsic
virtual Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)=0
llvm::TargetTransformInfo::Concept::isLegalMaskedGather
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment)=0
llvm::TargetTransformInfo::Concept::hasBranchDivergence
virtual bool hasBranchDivergence()=0
llvm::Instruction
Definition: Instruction.h:45
llvm::TargetTransformInfo::Concept::enableWritePrefetching
virtual bool enableWritePrefetching() const =0
llvm::TargetTransformInfo::isLSRCostLess
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C1.
Definition: TargetTransformInfo.cpp:348
llvm::TargetTransformInfo::MIM_PreDec
@ MIM_PreDec
Pre-decrementing.
Definition: TargetTransformInfo.h:1289
llvm::HardwareLoopInfo::PerformEntryTest
bool PerformEntryTest
Definition: TargetTransformInfo.h:109
llvm::TargetTransformInfo::Concept::getMaskedMemoryOpCost
virtual InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::isLegalMaskedLoad
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked load.
Definition: TargetTransformInfo.cpp:382
llvm::TargetTransformInfo::isSourceOfDivergence
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
Definition: TargetTransformInfo.cpp:240
llvm::TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic
Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
Definition: TargetTransformInfo.cpp:310
llvm::TargetTransformInfo::CastContextHint::Reversed
@ Reversed
The cast is used with a reversed load/store.
llvm::TargetTransformInfo::getPrefetchDistance
unsigned getPrefetchDistance() const
Definition: TargetTransformInfo.cpp:641
llvm::HardwareLoopInfo::CounterInReg
bool CounterInReg
Definition: TargetTransformInfo.h:107
llvm::TargetTransformInfo::Concept::isIndexedStoreLegal
virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const =0
llvm::TargetTransformInfo::Concept::supportsScalableVectors
virtual bool supportsScalableVectors() const =0
llvm::TargetTransformInfo::Concept::isLegalToVectorizeLoad
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const =0
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::TargetTransformInfo::Concept::isLegalToVectorizeStoreChain
virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
llvm::TargetTransformInfo::isFCmpOrdCheaperThanFCmpZero
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
Definition: TargetTransformInfo.cpp:535
llvm::AnalysisManager::Invalidator
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:670
llvm::TargetTransformInfo::Concept::getMemcpyLoopResidualLoweringType
virtual void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const =0
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::TargetTransformInfo::RGK_FixedWidthVector
@ RGK_FixedWidthVector
Definition: TargetTransformInfo.h:907
llvm::TargetTransformInfo::Concept::allowsMisalignedMemoryAccesses
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, bool *Fast)=0
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::TargetTransformInfo::Concept::areFunctionArgsABICompatible
virtual bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, SmallPtrSetImpl< Argument * > &Args) const =0
llvm::None
const NoneType None
Definition: None.h:23
llvm::TargetTransformInfo::Concept::getMemcpyLoopLoweringType
virtual Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const =0
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:118
BranchProbability.h
llvm::TargetTransformInfo::VPLegalization::VPLegalization
VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)
Definition: TargetTransformInfo.h:1406
llvm::TargetTransformInfo::Concept::getDataLayout
virtual const DataLayout & getDataLayout() const =0
llvm::TargetTransformInfo::hasVolatileVariant
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
Definition: TargetTransformInfo.cpp:422
llvm::TargetTransformInfo::PSK_Software
@ PSK_Software
Definition: TargetTransformInfo.h:592
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::TargetTransformInfo::Concept::isElementTypeLegalForScalableVector
virtual bool isElementTypeLegalForScalableVector(Type *Ty) const =0
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:78
llvm::TargetTransformInfo::isLegalMaskedCompressStore
bool isLegalMaskedCompressStore(Type *DataType) const
Return true if the target supports masked compress store.
Definition: TargetTransformInfo.cpp:406
llvm::TargetTransformInfo::haveFastSqrt
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
Definition: TargetTransformInfo.cpp:531
llvm::createTargetTransformInfoWrapperPass
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
Definition: TargetTransformInfo.cpp:1171
llvm::TargetTransformInfo::VPLegalization::EVLParamStrategy
VPTransform EVLParamStrategy
Definition: TargetTransformInfo.h:1395
llvm::TargetTransformInfo::ReductionFlags::ReductionFlags
ReductionFlags()
Definition: TargetTransformInfo.h:1339
llvm::TargetTransformInfo::SK_Reverse
@ SK_Reverse
Reverse the order of the vector.
Definition: TargetTransformInfo.h:861
llvm::TargetTransformInfo::Concept::collectFlatAddressOperands
virtual bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const =0
llvm::TargetTransformInfo::VPLegalization::VPTransform
VPTransform
Definition: TargetTransformInfo.h:1382
llvm::TargetTransformInfo::getFlatAddressSpace
unsigned getFlatAddressSpace() const
Returns the address space ID for a target's 'flat' address space.
Definition: TargetTransformInfo.cpp:248
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
llvm::TargetTransformInfo::CastContextHint::Normal
@ Normal
The cast is used with a normal load/store.
llvm::TargetTransformInfo::Concept::~Concept
virtual ~Concept()=0
Definition: TargetTransformInfo.cpp:1125
llvm::TargetTransformInfo::Concept::getIntrinsicInstrCost
virtual InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)=0
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:77
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::TargetTransformInfo::SK_InsertSubvector
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
Definition: TargetTransformInfo.h:866
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::TargetTransformInfo::Concept::getInstructionLatency
virtual InstructionCost getInstructionLatency(const Instruction *I)=0
llvm::TargetTransformInfo::Concept::isProfitableLSRChainElement
virtual bool isProfitableLSRChainElement(Instruction *I)=0
llvm::TargetTransformInfo::shouldBuildLookupTablesForConstant
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
Definition: TargetTransformInfo.cpp:466
llvm::TargetTransformInfo::Concept::getRegUsageForType
virtual InstructionCost getRegUsageForType(Type *Ty)=0
llvm::TargetTransformInfo::MIM_PostInc
@ MIM_PostInc
Post-incrementing.
Definition: TargetTransformInfo.h:1290
llvm::TargetTransformInfo::shouldMaximizeVectorBandwidth
bool shouldMaximizeVectorBandwidth() const
Definition: TargetTransformInfo.cpp:607
llvm::HardwareLoopInfo::HardwareLoopInfo
HardwareLoopInfo()=delete
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::TargetTransformInfo::LSRCost
Definition: TargetTransformInfo.h:414
llvm::TargetTransformInfo::collectFlatAddressOperands
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Return any intrinsic address operand indexes which may be rewritten if they use a flat address space ...
Definition: TargetTransformInfo.cpp:252
llvm::TargetTransformInfo::VPLegalization::OpStrategy
VPTransform OpStrategy
Definition: TargetTransformInfo.h:1401
llvm::TargetTransformInfo::isLegalMaskedGather
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
Return true if the target supports masked gather.
Definition: TargetTransformInfo.cpp:396
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2387
llvm::TargetTransformInfo::Concept::getInterleavedMemoryOpCost
virtual InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)=0
llvm::TargetTransformInfo::emitGetActiveLaneMask
bool emitGetActiveLaneMask() const
Query the target whether lowering of the llvm.get.active.lane.mask intrinsic is supported.
Definition: TargetTransformInfo.cpp:293
llvm::TargetTransformInfo::preferPredicatedReductionSelect
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
Definition: TargetTransformInfo.cpp:1035
llvm::TargetTransformInfo::Concept::hasDivRemOp
virtual bool hasDivRemOp(Type *DataType, bool IsSigned)=0
llvm::TargetTransformInfo::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
Definition: TargetTransformInfo.cpp:856
llvm::TargetTransformInfo::ReductionFlags::NoNaN
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
Definition: TargetTransformInfo.h:1342
llvm::TargetTransformInfo::Concept::shouldBuildLookupTables
virtual bool shouldBuildLookupTables()=0
llvm::TargetTransformInfo::OK_UniformConstantValue
@ OK_UniformConstantValue
Definition: TargetTransformInfo.h:881
llvm::TargetIRAnalysis::Result
TargetTransformInfo Result
Definition: TargetTransformInfo.h:2333
llvm::TargetTransformInfo::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(unsigned VF) const
Definition: TargetTransformInfo.cpp:660
llvm::TargetTransformInfo::VPLegalization::shouldDoNothing
bool shouldDoNothing() const
Definition: TargetTransformInfo.h:1403
llvm::TargetTransformInfo::getRegisterClassForType
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
Definition: TargetTransformInfo.cpp:585
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::TargetTransformInfo::Concept::getMaximumVF
virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const =0
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::AnalysisKey
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:72
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:428
llvm::TargetTransformInfo::getCostOfKeepingLiveOverCall
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
Definition: TargetTransformInfo.cpp:928
llvm::TargetTransformInfo::Concept::getArithmeticInstrCost
virtual InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, OperandValueKind Opd1Info, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr)=0
llvm::AtomicOrdering::Unordered
@ Unordered
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::TargetTransformInfo::OperandValueProperties
OperandValueProperties
Additional properties of an operand's values.
Definition: TargetTransformInfo.h:886
llvm::TargetTransformInfo::Concept::isLegalMaskedStore
virtual bool isLegalMaskedStore(Type *DataType, Align Alignment)=0
llvm::TargetTransformInfo::shouldConsiderAddressTypePromotion
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
Definition: TargetTransformInfo.cpp:621
llvm::LoopAccessInfo
Drive the analysis of memory accesses in the loop.
Definition: LoopAccessAnalysis.h:515
llvm::TargetTransformInfo::Concept::getScalarizationOverhead
virtual InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract)=0
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, TargetCostKind CostKind) const
This is a helper function which calls the two-argument getUserCost with Operands which are the curren...
Definition: TargetTransformInfo.h:326
llvm::TargetTransformInfo::Concept::getTgtMemIntrinsic
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)=0
llvm::TargetTransformInfo::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract) const
Estimate the overhead of scalarizing an instruction.
Definition: TargetTransformInfo.cpp:480
llvm::TargetTransformInfo::PeelingPreferences::AllowLoopNestsPeeling
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
Definition: TargetTransformInfo.h:543
llvm::TargetTransformInfo::rewriteIntrinsicWithAddressSpace
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Rewrite intrinsic call II such that OldV will be replaced with NewV, which has a different address sp...
Definition: TargetTransformInfo.cpp:271
llvm::TargetTransformInfo::Concept::getShuffleCost
virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, int Index, VectorType *SubTp)=0
llvm::TargetTransformInfo::Concept::shouldExpandReduction
virtual bool shouldExpandReduction(const IntrinsicInst *II) const =0
llvm::TargetTransformInfo::Concept::getLoadVectorFactor
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
llvm::TargetTransformInfo::getGatherScatterOpCost
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:847
llvm::TargetTransformInfo::VPLegalization::Discard
@ Discard
Definition: TargetTransformInfo.h:1386
llvm::TargetTransformInfo::Concept::getCastInstrCost
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I)=0
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1609
llvm::TargetTransformInfo::Concept::isLoweredToCall
virtual bool isLoweredToCall(const Function *F)=0
llvm::TargetTransformInfo::LSRWithInstrQueries
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
Definition: TargetTransformInfo.cpp:440
llvm::TargetTransformInfo::Concept::getScalingFactorCost
virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)=0
llvm::TargetTransformInfo::getMinMaxReductionCost
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Definition: TargetTransformInfo.cpp:911
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::TargetTransformInfo::isLegalToVectorizeLoadChain
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: TargetTransformInfo.cpp:995
llvm::TargetTransformInfo::getGEPCost
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TargetCostKind CostKind=TCK_SizeAndLatency) const
Estimate the cost of a GEP operation when lowered.
Definition: TargetTransformInfo.cpp:206
llvm::TargetTransformInfo::isLegalAddImmediate
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
Definition: TargetTransformInfo.cpp:331
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:163
llvm::TargetTransformInfo::Concept::getOperandsScalarizationOverhead
virtual InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys)=0
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::TargetTransformInfo::LSRCost::ScaleCost
unsigned ScaleCost
Definition: TargetTransformInfo.h:424
llvm::TargetTransformInfo::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), bool *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
Definition: TargetTransformInfo.cpp:517
llvm::TargetTransformInfo::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
Definition: TargetTransformInfo.cpp:276
llvm::TargetTransformInfo::SK_Splice
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
Definition: TargetTransformInfo.h:872
llvm::TargetTransformInfo::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:780
llvm::TargetTransformInfo::Concept::shouldBuildRelLookupTables
virtual bool shouldBuildRelLookupTables()=0
llvm::TargetTransformInfo::PSK_SlowHardware
@ PSK_SlowHardware
Definition: TargetTransformInfo.h:592
llvm::TargetTransformInfo::Concept::getRegisterClassName
virtual const char * getRegisterClassName(unsigned ClassID) const =0
llvm::AnalysisInfoMixin
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:397
llvm::TargetTransformInfo::ReductionFlags::IsSigned
bool IsSigned
Whether the operation is a signed int reduction.
Definition: TargetTransformInfo.h:1341
llvm::TargetTransformInfo::OperandValueKind
OperandValueKind
Additional information about an operand's possible values.
Definition: TargetTransformInfo.h:878
llvm::TargetTransformInfo::getMemcpyLoopResidualLoweringType
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const
Definition: TargetTransformInfo.cpp:953
llvm::TargetTransformInfo::Concept::instCombineIntrinsic
virtual Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)=0
llvm::MemIntrinsicInfo::ReadMem
bool ReadMem
Definition: TargetTransformInfo.h:83
llvm::TargetTransformInfo::Concept::getCmpSelInstrCost
virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I)=0
llvm::TargetTransformInfo::MemCmpExpansionOptions::MaxNumLoads
unsigned MaxNumLoads
Definition: TargetTransformInfo.h:766
InstructionCost.h
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::TargetTransformInfo::canSaveCmp
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
Definition: TargetTransformInfo.cpp:364
llvm::TargetTransformInfo::isTruncateFree
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
Definition: TargetTransformInfo.cpp:444
llvm::TargetTransformInfo::prefersVectorizedAddressing
bool prefersVectorizedAddressing() const
Return true if target doesn't mind addresses in vectors.
Definition: TargetTransformInfo.cpp:427
llvm::TargetTransformInfo::MemCmpExpansionOptions
Returns options for expansion of memcmp. IsZeroCmp is.
Definition: TargetTransformInfo.h:761
llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:262
llvm::TargetTransformInfo::PeelingPreferences::PeelCount
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Definition: TargetTransformInfo.h:539
llvm::TargetTransformInfo::supportsEfficientVectorElementLoadStore
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
Definition: TargetTransformInfo.cpp:491
llvm::TargetTransformInfo::enableMemCmpExpansion
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
Definition: TargetTransformInfo.cpp:501
llvm::TargetTransformInfo::Concept::shouldConsiderAddressTypePromotion
virtual bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)=0
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:215
llvm::TargetTransformInfo::Concept::haveFastSqrt
virtual bool haveFastSqrt(Type *Ty)=0
llvm::TargetTransformInfo::Concept::isNoopAddrSpaceCast
virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const =0
llvm::TargetTransformInfo::isElementTypeLegalForScalableVector
bool isElementTypeLegalForScalableVector(Type *Ty) const
Definition: TargetTransformInfo.cpp:1012
llvm::TargetTransformInfo::isLegalMaskedStore
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
Return true if the target supports masked store.
Definition: TargetTransformInfo.cpp:377
llvm::TargetTransformInfo::getRegUsageForType
InstructionCost getRegUsageForType(Type *Ty) const
Returns the estimated number of registers required to represent Ty.
Definition: TargetTransformInfo.cpp:458
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:219
llvm::TargetTransformInfo::Concept::isIndexedLoadLegal
virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const =0
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
llvm::BranchProbability
Definition: BranchProbability.h:30
llvm::TargetTransformInfo::Concept::getFlatAddressSpace
virtual unsigned getFlatAddressSpace()=0
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::TargetTransformInfo::UnrollingPreferences::DefaultUnrollRuntimeCount
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
Definition: TargetTransformInfo.h:464
llvm::TargetTransformInfo::hasDivRemOp
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
Definition: TargetTransformInfo.cpp:418
llvm::TargetTransformInfo::Concept::LSRWithInstrQueries
virtual bool LSRWithInstrQueries()=0
llvm::TargetTransformInfo::simplifyDemandedUseBitsIntrinsic
Optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Can be used to implement target-specific instruction combining.
Definition: TargetTransformInfo.cpp:303
llvm::TargetTransformInfo::getCacheSize
Optional< unsigned > getCacheSize(CacheLevel Level) const
Definition: TargetTransformInfo.cpp:632
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::TargetTransformInfo::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: TargetTransformInfo.cpp:581
llvm::InstructionCost::isValid
bool isValid() const
Definition: InstructionCost.h:79
llvm::TargetTransformInfo::AddressingModeKind
AddressingModeKind
Definition: TargetTransformInfo.h:639
llvm::TargetTransformInfo::getMaxPrefetchIterationsAhead
unsigned getMaxPrefetchIterationsAhead() const
Definition: TargetTransformInfo.cpp:652
llvm::TargetTransformInfo::MIM_Unindexed
@ MIM_Unindexed
No indexing.
Definition: TargetTransformInfo.h:1287
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:421
llvm::TargetTransformInfo::OK_AnyValue
@ OK_AnyValue
Definition: TargetTransformInfo.h:879
llvm::TargetTransformInfo::getLoadVectorFactor
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
Definition: TargetTransformInfo.cpp:1016
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
llvm::TargetTransformInfo::SK_Transpose
@ SK_Transpose
Transpose two vectors.
Definition: TargetTransformInfo.h:865
llvm::TargetTransformInfo::CastContextHint::None
@ None
The cast is not used with a load/store of any kind.
llvm::TargetTransformInfo::isLegalToVectorizeStore
bool isLegalToVectorizeStore(StoreInst *SI) const
Definition: TargetTransformInfo.cpp:991
llvm::TargetTransformInfo::CacheLevel::L2D
@ L2D
llvm::TargetTransformInfo::Concept::getAssumedAddrSpace
virtual unsigned getAssumedAddrSpace(const Value *V) const =0
llvm::TargetTransformInfo::MIM_PreInc
@ MIM_PreInc
Pre-incrementing.
Definition: TargetTransformInfo.h:1288
llvm::TargetTransformInfo::MemCmpExpansionOptions::LoadSizes
SmallVector< unsigned, 8 > LoadSizes
Definition: TargetTransformInfo.h:769
llvm::TargetIRAnalysis::TargetIRAnalysis
TargetIRAnalysis()
Default construct a target IR analysis.
Definition: TargetTransformInfo.cpp:1127
llvm::TargetTransformInfo::Concept::preferInLoopReduction
virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags) const =0
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:206
llvm::TargetTransformInfo::enableOrderedReductions
bool enableOrderedReductions() const
Return true if we should be enabling ordered reductions for the target.
Definition: TargetTransformInfo.cpp:414
llvm::TargetTransformInfo::getMaxVScale
Optional< unsigned > getMaxVScale() const
Definition: TargetTransformInfo.cpp:603
llvm::TargetTransformInfo::Concept::isLegalICmpImmediate
virtual bool isLegalICmpImmediate(int64_t Imm)=0
llvm::TargetTransformInfo::getCacheAssociativity
Optional< unsigned > getCacheAssociativity(CacheLevel Level) const
Definition: TargetTransformInfo.cpp:637
llvm::TargetTransformInfo::getCacheLineSize
unsigned getCacheLineSize() const
Definition: TargetTransformInfo.cpp:627
std
Definition: BitVector.h:838
llvm::TargetTransformInfo::enableMaskedInterleavedAccessVectorization
bool enableMaskedInterleavedAccessVectorization() const
Enable matching of interleaved access groups that contain predicated accesses or gaps and therefore v...
Definition: TargetTransformInfo.cpp:509
llvm::KnownBits
Definition: KnownBits.h:23
llvm::TargetTransformInfo::LSRCost::NumBaseAdds
unsigned NumBaseAdds
Definition: TargetTransformInfo.h:421
llvm::TargetIRAnalysis::operator=
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
Definition: TargetTransformInfo.h:2356
llvm::HardwareLoopInfo::ExitBlock
BasicBlock * ExitBlock
Definition: TargetTransformInfo.h:99
llvm::MemIntrinsicInfo::WriteMem
bool WriteMem
Definition: TargetTransformInfo.h:84
llvm::TargetTransformInfo::UnrollingPreferences::UpperBound
bool UpperBound
Allow using trip count upper bound to unroll loops.
Definition: TargetTransformInfo.h:495
llvm::VPIntrinsic
This is the common base class for vector predication intrinsics.
Definition: IntrinsicInst.h:390
llvm::TargetTransformInfo::isNoopAddrSpaceCast
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Definition: TargetTransformInfo.cpp:257
llvm::TypeSize
Definition: TypeSize.h:417
llvm::TargetTransformInfo::getLoadStoreVecRegBitWidth
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
Definition: TargetTransformInfo.cpp:983
llvm::TargetTransformInfo::UnrollingPreferences::AllowRemainder
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
Definition: TargetTransformInfo.h:487
llvm::TargetTransformInfo::Concept::enableAggressiveInterleaving
virtual bool enableAggressiveInterleaving(bool LoopHasReductions)=0
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::TargetTransformInfo::isFPVectorizationPotentiallyUnsafe
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
Definition: TargetTransformInfo.cpp:513
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
PassManager.h
Arguments
AMDGPU Lower Kernel Arguments
Definition: AMDGPULowerKernelArguments.cpp:243
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:221
llvm::User::operand_values
iterator_range< value_op_iterator > operand_values()
Definition: User.h:266
llvm::TargetTransformInfo::LSRCost::ImmCost
unsigned ImmCost
Definition: TargetTransformInfo.h:422
llvm::TargetIRAnalysis::run
Result run(const Function &F, FunctionAnalysisManager &)
Definition: TargetTransformInfo.cpp:1133
llvm::TargetTransformInfo::getInstructionCost
InstructionCost getInstructionCost(const Instruction *I, enum TargetCostKind kind) const
Query the cost of a specified instruction.
Definition: TargetTransformInfo.h:225
llvm::HardwareLoopInfo::HardwareLoopInfo
HardwareLoopInfo(Loop *L)
Definition: TargetTransformInfo.h:97
llvm::TargetTransformInfo::Concept::getFPOpCost
virtual InstructionCost getFPOpCost(Type *Ty)=0
llvm::TargetTransformInfo::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier() const
Definition: TargetTransformInfo.cpp:192
llvm::TargetTransformInfo::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index=-1) const
Definition: TargetTransformInfo.cpp:819
llvm::InstCombiner
The core instruction combiner logic.
Definition: InstCombiner.h:45
llvm::TargetTransformInfo::adjustInliningThreshold
unsigned adjustInliningThreshold(const CallBase *CB) const
Definition: TargetTransformInfo.cpp:197
llvm::TargetTransformInfo::getMaskedMemoryOpCost
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Definition: TargetTransformInfo.cpp:838
llvm::TargetTransformInfo::RGK_ScalableVector
@ RGK_ScalableVector
Definition: TargetTransformInfo.h:907
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
llvm::HardwareLoopInfo
Attributes of a target dependent hardware loop.
Definition: TargetTransformInfo.h:95
llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition: InstructionCost.h:73
Vector
So we should use XX3Form_Rcr to implement instrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
llvm::TargetTransformInfo::getEstimatedNumberOfCaseClusters
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Definition: TargetTransformInfo.cpp:212
llvm::TargetTransformInfo::TargetTransformInfo
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
Definition: TargetTransformInfo.h:2317
llvm::TargetTransformInfo::getExtendedAddReductionCost
InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of an Add ...
Definition: TargetTransformInfo.cpp:920
llvm::RecurrenceDescriptor
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:71
llvm::TargetTransformInfo::Concept::getCallInstrCost
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::Concept::getArithmeticReductionCost
virtual InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::MemCmpExpansionOptions::NumLoadsPerBlock
unsigned NumLoadsPerBlock
Definition: TargetTransformInfo.h:779
llvm::TargetTransformInfo::getArithmeticReductionCost
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
Definition: TargetTransformInfo.cpp:902
llvm::IntrinsicCostAttributes::getID
Intrinsic::ID getID() const
Definition: TargetTransformInfo.h:148
llvm::TargetTransformInfo::Concept::getGISelRematGlobalCost
virtual unsigned getGISelRematGlobalCost() const =0
llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:907
llvm::TargetTransformInfo::Concept::getIntImmCostInst
virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr)=0
llvm::TargetTransformInfo::Concept::getCFInstrCost
virtual InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
llvm::TargetTransformInfo::invalidate
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
Definition: TargetTransformInfo.h:197
llvm::TargetTransformInfo::Concept::getInlinerVectorBonusPercent
virtual int getInlinerVectorBonusPercent()=0
llvm::TargetTransformInfo::Concept::isLegalAddImmediate
virtual bool isLegalAddImmediate(int64_t Imm)=0
llvm::TargetTransformInfo::UnrollingPreferences::Threshold
unsigned Threshold
The cost threshold for the unrolled loop.
Definition: TargetTransformInfo.h:436
llvm::TargetTransformInfo::Concept::getNumberOfParts
virtual unsigned getNumberOfParts(Type *Tp)=0
llvm::TargetTransformInfo::VPLegalization::Legal
@ Legal
Definition: TargetTransformInfo.h:1384
llvm::TargetTransformInfo::Concept::shouldBuildLookupTablesForConstant
virtual bool shouldBuildLookupTablesForConstant(Constant *C)=0
llvm::TargetTransformInfo::Concept::isProfitableToHoist
virtual bool isProfitableToHoist(Instruction *I)=0
llvm::TargetTransformInfo::getMinPrefetchStride
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Some HW prefetchers can handle accesses up to a certain constant stride.
Definition: TargetTransformInfo.cpp:645
llvm::TargetTransformInfo::isIndexedStoreLegal
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
Definition: TargetTransformInfo.cpp:978
llvm::TargetTransformInfo::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR() const
Return true if LSR major cost is number of registers.
Definition: TargetTransformInfo.cpp:352
llvm::TargetTransformInfo::shouldExpandReduction
bool shouldExpandReduction(const IntrinsicInst *II) const
Definition: TargetTransformInfo.cpp:1045
llvm::TargetTransformInfo::Concept::isLegalAddressingMode
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I)=0
llvm::TargetTransformInfo::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const
Get target-customized preferences for the generic loop peeling transformation.
Definition: TargetTransformInfo.cpp:326
llvm::TargetTransformInfo::getStoreVectorFactor
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
Definition: TargetTransformInfo.cpp:1023
llvm::TargetTransformInfo::Concept::getCacheAssociativity
virtual Optional< unsigned > getCacheAssociativity(CacheLevel Level) const =0
llvm::TargetTransformInfo::MIM_PostDec
@ MIM_PostDec
Post-decrementing.
Definition: TargetTransformInfo.h:1291
llvm::HardwareLoopInfo::canAnalyze
bool canAnalyze(LoopInfo &LI)
Definition: TargetTransformInfo.cpp:47
llvm::TargetTransformInfo::isLegalToVectorizeStoreChain
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: TargetTransformInfo.cpp:1001
llvm::SmallVectorImpl< const Value * >
ForceHardwareLoopPHI
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
llvm::TargetTransformInfo::Concept::preferPredicatedReductionSelect
virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags) const =0
llvm::MemIntrinsicInfo
Information about a load/store intrinsic defined by the target.
Definition: TargetTransformInfo.h:70
llvm::TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize
unsigned getAtomicMemIntrinsicMaxElementSize() const
Definition: TargetTransformInfo.cpp:937
llvm::msgpack::Type
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
Definition: MsgPackReader.h:49
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1161
DataTypes.h
llvm::TargetTransformInfo::enableAggressiveInterleaving
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don't restrict interleaved unrolling to small loops.
Definition: TargetTransformInfo.cpp:495
llvm::TargetTransformInfo::Concept::areInlineCompatible
virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const =0
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::TargetTransformInfo::hasActiveVectorLength
bool hasActiveVectorLength() const
Definition: TargetTransformInfo.cpp:1057
llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition: TargetTransformInfo.h:450
llvm::TargetTransformInfo::Concept::getMinVectorRegisterBitWidth
virtual unsigned getMinVectorRegisterBitWidth() const =0
llvm::TargetTransformInfo::getCallInstrCost
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
Definition: TargetTransformInfo.cpp:876
llvm::TargetTransformInfo::getOperandInfo
static OperandValueKind getOperandInfo(const Value *V, OperandValueProperties &OpProps)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Definition: TargetTransformInfo.cpp:665
llvm::TargetTransformInfo::getExtractWithExtendCost
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index=-1) const
Definition: TargetTransformInfo.cpp:791
llvm::MemIntrinsicInfo::MatchingId
unsigned short MatchingId
Definition: TargetTransformInfo.h:81
llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:263
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3212
llvm::TargetTransformInfo::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
Definition: TargetTransformInfo.cpp:714
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::TargetTransformInfo::Concept::getUserCost
virtual InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind)=0
llvm::TargetTransformInfo::Concept::getPreferredAddressingMode
virtual AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const =0
llvm::IntrinsicCostAttributes::isTypeBasedOnly
bool isTypeBasedOnly() const
Definition: TargetTransformInfo.h:156
llvm::TargetTransformInfo::isProfitableToHoist
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
Definition: TargetTransformInfo.cpp:448
llvm::TargetTransformInfo::shouldBuildRelLookupTables
bool shouldBuildRelLookupTables() const
Return true if lookup tables should be turned into relative lookup tables.
Definition: TargetTransformInfo.cpp:471
llvm::TargetTransformInfo::Concept::getMaxInterleaveFactor
virtual unsigned getMaxInterleaveFactor(unsigned VF)=0
llvm::TargetTransformInfo::Concept::getStoreVectorFactor
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3068
llvm::TargetTransformInfo::Concept::getLoadStoreVecRegBitWidth
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const =0
llvm::TargetTransformInfo::~TargetTransformInfo
~TargetTransformInfo()
Definition: TargetTransformInfo.cpp:182
llvm::TargetTransformInfo::Concept::getCacheLineSize
virtual unsigned getCacheLineSize() const =0
llvm::AMDGPU::VGPRIndexMode::Id
Id
Definition: SIDefines.h:231
llvm::TargetTransformInfo::Concept::canHaveNonUndefGlobalInitializerInAddressSpace
virtual bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const =0
llvm::TargetTransformInfo::Concept::adjustInliningThreshold
virtual unsigned adjustInliningThreshold(const CallBase *CB)=0
llvm::TargetTransformInfo::Concept::getIntImmCostIntrin
virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
llvm::TargetTransformInfo::OK_NonUniformConstantValue
@ OK_NonUniformConstantValue
Definition: TargetTransformInfo.h:882
llvm::TargetTransformInfo::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
Definition: TargetTransformInfo.cpp:932
llvm::TargetTransformInfo::Concept::isLegalMaskedCompressStore
virtual bool isLegalMaskedCompressStore(Type *DataType)=0
llvm::TargetTransformInfo::useAA
bool useAA() const
Definition: TargetTransformInfo.cpp:452
llvm::TargetTransformInfo::Concept::getInliningThresholdMultiplier
virtual unsigned getInliningThresholdMultiplier()=0
llvm::HardwareLoopInfo::CountType
IntegerType * CountType
Definition: TargetTransformInfo.h:102
llvm::TargetTransformInfo::enableWritePrefetching
bool enableWritePrefetching() const
Definition: TargetTransformInfo.cpp:656
llvm::TargetTransformInfo::getFPOpCost
InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
Definition: TargetTransformInfo.cpp:539
llvm::TargetTransformInfo::Concept::getMemoryOpCost
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I)=0
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition: TargetTransformInfo.h:212
llvm::IntrinsicCostAttributes::getArgs
const SmallVectorImpl< const Value * > & getArgs() const
Definition: TargetTransformInfo.h:153
llvm::TargetTransformInfo::AMK_None
@ AMK_None
Definition: TargetTransformInfo.h:642
llvm::TargetTransformInfo::SK_ExtractSubvector
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
Definition: TargetTransformInfo.h:867
llvm::TargetTransformInfo::AMK_PreIndexed
@ AMK_PreIndexed
Definition: TargetTransformInfo.h:640
llvm::AtomicOrdering::NotAtomic
@ NotAtomic
llvm::TargetTransformInfo::Concept::isLegalToVectorizeStore
virtual bool isLegalToVectorizeStore(StoreInst *SI) const =0
llvm::TargetTransformInfo::isAlwaysUniform
bool isAlwaysUniform(const Value *V) const
Definition: TargetTransformInfo.cpp:244
llvm::TargetTransformInfo::Concept::getMemcpyCost
virtual InstructionCost getMemcpyCost(const Instruction *I)=0
llvm::TargetTransformInfo::isLegalAddressingMode
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Definition: TargetTransformInfo.cpp:339
llvm::TargetTransformInfo::getMemcpyCost
InstructionCost getMemcpyCost(const Instruction *I) const
Definition: TargetTransformInfo.cpp:896
llvm::TargetTransformInfo::Concept::simplifyDemandedUseBitsIntrinsic
virtual Optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)=0
llvm::TargetTransformInfo::UnrollingPreferences::MaxPercentThresholdBoost
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
Definition: TargetTransformInfo.h:447
llvm::TargetTransformInfo::Concept::getVectorInstrCost
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)=0
llvm::TargetTransformInfo::Concept::hasActiveVectorLength
virtual bool hasActiveVectorLength() const =0
llvm::TargetTransformInfo::Concept::getUnrollingPreferences
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)=0
llvm::MemIntrinsicInfo::IsVolatile
bool IsVolatile
Definition: TargetTransformInfo.h:85
llvm::TargetTransformInfo::Concept::getMinPrefetchStride
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const =0
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
llvm::IntrinsicCostAttributes::IntrinsicCostAttributes
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarCost=InstructionCost::getInvalid())
Definition: TargetTransformInfo.cpp:57