LLVM  13.0.0git
TargetTransformInfo.h
Go to the documentation of this file.
1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This pass exposes codegen information to IR-level passes. Every
10 /// transformation that uses codegen information is broken into three parts:
11 /// 1. The IR-level analysis pass.
12 /// 2. The IR-level transformation interface which provides the needed
13 /// information.
14 /// 3. Codegen-level implementation which uses target-specific hooks.
15 ///
16 /// This file defines #2, which is the interface that IR-level transformations
17 /// use for querying the codegen.
18 ///
19 //===----------------------------------------------------------------------===//
20 
21 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23 
25 #include "llvm/IR/InstrTypes.h"
26 #include "llvm/IR/Operator.h"
27 #include "llvm/IR/PassManager.h"
28 #include "llvm/Pass.h"
31 #include "llvm/Support/DataTypes.h"
33 #include <functional>
34 
35 namespace llvm {
36 
37 namespace Intrinsic {
38 typedef unsigned ID;
39 }
40 
41 class AssumptionCache;
42 class BlockFrequencyInfo;
43 class DominatorTree;
44 class BranchInst;
45 class CallBase;
46 class ExtractElementInst;
47 class Function;
48 class GlobalValue;
49 class InstCombiner;
50 class IntrinsicInst;
51 class LoadInst;
52 class LoopAccessInfo;
53 class Loop;
54 class LoopInfo;
55 class ProfileSummaryInfo;
56 class SCEV;
57 class ScalarEvolution;
58 class StoreInst;
59 class SwitchInst;
60 class TargetLibraryInfo;
61 class Type;
62 class User;
63 class Value;
64 struct KnownBits;
65 template <typename T> class Optional;
66 
67 /// Information about a load/store intrinsic defined by the target.
69  /// This is the pointer that the intrinsic is loading from or storing to.
70  /// If this is non-null, then analysis/optimization passes can assume that
71  /// this intrinsic is functionally equivalent to a load/store from this
72  /// pointer.
73  Value *PtrVal = nullptr;
74 
75  // Ordering for atomic operations.
77 
78  // Same Id is set by the target for corresponding load/store intrinsics.
79  unsigned short MatchingId = 0;
80 
81  bool ReadMem = false;
82  bool WriteMem = false;
83  bool IsVolatile = false;
84 
85  bool isUnordered() const {
88  !IsVolatile;
89  }
90 };
91 
92 /// Attributes of a target dependent hardware loop.
94  HardwareLoopInfo() = delete;
96  Loop *L = nullptr;
97  BasicBlock *ExitBlock = nullptr;
98  BranchInst *ExitBranch = nullptr;
99  const SCEV *TripCount = nullptr;
100  IntegerType *CountType = nullptr;
101  Value *LoopDecrement = nullptr; // Decrement the loop counter by this
102  // value in every iteration.
103  bool IsNestingLegal = false; // Can a hardware loop be a parent to
104  // another hardware loop?
105  bool CounterInReg = false; // Should loop counter be updated in
106  // the loop via a phi?
107  bool PerformEntryTest = false; // Generate the intrinsic which also performs
108  // icmp ne zero on the loop counter value and
109  // produces an i1 to guard the loop entry.
111  DominatorTree &DT, bool ForceNestedLoop = false,
112  bool ForceHardwareLoopPHI = false);
113  bool canAnalyze(LoopInfo &LI);
114 };
115 
117  const IntrinsicInst *II = nullptr;
118  Type *RetTy = nullptr;
119  Intrinsic::ID IID;
120  SmallVector<Type *, 4> ParamTys;
122  FastMathFlags FMF;
123  // If ScalarizationCost is UINT_MAX, the cost of scalarizing the
124  // arguments and the return value will be computed based on types.
125  unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
126 
127 public:
129  Intrinsic::ID Id, const CallBase &CI,
130  unsigned ScalarizationCost = std::numeric_limits<unsigned>::max());
131 
134  FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
135  unsigned ScalarCost = std::numeric_limits<unsigned>::max());
136 
139 
143  const IntrinsicInst *I = nullptr,
144  unsigned ScalarCost = std::numeric_limits<unsigned>::max());
145 
146  Intrinsic::ID getID() const { return IID; }
147  const IntrinsicInst *getInst() const { return II; }
148  Type *getReturnType() const { return RetTy; }
149  FastMathFlags getFlags() const { return FMF; }
150  unsigned getScalarizationCost() const { return ScalarizationCost; }
152  const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
153 
154  bool isTypeBasedOnly() const {
155  return Arguments.empty();
156  }
157 
158  bool skipScalarizationCost() const {
159  return ScalarizationCost != std::numeric_limits<unsigned>::max();
160  }
161 };
162 
164 typedef TargetTransformInfo TTI;
165 
166 /// This pass provides access to the codegen interfaces that are needed
167 /// for IR-level transformations.
169 public:
170  /// Construct a TTI object using a type implementing the \c Concept
171  /// API below.
172  ///
173  /// This is used by targets to construct a TTI wrapping their target-specific
174  /// implementation that encodes appropriate costs for their target.
175  template <typename T> TargetTransformInfo(T Impl);
176 
177  /// Construct a baseline TTI object using a minimal implementation of
178  /// the \c Concept API below.
179  ///
180  /// The TTI implementation will reflect the information in the DataLayout
181  /// provided if non-null.
182  explicit TargetTransformInfo(const DataLayout &DL);
183 
184  // Provide move semantics.
187 
188  // We need to define the destructor out-of-line to define our sub-classes
189  // out-of-line.
191 
192  /// Handle the invalidation of this information.
193  ///
194  /// When used as a result of \c TargetIRAnalysis this method will be called
195  /// when the function this was computed for changes. When it returns false,
196  /// the information is preserved across those changes.
199  // FIXME: We should probably in some way ensure that the subtarget
200  // information for a function hasn't changed.
201  return false;
202  }
203 
204  /// \name Generic Target Information
205  /// @{
206 
207  /// The kind of cost model.
208  ///
209  /// There are several different cost models that can be customized by the
210  /// target. The normalization of each cost model may be target specific.
212  TCK_RecipThroughput, ///< Reciprocal throughput.
213  TCK_Latency, ///< The latency of instruction.
214  TCK_CodeSize, ///< Instruction code size.
215  TCK_SizeAndLatency ///< The weighted sum of size and latency.
216  };
217 
218  /// Query the cost of a specified instruction.
219  ///
220  /// Clients should use this interface to query the cost of an existing
221  /// instruction. The instruction must have a valid parent (basic block).
222  ///
223  /// Note, this method does not cache the cost calculation and it
224  /// can be expensive in some cases.
226  enum TargetCostKind kind) const {
227  InstructionCost Cost;
228  switch (kind) {
229  case TCK_RecipThroughput:
230  Cost = getInstructionThroughput(I);
231  break;
232  case TCK_Latency:
233  Cost = getInstructionLatency(I);
234  break;
235  case TCK_CodeSize:
236  case TCK_SizeAndLatency:
237  Cost = getUserCost(I, kind);
238  break;
239  }
240  return Cost;
241  }
242 
243  /// Underlying constants for 'cost' values in this interface.
244  ///
245  /// Many APIs in this interface return a cost. This enum defines the
246  /// fundamental values that should be used to interpret (and produce) those
247  /// costs. The costs are returned as an int rather than a member of this
248  /// enumeration because it is expected that the cost of one IR instruction
249  /// may have a multiplicative factor to it or otherwise won't fit directly
250  /// into the enum. Moreover, it is common to sum or average costs which works
251  /// better as simple integral values. Thus this enum only provides constants.
252  /// Also note that the returned costs are signed integers to make it natural
253  /// to add, subtract, and test with zero (a common boundary condition). It is
254  /// not expected that 2^32 is a realistic cost to be modeling at any point.
255  ///
256  /// Note that these costs should usually reflect the intersection of code-size
257  /// cost and execution cost. A free instruction is typically one that folds
258  /// into another instruction. For example, reg-to-reg moves can often be
259  /// skipped by renaming the registers in the CPU, but they still are encoded
260  /// and thus wouldn't be considered 'free' here.
262  TCC_Free = 0, ///< Expected to fold away in lowering.
263  TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
264  TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
265  };
266 
267  /// Estimate the cost of a GEP operation when lowered.
268  int getGEPCost(Type *PointeeType, const Value *Ptr,
271 
272  /// \returns A value by which our inlining threshold should be multiplied.
273  /// This is primarily used to bump up the inlining threshold wholesale on
274  /// targets where calls are unusually expensive.
275  ///
276  /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
277  /// individual classes of instructions would be better.
278  unsigned getInliningThresholdMultiplier() const;
279 
280  /// \returns A value to be added to the inlining threshold.
281  unsigned adjustInliningThreshold(const CallBase *CB) const;
282 
283  /// \returns Vector bonus in percent.
284  ///
285  /// Vector bonuses: We want to more aggressively inline vector-dense kernels
286  /// and apply this bonus based on the percentage of vector instructions. A
287  /// bonus is applied if the vector instructions exceed 50% and half that
288  /// amount is applied if it exceeds 10%. Note that these bonuses are some what
289  /// arbitrary and evolved over time by accident as much as because they are
290  /// principled bonuses.
291  /// FIXME: It would be nice to base the bonus values on something more
292  /// scientific. A target may has no bonus on vector instructions.
293  int getInlinerVectorBonusPercent() const;
294 
295  /// \return the expected cost of a memcpy, which could e.g. depend on the
296  /// source/destination type and alignment and the number of bytes copied.
297  int getMemcpyCost(const Instruction *I) const;
298 
299  /// \return The estimated number of case clusters when lowering \p 'SI'.
300  /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
301  /// table.
303  unsigned &JTSize,
304  ProfileSummaryInfo *PSI,
305  BlockFrequencyInfo *BFI) const;
306 
307  /// Estimate the cost of a given IR user when lowered.
308  ///
309  /// This can estimate the cost of either a ConstantExpr or Instruction when
310  /// lowered.
311  ///
312  /// \p Operands is a list of operands which can be a result of transformations
313  /// of the current operands. The number of the operands on the list must equal
314  /// to the number of the current operands the IR user has. Their order on the
315  /// list must be the same as the order of the current operands the IR user
316  /// has.
317  ///
318  /// The returned cost is defined in terms of \c TargetCostConstants, see its
319  /// comments for a detailed explanation of the cost values.
321  TargetCostKind CostKind) const;
322 
323  /// This is a helper function which calls the two-argument getUserCost
324  /// with \p Operands which are the current operands U has.
327  return getUserCost(U, Operands, CostKind);
328  }
329 
330  /// If a branch or a select condition is skewed in one direction by more than
331  /// this factor, it is very likely to be predicted correctly.
333 
334  /// Return true if branch divergence exists.
335  ///
336  /// Branch divergence has a significantly negative impact on GPU performance
337  /// when threads in the same wavefront take different paths due to conditional
338  /// branches.
339  bool hasBranchDivergence() const;
340 
341  /// Return true if the target prefers to use GPU divergence analysis to
342  /// replace the legacy version.
343  bool useGPUDivergenceAnalysis() const;
344 
345  /// Returns whether V is a source of divergence.
346  ///
347  /// This function provides the target-dependent information for
348  /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis
349  /// first builds the dependency graph, and then runs the reachability
350  /// algorithm starting with the sources of divergence.
351  bool isSourceOfDivergence(const Value *V) const;
352 
353  // Returns true for the target specific
354  // set of operations which produce uniform result
355  // even taking non-uniform arguments
356  bool isAlwaysUniform(const Value *V) const;
357 
358  /// Returns the address space ID for a target's 'flat' address space. Note
359  /// this is not necessarily the same as addrspace(0), which LLVM sometimes
360  /// refers to as the generic address space. The flat address space is a
361  /// generic address space that can be used access multiple segments of memory
362  /// with different address spaces. Access of a memory location through a
363  /// pointer with this address space is expected to be legal but slower
364  /// compared to the same memory location accessed through a pointer with a
365  /// different address space.
366  //
367  /// This is for targets with different pointer representations which can
368  /// be converted with the addrspacecast instruction. If a pointer is converted
369  /// to this address space, optimizations should attempt to replace the access
370  /// with the source address space.
371  ///
372  /// \returns ~0u if the target does not have such a flat address space to
373  /// optimize away.
374  unsigned getFlatAddressSpace() const;
375 
376  /// Return any intrinsic address operand indexes which may be rewritten if
377  /// they use a flat address space pointer.
378  ///
379  /// \returns true if the intrinsic was handled.
381  Intrinsic::ID IID) const;
382 
383  bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
384 
385  unsigned getAssumedAddrSpace(const Value *V) const;
386 
387  /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
388  /// NewV, which has a different address space. This should happen for every
389  /// operand index that collectFlatAddressOperands returned for the intrinsic.
390  /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
391  /// new value (which may be the original \p II with modified operands).
393  Value *NewV) const;
394 
395  /// Test whether calls to a function lower to actual program function
396  /// calls.
397  ///
398  /// The idea is to test whether the program is likely to require a 'call'
399  /// instruction or equivalent in order to call the given function.
400  ///
401  /// FIXME: It's not clear that this is a good or useful query API. Client's
402  /// should probably move to simpler cost metrics using the above.
403  /// Alternatively, we could split the cost interface into distinct code-size
404  /// and execution-speed costs. This would allow modelling the core of this
405  /// query more accurately as a call is a single small instruction, but
406  /// incurs significant execution cost.
407  bool isLoweredToCall(const Function *F) const;
408 
409  struct LSRCost {
410  /// TODO: Some of these could be merged. Also, a lexical ordering
411  /// isn't always optimal.
412  unsigned Insns;
413  unsigned NumRegs;
414  unsigned AddRecCost;
415  unsigned NumIVMuls;
416  unsigned NumBaseAdds;
417  unsigned ImmCost;
418  unsigned SetupCost;
419  unsigned ScaleCost;
420  };
421 
422  /// Parameters that control the generic loop unrolling transformation.
424  /// The cost threshold for the unrolled loop. Should be relative to the
425  /// getUserCost values returned by this API, and the expectation is that
426  /// the unrolled loop's instructions when run through that interface should
427  /// not exceed this cost. However, this is only an estimate. Also, specific
428  /// loops may be unrolled even with a cost above this threshold if deemed
429  /// profitable. Set this to UINT_MAX to disable the loop body cost
430  /// restriction.
431  unsigned Threshold;
432  /// If complete unrolling will reduce the cost of the loop, we will boost
433  /// the Threshold by a certain percent to allow more aggressive complete
434  /// unrolling. This value provides the maximum boost percentage that we
435  /// can apply to Threshold (The value should be no less than 100).
436  /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
437  /// MaxPercentThresholdBoost / 100)
438  /// E.g. if complete unrolling reduces the loop execution time by 50%
439  /// then we boost the threshold by the factor of 2x. If unrolling is not
440  /// expected to reduce the running time, then we do not increase the
441  /// threshold.
443  /// The cost threshold for the unrolled loop when optimizing for size (set
444  /// to UINT_MAX to disable).
446  /// The cost threshold for the unrolled loop, like Threshold, but used
447  /// for partial/runtime unrolling (set to UINT_MAX to disable).
449  /// The cost threshold for the unrolled loop when optimizing for size, like
450  /// OptSizeThreshold, but used for partial/runtime unrolling (set to
451  /// UINT_MAX to disable).
453  /// A forced unrolling factor (the number of concatenated bodies of the
454  /// original loop in the unrolled loop body). When set to 0, the unrolling
455  /// transformation will select an unrolling factor based on the current cost
456  /// threshold and other factors.
457  unsigned Count;
458  /// Default unroll count for loops with run-time trip count.
460  // Set the maximum unrolling factor. The unrolling factor may be selected
461  // using the appropriate cost threshold, but may not exceed this number
462  // (set to UINT_MAX to disable). This does not apply in cases where the
463  // loop is being fully unrolled.
464  unsigned MaxCount;
465  /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
466  /// applies even if full unrolling is selected. This allows a target to fall
467  /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
469  // Represents number of instructions optimized when "back edge"
470  // becomes "fall through" in unrolled loop.
471  // For now we count a conditional branch on a backedge and a comparison
472  // feeding it.
473  unsigned BEInsns;
474  /// Allow partial unrolling (unrolling of loops to expand the size of the
475  /// loop body, not only to eliminate small constant-trip-count loops).
476  bool Partial;
477  /// Allow runtime unrolling (unrolling of loops to expand the size of the
478  /// loop body even when the number of loop iterations is not known at
479  /// compile time).
480  bool Runtime;
481  /// Allow generation of a loop remainder (extra iterations after unroll).
483  /// Allow emitting expensive instructions (such as divisions) when computing
484  /// the trip count of a loop for runtime unrolling.
486  /// Apply loop unroll on any kind of loop
487  /// (mainly to loops that fail runtime unrolling).
488  bool Force;
489  /// Allow using trip count upper bound to unroll loops.
491  /// Allow unrolling of all the iterations of the runtime loop remainder.
493  /// Allow unroll and jam. Used to enable unroll and jam for the target.
495  /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
496  /// value above is used during unroll and jam for the outer loop size.
497  /// This value is used in the same manner to limit the size of the inner
498  /// loop.
500  /// Don't allow loop unrolling to simulate more than this number of
501  /// iterations when checking full unroll profitability
503  };
504 
505  /// Get target-customized preferences for the generic loop unrolling
506  /// transformation. The caller will initialize UP with the current
507  /// target-independent defaults.
509  UnrollingPreferences &UP) const;
510 
511  /// Query the target whether it would be profitable to convert the given loop
512  /// into a hardware loop.
514  AssumptionCache &AC, TargetLibraryInfo *LibInfo,
515  HardwareLoopInfo &HWLoopInfo) const;
516 
517  /// Query the target whether it would be prefered to create a predicated
518  /// vector loop, which can avoid the need to emit a scalar epilogue loop.
521  DominatorTree *DT,
522  const LoopAccessInfo *LAI) const;
523 
524  /// Query the target whether lowering of the llvm.get.active.lane.mask
525  /// intrinsic is supported.
526  bool emitGetActiveLaneMask() const;
527 
528  // Parameters that control the loop peeling transformation
530  /// A forced peeling factor (the number of bodied of the original loop
531  /// that should be peeled off before the loop body). When set to 0, the
532  /// a peeling factor based on profile information and other factors.
533  unsigned PeelCount;
534  /// Allow peeling off loop iterations.
536  /// Allow peeling off loop iterations for loop nests.
538  /// Allow peeling basing on profile. Uses to enable peeling off all
539  /// iterations basing on provided profile.
540  /// If the value is true the peeling cost model can decide to peel only
541  /// some iterations and in this case it will set this to false.
543  };
544 
545  /// Get target-customized preferences for the generic loop peeling
546  /// transformation. The caller will initialize \p PP with the current
547  /// target-independent defaults with information from \p L and \p SE.
549  PeelingPreferences &PP) const;
550 
551  /// Targets can implement their own combinations for target-specific
552  /// intrinsics. This function will be called from the InstCombine pass every
553  /// time a target-specific intrinsic is encountered.
554  ///
555  /// \returns None to not do anything target specific or a value that will be
556  /// returned from the InstCombiner. It is possible to return null and stop
557  /// further processing of the intrinsic by returning nullptr.
559  IntrinsicInst &II) const;
560  /// Can be used to implement target-specific instruction combining.
561  /// \see instCombineIntrinsic
564  APInt DemandedMask, KnownBits &Known,
565  bool &KnownBitsComputed) const;
566  /// Can be used to implement target-specific instruction combining.
567  /// \see instCombineIntrinsic
569  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
570  APInt &UndefElts2, APInt &UndefElts3,
571  std::function<void(Instruction *, unsigned, APInt, APInt &)>
572  SimplifyAndSetOp) const;
573  /// @}
574 
575  /// \name Scalar Target Information
576  /// @{
577 
578  /// Flags indicating the kind of support for population count.
579  ///
580  /// Compared to the SW implementation, HW support is supposed to
581  /// significantly boost the performance when the population is dense, and it
582  /// may or may not degrade performance if the population is sparse. A HW
583  /// support is considered as "Fast" if it can outperform, or is on a par
584  /// with, SW implementation when the population is sparse; otherwise, it is
585  /// considered as "Slow".
587 
588  /// Return true if the specified immediate is legal add immediate, that
589  /// is the target has add instructions which can add a register with the
590  /// immediate without having to materialize the immediate into a register.
591  bool isLegalAddImmediate(int64_t Imm) const;
592 
593  /// Return true if the specified immediate is legal icmp immediate,
594  /// that is the target has icmp instructions which can compare a register
595  /// against the immediate without having to materialize the immediate into a
596  /// register.
597  bool isLegalICmpImmediate(int64_t Imm) const;
598 
599  /// Return true if the addressing mode represented by AM is legal for
600  /// this target, for a load/store of the specified type.
601  /// The type may be VoidTy, in which case only return true if the addressing
602  /// mode is legal for a load/store of any legal type.
603  /// If target returns true in LSRWithInstrQueries(), I may be valid.
604  /// TODO: Handle pre/postinc as well.
605  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
606  bool HasBaseReg, int64_t Scale,
607  unsigned AddrSpace = 0,
608  Instruction *I = nullptr) const;
609 
610  /// Return true if LSR cost of C1 is lower than C1.
612  TargetTransformInfo::LSRCost &C2) const;
613 
614  /// Return true if LSR major cost is number of registers. Targets which
615  /// implement their own isLSRCostLess and unset number of registers as major
616  /// cost should return false, otherwise return true.
617  bool isNumRegsMajorCostOfLSR() const;
618 
619  /// \returns true if LSR should not optimize a chain that includes \p I.
621 
622  /// Return true if the target can fuse a compare and branch.
623  /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
624  /// calculation for the instructions in a loop.
625  bool canMacroFuseCmp() const;
626 
627  /// Return true if the target can save a compare for loop count, for example
628  /// hardware loop saves a compare.
629  bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
631  TargetLibraryInfo *LibInfo) const;
632 
637  };
638 
639  /// Return the preferred addressing mode LSR should make efforts to generate.
641  ScalarEvolution *SE) const;
642 
643  /// Return true if the target supports masked store.
644  bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
645  /// Return true if the target supports masked load.
646  bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;
647 
648  /// Return true if the target supports nontemporal store.
649  bool isLegalNTStore(Type *DataType, Align Alignment) const;
650  /// Return true if the target supports nontemporal load.
651  bool isLegalNTLoad(Type *DataType, Align Alignment) const;
652 
653  /// Return true if the target supports masked scatter.
654  bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
655  /// Return true if the target supports masked gather.
656  bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
657 
658  /// Return true if the target supports masked compress store.
659  bool isLegalMaskedCompressStore(Type *DataType) const;
660  /// Return true if the target supports masked expand load.
661  bool isLegalMaskedExpandLoad(Type *DataType) const;
662 
663  /// Return true if the target has a unified operation to calculate division
664  /// and remainder. If so, the additional implicit multiplication and
665  /// subtraction required to calculate a remainder from division are free. This
666  /// can enable more aggressive transformations for division and remainder than
667  /// would typically be allowed using throughput or size cost models.
668  bool hasDivRemOp(Type *DataType, bool IsSigned) const;
669 
670  /// Return true if the given instruction (assumed to be a memory access
671  /// instruction) has a volatile variant. If that's the case then we can avoid
672  /// addrspacecast to generic AS for volatile loads/stores. Default
673  /// implementation returns false, which prevents address space inference for
674  /// volatile loads/stores.
675  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
676 
677  /// Return true if target doesn't mind addresses in vectors.
678  bool prefersVectorizedAddressing() const;
679 
680  /// Return the cost of the scaling factor used in the addressing
681  /// mode represented by AM for this target, for a load/store
682  /// of the specified type.
683  /// If the AM is supported, the return value must be >= 0.
684  /// If the AM is not supported, it returns a negative value.
685  /// TODO: Handle pre/postinc as well.
686  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
687  bool HasBaseReg, int64_t Scale,
688  unsigned AddrSpace = 0) const;
689 
690  /// Return true if the loop strength reduce pass should make
691  /// Instruction* based TTI queries to isLegalAddressingMode(). This is
692  /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
693  /// immediate offset and no index register.
694  bool LSRWithInstrQueries() const;
695 
696  /// Return true if it's free to truncate a value of type Ty1 to type
697  /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
698  /// by referencing its sub-register AX.
699  bool isTruncateFree(Type *Ty1, Type *Ty2) const;
700 
701  /// Return true if it is profitable to hoist instruction in the
702  /// then/else to before if.
703  bool isProfitableToHoist(Instruction *I) const;
704 
705  bool useAA() const;
706 
707  /// Return true if this type is legal.
708  bool isTypeLegal(Type *Ty) const;
709 
710  /// Returns the estimated number of registers required to represent \p Ty.
711  unsigned getRegUsageForType(Type *Ty) const;
712 
713  /// Return true if switches should be turned into lookup tables for the
714  /// target.
715  bool shouldBuildLookupTables() const;
716 
717  /// Return true if switches should be turned into lookup tables
718  /// containing this constant value for the target.
720 
721  /// Return true if lookup tables should be turned into relative lookup tables.
722  bool shouldBuildRelLookupTables() const;
723 
724  /// Return true if the input function which is cold at all call sites,
725  /// should use coldcc calling convention.
726  bool useColdCCForColdCall(Function &F) const;
727 
728  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
729  /// are set if the demanded result elements need to be inserted and/or
730  /// extracted from vectors.
731  unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
732  bool Insert, bool Extract) const;
733 
734  /// Estimate the overhead of scalarizing an instructions unique
735  /// non-constant operands. The (potentially vector) types to use for each of
736  /// argument are passes via Tys.
738  ArrayRef<Type *> Tys) const;
739 
740  /// If target has efficient vector element load/store instructions, it can
741  /// return true here so that insertion/extraction costs are not added to
742  /// the scalarization cost of a load/store.
744 
745  /// Don't restrict interleaved unrolling to small loops.
746  bool enableAggressiveInterleaving(bool LoopHasReductions) const;
747 
748  /// Returns options for expansion of memcmp. IsZeroCmp is
749  // true if this is the expansion of memcmp(p1, p2, s) == 0.
751  // Return true if memcmp expansion is enabled.
752  operator bool() const { return MaxNumLoads > 0; }
753 
754  // Maximum number of load operations.
755  unsigned MaxNumLoads = 0;
756 
757  // The list of available load sizes (in bytes), sorted in decreasing order.
759 
760  // For memcmp expansion when the memcmp result is only compared equal or
761  // not-equal to 0, allow up to this number of load pairs per block. As an
762  // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
763  // a0 = load2bytes &a[0]
764  // b0 = load2bytes &b[0]
765  // a2 = load1byte &a[2]
766  // b2 = load1byte &b[2]
767  // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
768  unsigned NumLoadsPerBlock = 1;
769 
770  // Set to true to allow overlapping loads. For example, 7-byte compares can
771  // be done with two 4-byte compares instead of 4+2+1-byte compares. This
772  // requires all loads in LoadSizes to be doable in an unaligned way.
773  bool AllowOverlappingLoads = false;
774  };
776  bool IsZeroCmp) const;
777 
778  /// Enable matching of interleaved access groups.
780 
781  /// Enable matching of interleaved access groups that contain predicated
782  /// accesses or gaps and therefore vectorized using masked
783  /// vector loads/stores.
785 
786  /// Indicate that it is potentially unsafe to automatically vectorize
787  /// floating-point operations because the semantics of vector and scalar
788  /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
789  /// does not support IEEE-754 denormal numbers, while depending on the
790  /// platform, scalar floating-point math does.
791  /// This applies to floating-point math operations and calls, not memory
792  /// operations, shuffles, or casts.
794 
795  /// Determine if the target supports unaligned memory accesses.
797  unsigned AddressSpace = 0,
798  Align Alignment = Align(1),
799  bool *Fast = nullptr) const;
800 
801  /// Return hardware support for population count.
802  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
803 
804  /// Return true if the hardware has a fast square-root instruction.
805  bool haveFastSqrt(Type *Ty) const;
806 
807  /// Return true if it is faster to check if a floating-point value is NaN
808  /// (or not-NaN) versus a comparison against a constant FP zero value.
809  /// Targets should override this if materializing a 0.0 for comparison is
810  /// generally as cheap as checking for ordered/unordered.
811  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
812 
813  /// Return the expected cost of supporting the floating point operation
814  /// of the specified type.
815  InstructionCost getFPOpCost(Type *Ty) const;
816 
817  /// Return the expected cost of materializing for the given integer
818  /// immediate of the specified type.
819  int getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const;
820 
821  /// Return the expected cost of materialization for the given integer
822  /// immediate of the specified type for a given instruction. The cost can be
823  /// zero if the immediate can be folded into the specified instruction.
824  int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty,
826  Instruction *Inst = nullptr) const;
827  int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
828  Type *Ty, TargetCostKind CostKind) const;
829 
830  /// Return the expected cost for the given integer when optimising
831  /// for size. This is different than the other integer immediate cost
832  /// functions in that it is subtarget agnostic. This is useful when you e.g.
833  /// target one ISA such as Aarch32 but smaller encodings could be possible
834  /// with another such as Thumb. This return value is used as a penalty when
835  /// the total costs for a constant is calculated (the bigger the cost, the
836  /// more beneficial constant hoisting is).
837  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
838  Type *Ty) const;
839  /// @}
840 
841  /// \name Vector Target Information
842  /// @{
843 
844  /// The various kinds of shuffle patterns for vector queries.
845  enum ShuffleKind {
846  SK_Broadcast, ///< Broadcast element 0 to all other elements.
847  SK_Reverse, ///< Reverse the order of the vector.
848  SK_Select, ///< Selects elements from the corresponding lane of
849  ///< either source operand. This is equivalent to a
850  ///< vector select with a constant condition operand.
851  SK_Transpose, ///< Transpose two vectors.
852  SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
853  SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
854  SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
855  ///< with any shuffle mask.
856  SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
857  ///< shuffle mask.
858  };
859 
860  /// Kind of the reduction data.
862  RK_None, /// Not a reduction.
863  RK_Arithmetic, /// Binary reduction data.
864  RK_MinMax, /// Min/max reduction data.
865  RK_UnsignedMinMax, /// Unsigned min/max reduction data.
866  };
867 
868  /// Contains opcode + LHS/RHS parts of the reduction operations.
869  struct ReductionData {
870  ReductionData() = delete;
872  : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
873  assert(Kind != RK_None && "expected binary or min/max reduction only.");
874  }
875  unsigned Opcode = 0;
876  Value *LHS = nullptr;
877  Value *RHS = nullptr;
879  bool hasSameData(ReductionData &RD) const {
880  return Kind == RD.Kind && Opcode == RD.Opcode;
881  }
882  };
883 
885  const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty);
886 
888  const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty);
889 
890  static ReductionKind matchVectorReduction(const ExtractElementInst *ReduxRoot,
891  unsigned &Opcode, VectorType *&Ty,
892  bool &IsPairwise);
893 
894  /// Additional information about an operand's possible values.
896  OK_AnyValue, // Operand can have any value.
897  OK_UniformValue, // Operand is uniform (splat of a value).
898  OK_UniformConstantValue, // Operand is uniform constant.
899  OK_NonUniformConstantValue // Operand is a non uniform constant value.
900  };
901 
902  /// Additional properties of an operand's values.
904 
905  /// \return the number of registers in the target-provided register class.
906  unsigned getNumberOfRegisters(unsigned ClassID) const;
907 
908  /// \return the target-provided register class ID for the provided type,
909  /// accounting for type promotion and other type-legalization techniques that
910  /// the target might apply. However, it specifically does not account for the
911  /// scalarization or splitting of vector types. Should a vector type require
912  /// scalarization or splitting into multiple underlying vector registers, that
913  /// type should be mapped to a register class containing no registers.
914  /// Specifically, this is designed to provide a simple, high-level view of the
915  /// register allocation later performed by the backend. These register classes
916  /// don't necessarily map onto the register classes used by the backend.
917  /// FIXME: It's not currently possible to determine how many registers
918  /// are used by the provided type.
919  unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
920 
921  /// \return the target-provided register class name
922  const char *getRegisterClassName(unsigned ClassID) const;
923 
925 
926  /// \return The width of the largest scalar or vector register type.
928 
929  /// \return The width of the smallest vector register type.
930  unsigned getMinVectorRegisterBitWidth() const;
931 
932  /// \return The maximum value of vscale if the target specifies an
933  /// architectural maximum vector length, and None otherwise.
935 
936  /// \return True if the vectorization factor should be chosen to
937  /// make the vector of the smallest element type match the size of a
938  /// vector register. For wider element types, this could result in
939  /// creating vectors that span multiple vector registers.
940  /// If false, the vectorization factor will be chosen based on the
941  /// size of the widest element type.
942  bool shouldMaximizeVectorBandwidth(bool OptSize) const;
943 
944  /// \return The minimum vectorization factor for types of given element
945  /// bit width, or 0 if there is no minimum VF. The returned value only
946  /// applies when shouldMaximizeVectorBandwidth returns true.
947  /// If IsScalable is true, the returned ElementCount must be a scalable VF.
948  ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
949 
950  /// \return The maximum vectorization factor for types of given element
951  /// bit width and opcode, or 0 if there is no maximum VF.
952  /// Currently only used by the SLP vectorizer.
953  unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
954 
955  /// \return True if it should be considered for address type promotion.
956  /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
957  /// profitable without finding other extensions fed by the same input.
959  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
960 
961  /// \return The size of a cache line in bytes.
962  unsigned getCacheLineSize() const;
963 
964  /// The possible cache levels
965  enum class CacheLevel {
966  L1D, // The L1 data cache
967  L2D, // The L2 data cache
968 
969  // We currently do not model L3 caches, as their sizes differ widely between
970  // microarchitectures. Also, we currently do not have a use for L3 cache
971  // size modeling yet.
972  };
973 
974  /// \return The size of the cache level in bytes, if available.
976 
977  /// \return The associativity of the cache level, if available.
979 
980  /// \return How much before a load we should place the prefetch
981  /// instruction. This is currently measured in number of
982  /// instructions.
983  unsigned getPrefetchDistance() const;
984 
985  /// Some HW prefetchers can handle accesses up to a certain constant stride.
986  /// Sometimes prefetching is beneficial even below the HW prefetcher limit,
987  /// and the arguments provided are meant to serve as a basis for deciding this
988  /// for a particular loop.
989  ///
990  /// \param NumMemAccesses Number of memory accesses in the loop.
991  /// \param NumStridedMemAccesses Number of the memory accesses that
992  /// ScalarEvolution could find a known stride
993  /// for.
994  /// \param NumPrefetches Number of software prefetches that will be
995  /// emitted as determined by the addresses
996  /// involved and the cache line size.
997  /// \param HasCall True if the loop contains a call.
998  ///
999  /// \return This is the minimum stride in bytes where it makes sense to start
1000  /// adding SW prefetches. The default is 1, i.e. prefetch with any
1001  /// stride.
1002  unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1003  unsigned NumStridedMemAccesses,
1004  unsigned NumPrefetches, bool HasCall) const;
1005 
1006  /// \return The maximum number of iterations to prefetch ahead. If
1007  /// the required number of iterations is more than this number, no
1008  /// prefetching is performed.
1009  unsigned getMaxPrefetchIterationsAhead() const;
1010 
1011  /// \return True if prefetching should also be done for writes.
1012  bool enableWritePrefetching() const;
1013 
1014  /// \return The maximum interleave factor that any transform should try to
1015  /// perform for this target. This number depends on the level of parallelism
1016  /// and the number of execution units in the CPU.
1017  unsigned getMaxInterleaveFactor(unsigned VF) const;
1018 
1019  /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
1020  static OperandValueKind getOperandInfo(const Value *V,
1021  OperandValueProperties &OpProps);
1022 
1023  /// This is an approximation of reciprocal throughput of a math/logic op.
1024  /// A higher cost indicates less expected throughput.
1025  /// From Agner Fog's guides, reciprocal throughput is "the average number of
1026  /// clock cycles per instruction when the instructions are not part of a
1027  /// limiting dependency chain."
1028  /// Therefore, costs should be scaled to account for multiple execution units
1029  /// on the target that can process this type of instruction. For example, if
1030  /// there are 5 scalar integer units and 2 vector integer units that can
1031  /// calculate an 'add' in a single cycle, this model should indicate that the
1032  /// cost of the vector add instruction is 2.5 times the cost of the scalar
1033  /// add instruction.
1034  /// \p Args is an optional argument which holds the instruction operands
1035  /// values so the TTI can analyze those values searching for special
1036  /// cases or optimizations based on those values.
1037  /// \p CxtI is the optional original context instruction, if one exists, to
1038  /// provide even more information.
1040  unsigned Opcode, Type *Ty,
1042  OperandValueKind Opd1Info = OK_AnyValue,
1043  OperandValueKind Opd2Info = OK_AnyValue,
1044  OperandValueProperties Opd1PropInfo = OP_None,
1045  OperandValueProperties Opd2PropInfo = OP_None,
1047  const Instruction *CxtI = nullptr) const;
1048 
1049  /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1050  /// The exact mask may be passed as Mask, or else the array will be empty.
1051  /// The index and subtype parameters are used by the subvector insertion and
1052  /// extraction shuffle kinds to show the insert/extract point and the type of
1053  /// the subvector being inserted/extracted.
1054  /// NOTE: For subvector extractions Tp represents the source type.
1056  ArrayRef<int> Mask = None, int Index = 0,
1057  VectorType *SubTp = nullptr) const;
1058 
1059  /// Represents a hint about the context in which a cast is used.
1060  ///
1061  /// For zext/sext, the context of the cast is the operand, which must be a
1062  /// load of some kind. For trunc, the context is of the cast is the single
1063  /// user of the instruction, which must be a store of some kind.
1064  ///
1065  /// This enum allows the vectorizer to give getCastInstrCost an idea of the
1066  /// type of cast it's dealing with, as not every cast is equal. For instance,
1067  /// the zext of a load may be free, but the zext of an interleaving load can
1068  //// be (very) expensive!
1069  ///
1070  /// See \c getCastContextHint to compute a CastContextHint from a cast
1071  /// Instruction*. Callers can use it if they don't need to override the
1072  /// context and just want it to be calculated from the instruction.
1073  ///
1074  /// FIXME: This handles the types of load/store that the vectorizer can
1075  /// produce, which are the cases where the context instruction is most
1076  /// likely to be incorrect. There are other situations where that can happen
1077  /// too, which might be handled here but in the long run a more general
1078  /// solution of costing multiple instructions at the same times may be better.
1079  enum class CastContextHint : uint8_t {
1080  None, ///< The cast is not used with a load/store of any kind.
1081  Normal, ///< The cast is used with a normal load/store.
1082  Masked, ///< The cast is used with a masked load/store.
1083  GatherScatter, ///< The cast is used with a gather/scatter.
1084  Interleave, ///< The cast is used with an interleaved load/store.
1085  Reversed, ///< The cast is used with a reversed load/store.
1086  };
1087 
1088  /// Calculates a CastContextHint from \p I.
1089  /// This should be used by callers of getCastInstrCost if they wish to
1090  /// determine the context from some instruction.
1091  /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
1092  /// or if it's another type of cast.
1094 
1095  /// \return The expected cost of cast instructions, such as bitcast, trunc,
1096  /// zext, etc. If there is an existing instruction that holds Opcode, it
1097  /// may be passed in the 'I' parameter.
1099  getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1102  const Instruction *I = nullptr) const;
1103 
1104  /// \return The expected cost of a sign- or zero-extended vector extract. Use
1105  /// -1 to indicate that there is no information about the index value.
1106  InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1107  VectorType *VecTy,
1108  unsigned Index = -1) const;
1109 
1110  /// \return The expected cost of control-flow related instructions such as
1111  /// Phi, Ret, Br, Switch.
1113  getCFInstrCost(unsigned Opcode,
1115  const Instruction *I = nullptr) const;
1116 
1117  /// \returns The expected cost of compare and select instructions. If there
1118  /// is an existing instruction that holds Opcode, it may be passed in the
1119  /// 'I' parameter. The \p VecPred parameter can be used to indicate the select
1120  /// is using a compare with the specified predicate as condition. When vector
1121  /// types are passed, \p VecPred must be used for all lanes.
1123  getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy = nullptr,
1126  const Instruction *I = nullptr) const;
1127 
1128  /// \return The expected cost of vector Insert and Extract.
1129  /// Use -1 to indicate that there is no information on the index value.
1130  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1131  unsigned Index = -1) const;
1132 
1133  /// \return The cost of Load and Store instructions.
1135  getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1136  unsigned AddressSpace,
1138  const Instruction *I = nullptr) const;
1139 
1140  /// \return The cost of masked Load and Store instructions.
1142  unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
1144 
1145  /// \return The cost of Gather or Scatter operation
1146  /// \p Opcode - is a type of memory access Load or Store
1147  /// \p DataTy - a vector type of the data to be loaded or stored
1148  /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1149  /// \p VariableMask - true when the memory access is predicated with a mask
1150  /// that is not a compile-time constant
1151  /// \p Alignment - alignment of single element
1152  /// \p I - the optional original context instruction, if one exists, e.g. the
1153  /// load/store to transform or the call to the gather/scatter intrinsic
1155  unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1157  const Instruction *I = nullptr) const;
1158 
1159  /// \return The cost of the interleaved memory operation.
1160  /// \p Opcode is the memory operation code
1161  /// \p VecTy is the vector type of the interleaved access.
1162  /// \p Factor is the interleave factor
1163  /// \p Indices is the indices for interleaved load members (as interleaved
1164  /// load allows gaps)
1165  /// \p Alignment is the alignment of the memory operation
1166  /// \p AddressSpace is address space of the pointer.
1167  /// \p UseMaskForCond indicates if the memory access is predicated.
1168  /// \p UseMaskForGaps indicates if gaps should be masked.
1170  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1171  Align Alignment, unsigned AddressSpace,
1173  bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
1174 
1175  /// Calculate the cost of performing a vector reduction.
1176  ///
1177  /// This is the cost of reducing the vector value of type \p Ty to a scalar
1178  /// value using the operation denoted by \p Opcode. The form of the reduction
1179  /// can either be a pairwise reduction or a reduction that splits the vector
1180  /// at every reduction level.
1181  ///
1182  /// Pairwise:
1183  /// (v0, v1, v2, v3)
1184  /// ((v0+v1), (v2+v3), undef, undef)
1185  /// Split:
1186  /// (v0, v1, v2, v3)
1187  /// ((v0+v2), (v1+v3), undef, undef)
1189  unsigned Opcode, VectorType *Ty, bool IsPairwiseForm,
1191 
1193  VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned,
1195 
1196  /// Calculate the cost of an extended reduction pattern, similar to
1197  /// getArithmeticReductionCost of an Add reduction with an extension and
1198  /// optional multiply. This is the cost of as:
1199  /// ResTy vecreduce.add(ext(Ty A)), or if IsMLA flag is set then:
1200  /// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)). The reduction happens
1201  /// on a VectorType with ResTy elements and Ty lanes.
1203  bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1205 
1206  /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1207  /// Three cases are handled: 1. scalar instruction 2. vector instruction
1208  /// 3. scalar instruction which is to be vectorized.
1211 
1212  /// \returns The cost of Call instructions.
1214  Function *F, Type *RetTy, ArrayRef<Type *> Tys,
1216 
1217  /// \returns The number of pieces into which the provided type must be
1218  /// split during legalization. Zero is returned when the answer is unknown.
1219  unsigned getNumberOfParts(Type *Tp) const;
1220 
1221  /// \returns The cost of the address computation. For most targets this can be
1222  /// merged into the instruction indexing mode. Some targets might want to
1223  /// distinguish between address computation for memory operations on vector
1224  /// types and scalar types. Such targets should override this function.
1225  /// The 'SE' parameter holds pointer for the scalar evolution object which
1226  /// is used in order to get the Ptr step value in case of constant stride.
1227  /// The 'Ptr' parameter holds SCEV of the access pointer.
1228  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
1229  const SCEV *Ptr = nullptr) const;
1230 
1231  /// \returns The cost, if any, of keeping values of the given types alive
1232  /// over a callsite.
1233  ///
1234  /// Some types may require the use of register classes that do not have
1235  /// any callee-saved registers, so would require a spill and fill.
1236  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
1237 
1238  /// \returns True if the intrinsic is a supported memory intrinsic. Info
1239  /// will contain additional information - whether the intrinsic may write
1240  /// or read to memory, volatility and the pointer. Info is undefined
1241  /// if false is returned.
1243 
1244  /// \returns The maximum element size, in bytes, for an element
1245  /// unordered-atomic memory intrinsic.
1246  unsigned getAtomicMemIntrinsicMaxElementSize() const;
1247 
1248  /// \returns A value which is the result of the given memory intrinsic. New
1249  /// instructions may be created to extract the result from the given intrinsic
1250  /// memory operation. Returns nullptr if the target cannot create a result
1251  /// from the given intrinsic.
1253  Type *ExpectedType) const;
1254 
1255  /// \returns The type to use in a loop expansion of a memcpy call.
1257  unsigned SrcAddrSpace, unsigned DestAddrSpace,
1258  unsigned SrcAlign, unsigned DestAlign) const;
1259 
1260  /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1261  /// \param RemainingBytes The number of bytes to copy.
1262  ///
1263  /// Calculates the operand types to use when copying \p RemainingBytes of
1264  /// memory, where source and destination alignments are \p SrcAlign and
1265  /// \p DestAlign respectively.
1268  unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1269  unsigned SrcAlign, unsigned DestAlign) const;
1270 
1271  /// \returns True if the two functions have compatible attributes for inlining
1272  /// purposes.
1273  bool areInlineCompatible(const Function *Caller,
1274  const Function *Callee) const;
1275 
1276  /// \returns True if the caller and callee agree on how \p Args will be passed
1277  /// to the callee.
1278  /// \param[out] Args The list of compatible arguments. The implementation may
1279  /// filter out any incompatible args from this list.
1280  bool areFunctionArgsABICompatible(const Function *Caller,
1281  const Function *Callee,
1283 
1284  /// The type of load/store indexing.
1286  MIM_Unindexed, ///< No indexing.
1287  MIM_PreInc, ///< Pre-incrementing.
1288  MIM_PreDec, ///< Pre-decrementing.
1289  MIM_PostInc, ///< Post-incrementing.
1290  MIM_PostDec ///< Post-decrementing.
1291  };
1292 
1293  /// \returns True if the specified indexed load for the given type is legal.
1294  bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1295 
1296  /// \returns True if the specified indexed store for the given type is legal.
1297  bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1298 
1299  /// \returns The bitwidth of the largest vector type that should be used to
1300  /// load/store in the given address space.
1301  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1302 
1303  /// \returns True if the load instruction is legal to vectorize.
1304  bool isLegalToVectorizeLoad(LoadInst *LI) const;
1305 
1306  /// \returns True if the store instruction is legal to vectorize.
1307  bool isLegalToVectorizeStore(StoreInst *SI) const;
1308 
1309  /// \returns True if it is legal to vectorize the given load chain.
1310  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
1311  unsigned AddrSpace) const;
1312 
1313  /// \returns True if it is legal to vectorize the given store chain.
1314  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
1315  unsigned AddrSpace) const;
1316 
1317  /// \returns True if it is legal to vectorize the given reduction kind.
1319  ElementCount VF) const;
1320 
1321  /// \returns The new vector factor value if the target doesn't support \p
1322  /// SizeInBytes loads or has a better vector factor.
1323  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1324  unsigned ChainSizeInBytes,
1325  VectorType *VecTy) const;
1326 
1327  /// \returns The new vector factor value if the target doesn't support \p
1328  /// SizeInBytes stores or has a better vector factor.
1329  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1330  unsigned ChainSizeInBytes,
1331  VectorType *VecTy) const;
1332 
1333  /// Flags describing the kind of vector reduction.
1336  bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation.
1337  bool IsSigned; ///< Whether the operation is a signed int reduction.
1338  bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.
1339  };
1340 
1341  /// \returns True if the target prefers reductions in loop.
1342  bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1343  ReductionFlags Flags) const;
1344 
1345  /// \returns True if the target prefers reductions select kept in the loop
1346  /// when tail folding. i.e.
1347  /// loop:
1348  /// p = phi (0, s)
1349  /// a = add (p, x)
1350  /// s = select (mask, a, p)
1351  /// vecreduce.add(s)
1352  ///
1353  /// As opposed to the normal scheme of p = phi (0, a) which allows the select
1354  /// to be pulled out of the loop. If the select(.., add, ..) can be predicated
1355  /// by the target, this can lead to cleaner code generation.
1356  bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1357  ReductionFlags Flags) const;
1358 
1359  /// \returns True if the target wants to expand the given reduction intrinsic
1360  /// into a shuffle sequence.
1361  bool shouldExpandReduction(const IntrinsicInst *II) const;
1362 
1363  /// \returns the size cost of rematerializing a GlobalValue address relative
1364  /// to a stack reload.
1365  unsigned getGISelRematGlobalCost() const;
1366 
1367  /// \returns True if the target supports scalable vectors.
1368  bool supportsScalableVectors() const;
1369 
1370  /// \name Vector Predication Information
1371  /// @{
1372  /// Whether the target supports the %evl parameter of VP intrinsic efficiently
1373  /// in hardware. (see LLVM Language Reference - "Vector Predication
1374  /// Intrinsics") Use of %evl is discouraged when that is not the case.
1375  bool hasActiveVectorLength() const;
1376 
1377  /// @}
1378 
1379  /// @}
1380 
1381 private:
1382  /// Estimate the latency of specified instruction.
1383  /// Returns 1 as the default value.
1384  InstructionCost getInstructionLatency(const Instruction *I) const;
1385 
1386  /// Returns the expected throughput cost of the instruction.
1387  /// Returns -1 if the cost is unknown.
1388  InstructionCost getInstructionThroughput(const Instruction *I) const;
1389 
1390  /// The abstract base class used to type erase specific TTI
1391  /// implementations.
1392  class Concept;
1393 
1394  /// The template model for the base class which wraps a concrete
1395  /// implementation in a type erased interface.
1396  template <typename T> class Model;
1397 
1398  std::unique_ptr<Concept> TTIImpl;
1399 };
1400 
1402 public:
1403  virtual ~Concept() = 0;
1404  virtual const DataLayout &getDataLayout() const = 0;
1405  virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
1408  virtual unsigned getInliningThresholdMultiplier() = 0;
1409  virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
1410  virtual int getInlinerVectorBonusPercent() = 0;
1411  virtual int getMemcpyCost(const Instruction *I) = 0;
1412  virtual unsigned
1413  getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
1414  ProfileSummaryInfo *PSI,
1415  BlockFrequencyInfo *BFI) = 0;
1416  virtual InstructionCost getUserCost(const User *U,
1418  TargetCostKind CostKind) = 0;
1420  virtual bool hasBranchDivergence() = 0;
1421  virtual bool useGPUDivergenceAnalysis() = 0;
1422  virtual bool isSourceOfDivergence(const Value *V) = 0;
1423  virtual bool isAlwaysUniform(const Value *V) = 0;
1424  virtual unsigned getFlatAddressSpace() = 0;
1425  virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1426  Intrinsic::ID IID) const = 0;
1427  virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1428  virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
1430  Value *OldV,
1431  Value *NewV) const = 0;
1432  virtual bool isLoweredToCall(const Function *F) = 0;
1433  virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1434  UnrollingPreferences &UP) = 0;
1435  virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
1436  PeelingPreferences &PP) = 0;
1437  virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1438  AssumptionCache &AC,
1439  TargetLibraryInfo *LibInfo,
1440  HardwareLoopInfo &HWLoopInfo) = 0;
1441  virtual bool
1444  DominatorTree *DT, const LoopAccessInfo *LAI) = 0;
1445  virtual bool emitGetActiveLaneMask() = 0;
1447  IntrinsicInst &II) = 0;
1448  virtual Optional<Value *>
1450  APInt DemandedMask, KnownBits &Known,
1451  bool &KnownBitsComputed) = 0;
1453  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1454  APInt &UndefElts2, APInt &UndefElts3,
1455  std::function<void(Instruction *, unsigned, APInt, APInt &)>
1456  SimplifyAndSetOp) = 0;
1457  virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1458  virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1459  virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1460  int64_t BaseOffset, bool HasBaseReg,
1461  int64_t Scale, unsigned AddrSpace,
1462  Instruction *I) = 0;
1465  virtual bool isNumRegsMajorCostOfLSR() = 0;
1466  virtual bool isProfitableLSRChainElement(Instruction *I) = 0;
1467  virtual bool canMacroFuseCmp() = 0;
1468  virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
1469  LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1470  TargetLibraryInfo *LibInfo) = 0;
1471  virtual AddressingModeKind
1472  getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const = 0;
1473  virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
1474  virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
1475  virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
1476  virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
1477  virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
1478  virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
1479  virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
1480  virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
1481  virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1482  virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1483  virtual bool prefersVectorizedAddressing() = 0;
1484  virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
1485  int64_t BaseOffset, bool HasBaseReg,
1486  int64_t Scale, unsigned AddrSpace) = 0;
1487  virtual bool LSRWithInstrQueries() = 0;
1488  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1489  virtual bool isProfitableToHoist(Instruction *I) = 0;
1490  virtual bool useAA() = 0;
1491  virtual bool isTypeLegal(Type *Ty) = 0;
1492  virtual unsigned getRegUsageForType(Type *Ty) = 0;
1493  virtual bool shouldBuildLookupTables() = 0;
1494  virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
1495  virtual bool shouldBuildRelLookupTables() = 0;
1496  virtual bool useColdCCForColdCall(Function &F) = 0;
1497  virtual unsigned getScalarizationOverhead(VectorType *Ty,
1498  const APInt &DemandedElts,
1499  bool Insert, bool Extract) = 0;
1500  virtual unsigned
1502  ArrayRef<Type *> Tys) = 0;
1503  virtual bool supportsEfficientVectorElementLoadStore() = 0;
1504  virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1505  virtual MemCmpExpansionOptions
1506  enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
1507  virtual bool enableInterleavedAccessVectorization() = 0;
1508  virtual bool enableMaskedInterleavedAccessVectorization() = 0;
1509  virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
1511  unsigned BitWidth,
1512  unsigned AddressSpace,
1513  Align Alignment,
1514  bool *Fast) = 0;
1515  virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1516  virtual bool haveFastSqrt(Type *Ty) = 0;
1517  virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1518  virtual InstructionCost getFPOpCost(Type *Ty) = 0;
1519  virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1520  const APInt &Imm, Type *Ty) = 0;
1521  virtual int getIntImmCost(const APInt &Imm, Type *Ty,
1522  TargetCostKind CostKind) = 0;
1523  virtual int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
1525  Instruction *Inst = nullptr) = 0;
1526  virtual int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
1527  const APInt &Imm, Type *Ty,
1528  TargetCostKind CostKind) = 0;
1529  virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
1530  virtual unsigned getRegisterClassForType(bool Vector,
1531  Type *Ty = nullptr) const = 0;
1532  virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
1533  virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0;
1534  virtual unsigned getMinVectorRegisterBitWidth() = 0;
1535  virtual Optional<unsigned> getMaxVScale() const = 0;
1536  virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
1537  virtual ElementCount getMinimumVF(unsigned ElemWidth,
1538  bool IsScalable) const = 0;
1539  virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
1541  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1542  virtual unsigned getCacheLineSize() const = 0;
1543  virtual Optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
1545 
1546  /// \return How much before a load we should place the prefetch
1547  /// instruction. This is currently measured in number of
1548  /// instructions.
1549  virtual unsigned getPrefetchDistance() const = 0;
1550 
1551  /// \return Some HW prefetchers can handle accesses up to a certain
1552  /// constant stride. This is the minimum stride in bytes where it
1553  /// makes sense to start adding SW prefetches. The default is 1,
1554  /// i.e. prefetch with any stride. Sometimes prefetching is beneficial
1555  /// even below the HW prefetcher limit, and the arguments provided are
1556  /// meant to serve as a basis for deciding this for a particular loop.
1557  virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1558  unsigned NumStridedMemAccesses,
1559  unsigned NumPrefetches,
1560  bool HasCall) const = 0;
1561 
1562  /// \return The maximum number of iterations to prefetch ahead. If
1563  /// the required number of iterations is more than this number, no
1564  /// prefetching is performed.
1565  virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
1566 
1567  /// \return True if prefetching should also be done for writes.
1568  virtual bool enableWritePrefetching() const = 0;
1569 
1570  virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
1572  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
1573  OperandValueKind Opd1Info, OperandValueKind Opd2Info,
1574  OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo,
1575  ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
1577  ArrayRef<int> Mask, int Index,
1578  VectorType *SubTp) = 0;
1579  virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
1580  Type *Src, CastContextHint CCH,
1582  const Instruction *I) = 0;
1583  virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1584  VectorType *VecTy,
1585  unsigned Index) = 0;
1586  virtual InstructionCost getCFInstrCost(unsigned Opcode,
1588  const Instruction *I = nullptr) = 0;
1589  virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1590  Type *CondTy,
1591  CmpInst::Predicate VecPred,
1593  const Instruction *I) = 0;
1594  virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1595  unsigned Index) = 0;
1596  virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
1597  Align Alignment,
1598  unsigned AddressSpace,
1600  const Instruction *I) = 0;
1601  virtual InstructionCost
1602  getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1603  unsigned AddressSpace,
1605  virtual InstructionCost
1606  getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
1607  bool VariableMask, Align Alignment,
1609  const Instruction *I = nullptr) = 0;
1610 
1612  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1613  Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1614  bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
1615  virtual InstructionCost
1616  getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
1617  bool IsPairwiseForm,
1619  virtual InstructionCost
1621  bool IsPairwiseForm, bool IsUnsigned,
1624  bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1626  virtual InstructionCost
1629  virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy,
1630  ArrayRef<Type *> Tys,
1632  virtual unsigned getNumberOfParts(Type *Tp) = 0;
1633  virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1634  const SCEV *Ptr) = 0;
1635  virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
1636  virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1637  MemIntrinsicInfo &Info) = 0;
1638  virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
1640  Type *ExpectedType) = 0;
1642  unsigned SrcAddrSpace,
1643  unsigned DestAddrSpace,
1644  unsigned SrcAlign,
1645  unsigned DestAlign) const = 0;
1646  virtual void getMemcpyLoopResidualLoweringType(
1648  unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1649  unsigned SrcAlign, unsigned DestAlign) const = 0;
1650  virtual bool areInlineCompatible(const Function *Caller,
1651  const Function *Callee) const = 0;
1652  virtual bool
1653  areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
1654  SmallPtrSetImpl<Argument *> &Args) const = 0;
1655  virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1656  virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1657  virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
1658  virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
1659  virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
1660  virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1661  Align Alignment,
1662  unsigned AddrSpace) const = 0;
1663  virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1664  Align Alignment,
1665  unsigned AddrSpace) const = 0;
1667  ElementCount VF) const = 0;
1668  virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1669  unsigned ChainSizeInBytes,
1670  VectorType *VecTy) const = 0;
1671  virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1672  unsigned ChainSizeInBytes,
1673  VectorType *VecTy) const = 0;
1674  virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1675  ReductionFlags) const = 0;
1676  virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1677  ReductionFlags) const = 0;
1678  virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
1679  virtual unsigned getGISelRematGlobalCost() const = 0;
1680  virtual bool supportsScalableVectors() const = 0;
1681  virtual bool hasActiveVectorLength() const = 0;
1682  virtual InstructionCost getInstructionLatency(const Instruction *I) = 0;
1683 };
1684 
1685 template <typename T>
1687  T Impl;
1688 
1689 public:
1690  Model(T Impl) : Impl(std::move(Impl)) {}
1691  ~Model() override {}
1692 
1693  const DataLayout &getDataLayout() const override {
1694  return Impl.getDataLayout();
1695  }
1696 
1697  int getGEPCost(Type *PointeeType, const Value *Ptr,
1698  ArrayRef<const Value *> Operands,
1700  return Impl.getGEPCost(PointeeType, Ptr, Operands);
1701  }
1702  unsigned getInliningThresholdMultiplier() override {
1703  return Impl.getInliningThresholdMultiplier();
1704  }
1705  unsigned adjustInliningThreshold(const CallBase *CB) override {
1706  return Impl.adjustInliningThreshold(CB);
1707  }
1708  int getInlinerVectorBonusPercent() override {
1709  return Impl.getInlinerVectorBonusPercent();
1710  }
1711  int getMemcpyCost(const Instruction *I) override {
1712  return Impl.getMemcpyCost(I);
1713  }
1714  InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
1715  TargetCostKind CostKind) override {
1716  return Impl.getUserCost(U, Operands, CostKind);
1717  }
1718  BranchProbability getPredictableBranchThreshold() override {
1719  return Impl.getPredictableBranchThreshold();
1720  }
1721  bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
1722  bool useGPUDivergenceAnalysis() override {
1723  return Impl.useGPUDivergenceAnalysis();
1724  }
1725  bool isSourceOfDivergence(const Value *V) override {
1726  return Impl.isSourceOfDivergence(V);
1727  }
1728 
1729  bool isAlwaysUniform(const Value *V) override {
1730  return Impl.isAlwaysUniform(V);
1731  }
1732 
1733  unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
1734 
1735  bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1736  Intrinsic::ID IID) const override {
1737  return Impl.collectFlatAddressOperands(OpIndexes, IID);
1738  }
1739 
1740  bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
1741  return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
1742  }
1743 
1744  unsigned getAssumedAddrSpace(const Value *V) const override {
1745  return Impl.getAssumedAddrSpace(V);
1746  }
1747 
1748  Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
1749  Value *NewV) const override {
1750  return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
1751  }
1752 
1753  bool isLoweredToCall(const Function *F) override {
1754  return Impl.isLoweredToCall(F);
1755  }
1756  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1757  UnrollingPreferences &UP) override {
1758  return Impl.getUnrollingPreferences(L, SE, UP);
1759  }
1760  void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
1761  PeelingPreferences &PP) override {
1762  return Impl.getPeelingPreferences(L, SE, PP);
1763  }
1764  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1765  AssumptionCache &AC, TargetLibraryInfo *LibInfo,
1766  HardwareLoopInfo &HWLoopInfo) override {
1767  return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
1768  }
1769  bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
1770  AssumptionCache &AC, TargetLibraryInfo *TLI,
1771  DominatorTree *DT,
1772  const LoopAccessInfo *LAI) override {
1773  return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
1774  }
1775  bool emitGetActiveLaneMask() override {
1776  return Impl.emitGetActiveLaneMask();
1777  }
1778  Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
1779  IntrinsicInst &II) override {
1780  return Impl.instCombineIntrinsic(IC, II);
1781  }
1782  Optional<Value *>
1783  simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
1784  APInt DemandedMask, KnownBits &Known,
1785  bool &KnownBitsComputed) override {
1786  return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
1787  KnownBitsComputed);
1788  }
1789  Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
1790  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1791  APInt &UndefElts2, APInt &UndefElts3,
1792  std::function<void(Instruction *, unsigned, APInt, APInt &)>
1793  SimplifyAndSetOp) override {
1794  return Impl.simplifyDemandedVectorEltsIntrinsic(
1795  IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
1796  SimplifyAndSetOp);
1797  }
1798  bool isLegalAddImmediate(int64_t Imm) override {
1799  return Impl.isLegalAddImmediate(Imm);
1800  }
1801  bool isLegalICmpImmediate(int64_t Imm) override {
1802  return Impl.isLegalICmpImmediate(Imm);
1803  }
1804  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1805  bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
1806  Instruction *I) override {
1807  return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
1808  AddrSpace, I);
1809  }
1810  bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1811  TargetTransformInfo::LSRCost &C2) override {
1812  return Impl.isLSRCostLess(C1, C2);
1813  }
1814  bool isNumRegsMajorCostOfLSR() override {
1815  return Impl.isNumRegsMajorCostOfLSR();
1816  }
1817  bool isProfitableLSRChainElement(Instruction *I) override {
1818  return Impl.isProfitableLSRChainElement(I);
1819  }
1820  bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
1821  bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
1822  DominatorTree *DT, AssumptionCache *AC,
1823  TargetLibraryInfo *LibInfo) override {
1824  return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
1825  }
1827  getPreferredAddressingMode(const Loop *L,
1828  ScalarEvolution *SE) const override {
1829  return Impl.getPreferredAddressingMode(L, SE);
1830  }
1831  bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
1832  return Impl.isLegalMaskedStore(DataType, Alignment);
1833  }
1834  bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
1835  return Impl.isLegalMaskedLoad(DataType, Alignment);
1836  }
1837  bool isLegalNTStore(Type *DataType, Align Alignment) override {
1838  return Impl.isLegalNTStore(DataType, Alignment);
1839  }
1840  bool isLegalNTLoad(Type *DataType, Align Alignment) override {
1841  return Impl.isLegalNTLoad(DataType, Alignment);
1842  }
1843  bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
1844  return Impl.isLegalMaskedScatter(DataType, Alignment);
1845  }
1846  bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
1847  return Impl.isLegalMaskedGather(DataType, Alignment);
1848  }
1849  bool isLegalMaskedCompressStore(Type *DataType) override {
1850  return Impl.isLegalMaskedCompressStore(DataType);
1851  }
1852  bool isLegalMaskedExpandLoad(Type *DataType) override {
1853  return Impl.isLegalMaskedExpandLoad(DataType);
1854  }
1855  bool hasDivRemOp(Type *DataType, bool IsSigned) override {
1856  return Impl.hasDivRemOp(DataType, IsSigned);
1857  }
1858  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
1859  return Impl.hasVolatileVariant(I, AddrSpace);
1860  }
1861  bool prefersVectorizedAddressing() override {
1862  return Impl.prefersVectorizedAddressing();
1863  }
1864  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1865  bool HasBaseReg, int64_t Scale,
1866  unsigned AddrSpace) override {
1867  return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
1868  AddrSpace);
1869  }
1870  bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
1871  bool isTruncateFree(Type *Ty1, Type *Ty2) override {
1872  return Impl.isTruncateFree(Ty1, Ty2);
1873  }
1874  bool isProfitableToHoist(Instruction *I) override {
1875  return Impl.isProfitableToHoist(I);
1876  }
1877  bool useAA() override { return Impl.useAA(); }
1878  bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
1879  unsigned getRegUsageForType(Type *Ty) override {
1880  return Impl.getRegUsageForType(Ty);
1881  }
1882  bool shouldBuildLookupTables() override {
1883  return Impl.shouldBuildLookupTables();
1884  }
1885  bool shouldBuildLookupTablesForConstant(Constant *C) override {
1886  return Impl.shouldBuildLookupTablesForConstant(C);
1887  }
1888  bool shouldBuildRelLookupTables() override {
1889  return Impl.shouldBuildRelLookupTables();
1890  }
1891  bool useColdCCForColdCall(Function &F) override {
1892  return Impl.useColdCCForColdCall(F);
1893  }
1894 
1895  unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
1896  bool Insert, bool Extract) override {
1897  return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
1898  }
1899  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1900  ArrayRef<Type *> Tys) override {
1901  return Impl.getOperandsScalarizationOverhead(Args, Tys);
1902  }
1903 
1904  bool supportsEfficientVectorElementLoadStore() override {
1905  return Impl.supportsEfficientVectorElementLoadStore();
1906  }
1907 
1908  bool enableAggressiveInterleaving(bool LoopHasReductions) override {
1909  return Impl.enableAggressiveInterleaving(LoopHasReductions);
1910  }
1911  MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
1912  bool IsZeroCmp) const override {
1913  return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
1914  }
1915  bool enableInterleavedAccessVectorization() override {
1916  return Impl.enableInterleavedAccessVectorization();
1917  }
1919  return Impl.enableMaskedInterleavedAccessVectorization();
1920  }
1921  bool isFPVectorizationPotentiallyUnsafe() override {
1922  return Impl.isFPVectorizationPotentiallyUnsafe();
1923  }
1924  bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
1925  unsigned AddressSpace, Align Alignment,
1926  bool *Fast) override {
1927  return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
1928  Alignment, Fast);
1929  }
1930  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
1931  return Impl.getPopcntSupport(IntTyWidthInBit);
1932  }
1933  bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
1934 
1935  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
1936  return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
1937  }
1938 
1939  InstructionCost getFPOpCost(Type *Ty) override {
1940  return Impl.getFPOpCost(Ty);
1941  }
1942 
1943  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1944  Type *Ty) override {
1945  return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
1946  }
1947  int getIntImmCost(const APInt &Imm, Type *Ty,
1948  TargetCostKind CostKind) override {
1949  return Impl.getIntImmCost(Imm, Ty, CostKind);
1950  }
1951  int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty,
1953  Instruction *Inst = nullptr) override {
1954  return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
1955  }
1956  int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1957  Type *Ty, TargetCostKind CostKind) override {
1958  return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
1959  }
1960  unsigned getNumberOfRegisters(unsigned ClassID) const override {
1961  return Impl.getNumberOfRegisters(ClassID);
1962  }
1963  unsigned getRegisterClassForType(bool Vector,
1964  Type *Ty = nullptr) const override {
1965  return Impl.getRegisterClassForType(Vector, Ty);
1966  }
1967  const char *getRegisterClassName(unsigned ClassID) const override {
1968  return Impl.getRegisterClassName(ClassID);
1969  }
1970  TypeSize getRegisterBitWidth(RegisterKind K) const override {
1971  return Impl.getRegisterBitWidth(K);
1972  }
1973  unsigned getMinVectorRegisterBitWidth() override {
1974  return Impl.getMinVectorRegisterBitWidth();
1975  }
1976  Optional<unsigned> getMaxVScale() const override {
1977  return Impl.getMaxVScale();
1978  }
1979  bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
1980  return Impl.shouldMaximizeVectorBandwidth(OptSize);
1981  }
1982  ElementCount getMinimumVF(unsigned ElemWidth,
1983  bool IsScalable) const override {
1984  return Impl.getMinimumVF(ElemWidth, IsScalable);
1985  }
1986  unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
1987  return Impl.getMaximumVF(ElemWidth, Opcode);
1988  }
1990  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
1991  return Impl.shouldConsiderAddressTypePromotion(
1992  I, AllowPromotionWithoutCommonHeader);
1993  }
1994  unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
1995  Optional<unsigned> getCacheSize(CacheLevel Level) const override {
1996  return Impl.getCacheSize(Level);
1997  }
1998  Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override {
1999  return Impl.getCacheAssociativity(Level);
2000  }
2001 
2002  /// Return the preferred prefetch distance in terms of instructions.
2003  ///
2004  unsigned getPrefetchDistance() const override {
2005  return Impl.getPrefetchDistance();
2006  }
2007 
2008  /// Return the minimum stride necessary to trigger software
2009  /// prefetching.
2010  ///
2011  unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2012  unsigned NumStridedMemAccesses,
2013  unsigned NumPrefetches,
2014  bool HasCall) const override {
2015  return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2016  NumPrefetches, HasCall);
2017  }
2018 
2019  /// Return the maximum prefetch distance in terms of loop
2020  /// iterations.
2021  ///
2022  unsigned getMaxPrefetchIterationsAhead() const override {
2023  return Impl.getMaxPrefetchIterationsAhead();
2024  }
2025 
2026  /// \return True if prefetching should also be done for writes.
2027  bool enableWritePrefetching() const override {
2028  return Impl.enableWritePrefetching();
2029  }
2030 
2031  unsigned getMaxInterleaveFactor(unsigned VF) override {
2032  return Impl.getMaxInterleaveFactor(VF);
2033  }
2034  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
2035  unsigned &JTSize,
2036  ProfileSummaryInfo *PSI,
2037  BlockFrequencyInfo *BFI) override {
2038  return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2039  }
2040  InstructionCost getArithmeticInstrCost(
2041  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2042  OperandValueKind Opd1Info, OperandValueKind Opd2Info,
2043  OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo,
2044  ArrayRef<const Value *> Args,
2045  const Instruction *CxtI = nullptr) override {
2046  return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
2047  Opd1PropInfo, Opd2PropInfo, Args, CxtI);
2048  }
2049  InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
2050  ArrayRef<int> Mask, int Index,
2051  VectorType *SubTp) override {
2052  return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp);
2053  }
2054  InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
2055  CastContextHint CCH,
2057  const Instruction *I) override {
2058  return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
2059  }
2060  InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2061  VectorType *VecTy,
2062  unsigned Index) override {
2063  return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
2064  }
2065  InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
2066  const Instruction *I = nullptr) override {
2067  return Impl.getCFInstrCost(Opcode, CostKind, I);
2068  }
2069  InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
2070  CmpInst::Predicate VecPred,
2072  const Instruction *I) override {
2073  return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
2074  }
2075  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2076  unsigned Index) override {
2077  return Impl.getVectorInstrCost(Opcode, Val, Index);
2078  }
2079  InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2080  unsigned AddressSpace,
2082  const Instruction *I) override {
2083  return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2084  CostKind, I);
2085  }
2086  InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
2087  Align Alignment, unsigned AddressSpace,
2088  TTI::TargetCostKind CostKind) override {
2089  return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2090  CostKind);
2091  }
2092  InstructionCost
2093  getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2094  bool VariableMask, Align Alignment,
2096  const Instruction *I = nullptr) override {
2097  return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
2098  Alignment, CostKind, I);
2099  }
2100  InstructionCost getInterleavedMemoryOpCost(
2101  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2102  Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2103  bool UseMaskForCond, bool UseMaskForGaps) override {
2104  return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2105  Alignment, AddressSpace, CostKind,
2106  UseMaskForCond, UseMaskForGaps);
2107  }
2108  InstructionCost
2109  getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2110  bool IsPairwiseForm,
2111  TTI::TargetCostKind CostKind) override {
2112  return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm,
2113  CostKind);
2114  }
2115  InstructionCost
2117  bool IsPairwiseForm, bool IsUnsigned,
2118  TTI::TargetCostKind CostKind) override {
2119  return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned,
2120  CostKind);
2121  }
2122  InstructionCost getExtendedAddReductionCost(
2123  bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
2125  return Impl.getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, Ty,
2126  CostKind);
2127  }
2128  InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2129  TTI::TargetCostKind CostKind) override {
2130  return Impl.getIntrinsicInstrCost(ICA, CostKind);
2131  }
2132  InstructionCost getCallInstrCost(Function *F, Type *RetTy,
2133  ArrayRef<Type *> Tys,
2134  TTI::TargetCostKind CostKind) override {
2135  return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
2136  }
2137  unsigned getNumberOfParts(Type *Tp) override {
2138  return Impl.getNumberOfParts(Tp);
2139  }
2140  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2141  const SCEV *Ptr) override {
2142  return Impl.getAddressComputationCost(Ty, SE, Ptr);
2143  }
2144  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
2145  return Impl.getCostOfKeepingLiveOverCall(Tys);
2146  }
2147  bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2148  MemIntrinsicInfo &Info) override {
2149  return Impl.getTgtMemIntrinsic(Inst, Info);
2150  }
2151  unsigned getAtomicMemIntrinsicMaxElementSize() const override {
2152  return Impl.getAtomicMemIntrinsicMaxElementSize();
2153  }
2154  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2155  Type *ExpectedType) override {
2156  return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2157  }
2158  Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
2159  unsigned SrcAddrSpace, unsigned DestAddrSpace,
2160  unsigned SrcAlign,
2161  unsigned DestAlign) const override {
2162  return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
2163  DestAddrSpace, SrcAlign, DestAlign);
2164  }
2166  SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2167  unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2168  unsigned SrcAlign, unsigned DestAlign) const override {
2169  Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2170  SrcAddrSpace, DestAddrSpace,
2171  SrcAlign, DestAlign);
2172  }
2173  bool areInlineCompatible(const Function *Caller,
2174  const Function *Callee) const override {
2175  return Impl.areInlineCompatible(Caller, Callee);
2176  }
2178  const Function *Caller, const Function *Callee,
2179  SmallPtrSetImpl<Argument *> &Args) const override {
2180  return Impl.areFunctionArgsABICompatible(Caller, Callee, Args);
2181  }
2182  bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
2183  return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
2184  }
2185  bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
2186  return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
2187  }
2188  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
2189  return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2190  }
2191  bool isLegalToVectorizeLoad(LoadInst *LI) const override {
2192  return Impl.isLegalToVectorizeLoad(LI);
2193  }
2194  bool isLegalToVectorizeStore(StoreInst *SI) const override {
2195  return Impl.isLegalToVectorizeStore(SI);
2196  }
2197  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
2198  unsigned AddrSpace) const override {
2199  return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2200  AddrSpace);
2201  }
2202  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
2203  unsigned AddrSpace) const override {
2204  return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2205  AddrSpace);
2206  }
2207  bool isLegalToVectorizeReduction(RecurrenceDescriptor RdxDesc,
2208  ElementCount VF) const override {
2209  return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2210  }
2211  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2212  unsigned ChainSizeInBytes,
2213  VectorType *VecTy) const override {
2214  return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2215  }
2216  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2217  unsigned ChainSizeInBytes,
2218  VectorType *VecTy) const override {
2219  return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2220  }
2221  bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2222  ReductionFlags Flags) const override {
2223  return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2224  }
2225  bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2226  ReductionFlags Flags) const override {
2227  return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2228  }
2229  bool shouldExpandReduction(const IntrinsicInst *II) const override {
2230  return Impl.shouldExpandReduction(II);
2231  }
2232 
2233  unsigned getGISelRematGlobalCost() const override {
2234  return Impl.getGISelRematGlobalCost();
2235  }
2236 
2237  bool supportsScalableVectors() const override {
2238  return Impl.supportsScalableVectors();
2239  }
2240 
2241  bool hasActiveVectorLength() const override {
2242  return Impl.hasActiveVectorLength();
2243  }
2244 
2245  InstructionCost getInstructionLatency(const Instruction *I) override {
2246  return Impl.getInstructionLatency(I);
2247  }
2248 };
2249 
2250 template <typename T>
2252  : TTIImpl(new Model<T>(Impl)) {}
2253 
2254 /// Analysis pass providing the \c TargetTransformInfo.
2255 ///
2256 /// The core idea of the TargetIRAnalysis is to expose an interface through
2257 /// which LLVM targets can analyze and provide information about the middle
2258 /// end's target-independent IR. This supports use cases such as target-aware
2259 /// cost modeling of IR constructs.
2260 ///
2261 /// This is a function analysis because much of the cost modeling for targets
2262 /// is done in a subtarget specific way and LLVM supports compiling different
2263 /// functions targeting different subtargets in order to support runtime
2264 /// dispatch according to the observed subtarget.
2265 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
2266 public:
2268 
2269  /// Default construct a target IR analysis.
2270  ///
2271  /// This will use the module's datalayout to construct a baseline
2272  /// conservative TTI result.
2273  TargetIRAnalysis();
2274 
2275  /// Construct an IR analysis pass around a target-provide callback.
2276  ///
2277  /// The callback will be called with a particular function for which the TTI
2278  /// is needed and must return a TTI object for that function.
2279  TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
2280 
2281  // Value semantics. We spell out the constructors for MSVC.
2283  : TTICallback(Arg.TTICallback) {}
2285  : TTICallback(std::move(Arg.TTICallback)) {}
2287  TTICallback = RHS.TTICallback;
2288  return *this;
2289  }
2291  TTICallback = std::move(RHS.TTICallback);
2292  return *this;
2293  }
2294 
2296 
2297 private:
2299  static AnalysisKey Key;
2300 
2301  /// The callback used to produce a result.
2302  ///
2303  /// We use a completely opaque callback so that targets can provide whatever
2304  /// mechanism they desire for constructing the TTI for a given function.
2305  ///
2306  /// FIXME: Should we really use std::function? It's relatively inefficient.
2307  /// It might be possible to arrange for even stateful callbacks to outlive
2308  /// the analysis and thus use a function_ref which would be lighter weight.
2309  /// This may also be less error prone as the callback is likely to reference
2310  /// the external TargetMachine, and that reference needs to never dangle.
2311  std::function<Result(const Function &)> TTICallback;
2312 
2313  /// Helper function used as the callback in the default constructor.
2314  static Result getDefaultTTI(const Function &F);
2315 };
2316 
2317 /// Wrapper pass for TargetTransformInfo.
2318 ///
2319 /// This pass can be constructed from a TTI object which it stores internally
2320 /// and is queried by passes.
2322  TargetIRAnalysis TIRA;
2324 
2325  virtual void anchor();
2326 
2327 public:
2328  static char ID;
2329 
2330  /// We must provide a default constructor for the pass but it should
2331  /// never be used.
2332  ///
2333  /// Use the constructor below or call one of the creation routines.
2335 
2337 
2339 };
2340 
2341 /// Create an analysis pass wrapper around a TTI object.
2342 ///
2343 /// This analysis pass just holds the TTI instance and makes it available to
2344 /// clients.
2346 
2347 } // namespace llvm
2348 
2349 #endif
llvm::TargetTransformInfo::ReductionFlags::IsMaxOp
bool IsMaxOp
If the op a min/max kind, true if it's a max operation.
Definition: TargetTransformInfo.h:1336
llvm::TargetTransformInfo::CastContextHint::GatherScatter
@ GatherScatter
The cast is used with a gather/scatter.
llvm::TargetTransformInfo::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
Definition: TargetTransformInfo.cpp:279
llvm::InstructionCost
Definition: InstructionCost.h:26
llvm::TargetTransformInfo::Concept::getExtractWithExtendCost
virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)=0
llvm::TargetTransformInfo::CacheLevel::L1D
@ L1D
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::EngineKind::Kind
Kind
Definition: ExecutionEngine.h:524
llvm::TargetTransformInfo::PSK_FastHardware
@ PSK_FastHardware
Definition: TargetTransformInfo.h:586
llvm::TargetTransformInfo::Concept::getPopcntSupport
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit)=0
llvm::TargetTransformInfo::CastContextHint::Masked
@ Masked
The cast is used with a masked load/store.
llvm::TargetTransformInfo::getMemcpyLoopLoweringType
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const
Definition: TargetTransformInfo.cpp:938
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:473
llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:452
llvm::TargetTransformInfo::SK_Select
@ SK_Select
Selects elements from the corresponding lane of either source operand.
Definition: TargetTransformInfo.h:848
llvm::TargetTransformInfo::isLegalToVectorizeReduction
bool isLegalToVectorizeReduction(RecurrenceDescriptor RdxDesc, ElementCount VF) const
Definition: TargetTransformInfo.cpp:999
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2265
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:480
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:211
llvm::TargetTransformInfo::LSRCost::NumRegs
unsigned NumRegs
Definition: TargetTransformInfo.h:413
llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition: TargetTransformInfo.h:264
llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition: TargetTransformInfo.h:448
llvm
Definition: AllocatorList.h:23
llvm::TargetTransformInfo::ReductionFlags
Flags describing the kind of vector reduction.
Definition: TargetTransformInfo.h:1334
llvm::TargetTransformInfo::instCombineIntrinsic
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
Definition: TargetTransformInfo.cpp:297
llvm::TargetTransformInfo::Concept::isHardwareLoopProfitable
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)=0
llvm::TargetTransformInfo::Concept::isSourceOfDivergence
virtual bool isSourceOfDivergence(const Value *V)=0
llvm::TargetTransformInfo::getMinMaxReductionCost
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Definition: TargetTransformInfo.cpp:903
llvm::TargetTransformInfo::Concept::getIntImmCost
virtual int getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
llvm::TargetTransformInfo::Concept::enableMaskedInterleavedAccessVectorization
virtual bool enableMaskedInterleavedAccessVectorization()=0
llvm::MemIntrinsicInfo::PtrVal
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
Definition: TargetTransformInfo.h:73
llvm::TargetTransformInfo::preferPredicateOverEpilogue
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI) const
Query the target whether it would be prefered to create a predicated vector loop, which can avoid the...
Definition: TargetTransformInfo.cpp:285
llvm::TargetTransformInfo::Concept::rewriteIntrinsicWithAddressSpace
virtual Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const =0
llvm::TargetTransformInfo::getMinVectorRegisterBitWidth
unsigned getMinVectorRegisterBitWidth() const
Definition: TargetTransformInfo.cpp:591
llvm::HardwareLoopInfo::LoopDecrement
Value * LoopDecrement
Definition: TargetTransformInfo.h:101
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::TargetTransformInfo::getShuffleCost
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask=None, int Index=0, VectorType *SubTp=nullptr) const
Definition: TargetTransformInfo.cpp:718
llvm::TargetTransformInfo::Concept::getCostOfKeepingLiveOverCall
virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)=0
llvm::TargetTransformInfo::MemIndexedMode
MemIndexedMode
The type of load/store indexing.
Definition: TargetTransformInfo.h:1285
llvm::TargetTransformInfo::TCK_Latency
@ TCK_Latency
The latency of instruction.
Definition: TargetTransformInfo.h:213
llvm::TargetTransformInfo::Concept::getMinVectorRegisterBitWidth
virtual unsigned getMinVectorRegisterBitWidth()=0
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:722
llvm::TargetTransformInfo::UnrollingPreferences::MaxCount
unsigned MaxCount
Definition: TargetTransformInfo.h:464
llvm::ImmutablePass
ImmutablePass class - This class is used to provide information that does not need to be run.
Definition: Pass.h:269
llvm::TargetTransformInfo::getRegisterClassName
const char * getRegisterClassName(unsigned ClassID) const
Definition: TargetTransformInfo.cpp:582
AtomicOrdering.h
llvm::ElementCount
Definition: TypeSize.h:386
llvm::TargetTransformInfo::OK_UniformValue
@ OK_UniformValue
Definition: TargetTransformInfo.h:897
llvm::TargetTransformInfo::getScalarizationOverhead
unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract) const
Estimate the overhead of scalarizing an instruction.
Definition: TargetTransformInfo.cpp:475
llvm::TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
Definition: TargetTransformInfo.cpp:1415
llvm::TargetTransformInfo::Concept::enableMemCmpExpansion
virtual MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const =0
llvm::ExtractElementInst
This instruction extracts a single (scalar) element from a VectorType value.
Definition: Instructions.h:1850
llvm::TargetTransformInfo::canMacroFuseCmp
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
Definition: TargetTransformInfo.cpp:358
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::TargetTransformInfo::Concept::isLegalMaskedScatter
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment)=0
Pass.h
llvm::TargetTransformInfo::getRegisterBitWidth
TypeSize getRegisterBitWidth(RegisterKind K) const
Definition: TargetTransformInfo.cpp:586
llvm::TargetTransformInfo::PopcntSupportKind
PopcntSupportKind
Flags indicating the kind of support for population count.
Definition: TargetTransformInfo.h:586
llvm::TargetTransformInfo::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:819
llvm::TargetTransformInfo::ReductionData::ReductionData
ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)
Definition: TargetTransformInfo.h:871
llvm::TargetTransformInfo::AMK_PostIndexed
@ AMK_PostIndexed
Definition: TargetTransformInfo.h:635
llvm::TargetTransformInfoWrapperPass::getTTI
TargetTransformInfo & getTTI(const Function &F)
Definition: TargetTransformInfo.cpp:1428
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::TargetTransformInfo::getAddressComputationCost
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
Definition: TargetTransformInfo.cpp:880
InstCombiner
Machine InstCombiner
Definition: MachineCombiner.cpp:136
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::TargetTransformInfo::PeelingPreferences::AllowPeeling
bool AllowPeeling
Allow peeling off loop iterations.
Definition: TargetTransformInfo.h:535
llvm::TargetTransformInfo::Concept::hasVolatileVariant
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace)=0
llvm::TargetTransformInfo::Concept::isFPVectorizationPotentiallyUnsafe
virtual bool isFPVectorizationPotentiallyUnsafe()=0
llvm::TargetTransformInfo::Concept::isLegalMaskedExpandLoad
virtual bool isLegalMaskedExpandLoad(Type *DataType)=0
llvm::TargetTransformInfo::Concept::isAlwaysUniform
virtual bool isAlwaysUniform(const Value *V)=0
llvm::TargetTransformInfo::Concept::getMaxPrefetchIterationsAhead
virtual unsigned getMaxPrefetchIterationsAhead() const =0
llvm::TargetTransformInfo::getIntImmCost
int getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
Definition: TargetTransformInfo.cpp:548
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::TargetTransformInfo::MemCmpExpansionOptions::AllowOverlappingLoads
bool AllowOverlappingLoads
Definition: TargetTransformInfo.h:773
llvm::IntrinsicCostAttributes::getReturnType
Type * getReturnType() const
Definition: TargetTransformInfo.h:148
llvm::TargetTransformInfo::Concept::getScalarizationOverhead
virtual unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract)=0
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:443
llvm::TargetTransformInfo::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfo.cpp:860
llvm::TargetTransformInfo::Concept::getRegisterClassForType
virtual unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const =0
llvm::TargetTransformInfo::RGK_Scalar
@ RGK_Scalar
Definition: TargetTransformInfo.h:924
llvm::TargetTransformInfo::Concept::enableInterleavedAccessVectorization
virtual bool enableInterleavedAccessVectorization()=0
llvm::TargetTransformInfo::getAssumedAddrSpace
unsigned getAssumedAddrSpace(const Value *V) const
Definition: TargetTransformInfo.cpp:266
llvm::IntrinsicCostAttributes::getInst
const IntrinsicInst * getInst() const
Definition: TargetTransformInfo.h:147
llvm::TargetTransformInfo::Concept::useGPUDivergenceAnalysis
virtual bool useGPUDivergenceAnalysis()=0
llvm::TargetTransformInfo::isLegalICmpImmediate
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition: TargetTransformInfo.cpp:333
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJamInnerLoopThreshold
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
Definition: TargetTransformInfo.h:499
llvm::TargetIRAnalysis::operator=
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Definition: TargetTransformInfo.h:2286
llvm::TargetIRAnalysis::TargetIRAnalysis
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Definition: TargetTransformInfo.h:2284
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::TargetTransformInfo::Concept::preferPredicateOverEpilogue
virtual bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI)=0
llvm::TargetTransformInfo::areFunctionArgsABICompatible
bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, SmallPtrSetImpl< Argument * > &Args) const
Definition: TargetTransformInfo.cpp:959
llvm::HardwareLoopInfo::ExitBranch
BranchInst * ExitBranch
Definition: TargetTransformInfo.h:98
llvm::TargetTransformInfo::UnrollingPreferences::UnrollRemainder
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
Definition: TargetTransformInfo.h:492
llvm::TargetTransformInfo::UnrollingPreferences::Count
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
Definition: TargetTransformInfo.h:457
llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:214
llvm::TargetTransformInfo::shouldBuildLookupTables
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
Definition: TargetTransformInfo.cpp:457
llvm::TargetTransformInfo::LSRCost::NumIVMuls
unsigned NumIVMuls
Definition: TargetTransformInfo.h:415
llvm::IntrinsicCostAttributes::getScalarizationCost
unsigned getScalarizationCost() const
Definition: TargetTransformInfo.h:150
llvm::HardwareLoopInfo::isHardwareLoopCandidate
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Definition: TargetTransformInfo.cpp:101
llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:476
llvm::TargetTransformInfo::getMaximumVF
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
Definition: TargetTransformInfo.cpp:608
llvm::TargetTransformInfo::Concept::getEstimatedNumberOfCaseClusters
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)=0
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::TargetTransformInfo::useColdCCForColdCall
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
Definition: TargetTransformInfo.cpp:470
llvm::TargetTransformInfo::Concept::getGatherScatterOpCost
virtual InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:529
llvm::TargetTransformInfo::operator=
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
Definition: TargetTransformInfo.cpp:192
llvm::TargetTransformInfo::Concept::shouldMaximizeVectorBandwidth
virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const =0
llvm::TargetTransformInfo::Concept::getPeelingPreferences
virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP)=0
llvm::HardwareLoopInfo::L
Loop * L
Definition: TargetTransformInfo.h:96
llvm::TargetTransformInfo::Concept::isLSRCostLess
virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2)=0
llvm::TargetTransformInfo::isLegalNTLoad
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
Definition: TargetTransformInfo.cpp:390
llvm::TargetTransformInfo::isLegalNTStore
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
Definition: TargetTransformInfo.cpp:385
llvm::TargetTransformInfo::UnrollingPreferences::FullUnrollMaxCount
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
Definition: TargetTransformInfo.h:468
llvm::Optional
Definition: APInt.h:34
ForceNestedLoop
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
llvm::TargetTransformInfo::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:791
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::TargetTransformInfo::OP_PowerOf2
@ OP_PowerOf2
Definition: TargetTransformInfo.h:903
llvm::TargetTransformInfo::getPredictableBranchThreshold
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
Definition: TargetTransformInfo.cpp:232
Operator.h
llvm::TargetTransformInfo::getInlinerVectorBonusPercent
int getInlinerVectorBonusPercent() const
Definition: TargetTransformInfo.cpp:206
VectorType
Definition: ItaniumDemangle.h:901
llvm::TargetTransformInfo::Concept::isTruncateFree
virtual bool isTruncateFree(Type *Ty1, Type *Ty2)=0
llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:160
llvm::TargetTransformInfo::Concept::getAtomicMemIntrinsicMaxElementSize
virtual unsigned getAtomicMemIntrinsicMaxElementSize() const =0
llvm::TargetTransformInfo::Concept::emitGetActiveLaneMask
virtual bool emitGetActiveLaneMask()=0
llvm::TargetTransformInfo::enableInterleavedAccessVectorization
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
Definition: TargetTransformInfo.cpp:500
llvm::TargetTransformInfo::Concept::prefersVectorizedAddressing
virtual bool prefersVectorizedAddressing()=0
llvm::TargetTransformInfo::Concept::getUnrollingPreferences
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP)=0
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::TargetTransformInfo::SK_PermuteSingleSrc
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
Definition: TargetTransformInfo.h:856
llvm::TargetTransformInfo::Concept::getOrCreateResultFromMemIntrinsic
virtual Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)=0
llvm::TargetTransformInfo::Concept::getRegisterBitWidth
virtual TypeSize getRegisterBitWidth(RegisterKind K) const =0
llvm::TargetTransformInfo::UnrollingPreferences::AllowExpensiveTripCount
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
Definition: TargetTransformInfo.h:485
llvm::TargetTransformInfo::preferInLoopReduction
bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
Definition: TargetTransformInfo.cpp:1018
llvm::TargetTransformInfo::CacheLevel
CacheLevel
The possible cache levels.
Definition: TargetTransformInfo.h:965
llvm::TargetTransformInfo::Concept
Definition: TargetTransformInfo.h:1401
llvm::TargetTransformInfo::Concept::isLegalNTStore
virtual bool isLegalNTStore(Type *DataType, Align Alignment)=0
new
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM ID Predecessors according to mbb< bb27, 0x8b0a7c0 > Note ADDri is not a two address instruction its result reg1037 is an operand of the PHI node in bb76 and its operand reg1039 is the result of the PHI node We should treat it as a two address code and make sure the ADDri is scheduled after any node that reads reg1039 Use info(i.e. register scavenger) to assign it a free register to allow reuse the collector could move the objects and invalidate the derived pointer This is bad enough in the first but safe points can crop up unpredictably **array_addr i32 n y store obj * new
Definition: README.txt:125
llvm::TargetTransformInfo::LSRCost::Insns
unsigned Insns
TODO: Some of these could be merged.
Definition: TargetTransformInfo.h:412
llvm::TargetTransformInfo::getIntImmCodeSizeCost
int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
Definition: TargetTransformInfo.cpp:540
llvm::TargetTransformInfo::SK_Broadcast
@ SK_Broadcast
Broadcast element 0 to all other elements.
Definition: TargetTransformInfo.h:846
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::TargetTransformInfo::Concept::isLegalNTLoad
virtual bool isLegalNTLoad(Type *DataType, Align Alignment)=0
llvm::IntrinsicCostAttributes::skipScalarizationCost
bool skipScalarizationCost() const
Definition: TargetTransformInfo.h:158
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::HardwareLoopInfo::IsNestingLegal
bool IsNestingLegal
Definition: TargetTransformInfo.h:103
llvm::TargetTransformInfo::LSRCost::AddRecCost
unsigned AddRecCost
Definition: TargetTransformInfo.h:414
llvm::IntrinsicCostAttributes::getFlags
FastMathFlags getFlags() const
Definition: TargetTransformInfo.h:149
llvm::Reloc::Model
Model
Definition: CodeGen.h:22
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::TargetTransformInfo::getArithmeticReductionCost
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, bool IsPairwiseForm, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of performing a vector reduction.
Definition: TargetTransformInfo.cpp:894
llvm::TargetTransformInfo::isLegalMaskedScatter
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
Return true if the target supports masked scatter.
Definition: TargetTransformInfo.cpp:399
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:205
llvm::TargetTransformInfo::LSRCost::SetupCost
unsigned SetupCost
Definition: TargetTransformInfo.h:418
llvm::TargetTransformInfo::Concept::isLegalMaskedLoad
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment)=0
llvm::TargetTransformInfo::Concept::canMacroFuseCmp
virtual bool canMacroFuseCmp()=0
llvm::TargetTransformInfo::Concept::isTypeLegal
virtual bool isTypeLegal(Type *Ty)=0
llvm::TargetTransformInfo::getGISelRematGlobalCost
unsigned getGISelRematGlobalCost() const
Definition: TargetTransformInfo.cpp:1032
llvm::IntrinsicCostAttributes::getArgTypes
const SmallVectorImpl< Type * > & getArgTypes() const
Definition: TargetTransformInfo.h:152
llvm::TargetTransformInfo::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Definition: TargetTransformInfo.cpp:954
llvm::TargetTransformInfo::Concept::getMinimumVF
virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const =0
llvm::TargetTransformInfo::isTypeLegal
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
Definition: TargetTransformInfo.cpp:449
llvm::TargetTransformInfo::SK_PermuteTwoSrc
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
Definition: TargetTransformInfo.h:854
llvm::TargetTransformInfo::Concept::getCacheSize
virtual Optional< unsigned > getCacheSize(CacheLevel Level) const =0
llvm::TargetTransformInfo::PeelingPreferences::PeelProfiledIterations
bool PeelProfiledIterations
Allow peeling basing on profile.
Definition: TargetTransformInfo.h:542
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::TargetTransformInfo::getMinimumVF
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
Definition: TargetTransformInfo.cpp:603
llvm::MemIntrinsicInfo::isUnordered
bool isUnordered() const
Definition: TargetTransformInfo.h:85
llvm::TargetTransformInfo::Concept::getPredictableBranchThreshold
virtual BranchProbability getPredictableBranchThreshold()=0
llvm::TargetTransformInfo::isProfitableLSRChainElement
bool isProfitableLSRChainElement(Instruction *I) const
Definition: TargetTransformInfo.cpp:354
llvm::TargetTransformInfo::Concept::useAA
virtual bool useAA()=0
llvm::TargetTransformInfo::getCastContextHint
static CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
Definition: TargetTransformInfo.cpp:729
llvm::TargetTransformInfo::getOrCreateResultFromMemIntrinsic
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
Definition: TargetTransformInfo.cpp:933
llvm::TargetTransformInfo::isLegalToVectorizeLoad
bool isLegalToVectorizeLoad(LoadInst *LI) const
Definition: TargetTransformInfo.cpp:979
llvm::MemIntrinsicInfo::Ordering
AtomicOrdering Ordering
Definition: TargetTransformInfo.h:76
llvm::TargetTransformInfo::Concept::useColdCCForColdCall
virtual bool useColdCCForColdCall(Function &F)=0
llvm::TargetTransformInfoWrapperPass::ID
static char ID
Definition: TargetTransformInfo.h:2328
llvm::TargetTransformInfo::ReductionKind
ReductionKind
Kind of the reduction data.
Definition: TargetTransformInfo.h:861
llvm::TargetTransformInfo::TargetCostConstants
TargetCostConstants
Underlying constants for 'cost' values in this interface.
Definition: TargetTransformInfo.h:261
llvm::TargetTransformInfo::getPopcntSupport
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
Definition: TargetTransformInfo.cpp:522
llvm::TargetIRAnalysis::TargetIRAnalysis
TargetIRAnalysis(const TargetIRAnalysis &Arg)
Definition: TargetTransformInfo.h:2282
llvm::TargetTransformInfo::OP_None
@ OP_None
Definition: TargetTransformInfo.h:903
llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition: TargetTransformInfo.h:845
llvm::TargetTransformInfo::getGEPCost
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TargetCostKind CostKind=TCK_SizeAndLatency) const
Estimate the cost of a GEP operation when lowered.
Definition: TargetTransformInfo.cpp:210
llvm::TargetTransformInfo::getPreferredAddressingMode
AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
Return the preferred addressing mode LSR should make efforts to generate.
Definition: TargetTransformInfo.cpp:370
llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition: TargetTransformInfo.h:1079
llvm::User
Definition: User.h:44
llvm::TargetTransformInfo::useGPUDivergenceAnalysis
bool useGPUDivergenceAnalysis() const
Return true if the target prefers to use GPU divergence analysis to replace the legacy version.
Definition: TargetTransformInfo.cpp:240
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::TargetTransformInfo::UnrollingPreferences::Force
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
Definition: TargetTransformInfo.h:488
InstrTypes.h
llvm::TargetTransformInfo::Concept::getMaxVScale
virtual Optional< unsigned > getMaxVScale() const =0
llvm::TargetTransformInfo::Concept::getPrefetchDistance
virtual unsigned getPrefetchDistance() const =0
llvm::TargetTransformInfo::hasBranchDivergence
bool hasBranchDivergence() const
Return true if branch divergence exists.
Definition: TargetTransformInfo.cpp:236
SI
@ SI
Definition: SIInstrInfo.cpp:7342
llvm::TargetTransformInfo::Concept::supportsEfficientVectorElementLoadStore
virtual bool supportsEfficientVectorElementLoadStore()=0
llvm::TargetTransformInfo::Concept::canSaveCmp
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo)=0
llvm::TargetTransformInfo::getNumberOfParts
unsigned getNumberOfParts(Type *Tp) const
Definition: TargetTransformInfo.cpp:876
llvm::TargetTransformInfo::Concept::isFCmpOrdCheaperThanFCmpZero
virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)=0
llvm::TargetTransformInfo::shouldMaximizeVectorBandwidth
bool shouldMaximizeVectorBandwidth(bool OptSize) const
Definition: TargetTransformInfo.cpp:599
llvm::TargetTransformInfo::Concept::isNumRegsMajorCostOfLSR
virtual bool isNumRegsMajorCostOfLSR()=0
llvm::TargetTransformInfo::supportsScalableVectors
bool supportsScalableVectors() const
Definition: TargetTransformInfo.cpp:1036
llvm::TargetTransformInfo::Concept::getExtendedAddReductionCost
virtual InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
llvm::TargetTransformInfo::Concept::getScalingFactorCost
virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)=0
llvm::TargetTransformInfo::isIndexedLoadLegal
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
Definition: TargetTransformInfo.cpp:965
llvm::TargetTransformInfo::CastContextHint::Interleave
@ Interleave
The cast is used with an interleaved load/store.
llvm::TargetTransformInfo::UnrollingPreferences::MaxIterationsCountToAnalyze
unsigned MaxIterationsCountToAnalyze
Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...
Definition: TargetTransformInfo.h:502
false
Definition: StackSlotColoring.cpp:142
llvm::TargetTransformInfo::Concept::getNumberOfRegisters
virtual unsigned getNumberOfRegisters(unsigned ClassID) const =0
llvm::TargetTransformInfo::Concept::isLegalToVectorizeLoadChain
virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJam
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
Definition: TargetTransformInfo.h:494
llvm::TargetTransformInfo::isLegalMaskedExpandLoad
bool isLegalMaskedExpandLoad(Type *DataType) const
Return true if the target supports masked expand load.
Definition: TargetTransformInfo.cpp:408
llvm::TargetTransformInfo::Concept::simplifyDemandedVectorEltsIntrinsic
virtual Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)=0
llvm::TargetTransformInfo::Concept::isLegalMaskedGather
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment)=0
llvm::TargetTransformInfo::Concept::hasBranchDivergence
virtual bool hasBranchDivergence()=0
llvm::Instruction
Definition: Instruction.h:45
llvm::TargetTransformInfo::RK_Arithmetic
@ RK_Arithmetic
Not a reduction.
Definition: TargetTransformInfo.h:863
llvm::TargetTransformInfo::Concept::enableWritePrefetching
virtual bool enableWritePrefetching() const =0
llvm::TargetTransformInfo::isLSRCostLess
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C1.
Definition: TargetTransformInfo.cpp:346
llvm::TargetTransformInfo::MIM_PreDec
@ MIM_PreDec
Pre-decrementing.
Definition: TargetTransformInfo.h:1288
llvm::HardwareLoopInfo::PerformEntryTest
bool PerformEntryTest
Definition: TargetTransformInfo.h:107
llvm::TargetTransformInfo::Concept::getMaskedMemoryOpCost
virtual InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::isLegalMaskedLoad
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked load.
Definition: TargetTransformInfo.cpp:380
llvm::TargetTransformInfo::getIntImmCostInst
int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
Definition: TargetTransformInfo.cpp:555
llvm::TargetTransformInfo::isSourceOfDivergence
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
Definition: TargetTransformInfo.cpp:244
llvm::TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic
Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
Definition: TargetTransformInfo.cpp:309
llvm::TargetTransformInfo::CastContextHint::Reversed
@ Reversed
The cast is used with a reversed load/store.
llvm::TargetTransformInfo::getPrefetchDistance
unsigned getPrefetchDistance() const
Definition: TargetTransformInfo.cpp:633
llvm::HardwareLoopInfo::CounterInReg
bool CounterInReg
Definition: TargetTransformInfo.h:105
llvm::TargetTransformInfo::matchVectorReduction
static ReductionKind matchVectorReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty, bool &IsPairwise)
Definition: TargetTransformInfo.cpp:1319
llvm::TargetTransformInfo::Concept::isIndexedStoreLegal
virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const =0
llvm::TargetTransformInfo::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) const
Get target-customized preferences for the generic loop unrolling transformation.
Definition: TargetTransformInfo.cpp:319
llvm::TargetTransformInfo::Concept::supportsScalableVectors
virtual bool supportsScalableVectors() const =0
llvm::TargetTransformInfo::Concept::isLegalToVectorizeLoad
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const =0
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:26
llvm::TargetTransformInfo::Concept::isLegalToVectorizeStoreChain
virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
llvm::TargetTransformInfo::isFCmpOrdCheaperThanFCmpZero
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
Definition: TargetTransformInfo.cpp:530
llvm::AnalysisManager::Invalidator
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:670
llvm::TargetTransformInfo::Concept::getMemcpyLoopResidualLoweringType
virtual void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const =0
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::TargetTransformInfo::getScalingFactorCost
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
Definition: TargetTransformInfo.cpp:425
llvm::TargetTransformInfo::RGK_FixedWidthVector
@ RGK_FixedWidthVector
Definition: TargetTransformInfo.h:924
llvm::TargetTransformInfo::Concept::allowsMisalignedMemoryAccesses
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, bool *Fast)=0
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
IVDescriptors.h
llvm::TargetTransformInfo::Concept::areFunctionArgsABICompatible
virtual bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, SmallPtrSetImpl< Argument * > &Args) const =0
llvm::TargetTransformInfo::Concept::getMemcpyLoopLoweringType
virtual Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const =0
llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:116
BranchProbability.h
llvm::TargetTransformInfo::RK_UnsignedMinMax
@ RK_UnsignedMinMax
Min/max reduction data.
Definition: TargetTransformInfo.h:865
llvm::TargetTransformInfo::Concept::getDataLayout
virtual const DataLayout & getDataLayout() const =0
llvm::TargetTransformInfo::hasVolatileVariant
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
Definition: TargetTransformInfo.cpp:416
llvm::TargetTransformInfo::PSK_Software
@ PSK_Software
Definition: TargetTransformInfo.h:586
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:78
llvm::TargetTransformInfo::isLegalMaskedCompressStore
bool isLegalMaskedCompressStore(Type *DataType) const
Return true if the target supports masked compress store.
Definition: TargetTransformInfo.cpp:404
llvm::TargetTransformInfo::haveFastSqrt
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
Definition: TargetTransformInfo.cpp:526
llvm::createTargetTransformInfoWrapperPass
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
Definition: TargetTransformInfo.cpp:1435
llvm::TargetTransformInfo::ReductionFlags::ReductionFlags
ReductionFlags()
Definition: TargetTransformInfo.h:1335
llvm::TargetTransformInfo::SK_Reverse
@ SK_Reverse
Reverse the order of the vector.
Definition: TargetTransformInfo.h:847
llvm::TargetTransformInfo::Concept::collectFlatAddressOperands
virtual bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const =0
llvm::TargetTransformInfo::getFlatAddressSpace
unsigned getFlatAddressSpace() const
Returns the address space ID for a target's 'flat' address space.
Definition: TargetTransformInfo.cpp:252
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:391
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
llvm::TargetTransformInfo::CastContextHint::Normal
@ Normal
The cast is used with a normal load/store.
llvm::TargetTransformInfo::Concept::~Concept
virtual ~Concept()=0
Definition: TargetTransformInfo.cpp:1389
llvm::TargetTransformInfo::Concept::getOperandsScalarizationOverhead
virtual unsigned getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys)=0
llvm::TargetTransformInfo::Concept::getIntrinsicInstrCost
virtual InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)=0
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:303
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::TargetTransformInfo::SK_InsertSubvector
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
Definition: TargetTransformInfo.h:852
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::TargetTransformInfo::Concept::getInstructionLatency
virtual InstructionCost getInstructionLatency(const Instruction *I)=0
llvm::TargetTransformInfo::Concept::isProfitableLSRChainElement
virtual bool isProfitableLSRChainElement(Instruction *I)=0
llvm::TargetTransformInfo::shouldBuildLookupTablesForConstant
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
Definition: TargetTransformInfo.cpp:461
llvm::TargetTransformInfo::MIM_PostInc
@ MIM_PostInc
Post-incrementing.
Definition: TargetTransformInfo.h:1289
llvm::HardwareLoopInfo::HardwareLoopInfo
HardwareLoopInfo()=delete
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::TargetTransformInfo::LSRCost
Definition: TargetTransformInfo.h:409
llvm::TargetTransformInfo::collectFlatAddressOperands
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Return any intrinsic address operand indexes which may be rewritten if they use a flat address space ...
Definition: TargetTransformInfo.cpp:256
llvm::TargetTransformInfo::isLegalMaskedGather
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
Return true if the target supports masked gather.
Definition: TargetTransformInfo.cpp:394
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2321
llvm::TargetTransformInfo::Concept::getInterleavedMemoryOpCost
virtual InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)=0
llvm::TargetTransformInfo::emitGetActiveLaneMask
bool emitGetActiveLaneMask() const
Query the target whether lowering of the llvm.get.active.lane.mask intrinsic is supported.
Definition: TargetTransformInfo.cpp:292
llvm::TargetTransformInfo::preferPredicatedReductionSelect
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
Definition: TargetTransformInfo.cpp:1023
llvm::TargetTransformInfo::Concept::hasDivRemOp
virtual bool hasDivRemOp(Type *DataType, bool IsSigned)=0
llvm::TargetTransformInfo::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
Definition: TargetTransformInfo.cpp:848
llvm::TargetTransformInfo::ReductionFlags::NoNaN
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
Definition: TargetTransformInfo.h:1338
llvm::TargetTransformInfo::Concept::shouldBuildLookupTables
virtual bool shouldBuildLookupTables()=0
llvm::TargetTransformInfo::OK_UniformConstantValue
@ OK_UniformConstantValue
Definition: TargetTransformInfo.h:898
llvm::TargetIRAnalysis::Result
TargetTransformInfo Result
Definition: TargetTransformInfo.h:2267
llvm::TargetTransformInfo::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(unsigned VF) const
Definition: TargetTransformInfo.cpp:652
llvm::TargetTransformInfo::getRegisterClassForType
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
Definition: TargetTransformInfo.cpp:577
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::TargetTransformInfo::Concept::getMaximumVF
virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const =0
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::AnalysisKey
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:72
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:423
llvm::TargetTransformInfo::Concept::getArithmeticInstrCost
virtual InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, OperandValueKind Opd1Info, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr)=0
llvm::AtomicOrdering::Unordered
@ Unordered
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::TargetTransformInfo::OperandValueProperties
OperandValueProperties
Additional properties of an operand's values.
Definition: TargetTransformInfo.h:903
llvm::TargetTransformInfo::ReductionData::hasSameData
bool hasSameData(ReductionData &RD) const
Definition: TargetTransformInfo.h:879
llvm::TargetTransformInfo::Concept::isLegalMaskedStore
virtual bool isLegalMaskedStore(Type *DataType, Align Alignment)=0
llvm::TargetTransformInfo::shouldConsiderAddressTypePromotion
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
Definition: TargetTransformInfo.cpp:613
llvm::LoopAccessInfo
Drive the analysis of memory accesses in the loop.
Definition: LoopAccessAnalysis.h:519
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, TargetCostKind CostKind) const
This is a helper function which calls the two-argument getUserCost with Operands which are the curren...
Definition: TargetTransformInfo.h:325
llvm::TargetTransformInfo::Concept::getTgtMemIntrinsic
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)=0
llvm::TargetTransformInfo::PeelingPreferences::AllowLoopNestsPeeling
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
Definition: TargetTransformInfo.h:537
llvm::TargetTransformInfo::rewriteIntrinsicWithAddressSpace
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Rewrite intrinsic call II such that OldV will be replaced with NewV, which has a different address sp...
Definition: TargetTransformInfo.cpp:270
llvm::TargetTransformInfo::Concept::getShuffleCost
virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, int Index, VectorType *SubTp)=0
llvm::TargetTransformInfo::Concept::shouldExpandReduction
virtual bool shouldExpandReduction(const IntrinsicInst *II) const =0
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetTransformInfo::Concept::getLoadVectorFactor
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
llvm::TargetTransformInfo::getGatherScatterOpCost
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:839
llvm::TargetTransformInfo::Concept::getCastInstrCost
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I)=0
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1563
llvm::TargetTransformInfo::Concept::isLoweredToCall
virtual bool isLoweredToCall(const Function *F)=0
llvm::TargetTransformInfo::LSRWithInstrQueries
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
Definition: TargetTransformInfo.cpp:435
llvm::TargetTransformInfo::isLegalToVectorizeLoadChain
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: TargetTransformInfo.cpp:987
llvm::TargetTransformInfo::Concept::getMinMaxReductionCost
virtual InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::Concept::getIntImmCodeSizeCost
virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty)=0
llvm::TargetTransformInfo::isLegalAddImmediate
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
Definition: TargetTransformInfo.cpp:329
llvm::CmpInst::BAD_ICMP_PREDICATE
@ BAD_ICMP_PREDICATE
Definition: InstrTypes.h:755
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:163
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::TargetTransformInfo::LSRCost::ScaleCost
unsigned ScaleCost
Definition: TargetTransformInfo.h:419
llvm::TargetTransformInfo::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), bool *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
Definition: TargetTransformInfo.cpp:512
llvm::TargetTransformInfo::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
Definition: TargetTransformInfo.cpp:275
llvm::TargetTransformInfo::Concept::getMemcpyCost
virtual int getMemcpyCost(const Instruction *I)=0
llvm::TargetTransformInfo::getIntImmCostIntrin
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Definition: TargetTransformInfo.cpp:565
llvm::TargetTransformInfo::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:772
llvm::TargetTransformInfo::Concept::shouldBuildRelLookupTables
virtual bool shouldBuildRelLookupTables()=0
llvm::TargetTransformInfo::PSK_SlowHardware
@ PSK_SlowHardware
Definition: TargetTransformInfo.h:586
llvm::TargetTransformInfo::Concept::getRegisterClassName
virtual const char * getRegisterClassName(unsigned ClassID) const =0
llvm::TargetTransformInfo::getCostOfKeepingLiveOverCall
unsigned getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
Definition: TargetTransformInfo.cpp:920
llvm::AnalysisInfoMixin
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:391
llvm::TargetTransformInfo::ReductionFlags::IsSigned
bool IsSigned
Whether the operation is a signed int reduction.
Definition: TargetTransformInfo.h:1337
llvm::TargetTransformInfo::ReductionData::ReductionData
ReductionData()=delete
llvm::TargetTransformInfo::OperandValueKind
OperandValueKind
Additional information about an operand's possible values.
Definition: TargetTransformInfo.h:895
llvm::TargetTransformInfo::getMemcpyLoopResidualLoweringType
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const
Definition: TargetTransformInfo.cpp:945
llvm::TargetTransformInfo::Concept::instCombineIntrinsic
virtual Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)=0
llvm::MemIntrinsicInfo::ReadMem
bool ReadMem
Definition: TargetTransformInfo.h:81
llvm::TargetTransformInfo::Concept::getCmpSelInstrCost
virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I)=0
llvm::TargetTransformInfo::MemCmpExpansionOptions::MaxNumLoads
unsigned MaxNumLoads
Definition: TargetTransformInfo.h:755
InstructionCost.h
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:71
llvm::TargetTransformInfo::canSaveCmp
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
Definition: TargetTransformInfo.cpp:362
llvm::TargetTransformInfo::isTruncateFree
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
Definition: TargetTransformInfo.cpp:439
llvm::TargetTransformInfo::prefersVectorizedAddressing
bool prefersVectorizedAddressing() const
Return true if target doesn't mind addresses in vectors.
Definition: TargetTransformInfo.cpp:421
llvm::TargetTransformInfo::MemCmpExpansionOptions
Returns options for expansion of memcmp. IsZeroCmp is.
Definition: TargetTransformInfo.h:750
llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:262
llvm::TargetTransformInfo::Concept::getGEPCost
virtual int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::PeelingPreferences::PeelCount
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Definition: TargetTransformInfo.h:533
llvm::TargetTransformInfo::supportsEfficientVectorElementLoadStore
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
Definition: TargetTransformInfo.cpp:486
llvm::TargetTransformInfo::enableMemCmpExpansion
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
Definition: TargetTransformInfo.cpp:496
llvm::TargetTransformInfo::Concept::shouldConsiderAddressTypePromotion
virtual bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)=0
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:33
llvm::LoopInfo
Definition: LoopInfo.h:1080
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:215
llvm::TargetTransformInfo::Concept::haveFastSqrt
virtual bool haveFastSqrt(Type *Ty)=0
llvm::TargetTransformInfo::Concept::isNoopAddrSpaceCast
virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const =0
llvm::TargetTransformInfo::isLegalMaskedStore
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
Return true if the target supports masked store.
Definition: TargetTransformInfo.cpp:375
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:223
llvm::TargetTransformInfo::Concept::isIndexedLoadLegal
virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const =0
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
llvm::BranchProbability
Definition: BranchProbability.h:30
llvm::TargetTransformInfo::Concept::getFlatAddressSpace
virtual unsigned getFlatAddressSpace()=0
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::TargetTransformInfo::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy=nullptr, CmpInst::Predicate VecPred=CmpInst::BAD_ICMP_PREDICATE, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:800
llvm::TargetTransformInfo::UnrollingPreferences::DefaultUnrollRuntimeCount
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
Definition: TargetTransformInfo.h:459
llvm::TargetTransformInfo::hasDivRemOp
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
Definition: TargetTransformInfo.cpp:412
llvm::TargetTransformInfo::Concept::LSRWithInstrQueries
virtual bool LSRWithInstrQueries()=0
llvm::TargetTransformInfo::simplifyDemandedUseBitsIntrinsic
Optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Can be used to implement target-specific instruction combining.
Definition: TargetTransformInfo.cpp:302
llvm::TargetTransformInfo::getCacheSize
Optional< unsigned > getCacheSize(CacheLevel Level) const
Definition: TargetTransformInfo.cpp:624
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::TargetTransformInfo::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: TargetTransformInfo.cpp:573
llvm::TargetTransformInfo::AddressingModeKind
AddressingModeKind
Definition: TargetTransformInfo.h:633
llvm::TargetTransformInfo::getMaxPrefetchIterationsAhead
unsigned getMaxPrefetchIterationsAhead() const
Definition: TargetTransformInfo.cpp:644
llvm::TargetTransformInfo::MIM_Unindexed
@ MIM_Unindexed
No indexing.
Definition: TargetTransformInfo.h:1286
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:419
llvm::TargetTransformInfo::OK_AnyValue
@ OK_AnyValue
Definition: TargetTransformInfo.h:896
llvm::TargetTransformInfo::getLoadVectorFactor
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
Definition: TargetTransformInfo.cpp:1004
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::TargetTransformInfo::SK_Transpose
@ SK_Transpose
Transpose two vectors.
Definition: TargetTransformInfo.h:851
llvm::TargetTransformInfo::CastContextHint::None
@ None
The cast is not used with a load/store of any kind.
llvm::TargetTransformInfo::isLegalToVectorizeStore
bool isLegalToVectorizeStore(StoreInst *SI) const
Definition: TargetTransformInfo.cpp:983
llvm::TargetTransformInfo::matchVectorSplittingReduction
static ReductionKind matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty)
Definition: TargetTransformInfo.cpp:1238
llvm::TargetTransformInfo::CacheLevel::L2D
@ L2D
llvm::TargetTransformInfo::Concept::getAssumedAddrSpace
virtual unsigned getAssumedAddrSpace(const Value *V) const =0
llvm::TargetTransformInfo::MIM_PreInc
@ MIM_PreInc
Pre-incrementing.
Definition: TargetTransformInfo.h:1287
llvm::TargetTransformInfo::MemCmpExpansionOptions::LoadSizes
SmallVector< unsigned, 8 > LoadSizes
Definition: TargetTransformInfo.h:758
llvm::TargetIRAnalysis::TargetIRAnalysis
TargetIRAnalysis()
Default construct a target IR analysis.
Definition: TargetTransformInfo.cpp:1391
llvm::TargetTransformInfo::Concept::preferInLoopReduction
virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags) const =0
llvm::TargetTransformInfo::Concept::getIntImmCostIntrin
virtual int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:205
llvm::TargetTransformInfo::getMaxVScale
Optional< unsigned > getMaxVScale() const
Definition: TargetTransformInfo.cpp:595
llvm::TargetTransformInfo::getOperandsScalarizationOverhead
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys) const
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: TargetTransformInfo.cpp:481
llvm::TargetTransformInfo::Concept::isLegalICmpImmediate
virtual bool isLegalICmpImmediate(int64_t Imm)=0
llvm::TargetTransformInfo::getCacheAssociativity
Optional< unsigned > getCacheAssociativity(CacheLevel Level) const
Definition: TargetTransformInfo.cpp:629
llvm::TargetTransformInfo::getCacheLineSize
unsigned getCacheLineSize() const
Definition: TargetTransformInfo.cpp:619
std
Definition: BitVector.h:838
llvm::TargetTransformInfo::enableMaskedInterleavedAccessVectorization
bool enableMaskedInterleavedAccessVectorization() const
Enable matching of interleaved access groups that contain predicated accesses or gaps and therefore v...
Definition: TargetTransformInfo.cpp:504
llvm::KnownBits
Definition: KnownBits.h:23
llvm::TargetTransformInfo::ReductionData::Kind
ReductionKind Kind
Definition: TargetTransformInfo.h:878
llvm::TargetTransformInfo::LSRCost::NumBaseAdds
unsigned NumBaseAdds
Definition: TargetTransformInfo.h:416
llvm::TargetIRAnalysis::operator=
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
Definition: TargetTransformInfo.h:2290
llvm::HardwareLoopInfo::ExitBlock
BasicBlock * ExitBlock
Definition: TargetTransformInfo.h:97
llvm::TargetTransformInfo::ReductionData::Opcode
unsigned Opcode
Definition: TargetTransformInfo.h:875
llvm::MemIntrinsicInfo::WriteMem
bool WriteMem
Definition: TargetTransformInfo.h:82
llvm::TargetTransformInfo::UnrollingPreferences::UpperBound
bool UpperBound
Allow using trip count upper bound to unroll loops.
Definition: TargetTransformInfo.h:490
llvm::TargetTransformInfo::Concept::getIntImmCostInst
virtual int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr)=0
llvm::TargetTransformInfo::isNoopAddrSpaceCast
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Definition: TargetTransformInfo.cpp:261
llvm::TypeSize
Definition: TypeSize.h:417
llvm::TargetTransformInfo::getLoadStoreVecRegBitWidth
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
Definition: TargetTransformInfo.cpp:975
llvm::TargetTransformInfo::UnrollingPreferences::AllowRemainder
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
Definition: TargetTransformInfo.h:482
llvm::TargetTransformInfo::Concept::enableAggressiveInterleaving
virtual bool enableAggressiveInterleaving(bool LoopHasReductions)=0
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::TargetTransformInfo::isFPVectorizationPotentiallyUnsafe
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
Definition: TargetTransformInfo.cpp:508
llvm::TargetTransformInfo::Concept::isLegalToVectorizeReduction
virtual bool isLegalToVectorizeReduction(RecurrenceDescriptor RdxDesc, ElementCount VF) const =0
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::TargetTransformInfo::ReductionData::RHS
Value * RHS
Definition: TargetTransformInfo.h:877
PassManager.h
Arguments
AMDGPU Lower Kernel Arguments
Definition: AMDGPULowerKernelArguments.cpp:243
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:207
llvm::User::operand_values
iterator_range< value_op_iterator > operand_values()
Definition: User.h:266
llvm::TargetTransformInfo::LSRCost::ImmCost
unsigned ImmCost
Definition: TargetTransformInfo.h:417
llvm::TargetTransformInfo::ReductionData::LHS
Value * LHS
Definition: TargetTransformInfo.h:876
llvm::HardwareLoopInfo::TripCount
const SCEV * TripCount
Definition: TargetTransformInfo.h:99
llvm::TargetIRAnalysis::run
Result run(const Function &F, FunctionAnalysisManager &)
Definition: TargetTransformInfo.cpp:1397
llvm::TargetTransformInfo::getInstructionCost
InstructionCost getInstructionCost(const Instruction *I, enum TargetCostKind kind) const
Query the cost of a specified instruction.
Definition: TargetTransformInfo.h:225
llvm::HardwareLoopInfo::HardwareLoopInfo
HardwareLoopInfo(Loop *L)
Definition: TargetTransformInfo.h:95
llvm::TargetTransformInfo::Concept::getFPOpCost
virtual InstructionCost getFPOpCost(Type *Ty)=0
llvm::TargetTransformInfo::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier() const
Definition: TargetTransformInfo.cpp:197
llvm::TargetTransformInfo::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index=-1) const
Definition: TargetTransformInfo.cpp:811
llvm::InstCombiner
The core instruction combiner logic.
Definition: InstCombiner.h:45
llvm::TargetTransformInfo::adjustInliningThreshold
unsigned adjustInliningThreshold(const CallBase *CB) const
Definition: TargetTransformInfo.cpp:202
llvm::TargetTransformInfo::getMaskedMemoryOpCost
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Definition: TargetTransformInfo.cpp:830
llvm::TargetTransformInfo::RGK_ScalableVector
@ RGK_ScalableVector
Definition: TargetTransformInfo.h:924
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
llvm::HardwareLoopInfo
Attributes of a target dependent hardware loop.
Definition: TargetTransformInfo.h:93
Vector
So we should use XX3Form_Rcr to implement instrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
llvm::TargetTransformInfo::getEstimatedNumberOfCaseClusters
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Definition: TargetTransformInfo.cpp:216
llvm::TargetTransformInfo::TargetTransformInfo
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
Definition: TargetTransformInfo.h:2251
llvm::TargetTransformInfo::getExtendedAddReductionCost
InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of an Add ...
Definition: TargetTransformInfo.cpp:912
llvm::RecurrenceDescriptor
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:66
llvm::TargetTransformInfo::Concept::getCallInstrCost
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::MemCmpExpansionOptions::NumLoadsPerBlock
unsigned NumLoadsPerBlock
Definition: TargetTransformInfo.h:768
llvm::IntrinsicCostAttributes::getID
Intrinsic::ID getID() const
Definition: TargetTransformInfo.h:146
llvm::TargetTransformInfo::Concept::getGISelRematGlobalCost
virtual unsigned getGISelRematGlobalCost() const =0
llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:924
llvm::TargetTransformInfo::Concept::getCFInstrCost
virtual InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
llvm::TargetTransformInfo::invalidate
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
Definition: TargetTransformInfo.h:197
llvm::TargetTransformInfo::Concept::getInlinerVectorBonusPercent
virtual int getInlinerVectorBonusPercent()=0
llvm::TargetTransformInfo::Concept::isLegalAddImmediate
virtual bool isLegalAddImmediate(int64_t Imm)=0
llvm::TargetTransformInfo::UnrollingPreferences::Threshold
unsigned Threshold
The cost threshold for the unrolled loop.
Definition: TargetTransformInfo.h:431
llvm::IntrinsicCostAttributes::IntrinsicCostAttributes
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, unsigned ScalarizationCost=std::numeric_limits< unsigned >::max())
Definition: TargetTransformInfo.cpp:57
llvm::TargetTransformInfo::Concept::getNumberOfParts
virtual unsigned getNumberOfParts(Type *Tp)=0
llvm::TargetTransformInfo::RK_MinMax
@ RK_MinMax
Binary reduction data.
Definition: TargetTransformInfo.h:864
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:350
llvm::TargetTransformInfo::Concept::shouldBuildLookupTablesForConstant
virtual bool shouldBuildLookupTablesForConstant(Constant *C)=0
llvm::TargetTransformInfo::Concept::isProfitableToHoist
virtual bool isProfitableToHoist(Instruction *I)=0
llvm::TargetTransformInfo::getMinPrefetchStride
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Some HW prefetchers can handle accesses up to a certain constant stride.
Definition: TargetTransformInfo.cpp:637
llvm::TargetTransformInfo::isIndexedStoreLegal
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
Definition: TargetTransformInfo.cpp:970
llvm::TargetTransformInfo::Concept::getAddressComputationCost
virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)=0
llvm::TargetTransformInfo::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR() const
Return true if LSR major cost is number of registers.
Definition: TargetTransformInfo.cpp:350
llvm::TargetTransformInfo::shouldExpandReduction
bool shouldExpandReduction(const IntrinsicInst *II) const
Definition: TargetTransformInfo.cpp:1028
llvm::TargetTransformInfo::Concept::getArithmeticReductionCost
virtual InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, bool IsPairwiseForm, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::Concept::isLegalAddressingMode
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I)=0
llvm::TargetTransformInfo::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const
Get target-customized preferences for the generic loop peeling transformation.
Definition: TargetTransformInfo.cpp:324
llvm::TargetTransformInfo::getStoreVectorFactor
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
Definition: TargetTransformInfo.cpp:1011
llvm::TargetTransformInfo::Concept::getCacheAssociativity
virtual Optional< unsigned > getCacheAssociativity(CacheLevel Level) const =0
llvm::TargetTransformInfo::MIM_PostDec
@ MIM_PostDec
Post-decrementing.
Definition: TargetTransformInfo.h:1290
llvm::HardwareLoopInfo::canAnalyze
bool canAnalyze(LoopInfo &LI)
Definition: TargetTransformInfo.cpp:47
llvm::TargetTransformInfo::getMemcpyCost
int getMemcpyCost(const Instruction *I) const
Definition: TargetTransformInfo.cpp:888
llvm::TargetTransformInfo::isLegalToVectorizeStoreChain
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: TargetTransformInfo.cpp:993
llvm::SmallVectorImpl< const Value * >
ForceHardwareLoopPHI
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
llvm::TargetTransformInfo::Concept::preferPredicatedReductionSelect
virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags) const =0
llvm::MemIntrinsicInfo
Information about a load/store intrinsic defined by the target.
Definition: TargetTransformInfo.h:68
llvm::TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize
unsigned getAtomicMemIntrinsicMaxElementSize() const
Definition: TargetTransformInfo.cpp:929
llvm::msgpack::Type
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
Definition: MsgPackReader.h:49
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1164
DataTypes.h
llvm::TargetTransformInfo::enableAggressiveInterleaving
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don't restrict interleaved unrolling to small loops.
Definition: TargetTransformInfo.cpp:490
llvm::TargetTransformInfo::Concept::areInlineCompatible
virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const =0
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
llvm::TargetTransformInfo::RK_None
@ RK_None
Definition: TargetTransformInfo.h:862
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition: TargetTransformInfo.h:445
llvm::TargetTransformInfo::getCallInstrCost
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
Definition: TargetTransformInfo.cpp:868
llvm::TargetTransformInfo::getOperandInfo
static OperandValueKind getOperandInfo(const Value *V, OperandValueProperties &OpProps)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Definition: TargetTransformInfo.cpp:657
llvm::TargetTransformInfo::getExtractWithExtendCost
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index=-1) const
Definition: TargetTransformInfo.cpp:783
llvm::MemIntrinsicInfo::MatchingId
unsigned short MatchingId
Definition: TargetTransformInfo.h:79
llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:263
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3149
llvm::TargetTransformInfo::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
Definition: TargetTransformInfo.cpp:706
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::TargetTransformInfo::Concept::getUserCost
virtual InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind)=0
llvm::TargetTransformInfo::Concept::getPreferredAddressingMode
virtual AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const =0
llvm::IntrinsicCostAttributes::isTypeBasedOnly
bool isTypeBasedOnly() const
Definition: TargetTransformInfo.h:154
llvm::TargetTransformInfo::isProfitableToHoist
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
Definition: TargetTransformInfo.cpp:443
llvm::TargetTransformInfo::shouldBuildRelLookupTables
bool shouldBuildRelLookupTables() const
Return true if lookup tables should be turned into relative lookup tables.
Definition: TargetTransformInfo.cpp:466
llvm::TargetTransformInfo::Concept::getMaxInterleaveFactor
virtual unsigned getMaxInterleaveFactor(unsigned VF)=0
llvm::TargetTransformInfo::Concept::getStoreVectorFactor
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3005
llvm::TargetTransformInfo::Concept::getLoadStoreVecRegBitWidth
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const =0
llvm::TargetTransformInfo::Concept::getRegUsageForType
virtual unsigned getRegUsageForType(Type *Ty)=0
llvm::TargetTransformInfo::~TargetTransformInfo
~TargetTransformInfo()
Definition: TargetTransformInfo.cpp:187
llvm::TargetTransformInfo::Concept::getCacheLineSize
virtual unsigned getCacheLineSize() const =0
llvm::AMDGPU::VGPRIndexMode::Id
Id
Definition: SIDefines.h:221
llvm::TargetTransformInfo::hasActiveVectorLength
bool hasActiveVectorLength() const
llvm::TargetTransformInfo::Concept::adjustInliningThreshold
virtual unsigned adjustInliningThreshold(const CallBase *CB)=0
llvm::TargetTransformInfo::OK_NonUniformConstantValue
@ OK_NonUniformConstantValue
Definition: TargetTransformInfo.h:899
llvm::TargetTransformInfo::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
Definition: TargetTransformInfo.cpp:924
llvm::TargetTransformInfo::getRegUsageForType
unsigned getRegUsageForType(Type *Ty) const
Returns the estimated number of registers required to represent Ty.
Definition: TargetTransformInfo.cpp:453
llvm::TargetTransformInfo::Concept::isLegalMaskedCompressStore
virtual bool isLegalMaskedCompressStore(Type *DataType)=0
llvm::TargetTransformInfo::useAA
bool useAA() const
Definition: TargetTransformInfo.cpp:447
llvm::TargetTransformInfo::Concept::getInliningThresholdMultiplier
virtual unsigned getInliningThresholdMultiplier()=0
llvm::HardwareLoopInfo::CountType
IntegerType * CountType
Definition: TargetTransformInfo.h:100
llvm::TargetTransformInfo::ReductionData
Contains opcode + LHS/RHS parts of the reduction operations.
Definition: TargetTransformInfo.h:869
llvm::TargetTransformInfo::enableWritePrefetching
bool enableWritePrefetching() const
Definition: TargetTransformInfo.cpp:648
llvm::TargetTransformInfo::getFPOpCost
InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
Definition: TargetTransformInfo.cpp:534
llvm::TargetTransformInfo::Concept::getMemoryOpCost
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I)=0
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition: TargetTransformInfo.h:212
llvm::IntrinsicCostAttributes::getArgs
const SmallVectorImpl< const Value * > & getArgs() const
Definition: TargetTransformInfo.h:151
llvm::TargetTransformInfo::AMK_None
@ AMK_None
Definition: TargetTransformInfo.h:636
llvm::TargetTransformInfo::SK_ExtractSubvector
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
Definition: TargetTransformInfo.h:853
llvm::TargetTransformInfo::AMK_PreIndexed
@ AMK_PreIndexed
Definition: TargetTransformInfo.h:634
llvm::AtomicOrdering::NotAtomic
@ NotAtomic
llvm::TargetTransformInfo::Concept::isLegalToVectorizeStore
virtual bool isLegalToVectorizeStore(StoreInst *SI) const =0
llvm::TargetTransformInfo::isAlwaysUniform
bool isAlwaysUniform(const Value *V) const
Definition: TargetTransformInfo.cpp:248
llvm::TargetTransformInfo::isLegalAddressingMode
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Definition: TargetTransformInfo.cpp:337
llvm::TargetTransformInfo::Concept::simplifyDemandedUseBitsIntrinsic
virtual Optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)=0
llvm::TargetTransformInfo::matchPairwiseReduction
static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty)
Definition: TargetTransformInfo.cpp:1174
llvm::TargetTransformInfo::UnrollingPreferences::MaxPercentThresholdBoost
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
Definition: TargetTransformInfo.h:442
llvm::TargetTransformInfo::Concept::getVectorInstrCost
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)=0
llvm::TargetTransformInfo::Concept::hasActiveVectorLength
virtual bool hasActiveVectorLength() const =0
llvm::MemIntrinsicInfo::IsVolatile
bool IsVolatile
Definition: TargetTransformInfo.h:83
llvm::TargetTransformInfo::Concept::getMinPrefetchStride
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const =0
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38