LLVM  16.0.0git
TargetTransformInfo.h
Go to the documentation of this file.
1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This pass exposes codegen information to IR-level passes. Every
10 /// transformation that uses codegen information is broken into three parts:
11 /// 1. The IR-level analysis pass.
12 /// 2. The IR-level transformation interface which provides the needed
13 /// information.
14 /// 3. Codegen-level implementation which uses target-specific hooks.
15 ///
16 /// This file defines #2, which is the interface that IR-level transformations
17 /// use for querying the codegen.
18 ///
19 //===----------------------------------------------------------------------===//
20 
21 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23 
25 #include "llvm/IR/FMF.h"
26 #include "llvm/IR/InstrTypes.h"
27 #include "llvm/IR/PassManager.h"
28 #include "llvm/Pass.h"
32 #include <functional>
33 #include <utility>
34 
35 namespace llvm {
36 
37 namespace Intrinsic {
38 typedef unsigned ID;
39 }
40 
41 class AssumptionCache;
42 class BlockFrequencyInfo;
43 class DominatorTree;
44 class BranchInst;
45 class CallBase;
46 class Function;
47 class GlobalValue;
48 class InstCombiner;
51 class IntrinsicInst;
52 class LoadInst;
53 class Loop;
54 class LoopInfo;
56 class ProfileSummaryInfo;
58 class SCEV;
59 class ScalarEvolution;
60 class StoreInst;
61 class SwitchInst;
62 class TargetLibraryInfo;
63 class Type;
64 class User;
65 class Value;
66 class VPIntrinsic;
67 struct KnownBits;
68 template <typename T> class Optional;
69 
70 /// Information about a load/store intrinsic defined by the target.
72  /// This is the pointer that the intrinsic is loading from or storing to.
73  /// If this is non-null, then analysis/optimization passes can assume that
74  /// this intrinsic is functionally equivalent to a load/store from this
75  /// pointer.
76  Value *PtrVal = nullptr;
77 
78  // Ordering for atomic operations.
80 
81  // Same Id is set by the target for corresponding load/store intrinsics.
82  unsigned short MatchingId = 0;
83 
84  bool ReadMem = false;
85  bool WriteMem = false;
86  bool IsVolatile = false;
87 
88  bool isUnordered() const {
91  !IsVolatile;
92  }
93 };
94 
95 /// Attributes of a target dependent hardware loop.
97  HardwareLoopInfo() = delete;
99  Loop *L = nullptr;
100  BasicBlock *ExitBlock = nullptr;
101  BranchInst *ExitBranch = nullptr;
102  const SCEV *ExitCount = nullptr;
103  IntegerType *CountType = nullptr;
104  Value *LoopDecrement = nullptr; // Decrement the loop counter by this
105  // value in every iteration.
106  bool IsNestingLegal = false; // Can a hardware loop be a parent to
107  // another hardware loop?
108  bool CounterInReg = false; // Should loop counter be updated in
109  // the loop via a phi?
110  bool PerformEntryTest = false; // Generate the intrinsic which also performs
111  // icmp ne zero on the loop counter value and
112  // produces an i1 to guard the loop entry.
114  DominatorTree &DT, bool ForceNestedLoop = false,
115  bool ForceHardwareLoopPHI = false);
116  bool canAnalyze(LoopInfo &LI);
117 };
118 
120  const IntrinsicInst *II = nullptr;
121  Type *RetTy = nullptr;
122  Intrinsic::ID IID;
123  SmallVector<Type *, 4> ParamTys;
125  FastMathFlags FMF;
126  // If ScalarizationCost is UINT_MAX, the cost of scalarizing the
127  // arguments and the return value will be computed based on types.
128  InstructionCost ScalarizationCost = InstructionCost::getInvalid();
129 
130 public:
132  Intrinsic::ID Id, const CallBase &CI,
134  bool TypeBasedOnly = false);
135 
138  FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
140 
143 
147  const IntrinsicInst *I = nullptr,
149 
150  Intrinsic::ID getID() const { return IID; }
151  const IntrinsicInst *getInst() const { return II; }
152  Type *getReturnType() const { return RetTy; }
153  FastMathFlags getFlags() const { return FMF; }
154  InstructionCost getScalarizationCost() const { return ScalarizationCost; }
156  const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
157 
158  bool isTypeBasedOnly() const {
159  return Arguments.empty();
160  }
161 
162  bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
163 };
164 
166 
168 typedef TargetTransformInfo TTI;
169 
170 /// This pass provides access to the codegen interfaces that are needed
171 /// for IR-level transformations.
173 public:
174  /// Construct a TTI object using a type implementing the \c Concept
175  /// API below.
176  ///
177  /// This is used by targets to construct a TTI wrapping their target-specific
178  /// implementation that encodes appropriate costs for their target.
179  template <typename T> TargetTransformInfo(T Impl);
180 
181  /// Construct a baseline TTI object using a minimal implementation of
182  /// the \c Concept API below.
183  ///
184  /// The TTI implementation will reflect the information in the DataLayout
185  /// provided if non-null.
186  explicit TargetTransformInfo(const DataLayout &DL);
187 
188  // Provide move semantics.
191 
192  // We need to define the destructor out-of-line to define our sub-classes
193  // out-of-line.
195 
196  /// Handle the invalidation of this information.
197  ///
198  /// When used as a result of \c TargetIRAnalysis this method will be called
199  /// when the function this was computed for changes. When it returns false,
200  /// the information is preserved across those changes.
203  // FIXME: We should probably in some way ensure that the subtarget
204  // information for a function hasn't changed.
205  return false;
206  }
207 
208  /// \name Generic Target Information
209  /// @{
210 
211  /// The kind of cost model.
212  ///
213  /// There are several different cost models that can be customized by the
214  /// target. The normalization of each cost model may be target specific.
215  /// e.g. TCK_SizeAndLatency should be comparable to target thresholds such as
216  /// those derived from MCSchedModel::LoopMicroOpBufferSize etc.
218  TCK_RecipThroughput, ///< Reciprocal throughput.
219  TCK_Latency, ///< The latency of instruction.
220  TCK_CodeSize, ///< Instruction code size.
221  TCK_SizeAndLatency ///< The weighted sum of size and latency.
222  };
223 
224  /// Underlying constants for 'cost' values in this interface.
225  ///
226  /// Many APIs in this interface return a cost. This enum defines the
227  /// fundamental values that should be used to interpret (and produce) those
228  /// costs. The costs are returned as an int rather than a member of this
229  /// enumeration because it is expected that the cost of one IR instruction
230  /// may have a multiplicative factor to it or otherwise won't fit directly
231  /// into the enum. Moreover, it is common to sum or average costs which works
232  /// better as simple integral values. Thus this enum only provides constants.
233  /// Also note that the returned costs are signed integers to make it natural
234  /// to add, subtract, and test with zero (a common boundary condition). It is
235  /// not expected that 2^32 is a realistic cost to be modeling at any point.
236  ///
237  /// Note that these costs should usually reflect the intersection of code-size
238  /// cost and execution cost. A free instruction is typically one that folds
239  /// into another instruction. For example, reg-to-reg moves can often be
240  /// skipped by renaming the registers in the CPU, but they still are encoded
241  /// and thus wouldn't be considered 'free' here.
243  TCC_Free = 0, ///< Expected to fold away in lowering.
244  TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
245  TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
246  };
247 
248  /// Estimate the cost of a GEP operation when lowered.
250  getGEPCost(Type *PointeeType, const Value *Ptr,
253 
254  /// \returns A value by which our inlining threshold should be multiplied.
255  /// This is primarily used to bump up the inlining threshold wholesale on
256  /// targets where calls are unusually expensive.
257  ///
258  /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
259  /// individual classes of instructions would be better.
260  unsigned getInliningThresholdMultiplier() const;
261 
262  /// \returns A value to be added to the inlining threshold.
263  unsigned adjustInliningThreshold(const CallBase *CB) const;
264 
265  /// \returns Vector bonus in percent.
266  ///
267  /// Vector bonuses: We want to more aggressively inline vector-dense kernels
268  /// and apply this bonus based on the percentage of vector instructions. A
269  /// bonus is applied if the vector instructions exceed 50% and half that
270  /// amount is applied if it exceeds 10%. Note that these bonuses are some what
271  /// arbitrary and evolved over time by accident as much as because they are
272  /// principled bonuses.
273  /// FIXME: It would be nice to base the bonus values on something more
274  /// scientific. A target may has no bonus on vector instructions.
275  int getInlinerVectorBonusPercent() const;
276 
277  /// \return the expected cost of a memcpy, which could e.g. depend on the
278  /// source/destination type and alignment and the number of bytes copied.
280 
281  /// \return The estimated number of case clusters when lowering \p 'SI'.
282  /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
283  /// table.
285  unsigned &JTSize,
286  ProfileSummaryInfo *PSI,
287  BlockFrequencyInfo *BFI) const;
288 
289  /// Estimate the cost of a given IR user when lowered.
290  ///
291  /// This can estimate the cost of either a ConstantExpr or Instruction when
292  /// lowered.
293  ///
294  /// \p Operands is a list of operands which can be a result of transformations
295  /// of the current operands. The number of the operands on the list must equal
296  /// to the number of the current operands the IR user has. Their order on the
297  /// list must be the same as the order of the current operands the IR user
298  /// has.
299  ///
300  /// The returned cost is defined in terms of \c TargetCostConstants, see its
301  /// comments for a detailed explanation of the cost values.
304  TargetCostKind CostKind) const;
305 
306  /// This is a helper function which calls the three-argument
307  /// getInstructionCost with \p Operands which are the current operands U has.
309  TargetCostKind CostKind) const {
312  }
313 
314  /// If a branch or a select condition is skewed in one direction by more than
315  /// this factor, it is very likely to be predicted correctly.
317 
318  /// Return true if branch divergence exists.
319  ///
320  /// Branch divergence has a significantly negative impact on GPU performance
321  /// when threads in the same wavefront take different paths due to conditional
322  /// branches.
323  bool hasBranchDivergence() const;
324 
325  /// Return true if the target prefers to use GPU divergence analysis to
326  /// replace the legacy version.
327  bool useGPUDivergenceAnalysis() const;
328 
329  /// Returns whether V is a source of divergence.
330  ///
331  /// This function provides the target-dependent information for
332  /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis
333  /// first builds the dependency graph, and then runs the reachability
334  /// algorithm starting with the sources of divergence.
335  bool isSourceOfDivergence(const Value *V) const;
336 
337  // Returns true for the target specific
338  // set of operations which produce uniform result
339  // even taking non-uniform arguments
340  bool isAlwaysUniform(const Value *V) const;
341 
342  /// Returns the address space ID for a target's 'flat' address space. Note
343  /// this is not necessarily the same as addrspace(0), which LLVM sometimes
344  /// refers to as the generic address space. The flat address space is a
345  /// generic address space that can be used access multiple segments of memory
346  /// with different address spaces. Access of a memory location through a
347  /// pointer with this address space is expected to be legal but slower
348  /// compared to the same memory location accessed through a pointer with a
349  /// different address space.
350  //
351  /// This is for targets with different pointer representations which can
352  /// be converted with the addrspacecast instruction. If a pointer is converted
353  /// to this address space, optimizations should attempt to replace the access
354  /// with the source address space.
355  ///
356  /// \returns ~0u if the target does not have such a flat address space to
357  /// optimize away.
358  unsigned getFlatAddressSpace() const;
359 
360  /// Return any intrinsic address operand indexes which may be rewritten if
361  /// they use a flat address space pointer.
362  ///
363  /// \returns true if the intrinsic was handled.
365  Intrinsic::ID IID) const;
366 
367  bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
368 
369  /// Return true if globals in this address space can have initializers other
370  /// than `undef`.
371  bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const;
372 
373  unsigned getAssumedAddrSpace(const Value *V) const;
374 
375  std::pair<const Value *, unsigned>
376  getPredicatedAddrSpace(const Value *V) const;
377 
378  /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
379  /// NewV, which has a different address space. This should happen for every
380  /// operand index that collectFlatAddressOperands returned for the intrinsic.
381  /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
382  /// new value (which may be the original \p II with modified operands).
384  Value *NewV) const;
385 
386  /// Test whether calls to a function lower to actual program function
387  /// calls.
388  ///
389  /// The idea is to test whether the program is likely to require a 'call'
390  /// instruction or equivalent in order to call the given function.
391  ///
392  /// FIXME: It's not clear that this is a good or useful query API. Client's
393  /// should probably move to simpler cost metrics using the above.
394  /// Alternatively, we could split the cost interface into distinct code-size
395  /// and execution-speed costs. This would allow modelling the core of this
396  /// query more accurately as a call is a single small instruction, but
397  /// incurs significant execution cost.
398  bool isLoweredToCall(const Function *F) const;
399 
400  struct LSRCost {
401  /// TODO: Some of these could be merged. Also, a lexical ordering
402  /// isn't always optimal.
403  unsigned Insns;
404  unsigned NumRegs;
405  unsigned AddRecCost;
406  unsigned NumIVMuls;
407  unsigned NumBaseAdds;
408  unsigned ImmCost;
409  unsigned SetupCost;
410  unsigned ScaleCost;
411  };
412 
413  /// Parameters that control the generic loop unrolling transformation.
415  /// The cost threshold for the unrolled loop. Should be relative to the
416  /// getInstructionCost values returned by this API, and the expectation is
417  /// that the unrolled loop's instructions when run through that interface
418  /// should not exceed this cost. However, this is only an estimate. Also,
419  /// specific loops may be unrolled even with a cost above this threshold if
420  /// deemed profitable. Set this to UINT_MAX to disable the loop body cost
421  /// restriction.
422  unsigned Threshold;
423  /// If complete unrolling will reduce the cost of the loop, we will boost
424  /// the Threshold by a certain percent to allow more aggressive complete
425  /// unrolling. This value provides the maximum boost percentage that we
426  /// can apply to Threshold (The value should be no less than 100).
427  /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
428  /// MaxPercentThresholdBoost / 100)
429  /// E.g. if complete unrolling reduces the loop execution time by 50%
430  /// then we boost the threshold by the factor of 2x. If unrolling is not
431  /// expected to reduce the running time, then we do not increase the
432  /// threshold.
434  /// The cost threshold for the unrolled loop when optimizing for size (set
435  /// to UINT_MAX to disable).
437  /// The cost threshold for the unrolled loop, like Threshold, but used
438  /// for partial/runtime unrolling (set to UINT_MAX to disable).
440  /// The cost threshold for the unrolled loop when optimizing for size, like
441  /// OptSizeThreshold, but used for partial/runtime unrolling (set to
442  /// UINT_MAX to disable).
444  /// A forced unrolling factor (the number of concatenated bodies of the
445  /// original loop in the unrolled loop body). When set to 0, the unrolling
446  /// transformation will select an unrolling factor based on the current cost
447  /// threshold and other factors.
448  unsigned Count;
449  /// Default unroll count for loops with run-time trip count.
451  // Set the maximum unrolling factor. The unrolling factor may be selected
452  // using the appropriate cost threshold, but may not exceed this number
453  // (set to UINT_MAX to disable). This does not apply in cases where the
454  // loop is being fully unrolled.
455  unsigned MaxCount;
456  /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
457  /// applies even if full unrolling is selected. This allows a target to fall
458  /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
460  // Represents number of instructions optimized when "back edge"
461  // becomes "fall through" in unrolled loop.
462  // For now we count a conditional branch on a backedge and a comparison
463  // feeding it.
464  unsigned BEInsns;
465  /// Allow partial unrolling (unrolling of loops to expand the size of the
466  /// loop body, not only to eliminate small constant-trip-count loops).
467  bool Partial;
468  /// Allow runtime unrolling (unrolling of loops to expand the size of the
469  /// loop body even when the number of loop iterations is not known at
470  /// compile time).
471  bool Runtime;
472  /// Allow generation of a loop remainder (extra iterations after unroll).
474  /// Allow emitting expensive instructions (such as divisions) when computing
475  /// the trip count of a loop for runtime unrolling.
477  /// Apply loop unroll on any kind of loop
478  /// (mainly to loops that fail runtime unrolling).
479  bool Force;
480  /// Allow using trip count upper bound to unroll loops.
482  /// Allow unrolling of all the iterations of the runtime loop remainder.
484  /// Allow unroll and jam. Used to enable unroll and jam for the target.
486  /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
487  /// value above is used during unroll and jam for the outer loop size.
488  /// This value is used in the same manner to limit the size of the inner
489  /// loop.
491  /// Don't allow loop unrolling to simulate more than this number of
492  /// iterations when checking full unroll profitability
494  };
495 
496  /// Get target-customized preferences for the generic loop unrolling
497  /// transformation. The caller will initialize UP with the current
498  /// target-independent defaults.
501  OptimizationRemarkEmitter *ORE) const;
502 
503  /// Query the target whether it would be profitable to convert the given loop
504  /// into a hardware loop.
506  AssumptionCache &AC, TargetLibraryInfo *LibInfo,
507  HardwareLoopInfo &HWLoopInfo) const;
508 
509  /// Query the target whether it would be prefered to create a predicated
510  /// vector loop, which can avoid the need to emit a scalar epilogue loop.
513  DominatorTree *DT,
515  InterleavedAccessInfo *IAI) const;
516 
517  /// Query the target whether lowering of the llvm.get.active.lane.mask
518  /// intrinsic is supported and how the mask should be used. A return value
519  /// of PredicationStyle::Data indicates the mask is used as data only,
520  /// whereas PredicationStyle::DataAndControlFlow indicates we should also use
521  /// the mask for control flow in the loop. If unsupported the return value is
522  /// PredicationStyle::None.
524 
525  // Parameters that control the loop peeling transformation
527  /// A forced peeling factor (the number of bodied of the original loop
528  /// that should be peeled off before the loop body). When set to 0, the
529  /// a peeling factor based on profile information and other factors.
530  unsigned PeelCount;
531  /// Allow peeling off loop iterations.
533  /// Allow peeling off loop iterations for loop nests.
535  /// Allow peeling basing on profile. Uses to enable peeling off all
536  /// iterations basing on provided profile.
537  /// If the value is true the peeling cost model can decide to peel only
538  /// some iterations and in this case it will set this to false.
540  };
541 
542  /// Get target-customized preferences for the generic loop peeling
543  /// transformation. The caller will initialize \p PP with the current
544  /// target-independent defaults with information from \p L and \p SE.
546  PeelingPreferences &PP) const;
547 
548  /// Targets can implement their own combinations for target-specific
549  /// intrinsics. This function will be called from the InstCombine pass every
550  /// time a target-specific intrinsic is encountered.
551  ///
552  /// \returns None to not do anything target specific or a value that will be
553  /// returned from the InstCombiner. It is possible to return null and stop
554  /// further processing of the intrinsic by returning nullptr.
556  IntrinsicInst &II) const;
557  /// Can be used to implement target-specific instruction combining.
558  /// \see instCombineIntrinsic
561  APInt DemandedMask, KnownBits &Known,
562  bool &KnownBitsComputed) const;
563  /// Can be used to implement target-specific instruction combining.
564  /// \see instCombineIntrinsic
566  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
567  APInt &UndefElts2, APInt &UndefElts3,
568  std::function<void(Instruction *, unsigned, APInt, APInt &)>
569  SimplifyAndSetOp) const;
570  /// @}
571 
572  /// \name Scalar Target Information
573  /// @{
574 
575  /// Flags indicating the kind of support for population count.
576  ///
577  /// Compared to the SW implementation, HW support is supposed to
578  /// significantly boost the performance when the population is dense, and it
579  /// may or may not degrade performance if the population is sparse. A HW
580  /// support is considered as "Fast" if it can outperform, or is on a par
581  /// with, SW implementation when the population is sparse; otherwise, it is
582  /// considered as "Slow".
584 
585  /// Return true if the specified immediate is legal add immediate, that
586  /// is the target has add instructions which can add a register with the
587  /// immediate without having to materialize the immediate into a register.
588  bool isLegalAddImmediate(int64_t Imm) const;
589 
590  /// Return true if the specified immediate is legal icmp immediate,
591  /// that is the target has icmp instructions which can compare a register
592  /// against the immediate without having to materialize the immediate into a
593  /// register.
594  bool isLegalICmpImmediate(int64_t Imm) const;
595 
596  /// Return true if the addressing mode represented by AM is legal for
597  /// this target, for a load/store of the specified type.
598  /// The type may be VoidTy, in which case only return true if the addressing
599  /// mode is legal for a load/store of any legal type.
600  /// If target returns true in LSRWithInstrQueries(), I may be valid.
601  /// TODO: Handle pre/postinc as well.
602  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
603  bool HasBaseReg, int64_t Scale,
604  unsigned AddrSpace = 0,
605  Instruction *I = nullptr) const;
606 
607  /// Return true if LSR cost of C1 is lower than C2.
609  const TargetTransformInfo::LSRCost &C2) const;
610 
611  /// Return true if LSR major cost is number of registers. Targets which
612  /// implement their own isLSRCostLess and unset number of registers as major
613  /// cost should return false, otherwise return true.
614  bool isNumRegsMajorCostOfLSR() const;
615 
616  /// \returns true if LSR should not optimize a chain that includes \p I.
618 
619  /// Return true if the target can fuse a compare and branch.
620  /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
621  /// calculation for the instructions in a loop.
622  bool canMacroFuseCmp() const;
623 
624  /// Return true if the target can save a compare for loop count, for example
625  /// hardware loop saves a compare.
626  bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
628  TargetLibraryInfo *LibInfo) const;
629 
634  };
635 
636  /// Return the preferred addressing mode LSR should make efforts to generate.
638  ScalarEvolution *SE) const;
639 
640  /// Return true if the target supports masked store.
641  bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
642  /// Return true if the target supports masked load.
643  bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;
644 
645  /// Return true if the target supports nontemporal store.
646  bool isLegalNTStore(Type *DataType, Align Alignment) const;
647  /// Return true if the target supports nontemporal load.
648  bool isLegalNTLoad(Type *DataType, Align Alignment) const;
649 
650  /// \Returns true if the target supports broadcasting a load to a vector of
651  /// type <NumElements x ElementTy>.
652  bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const;
653 
654  /// Return true if the target supports masked scatter.
655  bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
656  /// Return true if the target supports masked gather.
657  bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
658  /// Return true if the target forces scalarizing of llvm.masked.gather
659  /// intrinsics.
660  bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const;
661  /// Return true if the target forces scalarizing of llvm.masked.scatter
662  /// intrinsics.
663  bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const;
664 
665  /// Return true if the target supports masked compress store.
666  bool isLegalMaskedCompressStore(Type *DataType) const;
667  /// Return true if the target supports masked expand load.
668  bool isLegalMaskedExpandLoad(Type *DataType) const;
669 
670  /// Return true if this is an alternating opcode pattern that can be lowered
671  /// to a single instruction on the target. In X86 this is for the addsub
672  /// instruction which corrsponds to a Shuffle + Fadd + FSub pattern in IR.
673  /// This function expectes two opcodes: \p Opcode1 and \p Opcode2 being
674  /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
675  /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
676  /// \p VecTy is the vector type of the instruction to be generated.
677  bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
678  const SmallBitVector &OpcodeMask) const;
679 
680  /// Return true if we should be enabling ordered reductions for the target.
681  bool enableOrderedReductions() const;
682 
683  /// Return true if the target has a unified operation to calculate division
684  /// and remainder. If so, the additional implicit multiplication and
685  /// subtraction required to calculate a remainder from division are free. This
686  /// can enable more aggressive transformations for division and remainder than
687  /// would typically be allowed using throughput or size cost models.
688  bool hasDivRemOp(Type *DataType, bool IsSigned) const;
689 
690  /// Return true if the given instruction (assumed to be a memory access
691  /// instruction) has a volatile variant. If that's the case then we can avoid
692  /// addrspacecast to generic AS for volatile loads/stores. Default
693  /// implementation returns false, which prevents address space inference for
694  /// volatile loads/stores.
695  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
696 
697  /// Return true if target doesn't mind addresses in vectors.
698  bool prefersVectorizedAddressing() const;
699 
700  /// Return the cost of the scaling factor used in the addressing
701  /// mode represented by AM for this target, for a load/store
702  /// of the specified type.
703  /// If the AM is supported, the return value must be >= 0.
704  /// If the AM is not supported, it returns a negative value.
705  /// TODO: Handle pre/postinc as well.
707  int64_t BaseOffset, bool HasBaseReg,
708  int64_t Scale,
709  unsigned AddrSpace = 0) const;
710 
711  /// Return true if the loop strength reduce pass should make
712  /// Instruction* based TTI queries to isLegalAddressingMode(). This is
713  /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
714  /// immediate offset and no index register.
715  bool LSRWithInstrQueries() const;
716 
717  /// Return true if it's free to truncate a value of type Ty1 to type
718  /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
719  /// by referencing its sub-register AX.
720  bool isTruncateFree(Type *Ty1, Type *Ty2) const;
721 
722  /// Return true if it is profitable to hoist instruction in the
723  /// then/else to before if.
724  bool isProfitableToHoist(Instruction *I) const;
725 
726  bool useAA() const;
727 
728  /// Return true if this type is legal.
729  bool isTypeLegal(Type *Ty) const;
730 
731  /// Returns the estimated number of registers required to represent \p Ty.
732  unsigned getRegUsageForType(Type *Ty) const;
733 
734  /// Return true if switches should be turned into lookup tables for the
735  /// target.
736  bool shouldBuildLookupTables() const;
737 
738  /// Return true if switches should be turned into lookup tables
739  /// containing this constant value for the target.
741 
742  /// Return true if lookup tables should be turned into relative lookup tables.
743  bool shouldBuildRelLookupTables() const;
744 
745  /// Return true if the input function which is cold at all call sites,
746  /// should use coldcc calling convention.
747  bool useColdCCForColdCall(Function &F) const;
748 
749  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
750  /// are set if the demanded result elements need to be inserted and/or
751  /// extracted from vectors.
753  const APInt &DemandedElts,
754  bool Insert, bool Extract) const;
755 
756  /// Estimate the overhead of scalarizing an instructions unique
757  /// non-constant operands. The (potentially vector) types to use for each of
758  /// argument are passes via Tys.
760  ArrayRef<Type *> Tys) const;
761 
762  /// If target has efficient vector element load/store instructions, it can
763  /// return true here so that insertion/extraction costs are not added to
764  /// the scalarization cost of a load/store.
766 
767  /// If the target supports tail calls.
768  bool supportsTailCalls() const;
769 
770  /// If target supports tail call on \p CB
771  bool supportsTailCallFor(const CallBase *CB) const;
772 
773  /// Don't restrict interleaved unrolling to small loops.
774  bool enableAggressiveInterleaving(bool LoopHasReductions) const;
775 
776  /// Returns options for expansion of memcmp. IsZeroCmp is
777  // true if this is the expansion of memcmp(p1, p2, s) == 0.
779  // Return true if memcmp expansion is enabled.
780  operator bool() const { return MaxNumLoads > 0; }
781 
782  // Maximum number of load operations.
783  unsigned MaxNumLoads = 0;
784 
785  // The list of available load sizes (in bytes), sorted in decreasing order.
787 
788  // For memcmp expansion when the memcmp result is only compared equal or
789  // not-equal to 0, allow up to this number of load pairs per block. As an
790  // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
791  // a0 = load2bytes &a[0]
792  // b0 = load2bytes &b[0]
793  // a2 = load1byte &a[2]
794  // b2 = load1byte &b[2]
795  // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
796  unsigned NumLoadsPerBlock = 1;
797 
798  // Set to true to allow overlapping loads. For example, 7-byte compares can
799  // be done with two 4-byte compares instead of 4+2+1-byte compares. This
800  // requires all loads in LoadSizes to be doable in an unaligned way.
801  bool AllowOverlappingLoads = false;
802  };
804  bool IsZeroCmp) const;
805 
806  /// Enable matching of interleaved access groups.
808 
809  /// Enable matching of interleaved access groups that contain predicated
810  /// accesses or gaps and therefore vectorized using masked
811  /// vector loads/stores.
813 
814  /// Indicate that it is potentially unsafe to automatically vectorize
815  /// floating-point operations because the semantics of vector and scalar
816  /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
817  /// does not support IEEE-754 denormal numbers, while depending on the
818  /// platform, scalar floating-point math does.
819  /// This applies to floating-point math operations and calls, not memory
820  /// operations, shuffles, or casts.
822 
823  /// Determine if the target supports unaligned memory accesses.
825  unsigned AddressSpace = 0,
826  Align Alignment = Align(1),
827  bool *Fast = nullptr) const;
828 
829  /// Return hardware support for population count.
830  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
831 
832  /// Return true if the hardware has a fast square-root instruction.
833  bool haveFastSqrt(Type *Ty) const;
834 
835  /// Return true if the cost of the instruction is too high to speculatively
836  /// execute and should be kept behind a branch.
837  /// This normally just wraps around a getInstructionCost() call, but some
838  /// targets might report a low TCK_SizeAndLatency value that is incompatible
839  /// with the fixed TCC_Expensive value.
840  /// NOTE: This assumes the instruction passes isSafeToSpeculativelyExecute().
842 
843  /// Return true if it is faster to check if a floating-point value is NaN
844  /// (or not-NaN) versus a comparison against a constant FP zero value.
845  /// Targets should override this if materializing a 0.0 for comparison is
846  /// generally as cheap as checking for ordered/unordered.
847  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
848 
849  /// Return the expected cost of supporting the floating point operation
850  /// of the specified type.
851  InstructionCost getFPOpCost(Type *Ty) const;
852 
853  /// Return the expected cost of materializing for the given integer
854  /// immediate of the specified type.
856  TargetCostKind CostKind) const;
857 
858  /// Return the expected cost of materialization for the given integer
859  /// immediate of the specified type for a given instruction. The cost can be
860  /// zero if the immediate can be folded into the specified instruction.
861  InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
862  const APInt &Imm, Type *Ty,
864  Instruction *Inst = nullptr) const;
866  const APInt &Imm, Type *Ty,
867  TargetCostKind CostKind) const;
868 
869  /// Return the expected cost for the given integer when optimising
870  /// for size. This is different than the other integer immediate cost
871  /// functions in that it is subtarget agnostic. This is useful when you e.g.
872  /// target one ISA such as Aarch32 but smaller encodings could be possible
873  /// with another such as Thumb. This return value is used as a penalty when
874  /// the total costs for a constant is calculated (the bigger the cost, the
875  /// more beneficial constant hoisting is).
876  InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
877  const APInt &Imm, Type *Ty) const;
878  /// @}
879 
880  /// \name Vector Target Information
881  /// @{
882 
883  /// The various kinds of shuffle patterns for vector queries.
884  enum ShuffleKind {
885  SK_Broadcast, ///< Broadcast element 0 to all other elements.
886  SK_Reverse, ///< Reverse the order of the vector.
887  SK_Select, ///< Selects elements from the corresponding lane of
888  ///< either source operand. This is equivalent to a
889  ///< vector select with a constant condition operand.
890  SK_Transpose, ///< Transpose two vectors.
891  SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
892  SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
893  SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
894  ///< with any shuffle mask.
895  SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
896  ///< shuffle mask.
897  SK_Splice ///< Concatenates elements from the first input vector
898  ///< with elements of the second input vector. Returning
899  ///< a vector of the same type as the input vectors.
900  ///< Index indicates start offset in first input vector.
901  };
902 
903  /// Additional information about an operand's possible values.
905  OK_AnyValue, // Operand can have any value.
906  OK_UniformValue, // Operand is uniform (splat of a value).
907  OK_UniformConstantValue, // Operand is uniform constant.
908  OK_NonUniformConstantValue // Operand is a non uniform constant value.
909  };
910 
911  /// Additional properties of an operand's values.
913 
914  // Describe the values an operand can take. We're in the process
915  // of migrating uses of OperandValueKind and OperandValueProperties
916  // to use this class, and then will change the internal representation.
920 
921  bool isConstant() const {
923  }
924  bool isUniform() const {
926  }
927  bool isPowerOf2() const {
928  return Properties == OP_PowerOf2;
929  }
930 
932  return {Kind, OP_None};
933  }
934  };
935 
936  /// \return the number of registers in the target-provided register class.
937  unsigned getNumberOfRegisters(unsigned ClassID) const;
938 
939  /// \return the target-provided register class ID for the provided type,
940  /// accounting for type promotion and other type-legalization techniques that
941  /// the target might apply. However, it specifically does not account for the
942  /// scalarization or splitting of vector types. Should a vector type require
943  /// scalarization or splitting into multiple underlying vector registers, that
944  /// type should be mapped to a register class containing no registers.
945  /// Specifically, this is designed to provide a simple, high-level view of the
946  /// register allocation later performed by the backend. These register classes
947  /// don't necessarily map onto the register classes used by the backend.
948  /// FIXME: It's not currently possible to determine how many registers
949  /// are used by the provided type.
950  unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
951 
952  /// \return the target-provided register class name
953  const char *getRegisterClassName(unsigned ClassID) const;
954 
956 
957  /// \return The width of the largest scalar or vector register type.
959 
960  /// \return The width of the smallest vector register type.
961  unsigned getMinVectorRegisterBitWidth() const;
962 
963  /// \return The maximum value of vscale if the target specifies an
964  /// architectural maximum vector length, and None otherwise.
966 
967  /// \return the value of vscale to tune the cost model for.
969 
970  /// \return True if the vectorization factor should be chosen to
971  /// make the vector of the smallest element type match the size of a
972  /// vector register. For wider element types, this could result in
973  /// creating vectors that span multiple vector registers.
974  /// If false, the vectorization factor will be chosen based on the
975  /// size of the widest element type.
976  /// \p K Register Kind for vectorization.
978 
979  /// \return The minimum vectorization factor for types of given element
980  /// bit width, or 0 if there is no minimum VF. The returned value only
981  /// applies when shouldMaximizeVectorBandwidth returns true.
982  /// If IsScalable is true, the returned ElementCount must be a scalable VF.
983  ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
984 
985  /// \return The maximum vectorization factor for types of given element
986  /// bit width and opcode, or 0 if there is no maximum VF.
987  /// Currently only used by the SLP vectorizer.
988  unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
989 
990  /// \return The minimum vectorization factor for the store instruction. Given
991  /// the initial estimation of the minimum vector factor and store value type,
992  /// it tries to find possible lowest VF, which still might be profitable for
993  /// the vectorization.
994  /// \param VF Initial estimation of the minimum vector factor.
995  /// \param ScalarMemTy Scalar memory type of the store operation.
996  /// \param ScalarValTy Scalar type of the stored value.
997  /// Currently only used by the SLP vectorizer.
998  unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
999  Type *ScalarValTy) const;
1000 
1001  /// \return True if it should be considered for address type promotion.
1002  /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
1003  /// profitable without finding other extensions fed by the same input.
1005  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
1006 
1007  /// \return The size of a cache line in bytes.
1008  unsigned getCacheLineSize() const;
1009 
1010  /// The possible cache levels
1011  enum class CacheLevel {
1012  L1D, // The L1 data cache
1013  L2D, // The L2 data cache
1014 
1015  // We currently do not model L3 caches, as their sizes differ widely between
1016  // microarchitectures. Also, we currently do not have a use for L3 cache
1017  // size modeling yet.
1018  };
1019 
1020  /// \return The size of the cache level in bytes, if available.
1022 
1023  /// \return The associativity of the cache level, if available.
1025 
1026  /// \return How much before a load we should place the prefetch
1027  /// instruction. This is currently measured in number of
1028  /// instructions.
1029  unsigned getPrefetchDistance() const;
1030 
1031  /// Some HW prefetchers can handle accesses up to a certain constant stride.
1032  /// Sometimes prefetching is beneficial even below the HW prefetcher limit,
1033  /// and the arguments provided are meant to serve as a basis for deciding this
1034  /// for a particular loop.
1035  ///
1036  /// \param NumMemAccesses Number of memory accesses in the loop.
1037  /// \param NumStridedMemAccesses Number of the memory accesses that
1038  /// ScalarEvolution could find a known stride
1039  /// for.
1040  /// \param NumPrefetches Number of software prefetches that will be
1041  /// emitted as determined by the addresses
1042  /// involved and the cache line size.
1043  /// \param HasCall True if the loop contains a call.
1044  ///
1045  /// \return This is the minimum stride in bytes where it makes sense to start
1046  /// adding SW prefetches. The default is 1, i.e. prefetch with any
1047  /// stride.
1048  unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1049  unsigned NumStridedMemAccesses,
1050  unsigned NumPrefetches, bool HasCall) const;
1051 
1052  /// \return The maximum number of iterations to prefetch ahead. If
1053  /// the required number of iterations is more than this number, no
1054  /// prefetching is performed.
1055  unsigned getMaxPrefetchIterationsAhead() const;
1056 
1057  /// \return True if prefetching should also be done for writes.
1058  bool enableWritePrefetching() const;
1059 
1060  /// \return if target want to issue a prefetch in address space \p AS.
1061  bool shouldPrefetchAddressSpace(unsigned AS) const;
1062 
1063  /// \return The maximum interleave factor that any transform should try to
1064  /// perform for this target. This number depends on the level of parallelism
1065  /// and the number of execution units in the CPU.
1066  unsigned getMaxInterleaveFactor(unsigned VF) const;
1067 
1068  /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
1069  static OperandValueInfo getOperandInfo(const Value *V);
1070 
1071  /// This is an approximation of reciprocal throughput of a math/logic op.
1072  /// A higher cost indicates less expected throughput.
1073  /// From Agner Fog's guides, reciprocal throughput is "the average number of
1074  /// clock cycles per instruction when the instructions are not part of a
1075  /// limiting dependency chain."
1076  /// Therefore, costs should be scaled to account for multiple execution units
1077  /// on the target that can process this type of instruction. For example, if
1078  /// there are 5 scalar integer units and 2 vector integer units that can
1079  /// calculate an 'add' in a single cycle, this model should indicate that the
1080  /// cost of the vector add instruction is 2.5 times the cost of the scalar
1081  /// add instruction.
1082  /// \p Args is an optional argument which holds the instruction operands
1083  /// values so the TTI can analyze those values searching for special
1084  /// cases or optimizations based on those values.
1085  /// \p CxtI is the optional original context instruction, if one exists, to
1086  /// provide even more information.
1088  unsigned Opcode, Type *Ty,
1091  TTI::OperandValueInfo Opd2Info = {TTI::OK_AnyValue, TTI::OP_None},
1092  ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
1093  const Instruction *CxtI = nullptr) const;
1094 
1095  /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1096  /// The exact mask may be passed as Mask, or else the array will be empty.
1097  /// The index and subtype parameters are used by the subvector insertion and
1098  /// extraction shuffle kinds to show the insert/extract point and the type of
1099  /// the subvector being inserted/extracted. The operands of the shuffle can be
1100  /// passed through \p Args, which helps improve the cost estimation in some
1101  /// cases, like in broadcast loads.
1102  /// NOTE: For subvector extractions Tp represents the source type.
1103  InstructionCost
1104  getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask = None,
1106  int Index = 0, VectorType *SubTp = nullptr,
1107  ArrayRef<const Value *> Args = None) const;
1108 
1109  /// Represents a hint about the context in which a cast is used.
1110  ///
1111  /// For zext/sext, the context of the cast is the operand, which must be a
1112  /// load of some kind. For trunc, the context is of the cast is the single
1113  /// user of the instruction, which must be a store of some kind.
1114  ///
1115  /// This enum allows the vectorizer to give getCastInstrCost an idea of the
1116  /// type of cast it's dealing with, as not every cast is equal. For instance,
1117  /// the zext of a load may be free, but the zext of an interleaving load can
1118  //// be (very) expensive!
1119  ///
1120  /// See \c getCastContextHint to compute a CastContextHint from a cast
1121  /// Instruction*. Callers can use it if they don't need to override the
1122  /// context and just want it to be calculated from the instruction.
1123  ///
1124  /// FIXME: This handles the types of load/store that the vectorizer can
1125  /// produce, which are the cases where the context instruction is most
1126  /// likely to be incorrect. There are other situations where that can happen
1127  /// too, which might be handled here but in the long run a more general
1128  /// solution of costing multiple instructions at the same times may be better.
1129  enum class CastContextHint : uint8_t {
1130  None, ///< The cast is not used with a load/store of any kind.
1131  Normal, ///< The cast is used with a normal load/store.
1132  Masked, ///< The cast is used with a masked load/store.
1133  GatherScatter, ///< The cast is used with a gather/scatter.
1134  Interleave, ///< The cast is used with an interleaved load/store.
1135  Reversed, ///< The cast is used with a reversed load/store.
1136  };
1137 
1138  /// Calculates a CastContextHint from \p I.
1139  /// This should be used by callers of getCastInstrCost if they wish to
1140  /// determine the context from some instruction.
1141  /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
1142  /// or if it's another type of cast.
1144 
1145  /// \return The expected cost of cast instructions, such as bitcast, trunc,
1146  /// zext, etc. If there is an existing instruction that holds Opcode, it
1147  /// may be passed in the 'I' parameter.
1149  getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1152  const Instruction *I = nullptr) const;
1153 
1154  /// \return The expected cost of a sign- or zero-extended vector extract. Use
1155  /// -1 to indicate that there is no information about the index value.
1156  InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1157  VectorType *VecTy,
1158  unsigned Index = -1) const;
1159 
1160  /// \return The expected cost of control-flow related instructions such as
1161  /// Phi, Ret, Br, Switch.
1163  getCFInstrCost(unsigned Opcode,
1165  const Instruction *I = nullptr) const;
1166 
1167  /// \returns The expected cost of compare and select instructions. If there
1168  /// is an existing instruction that holds Opcode, it may be passed in the
1169  /// 'I' parameter. The \p VecPred parameter can be used to indicate the select
1170  /// is using a compare with the specified predicate as condition. When vector
1171  /// types are passed, \p VecPred must be used for all lanes.
1173  getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1174  CmpInst::Predicate VecPred,
1176  const Instruction *I = nullptr) const;
1177 
1178  /// \return The expected cost of vector Insert and Extract.
1179  /// Use -1 to indicate that there is no information on the index value.
1180  /// This is used when the instruction is not available; a typical use
1181  /// case is to provision the cost of vectorization/scalarization in
1182  /// vectorizer passes.
1183  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1184  unsigned Index = -1) const;
1185 
1186  /// \return The expected cost of vector Insert and Extract.
1187  /// This is used when instruction is available, and implementation
1188  /// asserts 'I' is not nullptr.
1189  ///
1190  /// A typical suitable use case is cost estimation when vector instruction
1191  /// exists (e.g., from basic blocks during transformation).
1193  unsigned Index = -1) const;
1194 
1195  /// \return The cost of replication shuffle of \p VF elements typed \p EltTy
1196  /// \p ReplicationFactor times.
1197  ///
1198  /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
1199  /// <0,0,0,1,1,1,2,2,2,3,3,3>
1200  InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
1201  int VF,
1202  const APInt &DemandedDstElts,
1204 
1205  /// \return The cost of Load and Store instructions.
1207  getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1208  unsigned AddressSpace,
1210  OperandValueInfo OpdInfo = {OK_AnyValue, OP_None},
1211  const Instruction *I = nullptr) const;
1212 
1213  /// \return The cost of VP Load and Store instructions.
1214  InstructionCost
1215  getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1216  unsigned AddressSpace,
1218  const Instruction *I = nullptr) const;
1219 
1220  /// \return The cost of masked Load and Store instructions.
1221  InstructionCost getMaskedMemoryOpCost(
1222  unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
1224 
1225  /// \return The cost of Gather or Scatter operation
1226  /// \p Opcode - is a type of memory access Load or Store
1227  /// \p DataTy - a vector type of the data to be loaded or stored
1228  /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1229  /// \p VariableMask - true when the memory access is predicated with a mask
1230  /// that is not a compile-time constant
1231  /// \p Alignment - alignment of single element
1232  /// \p I - the optional original context instruction, if one exists, e.g. the
1233  /// load/store to transform or the call to the gather/scatter intrinsic
1234  InstructionCost getGatherScatterOpCost(
1235  unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1237  const Instruction *I = nullptr) const;
1238 
1239  /// \return The cost of the interleaved memory operation.
1240  /// \p Opcode is the memory operation code
1241  /// \p VecTy is the vector type of the interleaved access.
1242  /// \p Factor is the interleave factor
1243  /// \p Indices is the indices for interleaved load members (as interleaved
1244  /// load allows gaps)
1245  /// \p Alignment is the alignment of the memory operation
1246  /// \p AddressSpace is address space of the pointer.
1247  /// \p UseMaskForCond indicates if the memory access is predicated.
1248  /// \p UseMaskForGaps indicates if gaps should be masked.
1249  InstructionCost getInterleavedMemoryOpCost(
1250  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1251  Align Alignment, unsigned AddressSpace,
1253  bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
1254 
1255  /// A helper function to determine the type of reduction algorithm used
1256  /// for a given \p Opcode and set of FastMathFlags \p FMF.
1258  return FMF != None && !(*FMF).allowReassoc();
1259  }
1260 
1261  /// Calculate the cost of vector reduction intrinsics.
1262  ///
1263  /// This is the cost of reducing the vector value of type \p Ty to a scalar
1264  /// value using the operation denoted by \p Opcode. The FastMathFlags
1265  /// parameter \p FMF indicates what type of reduction we are performing:
1266  /// 1. Tree-wise. This is the typical 'fast' reduction performed that
1267  /// involves successively splitting a vector into half and doing the
1268  /// operation on the pair of halves until you have a scalar value. For
1269  /// example:
1270  /// (v0, v1, v2, v3)
1271  /// ((v0+v2), (v1+v3), undef, undef)
1272  /// ((v0+v2+v1+v3), undef, undef, undef)
1273  /// This is the default behaviour for integer operations, whereas for
1274  /// floating point we only do this if \p FMF indicates that
1275  /// reassociation is allowed.
1276  /// 2. Ordered. For a vector with N elements this involves performing N
1277  /// operations in lane order, starting with an initial scalar value, i.e.
1278  /// result = InitVal + v0
1279  /// result = result + v1
1280  /// result = result + v2
1281  /// result = result + v3
1282  /// This is only the case for FP operations and when reassociation is not
1283  /// allowed.
1284  ///
1286  unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
1288 
1290  VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
1292 
1293  /// Calculate the cost of an extended reduction pattern, similar to
1294  /// getArithmeticReductionCost of an Add reduction with multiply and optional
1295  /// extensions. This is the cost of as:
1296  /// ResTy vecreduce.add(mul (A, B)).
1297  /// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)).
1299  bool IsUnsigned, Type *ResTy, VectorType *Ty,
1301 
1302  /// Calculate the cost of an extended reduction pattern, similar to
1303  /// getArithmeticReductionCost of a reduction with an extension.
1304  /// This is the cost of as:
1305  /// ResTy vecreduce.opcode(ext(Ty A)).
1307  unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1310 
1311  /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1312  /// Three cases are handled: 1. scalar instruction 2. vector instruction
1313  /// 3. scalar instruction which is to be vectorized.
1316 
1317  /// \returns The cost of Call instructions.
1319  Function *F, Type *RetTy, ArrayRef<Type *> Tys,
1321 
1322  /// \returns The number of pieces into which the provided type must be
1323  /// split during legalization. Zero is returned when the answer is unknown.
1324  unsigned getNumberOfParts(Type *Tp) const;
1325 
1326  /// \returns The cost of the address computation. For most targets this can be
1327  /// merged into the instruction indexing mode. Some targets might want to
1328  /// distinguish between address computation for memory operations on vector
1329  /// types and scalar types. Such targets should override this function.
1330  /// The 'SE' parameter holds pointer for the scalar evolution object which
1331  /// is used in order to get the Ptr step value in case of constant stride.
1332  /// The 'Ptr' parameter holds SCEV of the access pointer.
1334  ScalarEvolution *SE = nullptr,
1335  const SCEV *Ptr = nullptr) const;
1336 
1337  /// \returns The cost, if any, of keeping values of the given types alive
1338  /// over a callsite.
1339  ///
1340  /// Some types may require the use of register classes that do not have
1341  /// any callee-saved registers, so would require a spill and fill.
1343 
1344  /// \returns True if the intrinsic is a supported memory intrinsic. Info
1345  /// will contain additional information - whether the intrinsic may write
1346  /// or read to memory, volatility and the pointer. Info is undefined
1347  /// if false is returned.
1349 
1350  /// \returns The maximum element size, in bytes, for an element
1351  /// unordered-atomic memory intrinsic.
1352  unsigned getAtomicMemIntrinsicMaxElementSize() const;
1353 
1354  /// \returns A value which is the result of the given memory intrinsic. New
1355  /// instructions may be created to extract the result from the given intrinsic
1356  /// memory operation. Returns nullptr if the target cannot create a result
1357  /// from the given intrinsic.
1359  Type *ExpectedType) const;
1360 
1361  /// \returns The type to use in a loop expansion of a memcpy call.
1362  Type *
1364  unsigned SrcAddrSpace, unsigned DestAddrSpace,
1365  unsigned SrcAlign, unsigned DestAlign,
1366  Optional<uint32_t> AtomicElementSize = None) const;
1367 
1368  /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1369  /// \param RemainingBytes The number of bytes to copy.
1370  ///
1371  /// Calculates the operand types to use when copying \p RemainingBytes of
1372  /// memory, where source and destination alignments are \p SrcAlign and
1373  /// \p DestAlign respectively.
1376  unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1377  unsigned SrcAlign, unsigned DestAlign,
1378  Optional<uint32_t> AtomicCpySize = None) const;
1379 
1380  /// \returns True if the two functions have compatible attributes for inlining
1381  /// purposes.
1382  bool areInlineCompatible(const Function *Caller,
1383  const Function *Callee) const;
1384 
1385  /// \returns True if the caller and callee agree on how \p Types will be
1386  /// passed to or returned from the callee.
1387  /// to the callee.
1388  /// \param Types List of types to check.
1389  bool areTypesABICompatible(const Function *Caller, const Function *Callee,
1390  const ArrayRef<Type *> &Types) const;
1391 
1392  /// The type of load/store indexing.
1394  MIM_Unindexed, ///< No indexing.
1395  MIM_PreInc, ///< Pre-incrementing.
1396  MIM_PreDec, ///< Pre-decrementing.
1397  MIM_PostInc, ///< Post-incrementing.
1398  MIM_PostDec ///< Post-decrementing.
1399  };
1400 
1401  /// \returns True if the specified indexed load for the given type is legal.
1402  bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1403 
1404  /// \returns True if the specified indexed store for the given type is legal.
1405  bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1406 
1407  /// \returns The bitwidth of the largest vector type that should be used to
1408  /// load/store in the given address space.
1409  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1410 
1411  /// \returns True if the load instruction is legal to vectorize.
1412  bool isLegalToVectorizeLoad(LoadInst *LI) const;
1413 
1414  /// \returns True if the store instruction is legal to vectorize.
1415  bool isLegalToVectorizeStore(StoreInst *SI) const;
1416 
1417  /// \returns True if it is legal to vectorize the given load chain.
1418  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
1419  unsigned AddrSpace) const;
1420 
1421  /// \returns True if it is legal to vectorize the given store chain.
1422  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
1423  unsigned AddrSpace) const;
1424 
1425  /// \returns True if it is legal to vectorize the given reduction kind.
1427  ElementCount VF) const;
1428 
1429  /// \returns True if the given type is supported for scalable vectors
1430  bool isElementTypeLegalForScalableVector(Type *Ty) const;
1431 
1432  /// \returns The new vector factor value if the target doesn't support \p
1433  /// SizeInBytes loads or has a better vector factor.
1434  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1435  unsigned ChainSizeInBytes,
1436  VectorType *VecTy) const;
1437 
1438  /// \returns The new vector factor value if the target doesn't support \p
1439  /// SizeInBytes stores or has a better vector factor.
1440  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1441  unsigned ChainSizeInBytes,
1442  VectorType *VecTy) const;
1443 
1444  /// Flags describing the kind of vector reduction.
1446  ReductionFlags() = default;
1447  bool IsMaxOp =
1448  false; ///< If the op a min/max kind, true if it's a max operation.
1449  bool IsSigned = false; ///< Whether the operation is a signed int reduction.
1450  bool NoNaN =
1451  false; ///< If op is an fp min/max, whether NaNs may be present.
1452  };
1453 
1454  /// \returns True if the target prefers reductions in loop.
1455  bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1456  ReductionFlags Flags) const;
1457 
1458  /// \returns True if the target prefers reductions select kept in the loop
1459  /// when tail folding. i.e.
1460  /// loop:
1461  /// p = phi (0, s)
1462  /// a = add (p, x)
1463  /// s = select (mask, a, p)
1464  /// vecreduce.add(s)
1465  ///
1466  /// As opposed to the normal scheme of p = phi (0, a) which allows the select
1467  /// to be pulled out of the loop. If the select(.., add, ..) can be predicated
1468  /// by the target, this can lead to cleaner code generation.
1469  bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1470  ReductionFlags Flags) const;
1471 
1472  /// \returns True if the target wants to expand the given reduction intrinsic
1473  /// into a shuffle sequence.
1474  bool shouldExpandReduction(const IntrinsicInst *II) const;
1475 
1476  /// \returns the size cost of rematerializing a GlobalValue address relative
1477  /// to a stack reload.
1478  unsigned getGISelRematGlobalCost() const;
1479 
1480  /// \returns the lower bound of a trip count to decide on vectorization
1481  /// while tail-folding.
1482  unsigned getMinTripCountTailFoldingThreshold() const;
1483 
1484  /// \returns True if the target supports scalable vectors.
1485  bool supportsScalableVectors() const;
1486 
1487  /// \return true when scalable vectorization is preferred.
1488  bool enableScalableVectorization() const;
1489 
1490  /// \name Vector Predication Information
1491  /// @{
1492  /// Whether the target supports the %evl parameter of VP intrinsic efficiently
1493  /// in hardware, for the given opcode and type/alignment. (see LLVM Language
1494  /// Reference - "Vector Predication Intrinsics").
1495  /// Use of %evl is discouraged when that is not the case.
1496  bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1497  Align Alignment) const;
1498 
1501  // keep the predicating parameter
1502  Legal = 0,
1503  // where legal, discard the predicate parameter
1504  Discard = 1,
1505  // transform into something else that is also predicating
1507  };
1508 
1509  // How to transform the EVL parameter.
1510  // Legal: keep the EVL parameter as it is.
1511  // Discard: Ignore the EVL parameter where it is safe to do so.
1512  // Convert: Fold the EVL into the mask parameter.
1514 
1515  // How to transform the operator.
1516  // Legal: The target supports this operator.
1517  // Convert: Convert this to a non-VP operation.
1518  // The 'Discard' strategy is invalid.
1520 
1521  bool shouldDoNothing() const {
1522  return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
1523  }
1526  };
1527 
1528  /// \returns How the target needs this vector-predicated operation to be
1529  /// transformed.
1531  /// @}
1532 
1533  /// @}
1534 
1535 private:
1536  /// The abstract base class used to type erase specific TTI
1537  /// implementations.
1538  class Concept;
1539 
1540  /// The template model for the base class which wraps a concrete
1541  /// implementation in a type erased interface.
1542  template <typename T> class Model;
1543 
1544  std::unique_ptr<Concept> TTIImpl;
1545 };
1546 
1548 public:
1549  virtual ~Concept() = 0;
1550  virtual const DataLayout &getDataLayout() const = 0;
1551  virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
1554  virtual unsigned getInliningThresholdMultiplier() = 0;
1555  virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
1556  virtual int getInlinerVectorBonusPercent() = 0;
1557  virtual InstructionCost getMemcpyCost(const Instruction *I) = 0;
1558  virtual unsigned
1559  getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
1560  ProfileSummaryInfo *PSI,
1561  BlockFrequencyInfo *BFI) = 0;
1562  virtual InstructionCost getInstructionCost(const User *U,
1564  TargetCostKind CostKind) = 0;
1566  virtual bool hasBranchDivergence() = 0;
1567  virtual bool useGPUDivergenceAnalysis() = 0;
1568  virtual bool isSourceOfDivergence(const Value *V) = 0;
1569  virtual bool isAlwaysUniform(const Value *V) = 0;
1570  virtual unsigned getFlatAddressSpace() = 0;
1571  virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1572  Intrinsic::ID IID) const = 0;
1573  virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1574  virtual bool
1575  canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0;
1576  virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
1577  virtual std::pair<const Value *, unsigned>
1578  getPredicatedAddrSpace(const Value *V) const = 0;
1580  Value *OldV,
1581  Value *NewV) const = 0;
1582  virtual bool isLoweredToCall(const Function *F) = 0;
1583  virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1585  OptimizationRemarkEmitter *ORE) = 0;
1586  virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
1587  PeelingPreferences &PP) = 0;
1588  virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1589  AssumptionCache &AC,
1590  TargetLibraryInfo *LibInfo,
1591  HardwareLoopInfo &HWLoopInfo) = 0;
1592  virtual bool
1596  InterleavedAccessInfo *IAI) = 0;
1599  IntrinsicInst &II) = 0;
1600  virtual Optional<Value *>
1602  APInt DemandedMask, KnownBits &Known,
1603  bool &KnownBitsComputed) = 0;
1605  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1606  APInt &UndefElts2, APInt &UndefElts3,
1607  std::function<void(Instruction *, unsigned, APInt, APInt &)>
1608  SimplifyAndSetOp) = 0;
1609  virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1610  virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1611  virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1612  int64_t BaseOffset, bool HasBaseReg,
1613  int64_t Scale, unsigned AddrSpace,
1614  Instruction *I) = 0;
1615  virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
1616  const TargetTransformInfo::LSRCost &C2) = 0;
1617  virtual bool isNumRegsMajorCostOfLSR() = 0;
1618  virtual bool isProfitableLSRChainElement(Instruction *I) = 0;
1619  virtual bool canMacroFuseCmp() = 0;
1620  virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
1621  LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1622  TargetLibraryInfo *LibInfo) = 0;
1623  virtual AddressingModeKind
1624  getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const = 0;
1625  virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
1626  virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
1627  virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
1628  virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
1629  virtual bool isLegalBroadcastLoad(Type *ElementTy,
1630  ElementCount NumElements) const = 0;
1631  virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
1632  virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
1633  virtual bool forceScalarizeMaskedGather(VectorType *DataType,
1634  Align Alignment) = 0;
1635  virtual bool forceScalarizeMaskedScatter(VectorType *DataType,
1636  Align Alignment) = 0;
1637  virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
1638  virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
1639  virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
1640  unsigned Opcode1,
1641  const SmallBitVector &OpcodeMask) const = 0;
1642  virtual bool enableOrderedReductions() = 0;
1643  virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1644  virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1645  virtual bool prefersVectorizedAddressing() = 0;
1647  int64_t BaseOffset,
1648  bool HasBaseReg, int64_t Scale,
1649  unsigned AddrSpace) = 0;
1650  virtual bool LSRWithInstrQueries() = 0;
1651  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1652  virtual bool isProfitableToHoist(Instruction *I) = 0;
1653  virtual bool useAA() = 0;
1654  virtual bool isTypeLegal(Type *Ty) = 0;
1655  virtual unsigned getRegUsageForType(Type *Ty) = 0;
1656  virtual bool shouldBuildLookupTables() = 0;
1657  virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
1658  virtual bool shouldBuildRelLookupTables() = 0;
1659  virtual bool useColdCCForColdCall(Function &F) = 0;
1661  const APInt &DemandedElts,
1662  bool Insert,
1663  bool Extract) = 0;
1664  virtual InstructionCost
1666  ArrayRef<Type *> Tys) = 0;
1667  virtual bool supportsEfficientVectorElementLoadStore() = 0;
1668  virtual bool supportsTailCalls() = 0;
1669  virtual bool supportsTailCallFor(const CallBase *CB) = 0;
1670  virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1671  virtual MemCmpExpansionOptions
1672  enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
1673  virtual bool enableInterleavedAccessVectorization() = 0;
1674  virtual bool enableMaskedInterleavedAccessVectorization() = 0;
1675  virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
1677  unsigned BitWidth,
1678  unsigned AddressSpace,
1679  Align Alignment,
1680  bool *Fast) = 0;
1681  virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1682  virtual bool haveFastSqrt(Type *Ty) = 0;
1683  virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I) = 0;
1684  virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1685  virtual InstructionCost getFPOpCost(Type *Ty) = 0;
1686  virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1687  const APInt &Imm, Type *Ty) = 0;
1688  virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1689  TargetCostKind CostKind) = 0;
1690  virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1691  const APInt &Imm, Type *Ty,
1693  Instruction *Inst = nullptr) = 0;
1694  virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
1695  const APInt &Imm, Type *Ty,
1696  TargetCostKind CostKind) = 0;
1697  virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
1698  virtual unsigned getRegisterClassForType(bool Vector,
1699  Type *Ty = nullptr) const = 0;
1700  virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
1701  virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0;
1702  virtual unsigned getMinVectorRegisterBitWidth() const = 0;
1703  virtual Optional<unsigned> getMaxVScale() const = 0;
1704  virtual Optional<unsigned> getVScaleForTuning() const = 0;
1705  virtual bool
1707  virtual ElementCount getMinimumVF(unsigned ElemWidth,
1708  bool IsScalable) const = 0;
1709  virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
1710  virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1711  Type *ScalarValTy) const = 0;
1713  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1714  virtual unsigned getCacheLineSize() const = 0;
1715  virtual Optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
1717 
1718  /// \return How much before a load we should place the prefetch
1719  /// instruction. This is currently measured in number of
1720  /// instructions.
1721  virtual unsigned getPrefetchDistance() const = 0;
1722 
1723  /// \return Some HW prefetchers can handle accesses up to a certain
1724  /// constant stride. This is the minimum stride in bytes where it
1725  /// makes sense to start adding SW prefetches. The default is 1,
1726  /// i.e. prefetch with any stride. Sometimes prefetching is beneficial
1727  /// even below the HW prefetcher limit, and the arguments provided are
1728  /// meant to serve as a basis for deciding this for a particular loop.
1729  virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1730  unsigned NumStridedMemAccesses,
1731  unsigned NumPrefetches,
1732  bool HasCall) const = 0;
1733 
1734  /// \return The maximum number of iterations to prefetch ahead. If
1735  /// the required number of iterations is more than this number, no
1736  /// prefetching is performed.
1737  virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
1738 
1739  /// \return True if prefetching should also be done for writes.
1740  virtual bool enableWritePrefetching() const = 0;
1741 
1742  /// \return if target want to issue a prefetch in address space \p AS.
1743  virtual bool shouldPrefetchAddressSpace(unsigned AS) const = 0;
1744 
1745  virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
1747  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
1748  OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
1749  ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
1750 
1754  int Index, VectorType *SubTp,
1756  virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
1757  Type *Src, CastContextHint CCH,
1759  const Instruction *I) = 0;
1760  virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1761  VectorType *VecTy,
1762  unsigned Index) = 0;
1763  virtual InstructionCost getCFInstrCost(unsigned Opcode,
1765  const Instruction *I = nullptr) = 0;
1766  virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1767  Type *CondTy,
1768  CmpInst::Predicate VecPred,
1770  const Instruction *I) = 0;
1771  virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1772  unsigned Index) = 0;
1773  virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
1774  unsigned Index) = 0;
1775 
1776  virtual InstructionCost
1777  getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
1778  const APInt &DemandedDstElts,
1780 
1781  virtual InstructionCost
1782  getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1784  OperandValueInfo OpInfo, const Instruction *I) = 0;
1785  virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src,
1786  Align Alignment,
1787  unsigned AddressSpace,
1789  const Instruction *I) = 0;
1790  virtual InstructionCost
1791  getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1792  unsigned AddressSpace,
1794  virtual InstructionCost
1795  getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
1796  bool VariableMask, Align Alignment,
1798  const Instruction *I = nullptr) = 0;
1799 
1801  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1802  Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1803  bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
1804  virtual InstructionCost
1805  getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
1808  virtual InstructionCost
1809  getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
1812  unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1816  bool IsUnsigned, Type *ResTy, VectorType *Ty,
1818  virtual InstructionCost
1821  virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy,
1822  ArrayRef<Type *> Tys,
1824  virtual unsigned getNumberOfParts(Type *Tp) = 0;
1825  virtual InstructionCost
1826  getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr) = 0;
1827  virtual InstructionCost
1829  virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1830  MemIntrinsicInfo &Info) = 0;
1831  virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
1833  Type *ExpectedType) = 0;
1834  virtual Type *
1836  unsigned SrcAddrSpace, unsigned DestAddrSpace,
1837  unsigned SrcAlign, unsigned DestAlign,
1838  Optional<uint32_t> AtomicElementSize) const = 0;
1839 
1840  virtual void getMemcpyLoopResidualLoweringType(
1842  unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1843  unsigned SrcAlign, unsigned DestAlign,
1844  Optional<uint32_t> AtomicCpySize) const = 0;
1845  virtual bool areInlineCompatible(const Function *Caller,
1846  const Function *Callee) const = 0;
1847  virtual bool areTypesABICompatible(const Function *Caller,
1848  const Function *Callee,
1849  const ArrayRef<Type *> &Types) const = 0;
1850  virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1851  virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1852  virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
1853  virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
1854  virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
1855  virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1856  Align Alignment,
1857  unsigned AddrSpace) const = 0;
1858  virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1859  Align Alignment,
1860  unsigned AddrSpace) const = 0;
1861  virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
1862  ElementCount VF) const = 0;
1863  virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
1864  virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1865  unsigned ChainSizeInBytes,
1866  VectorType *VecTy) const = 0;
1867  virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1868  unsigned ChainSizeInBytes,
1869  VectorType *VecTy) const = 0;
1870  virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1871  ReductionFlags) const = 0;
1872  virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1873  ReductionFlags) const = 0;
1874  virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
1875  virtual unsigned getGISelRematGlobalCost() const = 0;
1876  virtual unsigned getMinTripCountTailFoldingThreshold() const = 0;
1877  virtual bool enableScalableVectorization() const = 0;
1878  virtual bool supportsScalableVectors() const = 0;
1879  virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1880  Align Alignment) const = 0;
1881  virtual VPLegalization
1882  getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
1883 };
1884 
1885 template <typename T>
1887  T Impl;
1888 
1889 public:
1890  Model(T Impl) : Impl(std::move(Impl)) {}
1891  ~Model() override = default;
1892 
1893  const DataLayout &getDataLayout() const override {
1894  return Impl.getDataLayout();
1895  }
1896 
1897  InstructionCost
1898  getGEPCost(Type *PointeeType, const Value *Ptr,
1899  ArrayRef<const Value *> Operands,
1901  return Impl.getGEPCost(PointeeType, Ptr, Operands, CostKind);
1902  }
1903  unsigned getInliningThresholdMultiplier() override {
1904  return Impl.getInliningThresholdMultiplier();
1905  }
1906  unsigned adjustInliningThreshold(const CallBase *CB) override {
1907  return Impl.adjustInliningThreshold(CB);
1908  }
1909  int getInlinerVectorBonusPercent() override {
1910  return Impl.getInlinerVectorBonusPercent();
1911  }
1912  InstructionCost getMemcpyCost(const Instruction *I) override {
1913  return Impl.getMemcpyCost(I);
1914  }
1915  InstructionCost getInstructionCost(const User *U,
1916  ArrayRef<const Value *> Operands,
1917  TargetCostKind CostKind) override {
1918  return Impl.getInstructionCost(U, Operands, CostKind);
1919  }
1920  BranchProbability getPredictableBranchThreshold() override {
1921  return Impl.getPredictableBranchThreshold();
1922  }
1923  bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
1924  bool useGPUDivergenceAnalysis() override {
1925  return Impl.useGPUDivergenceAnalysis();
1926  }
1927  bool isSourceOfDivergence(const Value *V) override {
1928  return Impl.isSourceOfDivergence(V);
1929  }
1930 
1931  bool isAlwaysUniform(const Value *V) override {
1932  return Impl.isAlwaysUniform(V);
1933  }
1934 
1935  unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
1936 
1937  bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1938  Intrinsic::ID IID) const override {
1939  return Impl.collectFlatAddressOperands(OpIndexes, IID);
1940  }
1941 
1942  bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
1943  return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
1944  }
1945 
1946  bool
1947  canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
1948  return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
1949  }
1950 
1951  unsigned getAssumedAddrSpace(const Value *V) const override {
1952  return Impl.getAssumedAddrSpace(V);
1953  }
1954 
1955  std::pair<const Value *, unsigned>
1956  getPredicatedAddrSpace(const Value *V) const override {
1957  return Impl.getPredicatedAddrSpace(V);
1958  }
1959 
1960  Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
1961  Value *NewV) const override {
1962  return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
1963  }
1964 
1965  bool isLoweredToCall(const Function *F) override {
1966  return Impl.isLoweredToCall(F);
1967  }
1968  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1969  UnrollingPreferences &UP,
1970  OptimizationRemarkEmitter *ORE) override {
1971  return Impl.getUnrollingPreferences(L, SE, UP, ORE);
1972  }
1973  void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
1974  PeelingPreferences &PP) override {
1975  return Impl.getPeelingPreferences(L, SE, PP);
1976  }
1977  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1978  AssumptionCache &AC, TargetLibraryInfo *LibInfo,
1979  HardwareLoopInfo &HWLoopInfo) override {
1980  return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
1981  }
1982  bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
1983  AssumptionCache &AC, TargetLibraryInfo *TLI,
1984  DominatorTree *DT,
1985  LoopVectorizationLegality *LVL,
1986  InterleavedAccessInfo *IAI) override {
1987  return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL, IAI);
1988  }
1990  return Impl.emitGetActiveLaneMask();
1991  }
1992  Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
1993  IntrinsicInst &II) override {
1994  return Impl.instCombineIntrinsic(IC, II);
1995  }
1996  Optional<Value *>
1997  simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
1998  APInt DemandedMask, KnownBits &Known,
1999  bool &KnownBitsComputed) override {
2000  return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
2001  KnownBitsComputed);
2002  }
2003  Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2004  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
2005  APInt &UndefElts2, APInt &UndefElts3,
2006  std::function<void(Instruction *, unsigned, APInt, APInt &)>
2007  SimplifyAndSetOp) override {
2008  return Impl.simplifyDemandedVectorEltsIntrinsic(
2009  IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2010  SimplifyAndSetOp);
2011  }
2012  bool isLegalAddImmediate(int64_t Imm) override {
2013  return Impl.isLegalAddImmediate(Imm);
2014  }
2015  bool isLegalICmpImmediate(int64_t Imm) override {
2016  return Impl.isLegalICmpImmediate(Imm);
2017  }
2018  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2019  bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
2020  Instruction *I) override {
2021  return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2022  AddrSpace, I);
2023  }
2024  bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
2025  const TargetTransformInfo::LSRCost &C2) override {
2026  return Impl.isLSRCostLess(C1, C2);
2027  }
2028  bool isNumRegsMajorCostOfLSR() override {
2029  return Impl.isNumRegsMajorCostOfLSR();
2030  }
2031  bool isProfitableLSRChainElement(Instruction *I) override {
2032  return Impl.isProfitableLSRChainElement(I);
2033  }
2034  bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
2035  bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2036  DominatorTree *DT, AssumptionCache *AC,
2037  TargetLibraryInfo *LibInfo) override {
2038  return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2039  }
2041  getPreferredAddressingMode(const Loop *L,
2042  ScalarEvolution *SE) const override {
2043  return Impl.getPreferredAddressingMode(L, SE);
2044  }
2045  bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
2046  return Impl.isLegalMaskedStore(DataType, Alignment);
2047  }
2048  bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
2049  return Impl.isLegalMaskedLoad(DataType, Alignment);
2050  }
2051  bool isLegalNTStore(Type *DataType, Align Alignment) override {
2052  return Impl.isLegalNTStore(DataType, Alignment);
2053  }
2054  bool isLegalNTLoad(Type *DataType, Align Alignment) override {
2055  return Impl.isLegalNTLoad(DataType, Alignment);
2056  }
2057  bool isLegalBroadcastLoad(Type *ElementTy,
2058  ElementCount NumElements) const override {
2059  return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2060  }
2061  bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
2062  return Impl.isLegalMaskedScatter(DataType, Alignment);
2063  }
2064  bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
2065  return Impl.isLegalMaskedGather(DataType, Alignment);
2066  }
2067  bool forceScalarizeMaskedGather(VectorType *DataType,
2068  Align Alignment) override {
2069  return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2070  }
2071  bool forceScalarizeMaskedScatter(VectorType *DataType,
2072  Align Alignment) override {
2073  return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2074  }
2075  bool isLegalMaskedCompressStore(Type *DataType) override {
2076  return Impl.isLegalMaskedCompressStore(DataType);
2077  }
2078  bool isLegalMaskedExpandLoad(Type *DataType) override {
2079  return Impl.isLegalMaskedExpandLoad(DataType);
2080  }
2081  bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
2082  const SmallBitVector &OpcodeMask) const override {
2083  return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2084  }
2085  bool enableOrderedReductions() override {
2086  return Impl.enableOrderedReductions();
2087  }
2088  bool hasDivRemOp(Type *DataType, bool IsSigned) override {
2089  return Impl.hasDivRemOp(DataType, IsSigned);
2090  }
2091  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
2092  return Impl.hasVolatileVariant(I, AddrSpace);
2093  }
2094  bool prefersVectorizedAddressing() override {
2095  return Impl.prefersVectorizedAddressing();
2096  }
2097  InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2098  int64_t BaseOffset, bool HasBaseReg,
2099  int64_t Scale,
2100  unsigned AddrSpace) override {
2101  return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2102  AddrSpace);
2103  }
2104  bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
2105  bool isTruncateFree(Type *Ty1, Type *Ty2) override {
2106  return Impl.isTruncateFree(Ty1, Ty2);
2107  }
2108  bool isProfitableToHoist(Instruction *I) override {
2109  return Impl.isProfitableToHoist(I);
2110  }
2111  bool useAA() override { return Impl.useAA(); }
2112  bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
2113  unsigned getRegUsageForType(Type *Ty) override {
2114  return Impl.getRegUsageForType(Ty);
2115  }
2116  bool shouldBuildLookupTables() override {
2117  return Impl.shouldBuildLookupTables();
2118  }
2119  bool shouldBuildLookupTablesForConstant(Constant *C) override {
2120  return Impl.shouldBuildLookupTablesForConstant(C);
2121  }
2122  bool shouldBuildRelLookupTables() override {
2123  return Impl.shouldBuildRelLookupTables();
2124  }
2125  bool useColdCCForColdCall(Function &F) override {
2126  return Impl.useColdCCForColdCall(F);
2127  }
2128 
2129  InstructionCost getScalarizationOverhead(VectorType *Ty,
2130  const APInt &DemandedElts,
2131  bool Insert, bool Extract) override {
2132  return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
2133  }
2134  InstructionCost
2135  getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2136  ArrayRef<Type *> Tys) override {
2137  return Impl.getOperandsScalarizationOverhead(Args, Tys);
2138  }
2139 
2140  bool supportsEfficientVectorElementLoadStore() override {
2141  return Impl.supportsEfficientVectorElementLoadStore();
2142  }
2143 
2144  bool supportsTailCalls() override { return Impl.supportsTailCalls(); }
2145  bool supportsTailCallFor(const CallBase *CB) override {
2146  return Impl.supportsTailCallFor(CB);
2147  }
2148 
2149  bool enableAggressiveInterleaving(bool LoopHasReductions) override {
2150  return Impl.enableAggressiveInterleaving(LoopHasReductions);
2151  }
2152  MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
2153  bool IsZeroCmp) const override {
2154  return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2155  }
2156  bool enableInterleavedAccessVectorization() override {
2157  return Impl.enableInterleavedAccessVectorization();
2158  }
2160  return Impl.enableMaskedInterleavedAccessVectorization();
2161  }
2162  bool isFPVectorizationPotentiallyUnsafe() override {
2163  return Impl.isFPVectorizationPotentiallyUnsafe();
2164  }
2165  bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
2166  unsigned AddressSpace, Align Alignment,
2167  bool *Fast) override {
2168  return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
2169  Alignment, Fast);
2170  }
2171  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
2172  return Impl.getPopcntSupport(IntTyWidthInBit);
2173  }
2174  bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
2175 
2176  bool isExpensiveToSpeculativelyExecute(const Instruction* I) override {
2177  return Impl.isExpensiveToSpeculativelyExecute(I);
2178  }
2179 
2180  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
2181  return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2182  }
2183 
2184  InstructionCost getFPOpCost(Type *Ty) override {
2185  return Impl.getFPOpCost(Ty);
2186  }
2187 
2188  InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
2189  const APInt &Imm, Type *Ty) override {
2190  return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
2191  }
2192  InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
2193  TargetCostKind CostKind) override {
2194  return Impl.getIntImmCost(Imm, Ty, CostKind);
2195  }
2196  InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
2197  const APInt &Imm, Type *Ty,
2199  Instruction *Inst = nullptr) override {
2200  return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
2201  }
2202  InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
2203  const APInt &Imm, Type *Ty,
2204  TargetCostKind CostKind) override {
2205  return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
2206  }
2207  unsigned getNumberOfRegisters(unsigned ClassID) const override {
2208  return Impl.getNumberOfRegisters(ClassID);
2209  }
2210  unsigned getRegisterClassForType(bool Vector,
2211  Type *Ty = nullptr) const override {
2212  return Impl.getRegisterClassForType(Vector, Ty);
2213  }
2214  const char *getRegisterClassName(unsigned ClassID) const override {
2215  return Impl.getRegisterClassName(ClassID);
2216  }
2217  TypeSize getRegisterBitWidth(RegisterKind K) const override {
2218  return Impl.getRegisterBitWidth(K);
2219  }
2220  unsigned getMinVectorRegisterBitWidth() const override {
2221  return Impl.getMinVectorRegisterBitWidth();
2222  }
2223  Optional<unsigned> getMaxVScale() const override {
2224  return Impl.getMaxVScale();
2225  }
2226  Optional<unsigned> getVScaleForTuning() const override {
2227  return Impl.getVScaleForTuning();
2228  }
2230  TargetTransformInfo::RegisterKind K) const override {
2231  return Impl.shouldMaximizeVectorBandwidth(K);
2232  }
2233  ElementCount getMinimumVF(unsigned ElemWidth,
2234  bool IsScalable) const override {
2235  return Impl.getMinimumVF(ElemWidth, IsScalable);
2236  }
2237  unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
2238  return Impl.getMaximumVF(ElemWidth, Opcode);
2239  }
2240  unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
2241  Type *ScalarValTy) const override {
2242  return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2243  }
2245  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
2246  return Impl.shouldConsiderAddressTypePromotion(
2247  I, AllowPromotionWithoutCommonHeader);
2248  }
2249  unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
2250  Optional<unsigned> getCacheSize(CacheLevel Level) const override {
2251  return Impl.getCacheSize(Level);
2252  }
2253  Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override {
2254  return Impl.getCacheAssociativity(Level);
2255  }
2256 
2257  /// Return the preferred prefetch distance in terms of instructions.
2258  ///
2259  unsigned getPrefetchDistance() const override {
2260  return Impl.getPrefetchDistance();
2261  }
2262 
2263  /// Return the minimum stride necessary to trigger software
2264  /// prefetching.
2265  ///
2266  unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2267  unsigned NumStridedMemAccesses,
2268  unsigned NumPrefetches,
2269  bool HasCall) const override {
2270  return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2271  NumPrefetches, HasCall);
2272  }
2273 
2274  /// Return the maximum prefetch distance in terms of loop
2275  /// iterations.
2276  ///
2277  unsigned getMaxPrefetchIterationsAhead() const override {
2278  return Impl.getMaxPrefetchIterationsAhead();
2279  }
2280 
2281  /// \return True if prefetching should also be done for writes.
2282  bool enableWritePrefetching() const override {
2283  return Impl.enableWritePrefetching();
2284  }
2285 
2286  /// \return if target want to issue a prefetch in address space \p AS.
2287  bool shouldPrefetchAddressSpace(unsigned AS) const override {
2288  return Impl.shouldPrefetchAddressSpace(AS);
2289  }
2290 
2291  unsigned getMaxInterleaveFactor(unsigned VF) override {
2292  return Impl.getMaxInterleaveFactor(VF);
2293  }
2294  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
2295  unsigned &JTSize,
2296  ProfileSummaryInfo *PSI,
2297  BlockFrequencyInfo *BFI) override {
2298  return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2299  }
2300  InstructionCost getArithmeticInstrCost(
2301  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2302  OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2303  ArrayRef<const Value *> Args,
2304  const Instruction *CxtI = nullptr) override {
2305  return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
2306  Args, CxtI);
2307  }
2308 
2309  InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
2310  ArrayRef<int> Mask,
2312  VectorType *SubTp,
2313  ArrayRef<const Value *> Args) override {
2314  return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
2315  }
2316  InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
2317  CastContextHint CCH,
2319  const Instruction *I) override {
2320  return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
2321  }
2322  InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2323  VectorType *VecTy,
2324  unsigned Index) override {
2325  return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
2326  }
2327  InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
2328  const Instruction *I = nullptr) override {
2329  return Impl.getCFInstrCost(Opcode, CostKind, I);
2330  }
2331  InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
2332  CmpInst::Predicate VecPred,
2334  const Instruction *I) override {
2335  return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
2336  }
2337  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2338  unsigned Index) override {
2339  return Impl.getVectorInstrCost(Opcode, Val, Index);
2340  }
2341  InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
2342  unsigned Index) override {
2343  return Impl.getVectorInstrCost(I, Val, Index);
2344  }
2345  InstructionCost
2346  getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
2347  const APInt &DemandedDstElts,
2348  TTI::TargetCostKind CostKind) override {
2349  return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2350  DemandedDstElts, CostKind);
2351  }
2352  InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2353  unsigned AddressSpace,
2355  OperandValueInfo OpInfo,
2356  const Instruction *I) override {
2357  return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind,
2358  OpInfo, I);
2359  }
2360  InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2361  unsigned AddressSpace,
2363  const Instruction *I) override {
2364  return Impl.getVPMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2365  CostKind, I);
2366  }
2367  InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
2368  Align Alignment, unsigned AddressSpace,
2369  TTI::TargetCostKind CostKind) override {
2370  return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2371  CostKind);
2372  }
2373  InstructionCost
2374  getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2375  bool VariableMask, Align Alignment,
2377  const Instruction *I = nullptr) override {
2378  return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
2379  Alignment, CostKind, I);
2380  }
2381  InstructionCost getInterleavedMemoryOpCost(
2382  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2383  Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2384  bool UseMaskForCond, bool UseMaskForGaps) override {
2385  return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2386  Alignment, AddressSpace, CostKind,
2387  UseMaskForCond, UseMaskForGaps);
2388  }
2389  InstructionCost
2390  getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2391  Optional<FastMathFlags> FMF,
2392  TTI::TargetCostKind CostKind) override {
2393  return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
2394  }
2395  InstructionCost
2396  getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
2397  TTI::TargetCostKind CostKind) override {
2398  return Impl.getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
2399  }
2400  InstructionCost getExtendedReductionCost(
2401  unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
2402  Optional<FastMathFlags> FMF,
2404  return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2405  CostKind);
2406  }
2407  InstructionCost getMulAccReductionCost(
2408  bool IsUnsigned, Type *ResTy, VectorType *Ty,
2410  return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty, CostKind);
2411  }
2412  InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2413  TTI::TargetCostKind CostKind) override {
2414  return Impl.getIntrinsicInstrCost(ICA, CostKind);
2415  }
2416  InstructionCost getCallInstrCost(Function *F, Type *RetTy,
2417  ArrayRef<Type *> Tys,
2418  TTI::TargetCostKind CostKind) override {
2419  return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
2420  }
2421  unsigned getNumberOfParts(Type *Tp) override {
2422  return Impl.getNumberOfParts(Tp);
2423  }
2424  InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2425  const SCEV *Ptr) override {
2426  return Impl.getAddressComputationCost(Ty, SE, Ptr);
2427  }
2428  InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
2429  return Impl.getCostOfKeepingLiveOverCall(Tys);
2430  }
2431  bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2432  MemIntrinsicInfo &Info) override {
2433  return Impl.getTgtMemIntrinsic(Inst, Info);
2434  }
2435  unsigned getAtomicMemIntrinsicMaxElementSize() const override {
2436  return Impl.getAtomicMemIntrinsicMaxElementSize();
2437  }
2438  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2439  Type *ExpectedType) override {
2440  return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2441  }
2443  LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
2444  unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
2445  Optional<uint32_t> AtomicElementSize) const override {
2446  return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
2447  DestAddrSpace, SrcAlign, DestAlign,
2448  AtomicElementSize);
2449  }
2451  SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2452  unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2453  unsigned SrcAlign, unsigned DestAlign,
2454  Optional<uint32_t> AtomicCpySize) const override {
2455  Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2456  SrcAddrSpace, DestAddrSpace,
2457  SrcAlign, DestAlign, AtomicCpySize);
2458  }
2459  bool areInlineCompatible(const Function *Caller,
2460  const Function *Callee) const override {
2461  return Impl.areInlineCompatible(Caller, Callee);
2462  }
2463  bool areTypesABICompatible(const Function *Caller, const Function *Callee,
2464  const ArrayRef<Type *> &Types) const override {
2465  return Impl.areTypesABICompatible(Caller, Callee, Types);
2466  }
2467  bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
2468  return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
2469  }
2470  bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
2471  return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
2472  }
2473  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
2474  return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2475  }
2476  bool isLegalToVectorizeLoad(LoadInst *LI) const override {
2477  return Impl.isLegalToVectorizeLoad(LI);
2478  }
2479  bool isLegalToVectorizeStore(StoreInst *SI) const override {
2480  return Impl.isLegalToVectorizeStore(SI);
2481  }
2482  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
2483  unsigned AddrSpace) const override {
2484  return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2485  AddrSpace);
2486  }
2487  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
2488  unsigned AddrSpace) const override {
2489  return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2490  AddrSpace);
2491  }
2492  bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
2493  ElementCount VF) const override {
2494  return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2495  }
2496  bool isElementTypeLegalForScalableVector(Type *Ty) const override {
2497  return Impl.isElementTypeLegalForScalableVector(Ty);
2498  }
2499  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2500  unsigned ChainSizeInBytes,
2501  VectorType *VecTy) const override {
2502  return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2503  }
2504  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2505  unsigned ChainSizeInBytes,
2506  VectorType *VecTy) const override {
2507  return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2508  }
2509  bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2510  ReductionFlags Flags) const override {
2511  return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2512  }
2513  bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2514  ReductionFlags Flags) const override {
2515  return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2516  }
2517  bool shouldExpandReduction(const IntrinsicInst *II) const override {
2518  return Impl.shouldExpandReduction(II);
2519  }
2520 
2521  unsigned getGISelRematGlobalCost() const override {
2522  return Impl.getGISelRematGlobalCost();
2523  }
2524 
2525  unsigned getMinTripCountTailFoldingThreshold() const override {
2526  return Impl.getMinTripCountTailFoldingThreshold();
2527  }
2528 
2529  bool supportsScalableVectors() const override {
2530  return Impl.supportsScalableVectors();
2531  }
2532 
2533  bool enableScalableVectorization() const override {
2534  return Impl.enableScalableVectorization();
2535  }
2536 
2537  bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2538  Align Alignment) const override {
2539  return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2540  }
2541 
2543  getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
2544  return Impl.getVPLegalizationStrategy(PI);
2545  }
2546 };
2547 
2548 template <typename T>
2550  : TTIImpl(new Model<T>(Impl)) {}
2551 
2552 /// Analysis pass providing the \c TargetTransformInfo.
2553 ///
2554 /// The core idea of the TargetIRAnalysis is to expose an interface through
2555 /// which LLVM targets can analyze and provide information about the middle
2556 /// end's target-independent IR. This supports use cases such as target-aware
2557 /// cost modeling of IR constructs.
2558 ///
2559 /// This is a function analysis because much of the cost modeling for targets
2560 /// is done in a subtarget specific way and LLVM supports compiling different
2561 /// functions targeting different subtargets in order to support runtime
2562 /// dispatch according to the observed subtarget.
2563 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
2564 public:
2566 
2567  /// Default construct a target IR analysis.
2568  ///
2569  /// This will use the module's datalayout to construct a baseline
2570  /// conservative TTI result.
2571  TargetIRAnalysis();
2572 
2573  /// Construct an IR analysis pass around a target-provide callback.
2574  ///
2575  /// The callback will be called with a particular function for which the TTI
2576  /// is needed and must return a TTI object for that function.
2577  TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
2578 
2579  // Value semantics. We spell out the constructors for MSVC.
2581  : TTICallback(Arg.TTICallback) {}
2583  : TTICallback(std::move(Arg.TTICallback)) {}
2585  TTICallback = RHS.TTICallback;
2586  return *this;
2587  }
2589  TTICallback = std::move(RHS.TTICallback);
2590  return *this;
2591  }
2592 
2594 
2595 private:
2597  static AnalysisKey Key;
2598 
2599  /// The callback used to produce a result.
2600  ///
2601  /// We use a completely opaque callback so that targets can provide whatever
2602  /// mechanism they desire for constructing the TTI for a given function.
2603  ///
2604  /// FIXME: Should we really use std::function? It's relatively inefficient.
2605  /// It might be possible to arrange for even stateful callbacks to outlive
2606  /// the analysis and thus use a function_ref which would be lighter weight.
2607  /// This may also be less error prone as the callback is likely to reference
2608  /// the external TargetMachine, and that reference needs to never dangle.
2609  std::function<Result(const Function &)> TTICallback;
2610 
2611  /// Helper function used as the callback in the default constructor.
2612  static Result getDefaultTTI(const Function &F);
2613 };
2614 
2615 /// Wrapper pass for TargetTransformInfo.
2616 ///
2617 /// This pass can be constructed from a TTI object which it stores internally
2618 /// and is queried by passes.
2620  TargetIRAnalysis TIRA;
2622 
2623  virtual void anchor();
2624 
2625 public:
2626  static char ID;
2627 
2628  /// We must provide a default constructor for the pass but it should
2629  /// never be used.
2630  ///
2631  /// Use the constructor below or call one of the creation routines.
2633 
2635 
2637 };
2638 
2639 /// Create an analysis pass wrapper around a TTI object.
2640 ///
2641 /// This analysis pass just holds the TTI instance and makes it available to
2642 /// clients.
2644 
2645 } // namespace llvm
2646 
2647 #endif
llvm::TargetTransformInfo::ReductionFlags::IsMaxOp
bool IsMaxOp
If the op a min/max kind, true if it's a max operation.
Definition: TargetTransformInfo.h:1447
llvm::TargetTransformInfo::CastContextHint::GatherScatter
@ GatherScatter
The cast is used with a gather/scatter.
llvm::TargetTransformInfo::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
Definition: TargetTransformInfo.cpp:290
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::TargetTransformInfo::Concept::getExtractWithExtendCost
virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)=0
llvm::TargetTransformInfo::CacheLevel::L1D
@ L1D
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::TargetTransformInfo::PSK_FastHardware
@ PSK_FastHardware
Definition: TargetTransformInfo.h:583
llvm::TargetTransformInfo::Concept::getPopcntSupport
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit)=0
llvm::TargetTransformInfo::Concept::getGEPCost
virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::CastContextHint::Masked
@ Masked
The cast is used with a masked load/store.
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:464
llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:443
llvm::TargetTransformInfo::SK_Select
@ SK_Select
Selects elements from the corresponding lane of either source operand.
Definition: TargetTransformInfo.h:887
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2563
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:471
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:217
llvm::TargetTransformInfo::LSRCost::NumRegs
unsigned NumRegs
Definition: TargetTransformInfo.h:404
llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition: TargetTransformInfo.h:245
llvm::TargetTransformInfo::getShuffleCost
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask=None, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args=None) const
Definition: TargetTransformInfo.cpp:785
llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition: TargetTransformInfo.h:439
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::TargetTransformInfo::ReductionFlags
Flags describing the kind of vector reduction.
Definition: TargetTransformInfo.h:1445
FMF.h
llvm::TargetTransformInfo::instCombineIntrinsic
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
Definition: TargetTransformInfo.cpp:308
llvm::TargetTransformInfo::Concept::isHardwareLoopProfitable
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)=0
llvm::TargetTransformInfo::Concept::isSourceOfDivergence
virtual bool isSourceOfDivergence(const Value *V)=0
llvm::TargetTransformInfo::getInstructionCost
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:224
llvm::TargetTransformInfo::Concept::enableMaskedInterleavedAccessVectorization
virtual bool enableMaskedInterleavedAccessVectorization()=0
llvm::MemIntrinsicInfo::PtrVal
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
Definition: TargetTransformInfo.h:76
llvm::TargetTransformInfo::Concept::rewriteIntrinsicWithAddressSpace
virtual Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const =0
llvm::TargetTransformInfo::getMinVectorRegisterBitWidth
unsigned getMinVectorRegisterBitWidth() const
Definition: TargetTransformInfo.cpp:644
llvm::TargetTransformInfo::Concept::enableOrderedReductions
virtual bool enableOrderedReductions()=0
llvm::HardwareLoopInfo::LoopDecrement
Value * LoopDecrement
Definition: TargetTransformInfo.h:104
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::TargetTransformInfo::OperandValueInfo::Properties
OperandValueProperties Properties
Definition: TargetTransformInfo.h:919
llvm::TargetTransformInfo::Concept::areTypesABICompatible
virtual bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const =0
llvm::TargetTransformInfo::MemIndexedMode
MemIndexedMode
The type of load/store indexing.
Definition: TargetTransformInfo.h:1393
llvm::TargetTransformInfo::TCK_Latency
@ TCK_Latency
The latency of instruction.
Definition: TargetTransformInfo.h:219
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
llvm::TargetTransformInfo::getVScaleForTuning
Optional< unsigned > getVScaleForTuning() const
Definition: TargetTransformInfo.cpp:652
llvm::TargetTransformInfo::UnrollingPreferences::MaxCount
unsigned MaxCount
Definition: TargetTransformInfo.h:455
llvm::ImmutablePass
ImmutablePass class - This class is used to provide information that does not need to be run.
Definition: Pass.h:279
llvm::TargetTransformInfo::getRegisterClassName
const char * getRegisterClassName(unsigned ClassID) const
Definition: TargetTransformInfo.cpp:635
AtomicOrdering.h
llvm::ElementCount
Definition: TypeSize.h:404
llvm::TargetTransformInfo::OK_UniformValue
@ OK_UniformValue
Definition: TargetTransformInfo.h:906
llvm::TargetTransformInfo::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:867
llvm::TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
Definition: TargetTransformInfo.cpp:1188
llvm::TargetTransformInfo::Concept::enableMemCmpExpansion
virtual MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const =0
llvm::TargetTransformInfo::canMacroFuseCmp
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
Definition: TargetTransformInfo.cpp:371
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:546
llvm::TargetTransformInfo::Concept::isLegalMaskedScatter
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment)=0
Pass.h
llvm::TargetTransformInfo::getRegisterBitWidth
TypeSize getRegisterBitWidth(RegisterKind K) const
Definition: TargetTransformInfo.cpp:639
llvm::TargetTransformInfo::PopcntSupportKind
PopcntSupportKind
Flags indicating the kind of support for population count.
Definition: TargetTransformInfo.h:583
llvm::TargetTransformInfo::Concept::getIntImmCost
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
llvm::TargetTransformInfo::getVPLegalizationStrategy
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
Definition: TargetTransformInfo.cpp:1133
llvm::TargetTransformInfo::AMK_PostIndexed
@ AMK_PostIndexed
Definition: TargetTransformInfo.h:632
llvm::TargetTransformInfoWrapperPass::getTTI
TargetTransformInfo & getTTI(const Function &F)
Definition: TargetTransformInfo.cpp:1201
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::TargetTransformInfo::getAddressComputationCost
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
Definition: TargetTransformInfo.cpp:972
InstCombiner
Machine InstCombiner
Definition: MachineCombiner.cpp:136
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1182
llvm::TargetTransformInfo::PeelingPreferences::AllowPeeling
bool AllowPeeling
Allow peeling off loop iterations.
Definition: TargetTransformInfo.h:532
llvm::TargetTransformInfo::Concept::hasVolatileVariant
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace)=0
llvm::TargetTransformInfo::Concept::isFPVectorizationPotentiallyUnsafe
virtual bool isFPVectorizationPotentiallyUnsafe()=0
llvm::TargetTransformInfo::Concept::isLegalMaskedExpandLoad
virtual bool isLegalMaskedExpandLoad(Type *DataType)=0
llvm::TargetTransformInfo::Concept::isAlwaysUniform
virtual bool isAlwaysUniform(const Value *V)=0
llvm::TargetTransformInfo::Concept::getMaxPrefetchIterationsAhead
virtual unsigned getMaxPrefetchIterationsAhead() const =0
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:172
llvm::TargetTransformInfo::MemCmpExpansionOptions::AllowOverlappingLoads
bool AllowOverlappingLoads
Definition: TargetTransformInfo.h:801
llvm::TargetTransformInfo::getScalingFactorCost
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
Definition: TargetTransformInfo.cpp:463
llvm::IntrinsicCostAttributes::getReturnType
Type * getReturnType() const
Definition: TargetTransformInfo.h:152
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::TargetTransformInfo::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfo.cpp:951
llvm::TargetTransformInfo::Concept::getRegisterClassForType
virtual unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const =0
llvm::TargetTransformInfo::RGK_Scalar
@ RGK_Scalar
Definition: TargetTransformInfo.h:955
llvm::TargetTransformInfo::Concept::enableInterleavedAccessVectorization
virtual bool enableInterleavedAccessVectorization()=0
llvm::LoopVectorizationLegality
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Definition: LoopVectorizationLegality.h:241
llvm::TargetTransformInfo::getAssumedAddrSpace
unsigned getAssumedAddrSpace(const Value *V) const
Definition: TargetTransformInfo.cpp:272
llvm::IntrinsicCostAttributes::getInst
const IntrinsicInst * getInst() const
Definition: TargetTransformInfo.h:151
llvm::TargetTransformInfo::Concept::useGPUDivergenceAnalysis
virtual bool useGPUDivergenceAnalysis()=0
llvm::TargetTransformInfo::isLegalICmpImmediate
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition: TargetTransformInfo.cpp:345
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJamInnerLoopThreshold
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
Definition: TargetTransformInfo.h:490
llvm::TargetTransformInfo::Concept::getMinMaxReductionCost
virtual InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::isLegalBroadcastLoad
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
\Returns true if the target supports broadcasting a load to a vector of type <NumElements x ElementTy...
Definition: TargetTransformInfo.cpp:407
llvm::TargetTransformInfo::getExtendedReductionCost
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of a reduc...
Definition: TargetTransformInfo.cpp:1003
llvm::TargetIRAnalysis::operator=
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Definition: TargetTransformInfo.h:2584
llvm::TargetIRAnalysis::TargetIRAnalysis
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Definition: TargetTransformInfo.h:2582
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::HardwareLoopInfo::ExitBranch
BranchInst * ExitBranch
Definition: TargetTransformInfo.h:101
llvm::TargetTransformInfo::UnrollingPreferences::UnrollRemainder
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
Definition: TargetTransformInfo.h:483
llvm::TargetTransformInfo::Concept::isExpensiveToSpeculativelyExecute
virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I)=0
llvm::TargetTransformInfo::UnrollingPreferences::Count
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
Definition: TargetTransformInfo.h:448
llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:220
llvm::TargetTransformInfo::VPLegalization
Definition: TargetTransformInfo.h:1499
llvm::TargetTransformInfo::shouldBuildLookupTables
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
Definition: TargetTransformInfo.cpp:494
llvm::TargetTransformInfo::LSRCost::NumIVMuls
unsigned NumIVMuls
Definition: TargetTransformInfo.h:406
llvm::TargetTransformInfo::Concept::isLegalToVectorizeReduction
virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const =0
llvm::HardwareLoopInfo::isHardwareLoopCandidate
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Definition: TargetTransformInfo.cpp:105
llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:467
llvm::TargetTransformInfo::getMaximumVF
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
Definition: TargetTransformInfo.cpp:666
llvm::TargetTransformInfo::Concept::getEstimatedNumberOfCaseClusters
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)=0
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::TargetTransformInfo::useColdCCForColdCall
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
Definition: TargetTransformInfo.cpp:507
llvm::TargetTransformInfo::VPLegalization::Convert
@ Convert
Definition: TargetTransformInfo.h:1506
llvm::TargetTransformInfo::Concept::getGatherScatterOpCost
virtual InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:526
llvm::TargetTransformInfo::operator=
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
Definition: TargetTransformInfo.cpp:192
llvm::TargetTransformInfo::Concept::getPeelingPreferences
virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP)=0
llvm::HardwareLoopInfo::L
Loop * L
Definition: TargetTransformInfo.h:99
llvm::TargetTransformInfo::isLegalNTLoad
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
Definition: TargetTransformInfo.cpp:403
llvm::TargetTransformInfo::isLegalNTStore
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
Definition: TargetTransformInfo.cpp:398
llvm::TargetTransformInfo::UnrollingPreferences::FullUnrollMaxCount
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
Definition: TargetTransformInfo.h:459
llvm::TargetTransformInfo::Concept::getMemcpyLoopLoweringType
virtual Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, Optional< uint32_t > AtomicElementSize) const =0
llvm::Optional
Definition: APInt.h:33
ForceNestedLoop
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
llvm::TargetTransformInfo::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:858
Vector
So we should use XX3Form_Rcr to implement intrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::IntrinsicCostAttributes::IntrinsicCostAttributes
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarCost=InstructionCost::getInvalid(), bool TypeBasedOnly=false)
Definition: TargetTransformInfo.cpp:60
llvm::TargetTransformInfo::OP_PowerOf2
@ OP_PowerOf2
Definition: TargetTransformInfo.h:912
llvm::TargetTransformInfo::getPredictableBranchThreshold
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
Definition: TargetTransformInfo.cpp:233
llvm::TargetTransformInfo::getIntImmCodeSizeCost
InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
Definition: TargetTransformInfo.cpp:590
llvm::TargetTransformInfo::getInlinerVectorBonusPercent
int getInlinerVectorBonusPercent() const
Definition: TargetTransformInfo.cpp:206
llvm::TargetTransformInfo::getIntImmCostIntrin
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Definition: TargetTransformInfo.cpp:617
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
VectorType
Definition: ItaniumDemangle.h:1074
llvm::TargetTransformInfo::Concept::isTruncateFree
virtual bool isTruncateFree(Type *Ty1, Type *Ty2)=0
llvm::TargetTransformInfo::getIntImmCostInst
InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
Definition: TargetTransformInfo.cpp:607
llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:21
llvm::TargetTransformInfo::Concept::getAtomicMemIntrinsicMaxElementSize
virtual unsigned getAtomicMemIntrinsicMaxElementSize() const =0
llvm::TargetTransformInfo::OperandValueInfo
Definition: TargetTransformInfo.h:917
llvm::TargetTransformInfo::enableInterleavedAccessVectorization
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
Definition: TargetTransformInfo.cpp:545
llvm::TargetTransformInfo::Concept::prefersVectorizedAddressing
virtual bool prefersVectorizedAddressing()=0
llvm::TargetTransformInfo::SK_PermuteSingleSrc
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
Definition: TargetTransformInfo.h:895
llvm::TargetTransformInfo::Concept::getOrCreateResultFromMemIntrinsic
virtual Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)=0
llvm::TargetTransformInfo::Concept::getCostOfKeepingLiveOverCall
virtual InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)=0
llvm::TargetTransformInfo::getOperandsScalarizationOverhead
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys) const
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: TargetTransformInfo.cpp:518
llvm::TargetTransformInfo::Concept::getRegisterBitWidth
virtual TypeSize getRegisterBitWidth(RegisterKind K) const =0
llvm::TargetTransformInfo::UnrollingPreferences::AllowExpensiveTripCount
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
Definition: TargetTransformInfo.h:476
llvm::TargetTransformInfo::preferInLoopReduction
bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
Definition: TargetTransformInfo.cpp:1122
llvm::TargetTransformInfo::CacheLevel
CacheLevel
The possible cache levels.
Definition: TargetTransformInfo.h:1011
llvm::TargetTransformInfo::Concept
Definition: TargetTransformInfo.h:1547
llvm::TargetTransformInfo::Concept::isLegalNTStore
virtual bool isLegalNTStore(Type *DataType, Align Alignment)=0
new
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM ID Predecessors according to mbb< bb27, 0x8b0a7c0 > Note ADDri is not a two address instruction its result reg1037 is an operand of the PHI node in bb76 and its operand reg1039 is the result of the PHI node We should treat it as a two address code and make sure the ADDri is scheduled after any node that reads reg1039 Use info(i.e. register scavenger) to assign it a free register to allow reuse the collector could move the objects and invalidate the derived pointer This is bad enough in the first but safe points can crop up unpredictably **array_addr i32 n y store obj * new
Definition: README.txt:125
llvm::TargetTransformInfo::LSRCost::Insns
unsigned Insns
TODO: Some of these could be merged.
Definition: TargetTransformInfo.h:403
llvm::IntrinsicCostAttributes::getScalarizationCost
InstructionCost getScalarizationCost() const
Definition: TargetTransformInfo.h:154
llvm::TargetTransformInfo::SK_Broadcast
@ SK_Broadcast
Broadcast element 0 to all other elements.
Definition: TargetTransformInfo.h:885
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::TargetTransformInfo::Concept::getAddressComputationCost
virtual InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)=0
llvm::TargetTransformInfo::Concept::getIntImmCodeSizeCost
virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty)=0
llvm::TargetTransformInfo::canHaveNonUndefGlobalInitializerInAddressSpace
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
Return true if globals in this address space can have initializers other than undef.
Definition: TargetTransformInfo.cpp:267
llvm::TargetTransformInfo::Concept::isLegalNTLoad
virtual bool isLegalNTLoad(Type *DataType, Align Alignment)=0
llvm::IntrinsicCostAttributes::skipScalarizationCost
bool skipScalarizationCost() const
Definition: TargetTransformInfo.h:162
llvm::TargetTransformInfo::requiresOrderedReduction
static bool requiresOrderedReduction(Optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
Definition: TargetTransformInfo.h:1257
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::HardwareLoopInfo::IsNestingLegal
bool IsNestingLegal
Definition: TargetTransformInfo.h:106
llvm::TargetTransformInfo::LSRCost::AddRecCost
unsigned AddRecCost
Definition: TargetTransformInfo.h:405
llvm::IntrinsicCostAttributes::getFlags
FastMathFlags getFlags() const
Definition: TargetTransformInfo.h:153
llvm::Reloc::Model
Model
Definition: CodeGen.h:22
llvm::TargetTransformInfo::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Get target-customized preferences for the generic loop unrolling transformation.
Definition: TargetTransformInfo.cpp:330
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::SmallBitVector
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
Definition: SmallBitVector.h:35
llvm::TargetTransformInfo::isLegalMaskedScatter
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
Return true if the target supports masked scatter.
Definition: TargetTransformInfo.cpp:423
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:186
llvm::TargetTransformInfo::LSRCost::SetupCost
unsigned SetupCost
Definition: TargetTransformInfo.h:409
llvm::TargetTransformInfo::Concept::isLegalMaskedLoad
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment)=0
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::TargetTransformInfo::Concept::canMacroFuseCmp
virtual bool canMacroFuseCmp()=0
llvm::TargetTransformInfo::Concept::isTypeLegal
virtual bool isTypeLegal(Type *Ty)=0
llvm::TargetTransformInfo::getGISelRematGlobalCost
unsigned getGISelRematGlobalCost() const
Definition: TargetTransformInfo.cpp:1141
llvm::IntrinsicCostAttributes::getArgTypes
const SmallVectorImpl< Type * > & getArgTypes() const
Definition: TargetTransformInfo.h:156
llvm::TargetTransformInfo::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Definition: TargetTransformInfo.cpp:1054
llvm::TargetTransformInfo::Concept::getMinimumVF
virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const =0
llvm::TargetTransformInfo::isTypeLegal
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
Definition: TargetTransformInfo.cpp:486
llvm::HardwareLoopInfo::ExitCount
const SCEV * ExitCount
Definition: TargetTransformInfo.h:102
llvm::TargetTransformInfo::SK_PermuteTwoSrc
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
Definition: TargetTransformInfo.h:893
llvm::TargetTransformInfo::Concept::getCacheSize
virtual Optional< unsigned > getCacheSize(CacheLevel Level) const =0
llvm::TargetTransformInfo::PeelingPreferences::PeelProfiledIterations
bool PeelProfiledIterations
Allow peeling basing on profile.
Definition: TargetTransformInfo.h:539
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::TargetTransformInfo::getMinimumVF
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
Definition: TargetTransformInfo.cpp:661
llvm::MemIntrinsicInfo::isUnordered
bool isUnordered() const
Definition: TargetTransformInfo.h:88
llvm::TargetTransformInfo::Concept::getPredictableBranchThreshold
virtual BranchProbability getPredictableBranchThreshold()=0
llvm::TargetTransformInfo::isProfitableLSRChainElement
bool isProfitableLSRChainElement(Instruction *I) const
Definition: TargetTransformInfo.cpp:367
llvm::TargetTransformInfo::Concept::useAA
virtual bool useAA()=0
llvm::TargetTransformInfo::getCastContextHint
static CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
Definition: TargetTransformInfo.cpp:796
llvm::TargetTransformInfo::getOrCreateResultFromMemIntrinsic
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
Definition: TargetTransformInfo.cpp:1030
llvm::TargetTransformInfo::isLegalToVectorizeLoad
bool isLegalToVectorizeLoad(LoadInst *LI) const
Definition: TargetTransformInfo.cpp:1079
llvm::TargetTransformInfo::supportsTailCallFor
bool supportsTailCallFor(const CallBase *CB) const
If target supports tail call on CB.
Definition: TargetTransformInfo.cpp:531
llvm::MemIntrinsicInfo::Ordering
AtomicOrdering Ordering
Definition: TargetTransformInfo.h:79
llvm::TargetTransformInfo::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
Definition: TargetTransformInfo.cpp:773
llvm::TargetTransformInfo::Concept::useColdCCForColdCall
virtual bool useColdCCForColdCall(Function &F)=0
llvm::TargetTransformInfoWrapperPass::ID
static char ID
Definition: TargetTransformInfo.h:2626
llvm::TargetTransformInfo::TargetCostConstants
TargetCostConstants
Underlying constants for 'cost' values in this interface.
Definition: TargetTransformInfo.h:242
llvm::TargetTransformInfo::getPopcntSupport
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
Definition: TargetTransformInfo.cpp:567
llvm::TargetIRAnalysis::TargetIRAnalysis
TargetIRAnalysis(const TargetIRAnalysis &Arg)
Definition: TargetTransformInfo.h:2580
llvm::TargetTransformInfo::OP_None
@ OP_None
Definition: TargetTransformInfo.h:912
llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition: TargetTransformInfo.h:884
llvm::TargetTransformInfo::getPreferredAddressingMode
AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
Return the preferred addressing mode LSR should make efforts to generate.
Definition: TargetTransformInfo.cpp:383
llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition: TargetTransformInfo.h:1129
llvm::TargetTransformInfo::Concept::getVPLegalizationStrategy
virtual VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const =0
llvm::User
Definition: User.h:44
llvm::TargetTransformInfo::useGPUDivergenceAnalysis
bool useGPUDivergenceAnalysis() const
Return true if the target prefers to use GPU divergence analysis to replace the legacy version.
Definition: TargetTransformInfo.cpp:241
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::TargetTransformInfo::UnrollingPreferences::Force
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
Definition: TargetTransformInfo.h:479
InstrTypes.h
llvm::TargetTransformInfo::Concept::getMaxVScale
virtual Optional< unsigned > getMaxVScale() const =0
llvm::TargetTransformInfo::Concept::getPrefetchDistance
virtual unsigned getPrefetchDistance() const =0
llvm::TargetTransformInfo::hasBranchDivergence
bool hasBranchDivergence() const
Return true if branch divergence exists.
Definition: TargetTransformInfo.cpp:237
llvm::TargetTransformInfo::isLegalToVectorizeReduction
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
Definition: TargetTransformInfo.cpp:1099
llvm::TargetTransformInfo::getIntImmCost
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
Definition: TargetTransformInfo.cpp:600
llvm::TargetTransformInfo::Concept::supportsEfficientVectorElementLoadStore
virtual bool supportsEfficientVectorElementLoadStore()=0
llvm::TargetTransformInfo::Concept::canSaveCmp
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo)=0
llvm::TargetTransformInfo::getNumberOfParts
unsigned getNumberOfParts(Type *Tp) const
Definition: TargetTransformInfo.cpp:967
llvm::TargetTransformInfo::Concept::isFCmpOrdCheaperThanFCmpZero
virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)=0
llvm::TargetTransformInfo::Concept::isNumRegsMajorCostOfLSR
virtual bool isNumRegsMajorCostOfLSR()=0
llvm::TargetTransformInfo::supportsScalableVectors
bool supportsScalableVectors() const
Definition: TargetTransformInfo.cpp:1149
llvm::TargetTransformInfo::isIndexedLoadLegal
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
Definition: TargetTransformInfo.cpp:1065
llvm::TargetTransformInfo::CastContextHint::Interleave
@ Interleave
The cast is used with an interleaved load/store.
llvm::TargetTransformInfo::UnrollingPreferences::MaxIterationsCountToAnalyze
unsigned MaxIterationsCountToAnalyze
Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...
Definition: TargetTransformInfo.h:493
llvm::TargetTransformInfo::Concept::getNumberOfRegisters
virtual unsigned getNumberOfRegisters(unsigned ClassID) const =0
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::TargetTransformInfo::Concept::isLegalToVectorizeLoadChain
virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
llvm::TargetTransformInfo::Concept::getInstructionCost
virtual InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind)=0
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJam
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
Definition: TargetTransformInfo.h:485
llvm::TargetTransformInfo::isLegalMaskedExpandLoad
bool isLegalMaskedExpandLoad(Type *DataType) const
Return true if the target supports masked expand load.
Definition: TargetTransformInfo.cpp:442
llvm::TargetTransformInfo::enableScalableVectorization
bool enableScalableVectorization() const
Definition: TargetTransformInfo.cpp:1153
llvm::TargetTransformInfo::Concept::supportsTailCalls
virtual bool supportsTailCalls()=0
llvm::TargetTransformInfo::Concept::simplifyDemandedVectorEltsIntrinsic
virtual Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)=0
llvm::TargetTransformInfo::Concept::isLegalMaskedGather
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment)=0
llvm::TargetTransformInfo::Concept::hasBranchDivergence
virtual bool hasBranchDivergence()=0
llvm::Instruction
Definition: Instruction.h:42
llvm::TargetTransformInfo::Concept::enableWritePrefetching
virtual bool enableWritePrefetching() const =0
llvm::TargetTransformInfo::MIM_PreDec
@ MIM_PreDec
Pre-decrementing.
Definition: TargetTransformInfo.h:1396
llvm::InterleavedAccessInfo
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:751
llvm::HardwareLoopInfo::PerformEntryTest
bool PerformEntryTest
Definition: TargetTransformInfo.h:110
llvm::TargetTransformInfo::Concept::getMaskedMemoryOpCost
virtual InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::isLegalMaskedLoad
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked load.
Definition: TargetTransformInfo.cpp:393
llvm::TargetTransformInfo::isSourceOfDivergence
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
Definition: TargetTransformInfo.cpp:245
llvm::TargetTransformInfo::Concept::getReplicationShuffleCost
virtual InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic
Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
Definition: TargetTransformInfo.cpp:320
llvm::TargetTransformInfo::CastContextHint::Reversed
@ Reversed
The cast is used with a reversed load/store.
llvm::TargetTransformInfo::getPrefetchDistance
unsigned getPrefetchDistance() const
Definition: TargetTransformInfo.cpp:697
llvm::HardwareLoopInfo::CounterInReg
bool CounterInReg
Definition: TargetTransformInfo.h:108
llvm::TargetTransformInfo::Concept::isIndexedStoreLegal
virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const =0
llvm::TargetTransformInfo::Concept::supportsScalableVectors
virtual bool supportsScalableVectors() const =0
llvm::TargetTransformInfo::Concept::isLegalToVectorizeLoad
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const =0
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::TargetTransformInfo::Concept::isLegalToVectorizeStoreChain
virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
llvm::TargetTransformInfo::isFCmpOrdCheaperThanFCmpZero
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
Definition: TargetTransformInfo.cpp:580
llvm::AnalysisManager::Invalidator
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:661
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
llvm::TargetTransformInfo::Concept::getMemoryOpCost
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, OperandValueInfo OpInfo, const Instruction *I)=0
llvm::TargetTransformInfo::RGK_FixedWidthVector
@ RGK_FixedWidthVector
Definition: TargetTransformInfo.h:955
llvm::TargetTransformInfo::Concept::allowsMisalignedMemoryAccesses
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, bool *Fast)=0
llvm::TargetTransformInfo::OperandValueInfo::Kind
OperandValueKind Kind
Definition: TargetTransformInfo.h:918
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::TargetTransformInfo::OperandValueInfo::isUniform
bool isUniform() const
Definition: TargetTransformInfo.h:924
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::TargetTransformInfo::areTypesABICompatible
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
Definition: TargetTransformInfo.cpp:1059
llvm::None
const NoneType None
Definition: None.h:24
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:119
BranchProbability.h
llvm::TargetTransformInfo::VPLegalization::VPLegalization
VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)
Definition: TargetTransformInfo.h:1524
llvm::TargetTransformInfo::Concept::getDataLayout
virtual const DataLayout & getDataLayout() const =0
llvm::TargetTransformInfo::hasVolatileVariant
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
Definition: TargetTransformInfo.cpp:454
llvm::TargetTransformInfo::PSK_Software
@ PSK_Software
Definition: TargetTransformInfo.h:583
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::TargetTransformInfo::Concept::isElementTypeLegalForScalableVector
virtual bool isElementTypeLegalForScalableVector(Type *Ty) const =0
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:74
llvm::TargetTransformInfo::isLegalMaskedCompressStore
bool isLegalMaskedCompressStore(Type *DataType) const
Return true if the target supports masked compress store.
Definition: TargetTransformInfo.cpp:438
llvm::PredicationStyle::DataAndControlFlow
@ DataAndControlFlow
llvm::TargetTransformInfo::haveFastSqrt
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
Definition: TargetTransformInfo.cpp:571
llvm::createTargetTransformInfoWrapperPass
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
Definition: TargetTransformInfo.cpp:1208
llvm::TargetTransformInfo::VPLegalization::EVLParamStrategy
VPTransform EVLParamStrategy
Definition: TargetTransformInfo.h:1513
llvm::TargetTransformInfo::SK_Reverse
@ SK_Reverse
Reverse the order of the vector.
Definition: TargetTransformInfo.h:886
llvm::TargetTransformInfo::preferPredicateOverEpilogue
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI) const
Query the target whether it would be prefered to create a predicated vector loop, which can avoid the...
Definition: TargetTransformInfo.cpp:296
llvm::TargetTransformInfo::Concept::collectFlatAddressOperands
virtual bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const =0
llvm::TargetTransformInfo::VPLegalization::VPTransform
VPTransform
Definition: TargetTransformInfo.h:1500
llvm::TargetTransformInfo::getFlatAddressSpace
unsigned getFlatAddressSpace() const
Returns the address space ID for a target's 'flat' address space.
Definition: TargetTransformInfo.cpp:253
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
llvm::TargetTransformInfo::CastContextHint::Normal
@ Normal
The cast is used with a normal load/store.
llvm::TargetTransformInfo::Concept::~Concept
virtual ~Concept()=0
llvm::TargetTransformInfo::Concept::getIntrinsicInstrCost
virtual InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)=0
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:298
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::TargetTransformInfo::Concept::preferPredicateOverEpilogue
virtual bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)=0
llvm::TargetTransformInfo::Concept::hasActiveVectorLength
virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const =0
llvm::TargetTransformInfo::SK_InsertSubvector
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
Definition: TargetTransformInfo.h:891
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::TargetTransformInfo::OperandValueInfo::isPowerOf2
bool isPowerOf2() const
Definition: TargetTransformInfo.h:927
llvm::TargetTransformInfo::Concept::isProfitableLSRChainElement
virtual bool isProfitableLSRChainElement(Instruction *I)=0
llvm::TargetTransformInfo::shouldBuildLookupTablesForConstant
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
Definition: TargetTransformInfo.cpp:498
llvm::TargetTransformInfo::Concept::shouldMaximizeVectorBandwidth
virtual bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const =0
llvm::TargetTransformInfo::MIM_PostInc
@ MIM_PostInc
Post-incrementing.
Definition: TargetTransformInfo.h:1397
llvm::HardwareLoopInfo::HardwareLoopInfo
HardwareLoopInfo()=delete
llvm::TargetTransformInfo::Concept::getMemcpyLoopResidualLoweringType
virtual void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, Optional< uint32_t > AtomicCpySize) const =0
Index
uint32_t Index
Definition: ELFObjHandler.cpp:82
llvm::TargetTransformInfo::LSRCost
Definition: TargetTransformInfo.h:400
llvm::TargetTransformInfo::collectFlatAddressOperands
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Return any intrinsic address operand indexes which may be rewritten if they use a flat address space ...
Definition: TargetTransformInfo.cpp:257
llvm::TargetTransformInfo::VPLegalization::OpStrategy
VPTransform OpStrategy
Definition: TargetTransformInfo.h:1519
llvm::TargetTransformInfo::isLegalMaskedGather
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
Return true if the target supports masked gather.
Definition: TargetTransformInfo.cpp:412
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2619
llvm::TargetTransformInfo::Concept::getInterleavedMemoryOpCost
virtual InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)=0
llvm::TargetTransformInfo::preferPredicatedReductionSelect
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
Definition: TargetTransformInfo.cpp:1127
llvm::TargetTransformInfo::Concept::hasDivRemOp
virtual bool hasDivRemOp(Type *DataType, bool IsSigned)=0
llvm::TargetTransformInfo::Concept::isLSRCostLess
virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)=0
llvm::TargetTransformInfo::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
Definition: TargetTransformInfo.cpp:939
llvm::TargetTransformInfo::ReductionFlags::NoNaN
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
Definition: TargetTransformInfo.h:1450
llvm::TargetTransformInfo::Concept::shouldBuildLookupTables
virtual bool shouldBuildLookupTables()=0
llvm::TargetTransformInfo::OK_UniformConstantValue
@ OK_UniformConstantValue
Definition: TargetTransformInfo.h:907
llvm::TargetTransformInfo::forceScalarizeMaskedGather
bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.gather intrinsics.
Definition: TargetTransformInfo.cpp:428
llvm::TargetIRAnalysis::Result
TargetTransformInfo Result
Definition: TargetTransformInfo.h:2565
llvm::TargetTransformInfo::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(unsigned VF) const
Definition: TargetTransformInfo.cpp:720
llvm::TargetTransformInfo::VPLegalization::shouldDoNothing
bool shouldDoNothing() const
Definition: TargetTransformInfo.h:1521
llvm::TargetTransformInfo::getRegisterClassForType
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
Definition: TargetTransformInfo.cpp:630
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::TargetTransformInfo::Concept::getMaximumVF
virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const =0
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::TargetTransformInfo::isLegalAltInstr
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
Return true if this is an alternating opcode pattern that can be lowered to a single instruction on t...
Definition: TargetTransformInfo.cpp:417
llvm::AnalysisKey
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:69
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:414
llvm::TargetTransformInfo::getCostOfKeepingLiveOverCall
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
Definition: TargetTransformInfo.cpp:1017
llvm::AtomicOrdering::Unordered
@ Unordered
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::TargetTransformInfo::OperandValueProperties
OperandValueProperties
Additional properties of an operand's values.
Definition: TargetTransformInfo.h:912
llvm::TargetTransformInfo::Concept::isLegalMaskedStore
virtual bool isLegalMaskedStore(Type *DataType, Align Alignment)=0
llvm::TargetTransformInfo::shouldConsiderAddressTypePromotion
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
Definition: TargetTransformInfo.cpp:676
llvm::TargetTransformInfo::Concept::getExtendedReductionCost
virtual InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
llvm::TargetTransformInfo::Concept::getScalarizationOverhead
virtual InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract)=0
llvm::TargetTransformInfo::Concept::getVPMemoryOpCost
virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I)=0
llvm::TargetTransformInfo::Concept::getTgtMemIntrinsic
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)=0
llvm::TargetTransformInfo::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract) const
Estimate the overhead of scalarizing an instruction.
Definition: TargetTransformInfo.cpp:512
llvm::TargetTransformInfo::getReplicationShuffleCost
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
Definition: TargetTransformInfo.cpp:900
llvm::TargetTransformInfo::isExpensiveToSpeculativelyExecute
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
Return true if the cost of the instruction is too high to speculatively execute and should be kept be...
Definition: TargetTransformInfo.cpp:575
llvm::TargetTransformInfo::PeelingPreferences::AllowLoopNestsPeeling
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
Definition: TargetTransformInfo.h:534
llvm::TargetTransformInfo::rewriteIntrinsicWithAddressSpace
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Rewrite intrinsic call II such that OldV will be replaced with NewV, which has a different address sp...
Definition: TargetTransformInfo.cpp:281
llvm::TargetTransformInfo::isLSRCostLess
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C2.
Definition: TargetTransformInfo.cpp:358
llvm::TargetTransformInfo::Concept::shouldExpandReduction
virtual bool shouldExpandReduction(const IntrinsicInst *II) const =0
llvm::TargetTransformInfo::Concept::getLoadVectorFactor
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
llvm::TargetTransformInfo::getGatherScatterOpCost
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:930
llvm::TargetTransformInfo::VPLegalization::Discard
@ Discard
Definition: TargetTransformInfo.h:1504
llvm::TargetTransformInfo::Concept::getCastInstrCost
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I)=0
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1666
llvm::TargetTransformInfo::Concept::isLoweredToCall
virtual bool isLoweredToCall(const Function *F)=0
llvm::TargetTransformInfo::LSRWithInstrQueries
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
Definition: TargetTransformInfo.cpp:472
llvm::TargetTransformInfo::Concept::getScalingFactorCost
virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)=0
llvm::TargetTransformInfo::getMinMaxReductionCost
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Definition: TargetTransformInfo.cpp:994
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::TargetTransformInfo::isLegalToVectorizeLoadChain
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: TargetTransformInfo.cpp:1087
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
llvm::TargetTransformInfo::getGEPCost
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TargetCostKind CostKind=TCK_SizeAndLatency) const
Estimate the cost of a GEP operation when lowered.
Definition: TargetTransformInfo.cpp:211
llvm::TargetTransformInfo::isLegalAddImmediate
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
Definition: TargetTransformInfo.cpp:341
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:167
llvm::TargetTransformInfo::Concept::getOperandsScalarizationOverhead
virtual InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys)=0
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::TargetTransformInfo::LSRCost::ScaleCost
unsigned ScaleCost
Definition: TargetTransformInfo.h:410
llvm::TargetTransformInfo::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), bool *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
Definition: TargetTransformInfo.cpp:557
llvm::TargetTransformInfo::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
Definition: TargetTransformInfo.cpp:286
llvm::TargetTransformInfo::OperandValueInfo::getNoProps
OperandValueInfo getNoProps() const
Definition: TargetTransformInfo.h:931
llvm::TargetTransformInfo::SK_Splice
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
Definition: TargetTransformInfo.h:897
llvm::TargetTransformInfo::Concept::getVScaleForTuning
virtual Optional< unsigned > getVScaleForTuning() const =0
llvm::TargetTransformInfo::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:839
llvm::TargetTransformInfo::Concept::shouldBuildRelLookupTables
virtual bool shouldBuildRelLookupTables()=0
llvm::TargetTransformInfo::PSK_SlowHardware
@ PSK_SlowHardware
Definition: TargetTransformInfo.h:583
llvm::TargetTransformInfo::Concept::getRegisterClassName
virtual const char * getRegisterClassName(unsigned ClassID) const =0
llvm::AnalysisInfoMixin
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:394
llvm::TargetTransformInfo::ReductionFlags::IsSigned
bool IsSigned
Whether the operation is a signed int reduction.
Definition: TargetTransformInfo.h:1449
llvm::TargetTransformInfo::OperandValueKind
OperandValueKind
Additional information about an operand's possible values.
Definition: TargetTransformInfo.h:904
llvm::TargetTransformInfo::Concept::instCombineIntrinsic
virtual Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)=0
llvm::MemIntrinsicInfo::ReadMem
bool ReadMem
Definition: TargetTransformInfo.h:84
llvm::TargetTransformInfo::Concept::getCmpSelInstrCost
virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I)=0
llvm::TargetTransformInfo::MemCmpExpansionOptions::MaxNumLoads
unsigned MaxNumLoads
Definition: TargetTransformInfo.h:783
InstructionCost.h
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::TargetTransformInfo::canSaveCmp
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
Definition: TargetTransformInfo.cpp:375
llvm::TargetTransformInfo::isTruncateFree
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
Definition: TargetTransformInfo.cpp:476
llvm::TargetTransformInfo::prefersVectorizedAddressing
bool prefersVectorizedAddressing() const
Return true if target doesn't mind addresses in vectors.
Definition: TargetTransformInfo.cpp:459
llvm::TargetTransformInfo::MemCmpExpansionOptions
Returns options for expansion of memcmp. IsZeroCmp is.
Definition: TargetTransformInfo.h:778
llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:243
llvm::TargetTransformInfo::PeelingPreferences::PeelCount
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Definition: TargetTransformInfo.h:530
llvm::TargetTransformInfo::Concept::getMulAccReductionCost
virtual InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
llvm::TargetTransformInfo::supportsEfficientVectorElementLoadStore
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
Definition: TargetTransformInfo.cpp:523
llvm::TargetTransformInfo::emitGetActiveLaneMask
PredicationStyle emitGetActiveLaneMask() const
Query the target whether lowering of the llvm.get.active.lane.mask intrinsic is supported and how the...
Definition: TargetTransformInfo.cpp:303
llvm::TargetTransformInfo::enableMemCmpExpansion
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
Definition: TargetTransformInfo.cpp:541
llvm::TargetTransformInfo::Concept::shouldConsiderAddressTypePromotion
virtual bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)=0
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1105
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:221
llvm::TargetTransformInfo::Concept::haveFastSqrt
virtual bool haveFastSqrt(Type *Ty)=0
llvm::TargetTransformInfo::Concept::isNoopAddrSpaceCast
virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const =0
llvm::TargetTransformInfo::isElementTypeLegalForScalableVector
bool isElementTypeLegalForScalableVector(Type *Ty) const
Definition: TargetTransformInfo.cpp:1104
llvm::TargetTransformInfo::isLegalMaskedStore
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
Return true if the target supports masked store.
Definition: TargetTransformInfo.cpp:388
llvm::TargetTransformInfo::getPredicatedAddrSpace
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
Definition: TargetTransformInfo.cpp:277
llvm::TargetTransformInfo::Concept::isIndexedLoadLegal
virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const =0
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
llvm::BranchProbability
Definition: BranchProbability.h:30
llvm::TargetTransformInfo::Concept::getMinTripCountTailFoldingThreshold
virtual unsigned getMinTripCountTailFoldingThreshold() const =0
llvm::TargetTransformInfo::Concept::getFlatAddressSpace
virtual unsigned getFlatAddressSpace()=0
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::TargetTransformInfo::Concept::emitGetActiveLaneMask
virtual PredicationStyle emitGetActiveLaneMask()=0
llvm::TargetTransformInfo::UnrollingPreferences::DefaultUnrollRuntimeCount
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
Definition: TargetTransformInfo.h:450
llvm::TargetTransformInfo::hasDivRemOp
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
Definition: TargetTransformInfo.cpp:450
llvm::TargetTransformInfo::Concept::LSRWithInstrQueries
virtual bool LSRWithInstrQueries()=0
llvm::TargetTransformInfo::simplifyDemandedUseBitsIntrinsic
Optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Can be used to implement target-specific instruction combining.
Definition: TargetTransformInfo.cpp:313
llvm::TargetTransformInfo::getCacheSize
Optional< unsigned > getCacheSize(CacheLevel Level) const
Definition: TargetTransformInfo.cpp:688
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::TargetTransformInfo::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: TargetTransformInfo.cpp:626
llvm::TargetTransformInfo::shouldPrefetchAddressSpace
bool shouldPrefetchAddressSpace(unsigned AS) const
Definition: TargetTransformInfo.cpp:716
llvm::InstructionCost::isValid
bool isValid() const
Definition: InstructionCost.h:79
llvm::TargetTransformInfo::AddressingModeKind
AddressingModeKind
Definition: TargetTransformInfo.h:630
llvm::TargetTransformInfo::getMaxPrefetchIterationsAhead
unsigned getMaxPrefetchIterationsAhead() const
Definition: TargetTransformInfo.cpp:708
llvm::TargetTransformInfo::MIM_Unindexed
@ MIM_Unindexed
No indexing.
Definition: TargetTransformInfo.h:1394
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:429
llvm::TargetTransformInfo::OK_AnyValue
@ OK_AnyValue
Definition: TargetTransformInfo.h:905
llvm::TargetTransformInfo::getLoadVectorFactor
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
Definition: TargetTransformInfo.cpp:1108
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::TargetTransformInfo::SK_Transpose
@ SK_Transpose
Transpose two vectors.
Definition: TargetTransformInfo.h:890
llvm::TargetTransformInfo::CastContextHint::None
@ None
The cast is not used with a load/store of any kind.
llvm::TargetTransformInfo::isLegalToVectorizeStore
bool isLegalToVectorizeStore(StoreInst *SI) const
Definition: TargetTransformInfo.cpp:1083
llvm::TargetTransformInfo::CacheLevel::L2D
@ L2D
llvm::TargetTransformInfo::Concept::getAssumedAddrSpace
virtual unsigned getAssumedAddrSpace(const Value *V) const =0
llvm::TargetTransformInfo::MIM_PreInc
@ MIM_PreInc
Pre-incrementing.
Definition: TargetTransformInfo.h:1395
llvm::TargetTransformInfo::MemCmpExpansionOptions::LoadSizes
SmallVector< unsigned, 8 > LoadSizes
Definition: TargetTransformInfo.h:786
llvm::TargetIRAnalysis::TargetIRAnalysis
TargetIRAnalysis()
Default construct a target IR analysis.
Definition: TargetTransformInfo.cpp:1164
llvm::TargetTransformInfo::Concept::preferInLoopReduction
virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags) const =0
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:186
llvm::TargetTransformInfo::enableOrderedReductions
bool enableOrderedReductions() const
Return true if we should be enabling ordered reductions for the target.
Definition: TargetTransformInfo.cpp:446
llvm::TargetTransformInfo::getMaxVScale
Optional< unsigned > getMaxVScale() const
Definition: TargetTransformInfo.cpp:648
llvm::TargetTransformInfo::Concept::forceScalarizeMaskedGather
virtual bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment)=0
llvm::TargetTransformInfo::Concept::getStoreMinimumVF
virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const =0
llvm::TargetTransformInfo::getMemcpyLoopResidualLoweringType
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, Optional< uint32_t > AtomicCpySize=None) const
Definition: TargetTransformInfo.cpp:1044
llvm::TargetTransformInfo::Concept::supportsTailCallFor
virtual bool supportsTailCallFor(const CallBase *CB)=0
llvm::TargetTransformInfo::getMemcpyLoopLoweringType
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, Optional< uint32_t > AtomicElementSize=None) const
Definition: TargetTransformInfo.cpp:1035
llvm::TargetTransformInfo::Concept::isLegalICmpImmediate
virtual bool isLegalICmpImmediate(int64_t Imm)=0
llvm::TargetTransformInfo::getCacheAssociativity
Optional< unsigned > getCacheAssociativity(CacheLevel Level) const
Definition: TargetTransformInfo.cpp:693
llvm::TargetTransformInfo::getCacheLineSize
unsigned getCacheLineSize() const
Definition: TargetTransformInfo.cpp:682
llvm::TargetTransformInfo::supportsTailCalls
bool supportsTailCalls() const
If the target supports tail calls.
Definition: TargetTransformInfo.cpp:527
std
Definition: BitVector.h:851
llvm::TargetTransformInfo::enableMaskedInterleavedAccessVectorization
bool enableMaskedInterleavedAccessVectorization() const
Enable matching of interleaved access groups that contain predicated accesses or gaps and therefore v...
Definition: TargetTransformInfo.cpp:549
llvm::KnownBits
Definition: KnownBits.h:23
llvm::TargetTransformInfo::LSRCost::NumBaseAdds
unsigned NumBaseAdds
Definition: TargetTransformInfo.h:407
llvm::TargetIRAnalysis::operator=
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
Definition: TargetTransformInfo.h:2588
llvm::HardwareLoopInfo::ExitBlock
BasicBlock * ExitBlock
Definition: TargetTransformInfo.h:100
llvm::MemIntrinsicInfo::WriteMem
bool WriteMem
Definition: TargetTransformInfo.h:85
llvm::TargetTransformInfo::UnrollingPreferences::UpperBound
bool UpperBound
Allow using trip count upper bound to unroll loops.
Definition: TargetTransformInfo.h:481
llvm::TargetTransformInfo::getMulAccReductionCost
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of an Add ...
Definition: TargetTransformInfo.cpp:1010
llvm::VPIntrinsic
This is the common base class for vector predication intrinsics.
Definition: IntrinsicInst.h:395
llvm::TargetTransformInfo::isNoopAddrSpaceCast
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Definition: TargetTransformInfo.cpp:262
llvm::TargetTransformInfo::getOperandInfo
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Definition: TargetTransformInfo.cpp:725
llvm::TypeSize
Definition: TypeSize.h:435
llvm::TargetTransformInfo::getLoadStoreVecRegBitWidth
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
Definition: TargetTransformInfo.cpp:1075
llvm::TargetTransformInfo::UnrollingPreferences::AllowRemainder
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
Definition: TargetTransformInfo.h:473
llvm::TargetTransformInfo::Concept::enableAggressiveInterleaving
virtual bool enableAggressiveInterleaving(bool LoopHasReductions)=0
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::TargetTransformInfo::shouldMaximizeVectorBandwidth
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
Definition: TargetTransformInfo.cpp:656
llvm::TargetTransformInfo::isFPVectorizationPotentiallyUnsafe
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
Definition: TargetTransformInfo.cpp:553
llvm::TargetTransformInfo::Concept::isLegalBroadcastLoad
virtual bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const =0
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
PassManager.h
Arguments
AMDGPU Lower Kernel Arguments
Definition: AMDGPULowerKernelArguments.cpp:242
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
llvm::User::operand_values
iterator_range< value_op_iterator > operand_values()
Definition: User.h:266
llvm::TargetTransformInfo::LSRCost::ImmCost
unsigned ImmCost
Definition: TargetTransformInfo.h:408
llvm::TargetTransformInfo::hasActiveVectorLength
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
Definition: TargetTransformInfo.cpp:1157
llvm::TargetTransformInfo::forceScalarizeMaskedScatter
bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.scatter intrinsics.
Definition: TargetTransformInfo.cpp:433
llvm::TargetTransformInfo::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:909
llvm::TargetIRAnalysis::run
Result run(const Function &F, FunctionAnalysisManager &)
Definition: TargetTransformInfo.cpp:1170
llvm::TargetTransformInfo::getInstructionCost
InstructionCost getInstructionCost(const User *U, TargetCostKind CostKind) const
This is a helper function which calls the three-argument getInstructionCost with Operands which are t...
Definition: TargetTransformInfo.h:308
llvm::HardwareLoopInfo::HardwareLoopInfo
HardwareLoopInfo(Loop *L)
Definition: TargetTransformInfo.h:98
llvm::TargetTransformInfo::Concept::getFPOpCost
virtual InstructionCost getFPOpCost(Type *Ty)=0
llvm::TargetTransformInfo::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier() const
Definition: TargetTransformInfo.cpp:197
llvm::TargetTransformInfo::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index=-1) const
Definition: TargetTransformInfo.cpp:878
llvm::PredicationStyle
PredicationStyle
Definition: TargetTransformInfo.h:165
llvm::InstCombiner
The core instruction combiner logic.
Definition: InstCombiner.h:45
llvm::TargetTransformInfo::adjustInliningThreshold
unsigned adjustInliningThreshold(const CallBase *CB) const
Definition: TargetTransformInfo.cpp:202
llvm::TargetTransformInfo::getMaskedMemoryOpCost
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Definition: TargetTransformInfo.cpp:921
llvm::TargetTransformInfo::RGK_ScalableVector
@ RGK_ScalableVector
Definition: TargetTransformInfo.h:955
llvm::TargetTransformInfo::getMinTripCountTailFoldingThreshold
unsigned getMinTripCountTailFoldingThreshold() const
Definition: TargetTransformInfo.cpp:1145
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
llvm::HardwareLoopInfo
Attributes of a target dependent hardware loop.
Definition: TargetTransformInfo.h:96
llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition: InstructionCost.h:73
llvm::TargetTransformInfo::getEstimatedNumberOfCaseClusters
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Definition: TargetTransformInfo.cpp:217
llvm::RISCVMatInt::Imm
@ Imm
Definition: RISCVMatInt.h:23
llvm::TargetTransformInfo::TargetTransformInfo
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
Definition: TargetTransformInfo.h:2549
llvm::RecurrenceDescriptor
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:69
llvm::TargetTransformInfo::Concept::isLegalAltInstr
virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const =0
llvm::TargetTransformInfo::Concept::getCallInstrCost
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::Concept::getArithmeticReductionCost
virtual InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)=0
llvm::TargetTransformInfo::MemCmpExpansionOptions::NumLoadsPerBlock
unsigned NumLoadsPerBlock
Definition: TargetTransformInfo.h:796
llvm::TargetTransformInfo::getArithmeticReductionCost
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
Definition: TargetTransformInfo.cpp:985
llvm::IntrinsicCostAttributes::getID
Intrinsic::ID getID() const
Definition: TargetTransformInfo.h:150
llvm::TargetTransformInfo::Concept::getGISelRematGlobalCost
virtual unsigned getGISelRematGlobalCost() const =0
llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:955
llvm::TargetTransformInfo::Concept::getIntImmCostInst
virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr)=0
llvm::TargetTransformInfo::Concept::getCFInstrCost
virtual InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
llvm::TargetTransformInfo::invalidate
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
Definition: TargetTransformInfo.h:201
llvm::TargetTransformInfo::Concept::getInlinerVectorBonusPercent
virtual int getInlinerVectorBonusPercent()=0
llvm::TargetTransformInfo::Concept::isLegalAddImmediate
virtual bool isLegalAddImmediate(int64_t Imm)=0
SmallBitVector.h
llvm::TargetTransformInfo::UnrollingPreferences::Threshold
unsigned Threshold
The cost threshold for the unrolled loop.
Definition: TargetTransformInfo.h:422
llvm::TargetTransformInfo::ReductionFlags::ReductionFlags
ReductionFlags()=default
llvm::TargetTransformInfo::Concept::enableScalableVectorization
virtual bool enableScalableVectorization() const =0
llvm::TargetTransformInfo::Concept::getNumberOfParts
virtual unsigned getNumberOfParts(Type *Tp)=0
llvm::TargetTransformInfo::Concept::getPredicatedAddrSpace
virtual std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const =0
llvm::TargetTransformInfo::VPLegalization::Legal
@ Legal
Definition: TargetTransformInfo.h:1502
llvm::TargetTransformInfo::Concept::shouldBuildLookupTablesForConstant
virtual bool shouldBuildLookupTablesForConstant(Constant *C)=0
llvm::TargetTransformInfo::Concept::isProfitableToHoist
virtual bool isProfitableToHoist(Instruction *I)=0
llvm::TargetTransformInfo::getMinPrefetchStride
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Some HW prefetchers can handle accesses up to a certain constant stride.
Definition: TargetTransformInfo.cpp:701
llvm::TargetTransformInfo::isIndexedStoreLegal
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
Definition: TargetTransformInfo.cpp:1070
llvm::TargetTransformInfo::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR() const
Return true if LSR major cost is number of registers.
Definition: TargetTransformInfo.cpp:363
llvm::TargetTransformInfo::shouldExpandReduction
bool shouldExpandReduction(const IntrinsicInst *II) const
Definition: TargetTransformInfo.cpp:1137
llvm::TargetTransformInfo::Concept::isLegalAddressingMode
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I)=0
llvm::TargetTransformInfo::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const
Get target-customized preferences for the generic loop peeling transformation.
Definition: TargetTransformInfo.cpp:336
llvm::TargetTransformInfo::getStoreVectorFactor
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
Definition: TargetTransformInfo.cpp:1115
llvm::TargetTransformInfo::Concept::getCacheAssociativity
virtual Optional< unsigned > getCacheAssociativity(CacheLevel Level) const =0
llvm::TargetTransformInfo::MIM_PostDec
@ MIM_PostDec
Post-decrementing.
Definition: TargetTransformInfo.h:1398
llvm::HardwareLoopInfo::canAnalyze
bool canAnalyze(LoopInfo &LI)
Definition: TargetTransformInfo.cpp:50
llvm::TargetTransformInfo::isLegalToVectorizeStoreChain
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: TargetTransformInfo.cpp:1093
llvm::SmallVectorImpl< const Value * >
llvm::TargetTransformInfo::Concept::getArithmeticInstrCost
virtual InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, OperandValueInfo Opd1Info, OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr)=0
ForceHardwareLoopPHI
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
llvm::TargetTransformInfo::Concept::preferPredicatedReductionSelect
virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags) const =0
llvm::MemIntrinsicInfo
Information about a load/store intrinsic defined by the target.
Definition: TargetTransformInfo.h:71
llvm::TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize
unsigned getAtomicMemIntrinsicMaxElementSize() const
Definition: TargetTransformInfo.cpp:1026
llvm::msgpack::Type
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
Definition: MsgPackReader.h:48
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1174
llvm::TargetTransformInfo::enableAggressiveInterleaving
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don't restrict interleaved unrolling to small loops.
Definition: TargetTransformInfo.cpp:535
llvm::TargetTransformInfo::Concept::areInlineCompatible
virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const =0
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::TargetTransformInfo::Concept::getShuffleCost
virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args)=0
llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition: TargetTransformInfo.h:436
llvm::TargetTransformInfo::Concept::getMinVectorRegisterBitWidth
virtual unsigned getMinVectorRegisterBitWidth() const =0
llvm::TargetTransformInfo::getCallInstrCost
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
Definition: TargetTransformInfo.cpp:959
llvm::TargetTransformInfo::getExtractWithExtendCost
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index=-1) const
Definition: TargetTransformInfo.cpp:850
llvm::MemIntrinsicInfo::MatchingId
unsigned short MatchingId
Definition: TargetTransformInfo.h:82
llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:244
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3278
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::TargetTransformInfo::Concept::getPreferredAddressingMode
virtual AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const =0
llvm::IntrinsicCostAttributes::isTypeBasedOnly
bool isTypeBasedOnly() const
Definition: TargetTransformInfo.h:158
llvm::TargetTransformInfo::isProfitableToHoist
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
Definition: TargetTransformInfo.cpp:480
llvm::TargetTransformInfo::shouldBuildRelLookupTables
bool shouldBuildRelLookupTables() const
Return true if lookup tables should be turned into relative lookup tables.
Definition: TargetTransformInfo.cpp:503
llvm::TargetTransformInfo::Concept::getMaxInterleaveFactor
virtual unsigned getMaxInterleaveFactor(unsigned VF)=0
llvm::TargetTransformInfo::Concept::getStoreVectorFactor
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3134
llvm::TargetTransformInfo::Concept::getLoadStoreVecRegBitWidth
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const =0
llvm::TargetTransformInfo::Concept::getRegUsageForType
virtual unsigned getRegUsageForType(Type *Ty)=0
llvm::TargetTransformInfo::~TargetTransformInfo
~TargetTransformInfo()
llvm::TargetTransformInfo::Concept::getCacheLineSize
virtual unsigned getCacheLineSize() const =0
llvm::AMDGPU::VGPRIndexMode::Id
Id
Definition: SIDefines.h:241
llvm::TargetTransformInfo::OperandValueInfo::isConstant
bool isConstant() const
Definition: TargetTransformInfo.h:921
llvm::TargetTransformInfo::Concept::canHaveNonUndefGlobalInitializerInAddressSpace
virtual bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const =0
llvm::TargetTransformInfo::Concept::adjustInliningThreshold
virtual unsigned adjustInliningThreshold(const CallBase *CB)=0
llvm::TargetTransformInfo::Concept::getIntImmCostIntrin
virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
llvm::TargetTransformInfo::OK_NonUniformConstantValue
@ OK_NonUniformConstantValue
Definition: TargetTransformInfo.h:908
llvm::TargetTransformInfo::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
Definition: TargetTransformInfo.cpp:1021
llvm::TargetTransformInfo::getStoreMinimumVF
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
Definition: TargetTransformInfo.cpp:671
llvm::TargetTransformInfo::getRegUsageForType
unsigned getRegUsageForType(Type *Ty) const
Returns the estimated number of registers required to represent Ty.
Definition: TargetTransformInfo.cpp:490
llvm::TargetTransformInfo::Concept::isLegalMaskedCompressStore
virtual bool isLegalMaskedCompressStore(Type *DataType)=0
llvm::TargetTransformInfo::useAA
bool useAA() const
Definition: TargetTransformInfo.cpp:484
llvm::TargetTransformInfo::Concept::getInliningThresholdMultiplier
virtual unsigned getInliningThresholdMultiplier()=0
llvm::HardwareLoopInfo::CountType
IntegerType * CountType
Definition: TargetTransformInfo.h:103
llvm::TargetTransformInfo::enableWritePrefetching
bool enableWritePrefetching() const
Definition: TargetTransformInfo.cpp:712
llvm::TargetTransformInfo::getFPOpCost
InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
Definition: TargetTransformInfo.cpp:584
llvm::PredicationStyle::None
@ None
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition: TargetTransformInfo.h:218
llvm::IntrinsicCostAttributes::getArgs
const SmallVectorImpl< const Value * > & getArgs() const
Definition: TargetTransformInfo.h:155
llvm::TargetTransformInfo::Concept::shouldPrefetchAddressSpace
virtual bool shouldPrefetchAddressSpace(unsigned AS) const =0
llvm::TargetTransformInfo::AMK_None
@ AMK_None
Definition: TargetTransformInfo.h:633
llvm::TargetTransformInfo::SK_ExtractSubvector
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
Definition: TargetTransformInfo.h:892
llvm::TargetTransformInfo::AMK_PreIndexed
@ AMK_PreIndexed
Definition: TargetTransformInfo.h:631
llvm::AtomicOrdering::NotAtomic
@ NotAtomic
llvm::Data
@ Data
llvm::TargetTransformInfo::Concept::isLegalToVectorizeStore
virtual bool isLegalToVectorizeStore(StoreInst *SI) const =0
llvm::TargetTransformInfo::getVPMemoryOpCost
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
llvm::TargetTransformInfo::isAlwaysUniform
bool isAlwaysUniform(const Value *V) const
Definition: TargetTransformInfo.cpp:249
llvm::TargetTransformInfo::Concept::getMemcpyCost
virtual InstructionCost getMemcpyCost(const Instruction *I)=0
llvm::TargetTransformInfo::isLegalAddressingMode
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Definition: TargetTransformInfo.cpp:349
llvm::TargetTransformInfo::getMemcpyCost
InstructionCost getMemcpyCost(const Instruction *I) const
Definition: TargetTransformInfo.cpp:979
llvm::TargetTransformInfo::Concept::simplifyDemandedUseBitsIntrinsic
virtual Optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)=0
llvm::TargetTransformInfo::UnrollingPreferences::MaxPercentThresholdBoost
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
Definition: TargetTransformInfo.h:433
llvm::TargetTransformInfo::Concept::getVectorInstrCost
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)=0
llvm::TargetTransformInfo::Concept::forceScalarizeMaskedScatter
virtual bool forceScalarizeMaskedScatter(VectorType *DataType, Align Alignment)=0
llvm::TargetTransformInfo::Concept::getUnrollingPreferences
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)=0
llvm::MemIntrinsicInfo::IsVolatile
bool IsVolatile
Definition: TargetTransformInfo.h:86
llvm::TargetTransformInfo::Concept::getMinPrefetchStride
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const =0
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38