LLVM  10.0.0svn
TargetTransformInfo.h
Go to the documentation of this file.
1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This pass exposes codegen information to IR-level passes. Every
10 /// transformation that uses codegen information is broken into three parts:
11 /// 1. The IR-level analysis pass.
12 /// 2. The IR-level transformation interface which provides the needed
13 /// information.
14 /// 3. Codegen-level implementation which uses target-specific hooks.
15 ///
16 /// This file defines #2, which is the interface that IR-level transformations
17 /// use for querying the codegen.
18 ///
19 //===----------------------------------------------------------------------===//
20 
21 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23 
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/IR/Operator.h"
26 #include "llvm/IR/PassManager.h"
27 #include "llvm/Pass.h"
29 #include "llvm/Support/DataTypes.h"
30 #include "llvm/Analysis/LoopInfo.h"
32 #include "llvm/IR/Dominators.h"
34 #include <functional>
35 
36 namespace llvm {
37 
38 namespace Intrinsic {
39 enum ID : unsigned;
40 }
41 
42 class AssumptionCache;
43 class BranchInst;
44 class Function;
45 class GlobalValue;
46 class IntrinsicInst;
47 class LoadInst;
48 class Loop;
49 class SCEV;
50 class ScalarEvolution;
51 class StoreInst;
52 class SwitchInst;
53 class TargetLibraryInfo;
54 class Type;
55 class User;
56 class Value;
57 
58 /// Information about a load/store intrinsic defined by the target.
60  /// This is the pointer that the intrinsic is loading from or storing to.
61  /// If this is non-null, then analysis/optimization passes can assume that
62  /// this intrinsic is functionally equivalent to a load/store from this
63  /// pointer.
64  Value *PtrVal = nullptr;
65 
66  // Ordering for atomic operations.
68 
69  // Same Id is set by the target for corresponding load/store intrinsics.
70  unsigned short MatchingId = 0;
71 
72  bool ReadMem = false;
73  bool WriteMem = false;
74  bool IsVolatile = false;
75 
76  bool isUnordered() const {
77  return (Ordering == AtomicOrdering::NotAtomic ||
78  Ordering == AtomicOrdering::Unordered) && !IsVolatile;
79  }
80 };
81 
82 /// Attributes of a target dependent hardware loop.
84  HardwareLoopInfo() = delete;
85  HardwareLoopInfo(Loop *L) : L(L) {}
86  Loop *L = nullptr;
87  BasicBlock *ExitBlock = nullptr;
88  BranchInst *ExitBranch = nullptr;
89  const SCEV *ExitCount = nullptr;
90  IntegerType *CountType = nullptr;
91  Value *LoopDecrement = nullptr; // Decrement the loop counter by this
92  // value in every iteration.
93  bool IsNestingLegal = false; // Can a hardware loop be a parent to
94  // another hardware loop?
95  bool CounterInReg = false; // Should loop counter be updated in
96  // the loop via a phi?
97  bool PerformEntryTest = false; // Generate the intrinsic which also performs
98  // icmp ne zero on the loop counter value and
99  // produces an i1 to guard the loop entry.
100  bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI,
101  DominatorTree &DT, bool ForceNestedLoop = false,
102  bool ForceHardwareLoopPHI = false);
103  bool canAnalyze(LoopInfo &LI);
104 };
105 
106 /// This pass provides access to the codegen interfaces that are needed
107 /// for IR-level transformations.
109 public:
110  /// Construct a TTI object using a type implementing the \c Concept
111  /// API below.
112  ///
113  /// This is used by targets to construct a TTI wrapping their target-specific
114  /// implementation that encodes appropriate costs for their target.
115  template <typename T> TargetTransformInfo(T Impl);
116 
117  /// Construct a baseline TTI object using a minimal implementation of
118  /// the \c Concept API below.
119  ///
120  /// The TTI implementation will reflect the information in the DataLayout
121  /// provided if non-null.
122  explicit TargetTransformInfo(const DataLayout &DL);
123 
124  // Provide move semantics.
126  TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
127 
128  // We need to define the destructor out-of-line to define our sub-classes
129  // out-of-line.
131 
132  /// Handle the invalidation of this information.
133  ///
134  /// When used as a result of \c TargetIRAnalysis this method will be called
135  /// when the function this was computed for changes. When it returns false,
136  /// the information is preserved across those changes.
138  FunctionAnalysisManager::Invalidator &) {
139  // FIXME: We should probably in some way ensure that the subtarget
140  // information for a function hasn't changed.
141  return false;
142  }
143 
144  /// \name Generic Target Information
145  /// @{
146 
147  /// The kind of cost model.
148  ///
149  /// There are several different cost models that can be customized by the
150  /// target. The normalization of each cost model may be target specific.
152  TCK_RecipThroughput, ///< Reciprocal throughput.
153  TCK_Latency, ///< The latency of instruction.
154  TCK_CodeSize ///< Instruction code size.
155  };
156 
157  /// Query the cost of a specified instruction.
158  ///
159  /// Clients should use this interface to query the cost of an existing
160  /// instruction. The instruction must have a valid parent (basic block).
161  ///
162  /// Note, this method does not cache the cost calculation and it
163  /// can be expensive in some cases.
164  int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
165  switch (kind){
166  case TCK_RecipThroughput:
167  return getInstructionThroughput(I);
168 
169  case TCK_Latency:
170  return getInstructionLatency(I);
171 
172  case TCK_CodeSize:
173  return getUserCost(I);
174  }
175  llvm_unreachable("Unknown instruction cost kind");
176  }
177 
178  /// Underlying constants for 'cost' values in this interface.
179  ///
180  /// Many APIs in this interface return a cost. This enum defines the
181  /// fundamental values that should be used to interpret (and produce) those
182  /// costs. The costs are returned as an int rather than a member of this
183  /// enumeration because it is expected that the cost of one IR instruction
184  /// may have a multiplicative factor to it or otherwise won't fit directly
185  /// into the enum. Moreover, it is common to sum or average costs which works
186  /// better as simple integral values. Thus this enum only provides constants.
187  /// Also note that the returned costs are signed integers to make it natural
188  /// to add, subtract, and test with zero (a common boundary condition). It is
189  /// not expected that 2^32 is a realistic cost to be modeling at any point.
190  ///
191  /// Note that these costs should usually reflect the intersection of code-size
192  /// cost and execution cost. A free instruction is typically one that folds
193  /// into another instruction. For example, reg-to-reg moves can often be
194  /// skipped by renaming the registers in the CPU, but they still are encoded
195  /// and thus wouldn't be considered 'free' here.
197  TCC_Free = 0, ///< Expected to fold away in lowering.
198  TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
199  TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
200  };
201 
202  /// Estimate the cost of a specific operation when lowered.
203  ///
204  /// Note that this is designed to work on an arbitrary synthetic opcode, and
205  /// thus work for hypothetical queries before an instruction has even been
206  /// formed. However, this does *not* work for GEPs, and must not be called
207  /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
208  /// analyzing a GEP's cost required more information.
209  ///
210  /// Typically only the result type is required, and the operand type can be
211  /// omitted. However, if the opcode is one of the cast instructions, the
212  /// operand type is required.
213  ///
214  /// The returned cost is defined in terms of \c TargetCostConstants, see its
215  /// comments for a detailed explanation of the cost values.
216  int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
217 
218  /// Estimate the cost of a GEP operation when lowered.
219  ///
220  /// The contract for this function is the same as \c getOperationCost except
221  /// that it supports an interface that provides extra information specific to
222  /// the GEP operation.
223  int getGEPCost(Type *PointeeType, const Value *Ptr,
224  ArrayRef<const Value *> Operands) const;
225 
226  /// Estimate the cost of a EXT operation when lowered.
227  ///
228  /// The contract for this function is the same as \c getOperationCost except
229  /// that it supports an interface that provides extra information specific to
230  /// the EXT operation.
231  int getExtCost(const Instruction *I, const Value *Src) const;
232 
233  /// Estimate the cost of a function call when lowered.
234  ///
235  /// The contract for this is the same as \c getOperationCost except that it
236  /// supports an interface that provides extra information specific to call
237  /// instructions.
238  ///
239  /// This is the most basic query for estimating call cost: it only knows the
240  /// function type and (potentially) the number of arguments at the call site.
241  /// The latter is only interesting for varargs function types.
242  int getCallCost(FunctionType *FTy, int NumArgs = -1,
243  const User *U = nullptr) const;
244 
245  /// Estimate the cost of calling a specific function when lowered.
246  ///
247  /// This overload adds the ability to reason about the particular function
248  /// being called in the event it is a library call with special lowering.
249  int getCallCost(const Function *F, int NumArgs = -1,
250  const User *U = nullptr) const;
251 
252  /// Estimate the cost of calling a specific function when lowered.
253  ///
254  /// This overload allows specifying a set of candidate argument values.
255  int getCallCost(const Function *F, ArrayRef<const Value *> Arguments,
256  const User *U = nullptr) const;
257 
258  /// \returns A value by which our inlining threshold should be multiplied.
259  /// This is primarily used to bump up the inlining threshold wholesale on
260  /// targets where calls are unusually expensive.
261  ///
262  /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
263  /// individual classes of instructions would be better.
264  unsigned getInliningThresholdMultiplier() const;
265 
266  /// \returns Vector bonus in percent.
267  ///
268  /// Vector bonuses: We want to more aggressively inline vector-dense kernels
269  /// and apply this bonus based on the percentage of vector instructions. A
270  /// bonus is applied if the vector instructions exceed 50% and half that amount
271  /// is applied if it exceeds 10%. Note that these bonuses are some what
272  /// arbitrary and evolved over time by accident as much as because they are
273  /// principled bonuses.
274  /// FIXME: It would be nice to base the bonus values on something more
275  /// scientific. A target may has no bonus on vector instructions.
276  int getInlinerVectorBonusPercent() const;
277 
278  /// Estimate the cost of an intrinsic when lowered.
279  ///
280  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
281  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
282  ArrayRef<Type *> ParamTys,
283  const User *U = nullptr) const;
284 
285  /// Estimate the cost of an intrinsic when lowered.
286  ///
287  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
288  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
290  const User *U = nullptr) const;
291 
292  /// \return the expected cost of a memcpy, which could e.g. depend on the
293  /// source/destination type and alignment and the number of bytes copied.
294  int getMemcpyCost(const Instruction *I) const;
295 
296  /// \return The estimated number of case clusters when lowering \p 'SI'.
297  /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
298  /// table.
299  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
300  unsigned &JTSize) const;
301 
302  /// Estimate the cost of a given IR user when lowered.
303  ///
304  /// This can estimate the cost of either a ConstantExpr or Instruction when
305  /// lowered. It has two primary advantages over the \c getOperationCost and
306  /// \c getGEPCost above, and one significant disadvantage: it can only be
307  /// used when the IR construct has already been formed.
308  ///
309  /// The advantages are that it can inspect the SSA use graph to reason more
310  /// accurately about the cost. For example, all-constant-GEPs can often be
311  /// folded into a load or other instruction, but if they are used in some
312  /// other context they may not be folded. This routine can distinguish such
313  /// cases.
314  ///
315  /// \p Operands is a list of operands which can be a result of transformations
316  /// of the current operands. The number of the operands on the list must equal
317  /// to the number of the current operands the IR user has. Their order on the
318  /// list must be the same as the order of the current operands the IR user
319  /// has.
320  ///
321  /// The returned cost is defined in terms of \c TargetCostConstants, see its
322  /// comments for a detailed explanation of the cost values.
323  int getUserCost(const User *U, ArrayRef<const Value *> Operands) const;
324 
325  /// This is a helper function which calls the two-argument getUserCost
326  /// with \p Operands which are the current operands U has.
327  int getUserCost(const User *U) const {
329  U->value_op_end());
330  return getUserCost(U, Operands);
331  }
332 
333  /// Return true if branch divergence exists.
334  ///
335  /// Branch divergence has a significantly negative impact on GPU performance
336  /// when threads in the same wavefront take different paths due to conditional
337  /// branches.
338  bool hasBranchDivergence() const;
339 
340  /// Returns whether V is a source of divergence.
341  ///
342  /// This function provides the target-dependent information for
343  /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis first
344  /// builds the dependency graph, and then runs the reachability algorithm
345  /// starting with the sources of divergence.
346  bool isSourceOfDivergence(const Value *V) const;
347 
348  // Returns true for the target specific
349  // set of operations which produce uniform result
350  // even taking non-uniform arguments
351  bool isAlwaysUniform(const Value *V) const;
352 
353  /// Returns the address space ID for a target's 'flat' address space. Note
354  /// this is not necessarily the same as addrspace(0), which LLVM sometimes
355  /// refers to as the generic address space. The flat address space is a
356  /// generic address space that can be used access multiple segments of memory
357  /// with different address spaces. Access of a memory location through a
358  /// pointer with this address space is expected to be legal but slower
359  /// compared to the same memory location accessed through a pointer with a
360  /// different address space.
361  //
362  /// This is for targets with different pointer representations which can
363  /// be converted with the addrspacecast instruction. If a pointer is converted
364  /// to this address space, optimizations should attempt to replace the access
365  /// with the source address space.
366  ///
367  /// \returns ~0u if the target does not have such a flat address space to
368  /// optimize away.
369  unsigned getFlatAddressSpace() const;
370 
371  /// Return any intrinsic address operand indexes which may be rewritten if
372  /// they use a flat address space pointer.
373  ///
374  /// \returns true if the intrinsic was handled.
375  bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
376  Intrinsic::ID IID) const;
377 
378  /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
379  /// NewV, which has a different address space. This should happen for every
380  /// operand index that collectFlatAddressOperands returned for the intrinsic.
381  /// \returns true if the intrinsic /// was handled.
382  bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
383  Value *OldV, Value *NewV) const;
384 
385  /// Test whether calls to a function lower to actual program function
386  /// calls.
387  ///
388  /// The idea is to test whether the program is likely to require a 'call'
389  /// instruction or equivalent in order to call the given function.
390  ///
391  /// FIXME: It's not clear that this is a good or useful query API. Client's
392  /// should probably move to simpler cost metrics using the above.
393  /// Alternatively, we could split the cost interface into distinct code-size
394  /// and execution-speed costs. This would allow modelling the core of this
395  /// query more accurately as a call is a single small instruction, but
396  /// incurs significant execution cost.
397  bool isLoweredToCall(const Function *F) const;
398 
399  struct LSRCost {
400  /// TODO: Some of these could be merged. Also, a lexical ordering
401  /// isn't always optimal.
402  unsigned Insns;
403  unsigned NumRegs;
404  unsigned AddRecCost;
405  unsigned NumIVMuls;
406  unsigned NumBaseAdds;
407  unsigned ImmCost;
408  unsigned SetupCost;
409  unsigned ScaleCost;
410  };
411 
412  /// Parameters that control the generic loop unrolling transformation.
414  /// The cost threshold for the unrolled loop. Should be relative to the
415  /// getUserCost values returned by this API, and the expectation is that
416  /// the unrolled loop's instructions when run through that interface should
417  /// not exceed this cost. However, this is only an estimate. Also, specific
418  /// loops may be unrolled even with a cost above this threshold if deemed
419  /// profitable. Set this to UINT_MAX to disable the loop body cost
420  /// restriction.
421  unsigned Threshold;
422  /// If complete unrolling will reduce the cost of the loop, we will boost
423  /// the Threshold by a certain percent to allow more aggressive complete
424  /// unrolling. This value provides the maximum boost percentage that we
425  /// can apply to Threshold (The value should be no less than 100).
426  /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
427  /// MaxPercentThresholdBoost / 100)
428  /// E.g. if complete unrolling reduces the loop execution time by 50%
429  /// then we boost the threshold by the factor of 2x. If unrolling is not
430  /// expected to reduce the running time, then we do not increase the
431  /// threshold.
433  /// The cost threshold for the unrolled loop when optimizing for size (set
434  /// to UINT_MAX to disable).
436  /// The cost threshold for the unrolled loop, like Threshold, but used
437  /// for partial/runtime unrolling (set to UINT_MAX to disable).
439  /// The cost threshold for the unrolled loop when optimizing for size, like
440  /// OptSizeThreshold, but used for partial/runtime unrolling (set to
441  /// UINT_MAX to disable).
443  /// A forced unrolling factor (the number of concatenated bodies of the
444  /// original loop in the unrolled loop body). When set to 0, the unrolling
445  /// transformation will select an unrolling factor based on the current cost
446  /// threshold and other factors.
447  unsigned Count;
448  /// A forced peeling factor (the number of bodied of the original loop
449  /// that should be peeled off before the loop body). When set to 0, the
450  /// unrolling transformation will select a peeling factor based on profile
451  /// information and other factors.
452  unsigned PeelCount;
453  /// Default unroll count for loops with run-time trip count.
455  // Set the maximum unrolling factor. The unrolling factor may be selected
456  // using the appropriate cost threshold, but may not exceed this number
457  // (set to UINT_MAX to disable). This does not apply in cases where the
458  // loop is being fully unrolled.
459  unsigned MaxCount;
460  /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
461  /// applies even if full unrolling is selected. This allows a target to fall
462  /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
464  // Represents number of instructions optimized when "back edge"
465  // becomes "fall through" in unrolled loop.
466  // For now we count a conditional branch on a backedge and a comparison
467  // feeding it.
468  unsigned BEInsns;
469  /// Allow partial unrolling (unrolling of loops to expand the size of the
470  /// loop body, not only to eliminate small constant-trip-count loops).
471  bool Partial;
472  /// Allow runtime unrolling (unrolling of loops to expand the size of the
473  /// loop body even when the number of loop iterations is not known at
474  /// compile time).
475  bool Runtime;
476  /// Allow generation of a loop remainder (extra iterations after unroll).
478  /// Allow emitting expensive instructions (such as divisions) when computing
479  /// the trip count of a loop for runtime unrolling.
481  /// Apply loop unroll on any kind of loop
482  /// (mainly to loops that fail runtime unrolling).
483  bool Force;
484  /// Allow using trip count upper bound to unroll loops.
486  /// Allow peeling off loop iterations.
488  /// Allow unrolling of all the iterations of the runtime loop remainder.
490  /// Allow unroll and jam. Used to enable unroll and jam for the target.
492  /// Allow peeling basing on profile. Uses to enable peeling off all
493  /// iterations basing on provided profile.
494  /// If the value is true the peeling cost model can decide to peel only
495  /// some iterations and in this case it will set this to false.
497  /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
498  /// value above is used during unroll and jam for the outer loop size.
499  /// This value is used in the same manner to limit the size of the inner
500  /// loop.
502  };
503 
504  /// Get target-customized preferences for the generic loop unrolling
505  /// transformation. The caller will initialize UP with the current
506  /// target-independent defaults.
507  void getUnrollingPreferences(Loop *L, ScalarEvolution &,
508  UnrollingPreferences &UP) const;
509 
510  /// Query the target whether it would be profitable to convert the given loop
511  /// into a hardware loop.
512  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
513  AssumptionCache &AC,
514  TargetLibraryInfo *LibInfo,
515  HardwareLoopInfo &HWLoopInfo) const;
516 
517  /// @}
518 
519  /// \name Scalar Target Information
520  /// @{
521 
522  /// Flags indicating the kind of support for population count.
523  ///
524  /// Compared to the SW implementation, HW support is supposed to
525  /// significantly boost the performance when the population is dense, and it
526  /// may or may not degrade performance if the population is sparse. A HW
527  /// support is considered as "Fast" if it can outperform, or is on a par
528  /// with, SW implementation when the population is sparse; otherwise, it is
529  /// considered as "Slow".
530  enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
531 
532  /// Return true if the specified immediate is legal add immediate, that
533  /// is the target has add instructions which can add a register with the
534  /// immediate without having to materialize the immediate into a register.
535  bool isLegalAddImmediate(int64_t Imm) const;
536 
537  /// Return true if the specified immediate is legal icmp immediate,
538  /// that is the target has icmp instructions which can compare a register
539  /// against the immediate without having to materialize the immediate into a
540  /// register.
541  bool isLegalICmpImmediate(int64_t Imm) const;
542 
543  /// Return true if the addressing mode represented by AM is legal for
544  /// this target, for a load/store of the specified type.
545  /// The type may be VoidTy, in which case only return true if the addressing
546  /// mode is legal for a load/store of any legal type.
547  /// If target returns true in LSRWithInstrQueries(), I may be valid.
548  /// TODO: Handle pre/postinc as well.
549  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
550  bool HasBaseReg, int64_t Scale,
551  unsigned AddrSpace = 0,
552  Instruction *I = nullptr) const;
553 
554  /// Return true if LSR cost of C1 is lower than C1.
555  bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
556  TargetTransformInfo::LSRCost &C2) const;
557 
558  /// Return true if the target can fuse a compare and branch.
559  /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
560  /// calculation for the instructions in a loop.
561  bool canMacroFuseCmp() const;
562 
563  /// Return true if the target can save a compare for loop count, for example
564  /// hardware loop saves a compare.
565  bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
567  TargetLibraryInfo *LibInfo) const;
568 
569  /// \return True is LSR should make efforts to create/preserve post-inc
570  /// addressing mode expressions.
571  bool shouldFavorPostInc() const;
572 
573  /// Return true if LSR should make efforts to generate indexed addressing
574  /// modes that operate across loop iterations.
575  bool shouldFavorBackedgeIndex(const Loop *L) const;
576 
577  /// Return true if the target supports masked load.
578  bool isLegalMaskedStore(Type *DataType) const;
579  /// Return true if the target supports masked store.
580  bool isLegalMaskedLoad(Type *DataType) const;
581 
582  /// Return true if the target supports nontemporal store.
583  bool isLegalNTStore(Type *DataType, unsigned Alignment) const;
584  /// Return true if the target supports nontemporal load.
585  bool isLegalNTLoad(Type *DataType, unsigned Alignment) const;
586 
587  /// Return true if the target supports masked scatter.
588  bool isLegalMaskedScatter(Type *DataType) const;
589  /// Return true if the target supports masked gather.
590  bool isLegalMaskedGather(Type *DataType) const;
591 
592  /// Return true if the target supports masked compress store.
593  bool isLegalMaskedCompressStore(Type *DataType) const;
594  /// Return true if the target supports masked expand load.
595  bool isLegalMaskedExpandLoad(Type *DataType) const;
596 
597  /// Return true if the target has a unified operation to calculate division
598  /// and remainder. If so, the additional implicit multiplication and
599  /// subtraction required to calculate a remainder from division are free. This
600  /// can enable more aggressive transformations for division and remainder than
601  /// would typically be allowed using throughput or size cost models.
602  bool hasDivRemOp(Type *DataType, bool IsSigned) const;
603 
604  /// Return true if the given instruction (assumed to be a memory access
605  /// instruction) has a volatile variant. If that's the case then we can avoid
606  /// addrspacecast to generic AS for volatile loads/stores. Default
607  /// implementation returns false, which prevents address space inference for
608  /// volatile loads/stores.
609  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
610 
611  /// Return true if target doesn't mind addresses in vectors.
612  bool prefersVectorizedAddressing() const;
613 
614  /// Return the cost of the scaling factor used in the addressing
615  /// mode represented by AM for this target, for a load/store
616  /// of the specified type.
617  /// If the AM is supported, the return value must be >= 0.
618  /// If the AM is not supported, it returns a negative value.
619  /// TODO: Handle pre/postinc as well.
620  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
621  bool HasBaseReg, int64_t Scale,
622  unsigned AddrSpace = 0) const;
623 
624  /// Return true if the loop strength reduce pass should make
625  /// Instruction* based TTI queries to isLegalAddressingMode(). This is
626  /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
627  /// immediate offset and no index register.
628  bool LSRWithInstrQueries() const;
629 
630  /// Return true if it's free to truncate a value of type Ty1 to type
631  /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
632  /// by referencing its sub-register AX.
633  bool isTruncateFree(Type *Ty1, Type *Ty2) const;
634 
635  /// Return true if it is profitable to hoist instruction in the
636  /// then/else to before if.
637  bool isProfitableToHoist(Instruction *I) const;
638 
639  bool useAA() const;
640 
641  /// Return true if this type is legal.
642  bool isTypeLegal(Type *Ty) const;
643 
644  /// Returns the target's jmp_buf alignment in bytes.
645  unsigned getJumpBufAlignment() const;
646 
647  /// Returns the target's jmp_buf size in bytes.
648  unsigned getJumpBufSize() const;
649 
650  /// Return true if switches should be turned into lookup tables for the
651  /// target.
652  bool shouldBuildLookupTables() const;
653 
654  /// Return true if switches should be turned into lookup tables
655  /// containing this constant value for the target.
656  bool shouldBuildLookupTablesForConstant(Constant *C) const;
657 
658  /// Return true if the input function which is cold at all call sites,
659  /// should use coldcc calling convention.
660  bool useColdCCForColdCall(Function &F) const;
661 
662  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
663 
664  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
665  unsigned VF) const;
666 
667  /// If target has efficient vector element load/store instructions, it can
668  /// return true here so that insertion/extraction costs are not added to
669  /// the scalarization cost of a load/store.
670  bool supportsEfficientVectorElementLoadStore() const;
671 
672  /// Don't restrict interleaved unrolling to small loops.
673  bool enableAggressiveInterleaving(bool LoopHasReductions) const;
674 
675  /// Returns options for expansion of memcmp. IsZeroCmp is
676  // true if this is the expansion of memcmp(p1, p2, s) == 0.
678  // Return true if memcmp expansion is enabled.
679  operator bool() const { return MaxNumLoads > 0; }
680 
681  // Maximum number of load operations.
682  unsigned MaxNumLoads = 0;
683 
684  // The list of available load sizes (in bytes), sorted in decreasing order.
686 
687  // For memcmp expansion when the memcmp result is only compared equal or
688  // not-equal to 0, allow up to this number of load pairs per block. As an
689  // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
690  // a0 = load2bytes &a[0]
691  // b0 = load2bytes &b[0]
692  // a2 = load1byte &a[2]
693  // b2 = load1byte &b[2]
694  // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
695  unsigned NumLoadsPerBlock = 1;
696 
697  // Set to true to allow overlapping loads. For example, 7-byte compares can
698  // be done with two 4-byte compares instead of 4+2+1-byte compares. This
699  // requires all loads in LoadSizes to be doable in an unaligned way.
700  bool AllowOverlappingLoads = false;
701  };
702  MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
703  bool IsZeroCmp) const;
704 
705  /// Enable matching of interleaved access groups.
706  bool enableInterleavedAccessVectorization() const;
707 
708  /// Enable matching of interleaved access groups that contain predicated
709  /// accesses or gaps and therefore vectorized using masked
710  /// vector loads/stores.
711  bool enableMaskedInterleavedAccessVectorization() const;
712 
713  /// Indicate that it is potentially unsafe to automatically vectorize
714  /// floating-point operations because the semantics of vector and scalar
715  /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
716  /// does not support IEEE-754 denormal numbers, while depending on the
717  /// platform, scalar floating-point math does.
718  /// This applies to floating-point math operations and calls, not memory
719  /// operations, shuffles, or casts.
720  bool isFPVectorizationPotentiallyUnsafe() const;
721 
722  /// Determine if the target supports unaligned memory accesses.
723  bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
724  unsigned BitWidth, unsigned AddressSpace = 0,
725  unsigned Alignment = 1,
726  bool *Fast = nullptr) const;
727 
728  /// Return hardware support for population count.
729  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
730 
731  /// Return true if the hardware has a fast square-root instruction.
732  bool haveFastSqrt(Type *Ty) const;
733 
734  /// Return true if it is faster to check if a floating-point value is NaN
735  /// (or not-NaN) versus a comparison against a constant FP zero value.
736  /// Targets should override this if materializing a 0.0 for comparison is
737  /// generally as cheap as checking for ordered/unordered.
738  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
739 
740  /// Return the expected cost of supporting the floating point operation
741  /// of the specified type.
742  int getFPOpCost(Type *Ty) const;
743 
744  /// Return the expected cost of materializing for the given integer
745  /// immediate of the specified type.
746  int getIntImmCost(const APInt &Imm, Type *Ty) const;
747 
748  /// Return the expected cost of materialization for the given integer
749  /// immediate of the specified type for a given instruction. The cost can be
750  /// zero if the immediate can be folded into the specified instruction.
751  int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
752  Type *Ty) const;
753  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
754  Type *Ty) const;
755 
756  /// Return the expected cost for the given integer when optimising
757  /// for size. This is different than the other integer immediate cost
758  /// functions in that it is subtarget agnostic. This is useful when you e.g.
759  /// target one ISA such as Aarch32 but smaller encodings could be possible
760  /// with another such as Thumb. This return value is used as a penalty when
761  /// the total costs for a constant is calculated (the bigger the cost, the
762  /// more beneficial constant hoisting is).
763  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
764  Type *Ty) const;
765  /// @}
766 
767  /// \name Vector Target Information
768  /// @{
769 
770  /// The various kinds of shuffle patterns for vector queries.
771  enum ShuffleKind {
772  SK_Broadcast, ///< Broadcast element 0 to all other elements.
773  SK_Reverse, ///< Reverse the order of the vector.
774  SK_Select, ///< Selects elements from the corresponding lane of
775  ///< either source operand. This is equivalent to a
776  ///< vector select with a constant condition operand.
777  SK_Transpose, ///< Transpose two vectors.
778  SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
779  SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
780  SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
781  ///< with any shuffle mask.
782  SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
783  ///< shuffle mask.
784  };
785 
786  /// Additional information about an operand's possible values.
788  OK_AnyValue, // Operand can have any value.
789  OK_UniformValue, // Operand is uniform (splat of a value).
790  OK_UniformConstantValue, // Operand is uniform constant.
791  OK_NonUniformConstantValue // Operand is a non uniform constant value.
792  };
793 
794  /// Additional properties of an operand's values.
795  enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
796 
797  /// \return The number of scalar or vector registers that the target has.
798  /// If 'Vectors' is true, it returns the number of vector registers. If it is
799  /// set to false, it returns the number of scalar registers.
800  unsigned getNumberOfRegisters(bool Vector) const;
801 
802  /// \return The width of the largest scalar or vector register type.
803  unsigned getRegisterBitWidth(bool Vector) const;
804 
805  /// \return The width of the smallest vector register type.
806  unsigned getMinVectorRegisterBitWidth() const;
807 
808  /// \return True if the vectorization factor should be chosen to
809  /// make the vector of the smallest element type match the size of a
810  /// vector register. For wider element types, this could result in
811  /// creating vectors that span multiple vector registers.
812  /// If false, the vectorization factor will be chosen based on the
813  /// size of the widest element type.
814  bool shouldMaximizeVectorBandwidth(bool OptSize) const;
815 
816  /// \return The minimum vectorization factor for types of given element
817  /// bit width, or 0 if there is no minimum VF. The returned value only
818  /// applies when shouldMaximizeVectorBandwidth returns true.
819  unsigned getMinimumVF(unsigned ElemWidth) const;
820 
821  /// \return True if it should be considered for address type promotion.
822  /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
823  /// profitable without finding other extensions fed by the same input.
824  bool shouldConsiderAddressTypePromotion(
825  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
826 
827  /// \return The size of a cache line in bytes.
828  unsigned getCacheLineSize() const;
829 
830  /// The possible cache levels
831  enum class CacheLevel {
832  L1D, // The L1 data cache
833  L2D, // The L2 data cache
834 
835  // We currently do not model L3 caches, as their sizes differ widely between
836  // microarchitectures. Also, we currently do not have a use for L3 cache
837  // size modeling yet.
838  };
839 
840  /// \return The size of the cache level in bytes, if available.
841  llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const;
842 
843  /// \return The associativity of the cache level, if available.
844  llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
845 
846  /// \return How much before a load we should place the prefetch instruction.
847  /// This is currently measured in number of instructions.
848  unsigned getPrefetchDistance() const;
849 
850  /// \return Some HW prefetchers can handle accesses up to a certain constant
851  /// stride. This is the minimum stride in bytes where it makes sense to start
852  /// adding SW prefetches. The default is 1, i.e. prefetch with any stride.
853  unsigned getMinPrefetchStride() const;
854 
855  /// \return The maximum number of iterations to prefetch ahead. If the
856  /// required number of iterations is more than this number, no prefetching is
857  /// performed.
858  unsigned getMaxPrefetchIterationsAhead() const;
859 
860  /// \return The maximum interleave factor that any transform should try to
861  /// perform for this target. This number depends on the level of parallelism
862  /// and the number of execution units in the CPU.
863  unsigned getMaxInterleaveFactor(unsigned VF) const;
864 
865  /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
866  static OperandValueKind getOperandInfo(Value *V,
867  OperandValueProperties &OpProps);
868 
869  /// This is an approximation of reciprocal throughput of a math/logic op.
870  /// A higher cost indicates less expected throughput.
871  /// From Agner Fog's guides, reciprocal throughput is "the average number of
872  /// clock cycles per instruction when the instructions are not part of a
873  /// limiting dependency chain."
874  /// Therefore, costs should be scaled to account for multiple execution units
875  /// on the target that can process this type of instruction. For example, if
876  /// there are 5 scalar integer units and 2 vector integer units that can
877  /// calculate an 'add' in a single cycle, this model should indicate that the
878  /// cost of the vector add instruction is 2.5 times the cost of the scalar
879  /// add instruction.
880  /// \p Args is an optional argument which holds the instruction operands
881  /// values so the TTI can analyze those values searching for special
882  /// cases or optimizations based on those values.
883  int getArithmeticInstrCost(
884  unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
885  OperandValueKind Opd2Info = OK_AnyValue,
886  OperandValueProperties Opd1PropInfo = OP_None,
887  OperandValueProperties Opd2PropInfo = OP_None,
889 
890  /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
891  /// The index and subtype parameters are used by the subvector insertion and
892  /// extraction shuffle kinds to show the insert/extract point and the type of
893  /// the subvector being inserted/extracted.
894  /// NOTE: For subvector extractions Tp represents the source type.
895  int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
896  Type *SubTp = nullptr) const;
897 
898  /// \return The expected cost of cast instructions, such as bitcast, trunc,
899  /// zext, etc. If there is an existing instruction that holds Opcode, it
900  /// may be passed in the 'I' parameter.
901  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
902  const Instruction *I = nullptr) const;
903 
904  /// \return The expected cost of a sign- or zero-extended vector extract. Use
905  /// -1 to indicate that there is no information about the index value.
906  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
907  unsigned Index = -1) const;
908 
909  /// \return The expected cost of control-flow related instructions such as
910  /// Phi, Ret, Br.
911  int getCFInstrCost(unsigned Opcode) const;
912 
913  /// \returns The expected cost of compare and select instructions. If there
914  /// is an existing instruction that holds Opcode, it may be passed in the
915  /// 'I' parameter.
916  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
917  Type *CondTy = nullptr, const Instruction *I = nullptr) const;
918 
919  /// \return The expected cost of vector Insert and Extract.
920  /// Use -1 to indicate that there is no information on the index value.
921  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
922 
923  /// \return The cost of Load and Store instructions.
924  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
925  unsigned AddressSpace, const Instruction *I = nullptr) const;
926 
927  /// \return The cost of masked Load and Store instructions.
928  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
929  unsigned AddressSpace) const;
930 
931  /// \return The cost of Gather or Scatter operation
932  /// \p Opcode - is a type of memory access Load or Store
933  /// \p DataTy - a vector type of the data to be loaded or stored
934  /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
935  /// \p VariableMask - true when the memory access is predicated with a mask
936  /// that is not a compile-time constant
937  /// \p Alignment - alignment of single element
938  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
939  bool VariableMask, unsigned Alignment) const;
940 
941  /// \return The cost of the interleaved memory operation.
942  /// \p Opcode is the memory operation code
943  /// \p VecTy is the vector type of the interleaved access.
944  /// \p Factor is the interleave factor
945  /// \p Indices is the indices for interleaved load members (as interleaved
946  /// load allows gaps)
947  /// \p Alignment is the alignment of the memory operation
948  /// \p AddressSpace is address space of the pointer.
949  /// \p UseMaskForCond indicates if the memory access is predicated.
950  /// \p UseMaskForGaps indicates if gaps should be masked.
951  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
952  ArrayRef<unsigned> Indices, unsigned Alignment,
953  unsigned AddressSpace,
954  bool UseMaskForCond = false,
955  bool UseMaskForGaps = false) const;
956 
957  /// Calculate the cost of performing a vector reduction.
958  ///
959  /// This is the cost of reducing the vector value of type \p Ty to a scalar
960  /// value using the operation denoted by \p Opcode. The form of the reduction
961  /// can either be a pairwise reduction or a reduction that splits the vector
962  /// at every reduction level.
963  ///
964  /// Pairwise:
965  /// (v0, v1, v2, v3)
966  /// ((v0+v1), (v2+v3), undef, undef)
967  /// Split:
968  /// (v0, v1, v2, v3)
969  /// ((v0+v2), (v1+v3), undef, undef)
970  int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
971  bool IsPairwiseForm) const;
972  int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
973  bool IsUnsigned) const;
974 
975  /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
976  /// Three cases are handled: 1. scalar instruction 2. vector instruction
977  /// 3. scalar instruction which is to be vectorized with VF.
978  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
980  unsigned VF = 1) const;
981 
982  /// \returns The cost of Intrinsic instructions. Types analysis only.
983  /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
984  /// arguments and the return value will be computed based on types.
985  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
987  unsigned ScalarizationCostPassed = UINT_MAX) const;
988 
989  /// \returns The cost of Call instructions.
990  int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
991 
992  /// \returns The number of pieces into which the provided type must be
993  /// split during legalization. Zero is returned when the answer is unknown.
994  unsigned getNumberOfParts(Type *Tp) const;
995 
996  /// \returns The cost of the address computation. For most targets this can be
997  /// merged into the instruction indexing mode. Some targets might want to
998  /// distinguish between address computation for memory operations on vector
999  /// types and scalar types. Such targets should override this function.
1000  /// The 'SE' parameter holds pointer for the scalar evolution object which
1001  /// is used in order to get the Ptr step value in case of constant stride.
1002  /// The 'Ptr' parameter holds SCEV of the access pointer.
1003  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
1004  const SCEV *Ptr = nullptr) const;
1005 
1006  /// \returns The cost, if any, of keeping values of the given types alive
1007  /// over a callsite.
1008  ///
1009  /// Some types may require the use of register classes that do not have
1010  /// any callee-saved registers, so would require a spill and fill.
1011  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
1012 
1013  /// \returns True if the intrinsic is a supported memory intrinsic. Info
1014  /// will contain additional information - whether the intrinsic may write
1015  /// or read to memory, volatility and the pointer. Info is undefined
1016  /// if false is returned.
1017  bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
1018 
1019  /// \returns The maximum element size, in bytes, for an element
1020  /// unordered-atomic memory intrinsic.
1021  unsigned getAtomicMemIntrinsicMaxElementSize() const;
1022 
1023  /// \returns A value which is the result of the given memory intrinsic. New
1024  /// instructions may be created to extract the result from the given intrinsic
1025  /// memory operation. Returns nullptr if the target cannot create a result
1026  /// from the given intrinsic.
1027  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1028  Type *ExpectedType) const;
1029 
1030  /// \returns The type to use in a loop expansion of a memcpy call.
1031  Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1032  unsigned SrcAlign, unsigned DestAlign) const;
1033 
1034  /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1035  /// \param RemainingBytes The number of bytes to copy.
1036  ///
1037  /// Calculates the operand types to use when copying \p RemainingBytes of
1038  /// memory, where source and destination alignments are \p SrcAlign and
1039  /// \p DestAlign respectively.
1040  void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
1041  LLVMContext &Context,
1042  unsigned RemainingBytes,
1043  unsigned SrcAlign,
1044  unsigned DestAlign) const;
1045 
1046  /// \returns True if the two functions have compatible attributes for inlining
1047  /// purposes.
1048  bool areInlineCompatible(const Function *Caller,
1049  const Function *Callee) const;
1050 
1051  /// \returns True if the caller and callee agree on how \p Args will be passed
1052  /// to the callee.
1053  /// \param[out] Args The list of compatible arguments. The implementation may
1054  /// filter out any incompatible args from this list.
1055  bool areFunctionArgsABICompatible(const Function *Caller,
1056  const Function *Callee,
1057  SmallPtrSetImpl<Argument *> &Args) const;
1058 
1059  /// The type of load/store indexing.
1061  MIM_Unindexed, ///< No indexing.
1062  MIM_PreInc, ///< Pre-incrementing.
1063  MIM_PreDec, ///< Pre-decrementing.
1064  MIM_PostInc, ///< Post-incrementing.
1065  MIM_PostDec ///< Post-decrementing.
1066  };
1067 
1068  /// \returns True if the specified indexed load for the given type is legal.
1069  bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1070 
1071  /// \returns True if the specified indexed store for the given type is legal.
1072  bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1073 
1074  /// \returns The bitwidth of the largest vector type that should be used to
1075  /// load/store in the given address space.
1076  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1077 
1078  /// \returns True if the load instruction is legal to vectorize.
1079  bool isLegalToVectorizeLoad(LoadInst *LI) const;
1080 
1081  /// \returns True if the store instruction is legal to vectorize.
1082  bool isLegalToVectorizeStore(StoreInst *SI) const;
1083 
1084  /// \returns True if it is legal to vectorize the given load chain.
1085  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1086  unsigned Alignment,
1087  unsigned AddrSpace) const;
1088 
1089  /// \returns True if it is legal to vectorize the given store chain.
1090  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1091  unsigned Alignment,
1092  unsigned AddrSpace) const;
1093 
1094  /// \returns The new vector factor value if the target doesn't support \p
1095  /// SizeInBytes loads or has a better vector factor.
1096  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1097  unsigned ChainSizeInBytes,
1098  VectorType *VecTy) const;
1099 
1100  /// \returns The new vector factor value if the target doesn't support \p
1101  /// SizeInBytes stores or has a better vector factor.
1102  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1103  unsigned ChainSizeInBytes,
1104  VectorType *VecTy) const;
1105 
1106  /// Flags describing the kind of vector reduction.
1108  ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
1109  bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation.
1110  bool IsSigned; ///< Whether the operation is a signed int reduction.
1111  bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.
1112  };
1113 
1114  /// \returns True if the target wants to handle the given reduction idiom in
1115  /// the intrinsics form instead of the shuffle form.
1116  bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1117  ReductionFlags Flags) const;
1118 
1119  /// \returns True if the target wants to expand the given reduction intrinsic
1120  /// into a shuffle sequence.
1121  bool shouldExpandReduction(const IntrinsicInst *II) const;
1122 
1123  /// \returns the size cost of rematerializing a GlobalValue address relative
1124  /// to a stack reload.
1125  unsigned getGISelRematGlobalCost() const;
1126 
1127  /// @}
1128 
1129 private:
1130  /// Estimate the latency of specified instruction.
1131  /// Returns 1 as the default value.
1132  int getInstructionLatency(const Instruction *I) const;
1133 
1134  /// Returns the expected throughput cost of the instruction.
1135  /// Returns -1 if the cost is unknown.
1136  int getInstructionThroughput(const Instruction *I) const;
1137 
1138  /// The abstract base class used to type erase specific TTI
1139  /// implementations.
1140  class Concept;
1141 
1142  /// The template model for the base class which wraps a concrete
1143  /// implementation in a type erased interface.
1144  template <typename T> class Model;
1145 
1146  std::unique_ptr<Concept> TTIImpl;
1147 };
1148 
1150 public:
1151  virtual ~Concept() = 0;
1152  virtual const DataLayout &getDataLayout() const = 0;
1153  virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
1154  virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
1155  ArrayRef<const Value *> Operands) = 0;
1156  virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
1157  virtual int getCallCost(FunctionType *FTy, int NumArgs, const User *U) = 0;
1158  virtual int getCallCost(const Function *F, int NumArgs, const User *U) = 0;
1159  virtual int getCallCost(const Function *F,
1160  ArrayRef<const Value *> Arguments, const User *U) = 0;
1161  virtual unsigned getInliningThresholdMultiplier() = 0;
1162  virtual int getInlinerVectorBonusPercent() = 0;
1163  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1164  ArrayRef<Type *> ParamTys, const User *U) = 0;
1165  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1166  ArrayRef<const Value *> Arguments,
1167  const User *U) = 0;
1168  virtual int getMemcpyCost(const Instruction *I) = 0;
1169  virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1170  unsigned &JTSize) = 0;
1171  virtual int
1172  getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0;
1173  virtual bool hasBranchDivergence() = 0;
1174  virtual bool isSourceOfDivergence(const Value *V) = 0;
1175  virtual bool isAlwaysUniform(const Value *V) = 0;
1176  virtual unsigned getFlatAddressSpace() = 0;
1177  virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1178  Intrinsic::ID IID) const = 0;
1179  virtual bool rewriteIntrinsicWithAddressSpace(
1180  IntrinsicInst *II, Value *OldV, Value *NewV) const = 0;
1181  virtual bool isLoweredToCall(const Function *F) = 0;
1182  virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1183  UnrollingPreferences &UP) = 0;
1184  virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1185  AssumptionCache &AC,
1186  TargetLibraryInfo *LibInfo,
1187  HardwareLoopInfo &HWLoopInfo) = 0;
1188  virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1189  virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1190  virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1191  int64_t BaseOffset, bool HasBaseReg,
1192  int64_t Scale,
1193  unsigned AddrSpace,
1194  Instruction *I) = 0;
1195  virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1197  virtual bool canMacroFuseCmp() = 0;
1198  virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
1199  LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1200  TargetLibraryInfo *LibInfo) = 0;
1201  virtual bool shouldFavorPostInc() const = 0;
1202  virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;
1203  virtual bool isLegalMaskedStore(Type *DataType) = 0;
1204  virtual bool isLegalMaskedLoad(Type *DataType) = 0;
1205  virtual bool isLegalNTStore(Type *DataType, unsigned Alignment) = 0;
1206  virtual bool isLegalNTLoad(Type *DataType, unsigned Alignment) = 0;
1207  virtual bool isLegalMaskedScatter(Type *DataType) = 0;
1208  virtual bool isLegalMaskedGather(Type *DataType) = 0;
1209  virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
1210  virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
1211  virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1212  virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1213  virtual bool prefersVectorizedAddressing() = 0;
1214  virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
1215  int64_t BaseOffset, bool HasBaseReg,
1216  int64_t Scale, unsigned AddrSpace) = 0;
1217  virtual bool LSRWithInstrQueries() = 0;
1218  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1219  virtual bool isProfitableToHoist(Instruction *I) = 0;
1220  virtual bool useAA() = 0;
1221  virtual bool isTypeLegal(Type *Ty) = 0;
1222  virtual unsigned getJumpBufAlignment() = 0;
1223  virtual unsigned getJumpBufSize() = 0;
1224  virtual bool shouldBuildLookupTables() = 0;
1225  virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
1226  virtual bool useColdCCForColdCall(Function &F) = 0;
1227  virtual unsigned
1228  getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
1229  virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1230  unsigned VF) = 0;
1231  virtual bool supportsEfficientVectorElementLoadStore() = 0;
1232  virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1233  virtual MemCmpExpansionOptions
1234  enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
1235  virtual bool enableInterleavedAccessVectorization() = 0;
1236  virtual bool enableMaskedInterleavedAccessVectorization() = 0;
1237  virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
1238  virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1239  unsigned BitWidth,
1240  unsigned AddressSpace,
1241  unsigned Alignment,
1242  bool *Fast) = 0;
1243  virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1244  virtual bool haveFastSqrt(Type *Ty) = 0;
1245  virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1246  virtual int getFPOpCost(Type *Ty) = 0;
1247  virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1248  Type *Ty) = 0;
1249  virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
1250  virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1251  Type *Ty) = 0;
1252  virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1253  Type *Ty) = 0;
1254  virtual unsigned getNumberOfRegisters(bool Vector) = 0;
1255  virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
1256  virtual unsigned getMinVectorRegisterBitWidth() = 0;
1257  virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
1258  virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
1259  virtual bool shouldConsiderAddressTypePromotion(
1260  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1261  virtual unsigned getCacheLineSize() = 0;
1262  virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0;
1263  virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0;
1264  virtual unsigned getPrefetchDistance() = 0;
1265  virtual unsigned getMinPrefetchStride() = 0;
1266  virtual unsigned getMaxPrefetchIterationsAhead() = 0;
1267  virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
1268  virtual unsigned
1269  getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1270  OperandValueKind Opd2Info,
1271  OperandValueProperties Opd1PropInfo,
1272  OperandValueProperties Opd2PropInfo,
1273  ArrayRef<const Value *> Args) = 0;
1274  virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1275  Type *SubTp) = 0;
1276  virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1277  const Instruction *I) = 0;
1278  virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1279  VectorType *VecTy, unsigned Index) = 0;
1280  virtual int getCFInstrCost(unsigned Opcode) = 0;
1281  virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1282  Type *CondTy, const Instruction *I) = 0;
1283  virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
1284  unsigned Index) = 0;
1285  virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1286  unsigned AddressSpace, const Instruction *I) = 0;
1287  virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
1288  unsigned Alignment,
1289  unsigned AddressSpace) = 0;
1290  virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1291  Value *Ptr, bool VariableMask,
1292  unsigned Alignment) = 0;
1293  virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
1294  unsigned Factor,
1295  ArrayRef<unsigned> Indices,
1296  unsigned Alignment,
1297  unsigned AddressSpace,
1298  bool UseMaskForCond = false,
1299  bool UseMaskForGaps = false) = 0;
1300  virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1301  bool IsPairwiseForm) = 0;
1302  virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1303  bool IsPairwiseForm, bool IsUnsigned) = 0;
1304  virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1306  unsigned ScalarizationCostPassed) = 0;
1307  virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1308  ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;
1309  virtual int getCallInstrCost(Function *F, Type *RetTy,
1310  ArrayRef<Type *> Tys) = 0;
1311  virtual unsigned getNumberOfParts(Type *Tp) = 0;
1312  virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1313  const SCEV *Ptr) = 0;
1314  virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
1315  virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1316  MemIntrinsicInfo &Info) = 0;
1317  virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
1318  virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1319  Type *ExpectedType) = 0;
1320  virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1321  unsigned SrcAlign,
1322  unsigned DestAlign) const = 0;
1323  virtual void getMemcpyLoopResidualLoweringType(
1324  SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1325  unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
1326  virtual bool areInlineCompatible(const Function *Caller,
1327  const Function *Callee) const = 0;
1328  virtual bool
1329  areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
1330  SmallPtrSetImpl<Argument *> &Args) const = 0;
1331  virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1332  virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0;
1333  virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
1334  virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
1335  virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
1336  virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1337  unsigned Alignment,
1338  unsigned AddrSpace) const = 0;
1339  virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1340  unsigned Alignment,
1341  unsigned AddrSpace) const = 0;
1342  virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1343  unsigned ChainSizeInBytes,
1344  VectorType *VecTy) const = 0;
1345  virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1346  unsigned ChainSizeInBytes,
1347  VectorType *VecTy) const = 0;
1348  virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1349  ReductionFlags) const = 0;
1350  virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
1351  virtual unsigned getGISelRematGlobalCost() const = 0;
1352  virtual int getInstructionLatency(const Instruction *I) = 0;
1353 };
1354 
1355 template <typename T>
1357  T Impl;
1358 
1359 public:
1360  Model(T Impl) : Impl(std::move(Impl)) {}
1361  ~Model() override {}
1362 
1363  const DataLayout &getDataLayout() const override {
1364  return Impl.getDataLayout();
1365  }
1366 
1367  int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
1368  return Impl.getOperationCost(Opcode, Ty, OpTy);
1369  }
1370  int getGEPCost(Type *PointeeType, const Value *Ptr,
1371  ArrayRef<const Value *> Operands) override {
1372  return Impl.getGEPCost(PointeeType, Ptr, Operands);
1373  }
1374  int getExtCost(const Instruction *I, const Value *Src) override {
1375  return Impl.getExtCost(I, Src);
1376  }
1377  int getCallCost(FunctionType *FTy, int NumArgs, const User *U) override {
1378  return Impl.getCallCost(FTy, NumArgs, U);
1379  }
1380  int getCallCost(const Function *F, int NumArgs, const User *U) override {
1381  return Impl.getCallCost(F, NumArgs, U);
1382  }
1383  int getCallCost(const Function *F,
1384  ArrayRef<const Value *> Arguments, const User *U) override {
1385  return Impl.getCallCost(F, Arguments, U);
1386  }
1387  unsigned getInliningThresholdMultiplier() override {
1388  return Impl.getInliningThresholdMultiplier();
1389  }
1390  int getInlinerVectorBonusPercent() override {
1391  return Impl.getInlinerVectorBonusPercent();
1392  }
1393  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1394  ArrayRef<Type *> ParamTys, const User *U = nullptr) override {
1395  return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U);
1396  }
1397  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1398  ArrayRef<const Value *> Arguments,
1399  const User *U = nullptr) override {
1400  return Impl.getIntrinsicCost(IID, RetTy, Arguments, U);
1401  }
1402  int getMemcpyCost(const Instruction *I) override {
1403  return Impl.getMemcpyCost(I);
1404  }
1405  int getUserCost(const User *U, ArrayRef<const Value *> Operands) override {
1406  return Impl.getUserCost(U, Operands);
1407  }
1408  bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
1409  bool isSourceOfDivergence(const Value *V) override {
1410  return Impl.isSourceOfDivergence(V);
1411  }
1412 
1413  bool isAlwaysUniform(const Value *V) override {
1414  return Impl.isAlwaysUniform(V);
1415  }
1416 
1417  unsigned getFlatAddressSpace() override {
1418  return Impl.getFlatAddressSpace();
1419  }
1420 
1421  bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1422  Intrinsic::ID IID) const override {
1423  return Impl.collectFlatAddressOperands(OpIndexes, IID);
1424  }
1425 
1426  bool rewriteIntrinsicWithAddressSpace(
1427  IntrinsicInst *II, Value *OldV, Value *NewV) const override {
1428  return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
1429  }
1430 
1431  bool isLoweredToCall(const Function *F) override {
1432  return Impl.isLoweredToCall(F);
1433  }
1434  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1435  UnrollingPreferences &UP) override {
1436  return Impl.getUnrollingPreferences(L, SE, UP);
1437  }
1438  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1439  AssumptionCache &AC,
1440  TargetLibraryInfo *LibInfo,
1441  HardwareLoopInfo &HWLoopInfo) override {
1442  return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
1443  }
1444  bool isLegalAddImmediate(int64_t Imm) override {
1445  return Impl.isLegalAddImmediate(Imm);
1446  }
1447  bool isLegalICmpImmediate(int64_t Imm) override {
1448  return Impl.isLegalICmpImmediate(Imm);
1449  }
1450  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1451  bool HasBaseReg, int64_t Scale,
1452  unsigned AddrSpace,
1453  Instruction *I) override {
1454  return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
1455  Scale, AddrSpace, I);
1456  }
1457  bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1458  TargetTransformInfo::LSRCost &C2) override {
1459  return Impl.isLSRCostLess(C1, C2);
1460  }
1461  bool canMacroFuseCmp() override {
1462  return Impl.canMacroFuseCmp();
1463  }
1464  bool canSaveCmp(Loop *L, BranchInst **BI,
1465  ScalarEvolution *SE,
1466  LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1467  TargetLibraryInfo *LibInfo) override {
1468  return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
1469  }
1470  bool shouldFavorPostInc() const override {
1471  return Impl.shouldFavorPostInc();
1472  }
1473  bool shouldFavorBackedgeIndex(const Loop *L) const override {
1474  return Impl.shouldFavorBackedgeIndex(L);
1475  }
1476  bool isLegalMaskedStore(Type *DataType) override {
1477  return Impl.isLegalMaskedStore(DataType);
1478  }
1479  bool isLegalMaskedLoad(Type *DataType) override {
1480  return Impl.isLegalMaskedLoad(DataType);
1481  }
1482  bool isLegalNTStore(Type *DataType, unsigned Alignment) override {
1483  return Impl.isLegalNTStore(DataType, Alignment);
1484  }
1485  bool isLegalNTLoad(Type *DataType, unsigned Alignment) override {
1486  return Impl.isLegalNTLoad(DataType, Alignment);
1487  }
1488  bool isLegalMaskedScatter(Type *DataType) override {
1489  return Impl.isLegalMaskedScatter(DataType);
1490  }
1491  bool isLegalMaskedGather(Type *DataType) override {
1492  return Impl.isLegalMaskedGather(DataType);
1493  }
1494  bool isLegalMaskedCompressStore(Type *DataType) override {
1495  return Impl.isLegalMaskedCompressStore(DataType);
1496  }
1497  bool isLegalMaskedExpandLoad(Type *DataType) override {
1498  return Impl.isLegalMaskedExpandLoad(DataType);
1499  }
1500  bool hasDivRemOp(Type *DataType, bool IsSigned) override {
1501  return Impl.hasDivRemOp(DataType, IsSigned);
1502  }
1503  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
1504  return Impl.hasVolatileVariant(I, AddrSpace);
1505  }
1506  bool prefersVectorizedAddressing() override {
1507  return Impl.prefersVectorizedAddressing();
1508  }
1509  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1510  bool HasBaseReg, int64_t Scale,
1511  unsigned AddrSpace) override {
1512  return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
1513  Scale, AddrSpace);
1514  }
1515  bool LSRWithInstrQueries() override {
1516  return Impl.LSRWithInstrQueries();
1517  }
1518  bool isTruncateFree(Type *Ty1, Type *Ty2) override {
1519  return Impl.isTruncateFree(Ty1, Ty2);
1520  }
1521  bool isProfitableToHoist(Instruction *I) override {
1522  return Impl.isProfitableToHoist(I);
1523  }
1524  bool useAA() override { return Impl.useAA(); }
1525  bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
1526  unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
1527  unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
1528  bool shouldBuildLookupTables() override {
1529  return Impl.shouldBuildLookupTables();
1530  }
1531  bool shouldBuildLookupTablesForConstant(Constant *C) override {
1532  return Impl.shouldBuildLookupTablesForConstant(C);
1533  }
1534  bool useColdCCForColdCall(Function &F) override {
1535  return Impl.useColdCCForColdCall(F);
1536  }
1537 
1538  unsigned getScalarizationOverhead(Type *Ty, bool Insert,
1539  bool Extract) override {
1540  return Impl.getScalarizationOverhead(Ty, Insert, Extract);
1541  }
1542  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1543  unsigned VF) override {
1544  return Impl.getOperandsScalarizationOverhead(Args, VF);
1545  }
1546 
1547  bool supportsEfficientVectorElementLoadStore() override {
1548  return Impl.supportsEfficientVectorElementLoadStore();
1549  }
1550 
1551  bool enableAggressiveInterleaving(bool LoopHasReductions) override {
1552  return Impl.enableAggressiveInterleaving(LoopHasReductions);
1553  }
1554  MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
1555  bool IsZeroCmp) const override {
1556  return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
1557  }
1558  bool enableInterleavedAccessVectorization() override {
1559  return Impl.enableInterleavedAccessVectorization();
1560  }
1561  bool enableMaskedInterleavedAccessVectorization() override {
1562  return Impl.enableMaskedInterleavedAccessVectorization();
1563  }
1564  bool isFPVectorizationPotentiallyUnsafe() override {
1565  return Impl.isFPVectorizationPotentiallyUnsafe();
1566  }
1567  bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1568  unsigned BitWidth, unsigned AddressSpace,
1569  unsigned Alignment, bool *Fast) override {
1570  return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
1571  Alignment, Fast);
1572  }
1573  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
1574  return Impl.getPopcntSupport(IntTyWidthInBit);
1575  }
1576  bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
1577 
1578  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
1579  return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
1580  }
1581 
1582  int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
1583 
1584  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1585  Type *Ty) override {
1586  return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
1587  }
1588  int getIntImmCost(const APInt &Imm, Type *Ty) override {
1589  return Impl.getIntImmCost(Imm, Ty);
1590  }
1591  int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1592  Type *Ty) override {
1593  return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
1594  }
1595  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1596  Type *Ty) override {
1597  return Impl.getIntImmCost(IID, Idx, Imm, Ty);
1598  }
1599  unsigned getNumberOfRegisters(bool Vector) override {
1600  return Impl.getNumberOfRegisters(Vector);
1601  }
1602  unsigned getRegisterBitWidth(bool Vector) const override {
1603  return Impl.getRegisterBitWidth(Vector);
1604  }
1605  unsigned getMinVectorRegisterBitWidth() override {
1606  return Impl.getMinVectorRegisterBitWidth();
1607  }
1608  bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
1609  return Impl.shouldMaximizeVectorBandwidth(OptSize);
1610  }
1611  unsigned getMinimumVF(unsigned ElemWidth) const override {
1612  return Impl.getMinimumVF(ElemWidth);
1613  }
1614  bool shouldConsiderAddressTypePromotion(
1615  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
1616  return Impl.shouldConsiderAddressTypePromotion(
1617  I, AllowPromotionWithoutCommonHeader);
1618  }
1619  unsigned getCacheLineSize() override {
1620  return Impl.getCacheLineSize();
1621  }
1622  llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override {
1623  return Impl.getCacheSize(Level);
1624  }
1625  llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override {
1626  return Impl.getCacheAssociativity(Level);
1627  }
1628  unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
1629  unsigned getMinPrefetchStride() override {
1630  return Impl.getMinPrefetchStride();
1631  }
1632  unsigned getMaxPrefetchIterationsAhead() override {
1633  return Impl.getMaxPrefetchIterationsAhead();
1634  }
1635  unsigned getMaxInterleaveFactor(unsigned VF) override {
1636  return Impl.getMaxInterleaveFactor(VF);
1637  }
1638  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1639  unsigned &JTSize) override {
1640  return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize);
1641  }
1642  unsigned
1643  getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1644  OperandValueKind Opd2Info,
1645  OperandValueProperties Opd1PropInfo,
1646  OperandValueProperties Opd2PropInfo,
1647  ArrayRef<const Value *> Args) override {
1648  return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
1649  Opd1PropInfo, Opd2PropInfo, Args);
1650  }
1651  int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1652  Type *SubTp) override {
1653  return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
1654  }
1655  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1656  const Instruction *I) override {
1657  return Impl.getCastInstrCost(Opcode, Dst, Src, I);
1658  }
1659  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
1660  unsigned Index) override {
1661  return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
1662  }
1663  int getCFInstrCost(unsigned Opcode) override {
1664  return Impl.getCFInstrCost(Opcode);
1665  }
1666  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1667  const Instruction *I) override {
1668  return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
1669  }
1670  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
1671  return Impl.getVectorInstrCost(Opcode, Val, Index);
1672  }
1673  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1674  unsigned AddressSpace, const Instruction *I) override {
1675  return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
1676  }
1677  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1678  unsigned AddressSpace) override {
1679  return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
1680  }
1681  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1682  Value *Ptr, bool VariableMask,
1683  unsigned Alignment) override {
1684  return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
1685  Alignment);
1686  }
1687  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
1688  ArrayRef<unsigned> Indices, unsigned Alignment,
1689  unsigned AddressSpace, bool UseMaskForCond,
1690  bool UseMaskForGaps) override {
1691  return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
1692  Alignment, AddressSpace,
1693  UseMaskForCond, UseMaskForGaps);
1694  }
1695  int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1696  bool IsPairwiseForm) override {
1697  return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
1698  }
1699  int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1700  bool IsPairwiseForm, bool IsUnsigned) override {
1701  return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
1702  }
1703  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
1704  FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
1705  return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
1706  ScalarizationCostPassed);
1707  }
1708  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1709  ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {
1710  return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
1711  }
1712  int getCallInstrCost(Function *F, Type *RetTy,
1713  ArrayRef<Type *> Tys) override {
1714  return Impl.getCallInstrCost(F, RetTy, Tys);
1715  }
1716  unsigned getNumberOfParts(Type *Tp) override {
1717  return Impl.getNumberOfParts(Tp);
1718  }
1719  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1720  const SCEV *Ptr) override {
1721  return Impl.getAddressComputationCost(Ty, SE, Ptr);
1722  }
1723  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
1724  return Impl.getCostOfKeepingLiveOverCall(Tys);
1725  }
1726  bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1727  MemIntrinsicInfo &Info) override {
1728  return Impl.getTgtMemIntrinsic(Inst, Info);
1729  }
1730  unsigned getAtomicMemIntrinsicMaxElementSize() const override {
1731  return Impl.getAtomicMemIntrinsicMaxElementSize();
1732  }
1733  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1734  Type *ExpectedType) override {
1735  return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
1736  }
1737  Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1738  unsigned SrcAlign,
1739  unsigned DestAlign) const override {
1740  return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign);
1741  }
1742  void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
1743  LLVMContext &Context,
1744  unsigned RemainingBytes,
1745  unsigned SrcAlign,
1746  unsigned DestAlign) const override {
1747  Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
1748  SrcAlign, DestAlign);
1749  }
1750  bool areInlineCompatible(const Function *Caller,
1751  const Function *Callee) const override {
1752  return Impl.areInlineCompatible(Caller, Callee);
1753  }
1755  const Function *Caller, const Function *Callee,
1756  SmallPtrSetImpl<Argument *> &Args) const override {
1757  return Impl.areFunctionArgsABICompatible(Caller, Callee, Args);
1758  }
1759  bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
1760  return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
1761  }
1762  bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
1763  return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
1764  }
1765  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
1766  return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
1767  }
1768  bool isLegalToVectorizeLoad(LoadInst *LI) const override {
1769  return Impl.isLegalToVectorizeLoad(LI);
1770  }
1771  bool isLegalToVectorizeStore(StoreInst *SI) const override {
1772  return Impl.isLegalToVectorizeStore(SI);
1773  }
1774  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1775  unsigned Alignment,
1776  unsigned AddrSpace) const override {
1777  return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
1778  AddrSpace);
1779  }
1780  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1781  unsigned Alignment,
1782  unsigned AddrSpace) const override {
1783  return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
1784  AddrSpace);
1785  }
1786  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1787  unsigned ChainSizeInBytes,
1788  VectorType *VecTy) const override {
1789  return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
1790  }
1791  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1792  unsigned ChainSizeInBytes,
1793  VectorType *VecTy) const override {
1794  return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
1795  }
1796  bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1797  ReductionFlags Flags) const override {
1798  return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
1799  }
1800  bool shouldExpandReduction(const IntrinsicInst *II) const override {
1801  return Impl.shouldExpandReduction(II);
1802  }
1803 
1804  unsigned getGISelRematGlobalCost() const override {
1805  return Impl.getGISelRematGlobalCost();
1806  }
1807 
1808  int getInstructionLatency(const Instruction *I) override {
1809  return Impl.getInstructionLatency(I);
1810  }
1811 };
1812 
1813 template <typename T>
1815  : TTIImpl(new Model<T>(Impl)) {}
1816 
1817 /// Analysis pass providing the \c TargetTransformInfo.
1818 ///
1819 /// The core idea of the TargetIRAnalysis is to expose an interface through
1820 /// which LLVM targets can analyze and provide information about the middle
1821 /// end's target-independent IR. This supports use cases such as target-aware
1822 /// cost modeling of IR constructs.
1823 ///
1824 /// This is a function analysis because much of the cost modeling for targets
1825 /// is done in a subtarget specific way and LLVM supports compiling different
1826 /// functions targeting different subtargets in order to support runtime
1827 /// dispatch according to the observed subtarget.
1828 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
1829 public:
1831 
1832  /// Default construct a target IR analysis.
1833  ///
1834  /// This will use the module's datalayout to construct a baseline
1835  /// conservative TTI result.
1836  TargetIRAnalysis();
1837 
1838  /// Construct an IR analysis pass around a target-provide callback.
1839  ///
1840  /// The callback will be called with a particular function for which the TTI
1841  /// is needed and must return a TTI object for that function.
1842  TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
1843 
1844  // Value semantics. We spell out the constructors for MSVC.
1846  : TTICallback(Arg.TTICallback) {}
1848  : TTICallback(std::move(Arg.TTICallback)) {}
1850  TTICallback = RHS.TTICallback;
1851  return *this;
1852  }
1854  TTICallback = std::move(RHS.TTICallback);
1855  return *this;
1856  }
1857 
1858  Result run(const Function &F, FunctionAnalysisManager &);
1859 
1860 private:
1862  static AnalysisKey Key;
1863 
1864  /// The callback used to produce a result.
1865  ///
1866  /// We use a completely opaque callback so that targets can provide whatever
1867  /// mechanism they desire for constructing the TTI for a given function.
1868  ///
1869  /// FIXME: Should we really use std::function? It's relatively inefficient.
1870  /// It might be possible to arrange for even stateful callbacks to outlive
1871  /// the analysis and thus use a function_ref which would be lighter weight.
1872  /// This may also be less error prone as the callback is likely to reference
1873  /// the external TargetMachine, and that reference needs to never dangle.
1874  std::function<Result(const Function &)> TTICallback;
1875 
1876  /// Helper function used as the callback in the default constructor.
1877  static Result getDefaultTTI(const Function &F);
1878 };
1879 
1880 /// Wrapper pass for TargetTransformInfo.
1881 ///
1882 /// This pass can be constructed from a TTI object which it stores internally
1883 /// and is queried by passes.
1885  TargetIRAnalysis TIRA;
1887 
1888  virtual void anchor();
1889 
1890 public:
1891  static char ID;
1892 
1893  /// We must provide a default constructor for the pass but it should
1894  /// never be used.
1895  ///
1896  /// Use the constructor below or call one of the creation routines.
1898 
1900 
1901  TargetTransformInfo &getTTI(const Function &F);
1902 };
1903 
1904 /// Create an analysis pass wrapper around a TTI object.
1905 ///
1906 /// This analysis pass just holds the TTI instance and makes it available to
1907 /// clients.
1909 
1910 } // End llvm namespace
1911 
1912 #endif
uint64_t CallInst * C
bool PeelProfiledIterations
Allow peeling basing on profile.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
LLVMContext & Context
Atomic ordering constants.
SI Whole Quad Mode
This class represents lattice values for constants.
Definition: AllocatorList.h:23
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
value_op_iterator value_op_begin()
Definition: User.h:255
The main scalar evolution driver.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
MemIndexedMode
The type of load/store indexing.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
value_op_iterator value_op_end()
Definition: User.h:258
F(f)
An instruction for reading from memory.
Definition: Instructions.h:167
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
bool areInlineCompatible(const Function &Caller, const Function &Callee)
int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const
Query the cost of a specified instruction.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
static bool areFunctionArgsABICompatible(const Function &F, const TargetTransformInfo &TTI, SmallPtrSetImpl< Argument *> &ArgsToPromote, SmallPtrSetImpl< Argument *> &ByValArgsToTransform)
Definition: BitVector.h:937
bool AllowPeeling
Allow peeling off loop iterations.
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
Key
PAL metadata keys.
Class to represent function types.
Definition: DerivedTypes.h:103
PopcntSupportKind
Flags indicating the kind of support for population count.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
An instruction for storing to memory.
Definition: Instructions.h:320
Reverse the order of the vector.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
ExtractSubvector Index indicates start offset.
Returns options for expansion of memcmp. IsZeroCmp is.
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
Flags describing the kind of vector reduction.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Conditional or Unconditional Branch instruction.
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
This is an important base class in LLVM.
Definition: Constant.h:41
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:389
AMDGPU Lower Kernel Arguments
static cl::opt< unsigned > LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1), cl::desc("Set the loop decrement value"))
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
TargetIRAnalysis(const TargetIRAnalysis &Arg)
Class to represent integer types.
Definition: DerivedTypes.h:40
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
Attributes of a target dependent hardware loop.
bool IsMaxOp
If the op a min/max kind, true if it&#39;s a max operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
OperandValueProperties
Additional properties of an operand&#39;s values.
ImmutablePass class - This class is used to provide information that does not need to be run...
Definition: Pass.h:255
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Provides information about what library functions are available for the current target.
AddressSpace
Definition: NVPTXBaseInfo.h:21
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Class to represent vector types.
Definition: DerivedTypes.h:427
Class for arbitrary precision integers.
Definition: APInt.h:69
amdgpu Simplify well known AMD library false FunctionCallee Callee
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
unsigned Threshold
The cost threshold for the unrolled loop.
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
Parameters that control the generic loop unrolling transformation.
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
TargetIRAnalysis(TargetIRAnalysis &&Arg)
#define I(x, y, z)
Definition: MD5.cpp:58
TargetCostConstants
Underlying constants for &#39;cost&#39; values in this interface.
int getUserCost(const User *U) const
This is a helper function which calls the two-argument getUserCost with Operands which are the curren...
InsertSubvector. Index indicates start offset.
unsigned Insns
TODO: Some of these could be merged.
Multiway switch.
TargetTransformInfo Result
LLVM Value Representation.
Definition: Value.h:73
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
Broadcast element 0 to all other elements.
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
bool UpperBound
Allow using trip count upper bound to unroll loops.
print Print MemDeps of function
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:159
OperandValueKind
Additional information about an operand&#39;s possible values.
TargetCostKind
The kind of cost model.
CacheLevel
The possible cache levels.
This header defines various interfaces for pass management in LLVM.
Information about a load/store intrinsic defined by the target.
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:70
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
ShuffleKind
The various kinds of shuffle patterns for vector queries.
bool IsSigned
Whether the operation is a signed int reduction.