LLVM  9.0.0svn
TargetTransformInfo.h
Go to the documentation of this file.
1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This pass exposes codegen information to IR-level passes. Every
10 /// transformation that uses codegen information is broken into three parts:
11 /// 1. The IR-level analysis pass.
12 /// 2. The IR-level transformation interface which provides the needed
13 /// information.
14 /// 3. Codegen-level implementation which uses target-specific hooks.
15 ///
16 /// This file defines #2, which is the interface that IR-level transformations
17 /// use for querying the codegen.
18 ///
19 //===----------------------------------------------------------------------===//
20 
21 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23 
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/IR/Operator.h"
26 #include "llvm/IR/PassManager.h"
27 #include "llvm/Pass.h"
29 #include "llvm/Support/DataTypes.h"
30 #include "llvm/Analysis/LoopInfo.h"
32 #include "llvm/IR/Dominators.h"
33 #include <functional>
34 
35 namespace llvm {
36 
37 namespace Intrinsic {
38 enum ID : unsigned;
39 }
40 
41 class AssumptionCache;
42 class BranchInst;
43 class Function;
44 class GlobalValue;
45 class IntrinsicInst;
46 class LoadInst;
47 class Loop;
48 class SCEV;
49 class ScalarEvolution;
50 class StoreInst;
51 class SwitchInst;
52 class TargetLibraryInfo;
53 class Type;
54 class User;
55 class Value;
56 
57 /// Information about a load/store intrinsic defined by the target.
59  /// This is the pointer that the intrinsic is loading from or storing to.
60  /// If this is non-null, then analysis/optimization passes can assume that
61  /// this intrinsic is functionally equivalent to a load/store from this
62  /// pointer.
63  Value *PtrVal = nullptr;
64 
65  // Ordering for atomic operations.
67 
68  // Same Id is set by the target for corresponding load/store intrinsics.
69  unsigned short MatchingId = 0;
70 
71  bool ReadMem = false;
72  bool WriteMem = false;
73  bool IsVolatile = false;
74 
75  bool isUnordered() const {
76  return (Ordering == AtomicOrdering::NotAtomic ||
77  Ordering == AtomicOrdering::Unordered) && !IsVolatile;
78  }
79 };
80 
81 /// Attributes of a target dependent hardware loop.
83  HardwareLoopInfo() = delete;
84  HardwareLoopInfo(Loop *L) : L(L) {}
85  Loop *L = nullptr;
86  BasicBlock *ExitBlock = nullptr;
87  BranchInst *ExitBranch = nullptr;
88  const SCEV *ExitCount = nullptr;
89  IntegerType *CountType = nullptr;
90  Value *LoopDecrement = nullptr; // Decrement the loop counter by this
91  // value in every iteration.
92  bool IsNestingLegal = false; // Can a hardware loop be a parent to
93  // another hardware loop?
94  bool CounterInReg = false; // Should loop counter be updated in
95  // the loop via a phi?
96  bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI,
97  DominatorTree &DT, bool ForceNestedLoop = false,
98  bool ForceHardwareLoopPHI = false);
99 };
100 
101 /// This pass provides access to the codegen interfaces that are needed
102 /// for IR-level transformations.
104 public:
105  /// Construct a TTI object using a type implementing the \c Concept
106  /// API below.
107  ///
108  /// This is used by targets to construct a TTI wrapping their target-specific
109  /// implementation that encodes appropriate costs for their target.
110  template <typename T> TargetTransformInfo(T Impl);
111 
112  /// Construct a baseline TTI object using a minimal implementation of
113  /// the \c Concept API below.
114  ///
115  /// The TTI implementation will reflect the information in the DataLayout
116  /// provided if non-null.
117  explicit TargetTransformInfo(const DataLayout &DL);
118 
119  // Provide move semantics.
121  TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
122 
123  // We need to define the destructor out-of-line to define our sub-classes
124  // out-of-line.
126 
127  /// Handle the invalidation of this information.
128  ///
129  /// When used as a result of \c TargetIRAnalysis this method will be called
130  /// when the function this was computed for changes. When it returns false,
131  /// the information is preserved across those changes.
134  // FIXME: We should probably in some way ensure that the subtarget
135  // information for a function hasn't changed.
136  return false;
137  }
138 
139  /// \name Generic Target Information
140  /// @{
141 
142  /// The kind of cost model.
143  ///
144  /// There are several different cost models that can be customized by the
145  /// target. The normalization of each cost model may be target specific.
147  TCK_RecipThroughput, ///< Reciprocal throughput.
148  TCK_Latency, ///< The latency of instruction.
149  TCK_CodeSize ///< Instruction code size.
150  };
151 
152  /// Query the cost of a specified instruction.
153  ///
154  /// Clients should use this interface to query the cost of an existing
155  /// instruction. The instruction must have a valid parent (basic block).
156  ///
157  /// Note, this method does not cache the cost calculation and it
158  /// can be expensive in some cases.
159  int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
160  switch (kind){
161  case TCK_RecipThroughput:
162  return getInstructionThroughput(I);
163 
164  case TCK_Latency:
165  return getInstructionLatency(I);
166 
167  case TCK_CodeSize:
168  return getUserCost(I);
169  }
170  llvm_unreachable("Unknown instruction cost kind");
171  }
172 
173  /// Underlying constants for 'cost' values in this interface.
174  ///
175  /// Many APIs in this interface return a cost. This enum defines the
176  /// fundamental values that should be used to interpret (and produce) those
177  /// costs. The costs are returned as an int rather than a member of this
178  /// enumeration because it is expected that the cost of one IR instruction
179  /// may have a multiplicative factor to it or otherwise won't fit directly
180  /// into the enum. Moreover, it is common to sum or average costs which works
181  /// better as simple integral values. Thus this enum only provides constants.
182  /// Also note that the returned costs are signed integers to make it natural
183  /// to add, subtract, and test with zero (a common boundary condition). It is
184  /// not expected that 2^32 is a realistic cost to be modeling at any point.
185  ///
186  /// Note that these costs should usually reflect the intersection of code-size
187  /// cost and execution cost. A free instruction is typically one that folds
188  /// into another instruction. For example, reg-to-reg moves can often be
189  /// skipped by renaming the registers in the CPU, but they still are encoded
190  /// and thus wouldn't be considered 'free' here.
192  TCC_Free = 0, ///< Expected to fold away in lowering.
193  TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
194  TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
195  };
196 
197  /// Estimate the cost of a specific operation when lowered.
198  ///
199  /// Note that this is designed to work on an arbitrary synthetic opcode, and
200  /// thus work for hypothetical queries before an instruction has even been
201  /// formed. However, this does *not* work for GEPs, and must not be called
202  /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
203  /// analyzing a GEP's cost required more information.
204  ///
205  /// Typically only the result type is required, and the operand type can be
206  /// omitted. However, if the opcode is one of the cast instructions, the
207  /// operand type is required.
208  ///
209  /// The returned cost is defined in terms of \c TargetCostConstants, see its
210  /// comments for a detailed explanation of the cost values.
211  int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
212 
213  /// Estimate the cost of a GEP operation when lowered.
214  ///
215  /// The contract for this function is the same as \c getOperationCost except
216  /// that it supports an interface that provides extra information specific to
217  /// the GEP operation.
218  int getGEPCost(Type *PointeeType, const Value *Ptr,
219  ArrayRef<const Value *> Operands) const;
220 
221  /// Estimate the cost of a EXT operation when lowered.
222  ///
223  /// The contract for this function is the same as \c getOperationCost except
224  /// that it supports an interface that provides extra information specific to
225  /// the EXT operation.
226  int getExtCost(const Instruction *I, const Value *Src) const;
227 
228  /// Estimate the cost of a function call when lowered.
229  ///
230  /// The contract for this is the same as \c getOperationCost except that it
231  /// supports an interface that provides extra information specific to call
232  /// instructions.
233  ///
234  /// This is the most basic query for estimating call cost: it only knows the
235  /// function type and (potentially) the number of arguments at the call site.
236  /// The latter is only interesting for varargs function types.
237  int getCallCost(FunctionType *FTy, int NumArgs = -1,
238  const User *U = nullptr) const;
239 
240  /// Estimate the cost of calling a specific function when lowered.
241  ///
242  /// This overload adds the ability to reason about the particular function
243  /// being called in the event it is a library call with special lowering.
244  int getCallCost(const Function *F, int NumArgs = -1,
245  const User *U = nullptr) const;
246 
247  /// Estimate the cost of calling a specific function when lowered.
248  ///
249  /// This overload allows specifying a set of candidate argument values.
250  int getCallCost(const Function *F, ArrayRef<const Value *> Arguments,
251  const User *U = nullptr) const;
252 
253  /// \returns A value by which our inlining threshold should be multiplied.
254  /// This is primarily used to bump up the inlining threshold wholesale on
255  /// targets where calls are unusually expensive.
256  ///
257  /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
258  /// individual classes of instructions would be better.
259  unsigned getInliningThresholdMultiplier() const;
260 
261  /// Estimate the cost of an intrinsic when lowered.
262  ///
263  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
264  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
265  ArrayRef<Type *> ParamTys,
266  const User *U = nullptr) const;
267 
268  /// Estimate the cost of an intrinsic when lowered.
269  ///
270  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
271  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
273  const User *U = nullptr) const;
274 
275  /// \return the expected cost of a memcpy, which could e.g. depend on the
276  /// source/destination type and alignment and the number of bytes copied.
277  int getMemcpyCost(const Instruction *I) const;
278 
279  /// \return The estimated number of case clusters when lowering \p 'SI'.
280  /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
281  /// table.
282  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
283  unsigned &JTSize) const;
284 
285  /// Estimate the cost of a given IR user when lowered.
286  ///
287  /// This can estimate the cost of either a ConstantExpr or Instruction when
288  /// lowered. It has two primary advantages over the \c getOperationCost and
289  /// \c getGEPCost above, and one significant disadvantage: it can only be
290  /// used when the IR construct has already been formed.
291  ///
292  /// The advantages are that it can inspect the SSA use graph to reason more
293  /// accurately about the cost. For example, all-constant-GEPs can often be
294  /// folded into a load or other instruction, but if they are used in some
295  /// other context they may not be folded. This routine can distinguish such
296  /// cases.
297  ///
298  /// \p Operands is a list of operands which can be a result of transformations
299  /// of the current operands. The number of the operands on the list must equal
300  /// to the number of the current operands the IR user has. Their order on the
301  /// list must be the same as the order of the current operands the IR user
302  /// has.
303  ///
304  /// The returned cost is defined in terms of \c TargetCostConstants, see its
305  /// comments for a detailed explanation of the cost values.
306  int getUserCost(const User *U, ArrayRef<const Value *> Operands) const;
307 
308  /// This is a helper function which calls the two-argument getUserCost
309  /// with \p Operands which are the current operands U has.
310  int getUserCost(const User *U) const {
312  U->value_op_end());
313  return getUserCost(U, Operands);
314  }
315 
316  /// Return true if branch divergence exists.
317  ///
318  /// Branch divergence has a significantly negative impact on GPU performance
319  /// when threads in the same wavefront take different paths due to conditional
320  /// branches.
321  bool hasBranchDivergence() const;
322 
323  /// Returns whether V is a source of divergence.
324  ///
325  /// This function provides the target-dependent information for
326  /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis first
327  /// builds the dependency graph, and then runs the reachability algorithm
328  /// starting with the sources of divergence.
329  bool isSourceOfDivergence(const Value *V) const;
330 
331  // Returns true for the target specific
332  // set of operations which produce uniform result
333  // even taking non-uniform arguments
334  bool isAlwaysUniform(const Value *V) const;
335 
336  /// Returns the address space ID for a target's 'flat' address space. Note
337  /// this is not necessarily the same as addrspace(0), which LLVM sometimes
338  /// refers to as the generic address space. The flat address space is a
339  /// generic address space that can be used access multiple segments of memory
340  /// with different address spaces. Access of a memory location through a
341  /// pointer with this address space is expected to be legal but slower
342  /// compared to the same memory location accessed through a pointer with a
343  /// different address space.
344  //
345  /// This is for targets with different pointer representations which can
346  /// be converted with the addrspacecast instruction. If a pointer is converted
347  /// to this address space, optimizations should attempt to replace the access
348  /// with the source address space.
349  ///
350  /// \returns ~0u if the target does not have such a flat address space to
351  /// optimize away.
352  unsigned getFlatAddressSpace() const;
353 
354  /// Test whether calls to a function lower to actual program function
355  /// calls.
356  ///
357  /// The idea is to test whether the program is likely to require a 'call'
358  /// instruction or equivalent in order to call the given function.
359  ///
360  /// FIXME: It's not clear that this is a good or useful query API. Client's
361  /// should probably move to simpler cost metrics using the above.
362  /// Alternatively, we could split the cost interface into distinct code-size
363  /// and execution-speed costs. This would allow modelling the core of this
364  /// query more accurately as a call is a single small instruction, but
365  /// incurs significant execution cost.
366  bool isLoweredToCall(const Function *F) const;
367 
368  struct LSRCost {
369  /// TODO: Some of these could be merged. Also, a lexical ordering
370  /// isn't always optimal.
371  unsigned Insns;
372  unsigned NumRegs;
373  unsigned AddRecCost;
374  unsigned NumIVMuls;
375  unsigned NumBaseAdds;
376  unsigned ImmCost;
377  unsigned SetupCost;
378  unsigned ScaleCost;
379  };
380 
381  /// Parameters that control the generic loop unrolling transformation.
383  /// The cost threshold for the unrolled loop. Should be relative to the
384  /// getUserCost values returned by this API, and the expectation is that
385  /// the unrolled loop's instructions when run through that interface should
386  /// not exceed this cost. However, this is only an estimate. Also, specific
387  /// loops may be unrolled even with a cost above this threshold if deemed
388  /// profitable. Set this to UINT_MAX to disable the loop body cost
389  /// restriction.
390  unsigned Threshold;
391  /// If complete unrolling will reduce the cost of the loop, we will boost
392  /// the Threshold by a certain percent to allow more aggressive complete
393  /// unrolling. This value provides the maximum boost percentage that we
394  /// can apply to Threshold (The value should be no less than 100).
395  /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
396  /// MaxPercentThresholdBoost / 100)
397  /// E.g. if complete unrolling reduces the loop execution time by 50%
398  /// then we boost the threshold by the factor of 2x. If unrolling is not
399  /// expected to reduce the running time, then we do not increase the
400  /// threshold.
402  /// The cost threshold for the unrolled loop when optimizing for size (set
403  /// to UINT_MAX to disable).
405  /// The cost threshold for the unrolled loop, like Threshold, but used
406  /// for partial/runtime unrolling (set to UINT_MAX to disable).
408  /// The cost threshold for the unrolled loop when optimizing for size, like
409  /// OptSizeThreshold, but used for partial/runtime unrolling (set to
410  /// UINT_MAX to disable).
412  /// A forced unrolling factor (the number of concatenated bodies of the
413  /// original loop in the unrolled loop body). When set to 0, the unrolling
414  /// transformation will select an unrolling factor based on the current cost
415  /// threshold and other factors.
416  unsigned Count;
417  /// A forced peeling factor (the number of bodied of the original loop
418  /// that should be peeled off before the loop body). When set to 0, the
419  /// unrolling transformation will select a peeling factor based on profile
420  /// information and other factors.
421  unsigned PeelCount;
422  /// Default unroll count for loops with run-time trip count.
424  // Set the maximum unrolling factor. The unrolling factor may be selected
425  // using the appropriate cost threshold, but may not exceed this number
426  // (set to UINT_MAX to disable). This does not apply in cases where the
427  // loop is being fully unrolled.
428  unsigned MaxCount;
429  /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
430  /// applies even if full unrolling is selected. This allows a target to fall
431  /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
433  // Represents number of instructions optimized when "back edge"
434  // becomes "fall through" in unrolled loop.
435  // For now we count a conditional branch on a backedge and a comparison
436  // feeding it.
437  unsigned BEInsns;
438  /// Allow partial unrolling (unrolling of loops to expand the size of the
439  /// loop body, not only to eliminate small constant-trip-count loops).
440  bool Partial;
441  /// Allow runtime unrolling (unrolling of loops to expand the size of the
442  /// loop body even when the number of loop iterations is not known at
443  /// compile time).
444  bool Runtime;
445  /// Allow generation of a loop remainder (extra iterations after unroll).
447  /// Allow emitting expensive instructions (such as divisions) when computing
448  /// the trip count of a loop for runtime unrolling.
450  /// Apply loop unroll on any kind of loop
451  /// (mainly to loops that fail runtime unrolling).
452  bool Force;
453  /// Allow using trip count upper bound to unroll loops.
455  /// Allow peeling off loop iterations for loops with low dynamic tripcount.
457  /// Allow unrolling of all the iterations of the runtime loop remainder.
459  /// Allow unroll and jam. Used to enable unroll and jam for the target.
461  /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
462  /// value above is used during unroll and jam for the outer loop size.
463  /// This value is used in the same manner to limit the size of the inner
464  /// loop.
466  };
467 
468  /// Get target-customized preferences for the generic loop unrolling
469  /// transformation. The caller will initialize UP with the current
470  /// target-independent defaults.
471  void getUnrollingPreferences(Loop *L, ScalarEvolution &,
472  UnrollingPreferences &UP) const;
473 
474  /// Query the target whether it would be profitable to convert the given loop
475  /// into a hardware loop.
476  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
477  AssumptionCache &AC,
478  TargetLibraryInfo *LibInfo,
479  HardwareLoopInfo &HWLoopInfo) const;
480 
481  /// @}
482 
483  /// \name Scalar Target Information
484  /// @{
485 
486  /// Flags indicating the kind of support for population count.
487  ///
488  /// Compared to the SW implementation, HW support is supposed to
489  /// significantly boost the performance when the population is dense, and it
490  /// may or may not degrade performance if the population is sparse. A HW
491  /// support is considered as "Fast" if it can outperform, or is on a par
492  /// with, SW implementation when the population is sparse; otherwise, it is
493  /// considered as "Slow".
494  enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
495 
496  /// Return true if the specified immediate is legal add immediate, that
497  /// is the target has add instructions which can add a register with the
498  /// immediate without having to materialize the immediate into a register.
499  bool isLegalAddImmediate(int64_t Imm) const;
500 
501  /// Return true if the specified immediate is legal icmp immediate,
502  /// that is the target has icmp instructions which can compare a register
503  /// against the immediate without having to materialize the immediate into a
504  /// register.
505  bool isLegalICmpImmediate(int64_t Imm) const;
506 
507  /// Return true if the addressing mode represented by AM is legal for
508  /// this target, for a load/store of the specified type.
509  /// The type may be VoidTy, in which case only return true if the addressing
510  /// mode is legal for a load/store of any legal type.
511  /// If target returns true in LSRWithInstrQueries(), I may be valid.
512  /// TODO: Handle pre/postinc as well.
513  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
514  bool HasBaseReg, int64_t Scale,
515  unsigned AddrSpace = 0,
516  Instruction *I = nullptr) const;
517 
518  /// Return true if LSR cost of C1 is lower than C1.
519  bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
520  TargetTransformInfo::LSRCost &C2) const;
521 
522  /// Return true if the target can fuse a compare and branch.
523  /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
524  /// calculation for the instructions in a loop.
525  bool canMacroFuseCmp() const;
526 
527  /// \return True is LSR should make efforts to create/preserve post-inc
528  /// addressing mode expressions.
529  bool shouldFavorPostInc() const;
530 
531  /// Return true if LSR should make efforts to generate indexed addressing
532  /// modes that operate across loop iterations.
533  bool shouldFavorBackedgeIndex(const Loop *L) const;
534 
535  /// Return true if the target supports masked load.
536  bool isLegalMaskedStore(Type *DataType) const;
537  /// Return true if the target supports masked store.
538  bool isLegalMaskedLoad(Type *DataType) const;
539 
540  /// Return true if the target supports nontemporal store.
541  bool isLegalNTStore(Type *DataType, unsigned Alignment) const;
542  /// Return true if the target supports nontemporal load.
543  bool isLegalNTLoad(Type *DataType, unsigned Alignment) const;
544 
545  /// Return true if the target supports masked scatter.
546  bool isLegalMaskedScatter(Type *DataType) const;
547  /// Return true if the target supports masked gather.
548  bool isLegalMaskedGather(Type *DataType) const;
549 
550  /// Return true if the target supports masked compress store.
551  bool isLegalMaskedCompressStore(Type *DataType) const;
552  /// Return true if the target supports masked expand load.
553  bool isLegalMaskedExpandLoad(Type *DataType) const;
554 
555  /// Return true if the target has a unified operation to calculate division
556  /// and remainder. If so, the additional implicit multiplication and
557  /// subtraction required to calculate a remainder from division are free. This
558  /// can enable more aggressive transformations for division and remainder than
559  /// would typically be allowed using throughput or size cost models.
560  bool hasDivRemOp(Type *DataType, bool IsSigned) const;
561 
562  /// Return true if the given instruction (assumed to be a memory access
563  /// instruction) has a volatile variant. If that's the case then we can avoid
564  /// addrspacecast to generic AS for volatile loads/stores. Default
565  /// implementation returns false, which prevents address space inference for
566  /// volatile loads/stores.
567  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
568 
569  /// Return true if target doesn't mind addresses in vectors.
570  bool prefersVectorizedAddressing() const;
571 
572  /// Return the cost of the scaling factor used in the addressing
573  /// mode represented by AM for this target, for a load/store
574  /// of the specified type.
575  /// If the AM is supported, the return value must be >= 0.
576  /// If the AM is not supported, it returns a negative value.
577  /// TODO: Handle pre/postinc as well.
578  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
579  bool HasBaseReg, int64_t Scale,
580  unsigned AddrSpace = 0) const;
581 
582  /// Return true if the loop strength reduce pass should make
583  /// Instruction* based TTI queries to isLegalAddressingMode(). This is
584  /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
585  /// immediate offset and no index register.
586  bool LSRWithInstrQueries() const;
587 
588  /// Return true if it's free to truncate a value of type Ty1 to type
589  /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
590  /// by referencing its sub-register AX.
591  bool isTruncateFree(Type *Ty1, Type *Ty2) const;
592 
593  /// Return true if it is profitable to hoist instruction in the
594  /// then/else to before if.
595  bool isProfitableToHoist(Instruction *I) const;
596 
597  bool useAA() const;
598 
599  /// Return true if this type is legal.
600  bool isTypeLegal(Type *Ty) const;
601 
602  /// Returns the target's jmp_buf alignment in bytes.
603  unsigned getJumpBufAlignment() const;
604 
605  /// Returns the target's jmp_buf size in bytes.
606  unsigned getJumpBufSize() const;
607 
608  /// Return true if switches should be turned into lookup tables for the
609  /// target.
610  bool shouldBuildLookupTables() const;
611 
612  /// Return true if switches should be turned into lookup tables
613  /// containing this constant value for the target.
614  bool shouldBuildLookupTablesForConstant(Constant *C) const;
615 
616  /// Return true if the input function which is cold at all call sites,
617  /// should use coldcc calling convention.
618  bool useColdCCForColdCall(Function &F) const;
619 
620  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
621 
622  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
623  unsigned VF) const;
624 
625  /// If target has efficient vector element load/store instructions, it can
626  /// return true here so that insertion/extraction costs are not added to
627  /// the scalarization cost of a load/store.
628  bool supportsEfficientVectorElementLoadStore() const;
629 
630  /// Don't restrict interleaved unrolling to small loops.
631  bool enableAggressiveInterleaving(bool LoopHasReductions) const;
632 
633  /// If not nullptr, enable inline expansion of memcmp. IsZeroCmp is
634  /// true if this is the expansion of memcmp(p1, p2, s) == 0.
636  // The list of available load sizes (in bytes), sorted in decreasing order.
638  // Set to true to allow overlapping loads. For example, 7-byte compares can
639  // be done with two 4-byte compares instead of 4+2+1-byte compares. This
640  // requires all loads in LoadSizes to be doable in an unaligned way.
641  bool AllowOverlappingLoads = false;
642  };
643  const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const;
644 
645  /// Enable matching of interleaved access groups.
646  bool enableInterleavedAccessVectorization() const;
647 
648  /// Enable matching of interleaved access groups that contain predicated
649  /// accesses or gaps and therefore vectorized using masked
650  /// vector loads/stores.
651  bool enableMaskedInterleavedAccessVectorization() const;
652 
653  /// Indicate that it is potentially unsafe to automatically vectorize
654  /// floating-point operations because the semantics of vector and scalar
655  /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
656  /// does not support IEEE-754 denormal numbers, while depending on the
657  /// platform, scalar floating-point math does.
658  /// This applies to floating-point math operations and calls, not memory
659  /// operations, shuffles, or casts.
660  bool isFPVectorizationPotentiallyUnsafe() const;
661 
662  /// Determine if the target supports unaligned memory accesses.
663  bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
664  unsigned BitWidth, unsigned AddressSpace = 0,
665  unsigned Alignment = 1,
666  bool *Fast = nullptr) const;
667 
668  /// Return hardware support for population count.
669  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
670 
671  /// Return true if the hardware has a fast square-root instruction.
672  bool haveFastSqrt(Type *Ty) const;
673 
674  /// Return true if it is faster to check if a floating-point value is NaN
675  /// (or not-NaN) versus a comparison against a constant FP zero value.
676  /// Targets should override this if materializing a 0.0 for comparison is
677  /// generally as cheap as checking for ordered/unordered.
678  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
679 
680  /// Return the expected cost of supporting the floating point operation
681  /// of the specified type.
682  int getFPOpCost(Type *Ty) const;
683 
684  /// Return the expected cost of materializing for the given integer
685  /// immediate of the specified type.
686  int getIntImmCost(const APInt &Imm, Type *Ty) const;
687 
688  /// Return the expected cost of materialization for the given integer
689  /// immediate of the specified type for a given instruction. The cost can be
690  /// zero if the immediate can be folded into the specified instruction.
691  int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
692  Type *Ty) const;
693  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
694  Type *Ty) const;
695 
696  /// Return the expected cost for the given integer when optimising
697  /// for size. This is different than the other integer immediate cost
698  /// functions in that it is subtarget agnostic. This is useful when you e.g.
699  /// target one ISA such as Aarch32 but smaller encodings could be possible
700  /// with another such as Thumb. This return value is used as a penalty when
701  /// the total costs for a constant is calculated (the bigger the cost, the
702  /// more beneficial constant hoisting is).
703  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
704  Type *Ty) const;
705  /// @}
706 
707  /// \name Vector Target Information
708  /// @{
709 
710  /// The various kinds of shuffle patterns for vector queries.
711  enum ShuffleKind {
712  SK_Broadcast, ///< Broadcast element 0 to all other elements.
713  SK_Reverse, ///< Reverse the order of the vector.
714  SK_Select, ///< Selects elements from the corresponding lane of
715  ///< either source operand. This is equivalent to a
716  ///< vector select with a constant condition operand.
717  SK_Transpose, ///< Transpose two vectors.
718  SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
719  SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
720  SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
721  ///< with any shuffle mask.
722  SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
723  ///< shuffle mask.
724  };
725 
726  /// Additional information about an operand's possible values.
728  OK_AnyValue, // Operand can have any value.
729  OK_UniformValue, // Operand is uniform (splat of a value).
730  OK_UniformConstantValue, // Operand is uniform constant.
731  OK_NonUniformConstantValue // Operand is a non uniform constant value.
732  };
733 
734  /// Additional properties of an operand's values.
735  enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
736 
737  /// \return The number of scalar or vector registers that the target has.
738  /// If 'Vectors' is true, it returns the number of vector registers. If it is
739  /// set to false, it returns the number of scalar registers.
740  unsigned getNumberOfRegisters(bool Vector) const;
741 
742  /// \return The width of the largest scalar or vector register type.
743  unsigned getRegisterBitWidth(bool Vector) const;
744 
745  /// \return The width of the smallest vector register type.
746  unsigned getMinVectorRegisterBitWidth() const;
747 
748  /// \return True if the vectorization factor should be chosen to
749  /// make the vector of the smallest element type match the size of a
750  /// vector register. For wider element types, this could result in
751  /// creating vectors that span multiple vector registers.
752  /// If false, the vectorization factor will be chosen based on the
753  /// size of the widest element type.
754  bool shouldMaximizeVectorBandwidth(bool OptSize) const;
755 
756  /// \return The minimum vectorization factor for types of given element
757  /// bit width, or 0 if there is no minimum VF. The returned value only
758  /// applies when shouldMaximizeVectorBandwidth returns true.
759  unsigned getMinimumVF(unsigned ElemWidth) const;
760 
761  /// \return True if it should be considered for address type promotion.
762  /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
763  /// profitable without finding other extensions fed by the same input.
764  bool shouldConsiderAddressTypePromotion(
765  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
766 
767  /// \return The size of a cache line in bytes.
768  unsigned getCacheLineSize() const;
769 
770  /// The possible cache levels
771  enum class CacheLevel {
772  L1D, // The L1 data cache
773  L2D, // The L2 data cache
774 
775  // We currently do not model L3 caches, as their sizes differ widely between
776  // microarchitectures. Also, we currently do not have a use for L3 cache
777  // size modeling yet.
778  };
779 
780  /// \return The size of the cache level in bytes, if available.
781  llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const;
782 
783  /// \return The associativity of the cache level, if available.
784  llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
785 
786  /// \return How much before a load we should place the prefetch instruction.
787  /// This is currently measured in number of instructions.
788  unsigned getPrefetchDistance() const;
789 
790  /// \return Some HW prefetchers can handle accesses up to a certain constant
791  /// stride. This is the minimum stride in bytes where it makes sense to start
792  /// adding SW prefetches. The default is 1, i.e. prefetch with any stride.
793  unsigned getMinPrefetchStride() const;
794 
795  /// \return The maximum number of iterations to prefetch ahead. If the
796  /// required number of iterations is more than this number, no prefetching is
797  /// performed.
798  unsigned getMaxPrefetchIterationsAhead() const;
799 
800  /// \return The maximum interleave factor that any transform should try to
801  /// perform for this target. This number depends on the level of parallelism
802  /// and the number of execution units in the CPU.
803  unsigned getMaxInterleaveFactor(unsigned VF) const;
804 
805  /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
806  static OperandValueKind getOperandInfo(Value *V,
807  OperandValueProperties &OpProps);
808 
809  /// This is an approximation of reciprocal throughput of a math/logic op.
810  /// A higher cost indicates less expected throughput.
811  /// From Agner Fog's guides, reciprocal throughput is "the average number of
812  /// clock cycles per instruction when the instructions are not part of a
813  /// limiting dependency chain."
814  /// Therefore, costs should be scaled to account for multiple execution units
815  /// on the target that can process this type of instruction. For example, if
816  /// there are 5 scalar integer units and 2 vector integer units that can
817  /// calculate an 'add' in a single cycle, this model should indicate that the
818  /// cost of the vector add instruction is 2.5 times the cost of the scalar
819  /// add instruction.
820  /// \p Args is an optional argument which holds the instruction operands
821  /// values so the TTI can analyze those values searching for special
822  /// cases or optimizations based on those values.
823  int getArithmeticInstrCost(
824  unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
825  OperandValueKind Opd2Info = OK_AnyValue,
826  OperandValueProperties Opd1PropInfo = OP_None,
827  OperandValueProperties Opd2PropInfo = OP_None,
829 
830  /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
831  /// The index and subtype parameters are used by the subvector insertion and
832  /// extraction shuffle kinds to show the insert/extract point and the type of
833  /// the subvector being inserted/extracted.
834  /// NOTE: For subvector extractions Tp represents the source type.
835  int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
836  Type *SubTp = nullptr) const;
837 
838  /// \return The expected cost of cast instructions, such as bitcast, trunc,
839  /// zext, etc. If there is an existing instruction that holds Opcode, it
840  /// may be passed in the 'I' parameter.
841  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
842  const Instruction *I = nullptr) const;
843 
844  /// \return The expected cost of a sign- or zero-extended vector extract. Use
845  /// -1 to indicate that there is no information about the index value.
846  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
847  unsigned Index = -1) const;
848 
849  /// \return The expected cost of control-flow related instructions such as
850  /// Phi, Ret, Br.
851  int getCFInstrCost(unsigned Opcode) const;
852 
853  /// \returns The expected cost of compare and select instructions. If there
854  /// is an existing instruction that holds Opcode, it may be passed in the
855  /// 'I' parameter.
856  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
857  Type *CondTy = nullptr, const Instruction *I = nullptr) const;
858 
859  /// \return The expected cost of vector Insert and Extract.
860  /// Use -1 to indicate that there is no information on the index value.
861  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
862 
863  /// \return The cost of Load and Store instructions.
864  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
865  unsigned AddressSpace, const Instruction *I = nullptr) const;
866 
867  /// \return The cost of masked Load and Store instructions.
868  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
869  unsigned AddressSpace) const;
870 
871  /// \return The cost of Gather or Scatter operation
872  /// \p Opcode - is a type of memory access Load or Store
873  /// \p DataTy - a vector type of the data to be loaded or stored
874  /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
875  /// \p VariableMask - true when the memory access is predicated with a mask
876  /// that is not a compile-time constant
877  /// \p Alignment - alignment of single element
878  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
879  bool VariableMask, unsigned Alignment) const;
880 
881  /// \return The cost of the interleaved memory operation.
882  /// \p Opcode is the memory operation code
883  /// \p VecTy is the vector type of the interleaved access.
884  /// \p Factor is the interleave factor
885  /// \p Indices is the indices for interleaved load members (as interleaved
886  /// load allows gaps)
887  /// \p Alignment is the alignment of the memory operation
888  /// \p AddressSpace is address space of the pointer.
889  /// \p UseMaskForCond indicates if the memory access is predicated.
890  /// \p UseMaskForGaps indicates if gaps should be masked.
891  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
892  ArrayRef<unsigned> Indices, unsigned Alignment,
893  unsigned AddressSpace,
894  bool UseMaskForCond = false,
895  bool UseMaskForGaps = false) const;
896 
897  /// Calculate the cost of performing a vector reduction.
898  ///
899  /// This is the cost of reducing the vector value of type \p Ty to a scalar
900  /// value using the operation denoted by \p Opcode. The form of the reduction
901  /// can either be a pairwise reduction or a reduction that splits the vector
902  /// at every reduction level.
903  ///
904  /// Pairwise:
905  /// (v0, v1, v2, v3)
906  /// ((v0+v1), (v2+v3), undef, undef)
907  /// Split:
908  /// (v0, v1, v2, v3)
909  /// ((v0+v2), (v1+v3), undef, undef)
910  int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
911  bool IsPairwiseForm) const;
912  int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
913  bool IsUnsigned) const;
914 
915  /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
916  /// Three cases are handled: 1. scalar instruction 2. vector instruction
917  /// 3. scalar instruction which is to be vectorized with VF.
918  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
920  unsigned VF = 1) const;
921 
922  /// \returns The cost of Intrinsic instructions. Types analysis only.
923  /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
924  /// arguments and the return value will be computed based on types.
925  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
927  unsigned ScalarizationCostPassed = UINT_MAX) const;
928 
929  /// \returns The cost of Call instructions.
930  int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
931 
932  /// \returns The number of pieces into which the provided type must be
933  /// split during legalization. Zero is returned when the answer is unknown.
934  unsigned getNumberOfParts(Type *Tp) const;
935 
936  /// \returns The cost of the address computation. For most targets this can be
937  /// merged into the instruction indexing mode. Some targets might want to
938  /// distinguish between address computation for memory operations on vector
939  /// types and scalar types. Such targets should override this function.
940  /// The 'SE' parameter holds pointer for the scalar evolution object which
941  /// is used in order to get the Ptr step value in case of constant stride.
942  /// The 'Ptr' parameter holds SCEV of the access pointer.
943  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
944  const SCEV *Ptr = nullptr) const;
945 
946  /// \returns The cost, if any, of keeping values of the given types alive
947  /// over a callsite.
948  ///
949  /// Some types may require the use of register classes that do not have
950  /// any callee-saved registers, so would require a spill and fill.
951  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
952 
953  /// \returns True if the intrinsic is a supported memory intrinsic. Info
954  /// will contain additional information - whether the intrinsic may write
955  /// or read to memory, volatility and the pointer. Info is undefined
956  /// if false is returned.
957  bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
958 
959  /// \returns The maximum element size, in bytes, for an element
960  /// unordered-atomic memory intrinsic.
961  unsigned getAtomicMemIntrinsicMaxElementSize() const;
962 
963  /// \returns A value which is the result of the given memory intrinsic. New
964  /// instructions may be created to extract the result from the given intrinsic
965  /// memory operation. Returns nullptr if the target cannot create a result
966  /// from the given intrinsic.
967  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
968  Type *ExpectedType) const;
969 
970  /// \returns The type to use in a loop expansion of a memcpy call.
971  Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
972  unsigned SrcAlign, unsigned DestAlign) const;
973 
974  /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
975  /// \param RemainingBytes The number of bytes to copy.
976  ///
977  /// Calculates the operand types to use when copying \p RemainingBytes of
978  /// memory, where source and destination alignments are \p SrcAlign and
979  /// \p DestAlign respectively.
980  void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
981  LLVMContext &Context,
982  unsigned RemainingBytes,
983  unsigned SrcAlign,
984  unsigned DestAlign) const;
985 
986  /// \returns True if the two functions have compatible attributes for inlining
987  /// purposes.
988  bool areInlineCompatible(const Function *Caller,
989  const Function *Callee) const;
990 
991  /// \returns True if the caller and callee agree on how \p Args will be passed
992  /// to the callee.
993  /// \param[out] Args The list of compatible arguments. The implementation may
994  /// filter out any incompatible args from this list.
995  bool areFunctionArgsABICompatible(const Function *Caller,
996  const Function *Callee,
997  SmallPtrSetImpl<Argument *> &Args) const;
998 
999  /// The type of load/store indexing.
1001  MIM_Unindexed, ///< No indexing.
1002  MIM_PreInc, ///< Pre-incrementing.
1003  MIM_PreDec, ///< Pre-decrementing.
1004  MIM_PostInc, ///< Post-incrementing.
1005  MIM_PostDec ///< Post-decrementing.
1006  };
1007 
1008  /// \returns True if the specified indexed load for the given type is legal.
1009  bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1010 
1011  /// \returns True if the specified indexed store for the given type is legal.
1012  bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1013 
1014  /// \returns The bitwidth of the largest vector type that should be used to
1015  /// load/store in the given address space.
1016  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1017 
1018  /// \returns True if the load instruction is legal to vectorize.
1019  bool isLegalToVectorizeLoad(LoadInst *LI) const;
1020 
1021  /// \returns True if the store instruction is legal to vectorize.
1022  bool isLegalToVectorizeStore(StoreInst *SI) const;
1023 
1024  /// \returns True if it is legal to vectorize the given load chain.
1025  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1026  unsigned Alignment,
1027  unsigned AddrSpace) const;
1028 
1029  /// \returns True if it is legal to vectorize the given store chain.
1030  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1031  unsigned Alignment,
1032  unsigned AddrSpace) const;
1033 
1034  /// \returns The new vector factor value if the target doesn't support \p
1035  /// SizeInBytes loads or has a better vector factor.
1036  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1037  unsigned ChainSizeInBytes,
1038  VectorType *VecTy) const;
1039 
1040  /// \returns The new vector factor value if the target doesn't support \p
1041  /// SizeInBytes stores or has a better vector factor.
1042  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1043  unsigned ChainSizeInBytes,
1044  VectorType *VecTy) const;
1045 
1046  /// Flags describing the kind of vector reduction.
1048  ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
1049  bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation.
1050  bool IsSigned; ///< Whether the operation is a signed int reduction.
1051  bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.
1052  };
1053 
1054  /// \returns True if the target wants to handle the given reduction idiom in
1055  /// the intrinsics form instead of the shuffle form.
1056  bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1057  ReductionFlags Flags) const;
1058 
1059  /// \returns True if the target wants to expand the given reduction intrinsic
1060  /// into a shuffle sequence.
1061  bool shouldExpandReduction(const IntrinsicInst *II) const;
1062 
1063  /// \returns the size cost of rematerializing a GlobalValue address relative
1064  /// to a stack reload.
1065  unsigned getGISelRematGlobalCost() const;
1066 
1067  /// @}
1068 
1069 private:
1070  /// Estimate the latency of specified instruction.
1071  /// Returns 1 as the default value.
1072  int getInstructionLatency(const Instruction *I) const;
1073 
1074  /// Returns the expected throughput cost of the instruction.
1075  /// Returns -1 if the cost is unknown.
1076  int getInstructionThroughput(const Instruction *I) const;
1077 
1078  /// The abstract base class used to type erase specific TTI
1079  /// implementations.
1080  class Concept;
1081 
1082  /// The template model for the base class which wraps a concrete
1083  /// implementation in a type erased interface.
1084  template <typename T> class Model;
1085 
1086  std::unique_ptr<Concept> TTIImpl;
1087 };
1088 
1090 public:
1091  virtual ~Concept() = 0;
1092  virtual const DataLayout &getDataLayout() const = 0;
1093  virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
1094  virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
1095  ArrayRef<const Value *> Operands) = 0;
1096  virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
1097  virtual int getCallCost(FunctionType *FTy, int NumArgs, const User *U) = 0;
1098  virtual int getCallCost(const Function *F, int NumArgs, const User *U) = 0;
1099  virtual int getCallCost(const Function *F,
1100  ArrayRef<const Value *> Arguments, const User *U) = 0;
1101  virtual unsigned getInliningThresholdMultiplier() = 0;
1102  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1103  ArrayRef<Type *> ParamTys, const User *U) = 0;
1104  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1105  ArrayRef<const Value *> Arguments,
1106  const User *U) = 0;
1107  virtual int getMemcpyCost(const Instruction *I) = 0;
1108  virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1109  unsigned &JTSize) = 0;
1110  virtual int
1111  getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0;
1112  virtual bool hasBranchDivergence() = 0;
1113  virtual bool isSourceOfDivergence(const Value *V) = 0;
1114  virtual bool isAlwaysUniform(const Value *V) = 0;
1115  virtual unsigned getFlatAddressSpace() = 0;
1116  virtual bool isLoweredToCall(const Function *F) = 0;
1117  virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1118  UnrollingPreferences &UP) = 0;
1119  virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1120  AssumptionCache &AC,
1121  TargetLibraryInfo *LibInfo,
1122  HardwareLoopInfo &HWLoopInfo) = 0;
1123  virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1124  virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1125  virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1126  int64_t BaseOffset, bool HasBaseReg,
1127  int64_t Scale,
1128  unsigned AddrSpace,
1129  Instruction *I) = 0;
1130  virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1132  virtual bool canMacroFuseCmp() = 0;
1133  virtual bool shouldFavorPostInc() const = 0;
1134  virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;
1135  virtual bool isLegalMaskedStore(Type *DataType) = 0;
1136  virtual bool isLegalMaskedLoad(Type *DataType) = 0;
1137  virtual bool isLegalNTStore(Type *DataType, unsigned Alignment) = 0;
1138  virtual bool isLegalNTLoad(Type *DataType, unsigned Alignment) = 0;
1139  virtual bool isLegalMaskedScatter(Type *DataType) = 0;
1140  virtual bool isLegalMaskedGather(Type *DataType) = 0;
1141  virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
1142  virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
1143  virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1144  virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1145  virtual bool prefersVectorizedAddressing() = 0;
1146  virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
1147  int64_t BaseOffset, bool HasBaseReg,
1148  int64_t Scale, unsigned AddrSpace) = 0;
1149  virtual bool LSRWithInstrQueries() = 0;
1150  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1151  virtual bool isProfitableToHoist(Instruction *I) = 0;
1152  virtual bool useAA() = 0;
1153  virtual bool isTypeLegal(Type *Ty) = 0;
1154  virtual unsigned getJumpBufAlignment() = 0;
1155  virtual unsigned getJumpBufSize() = 0;
1156  virtual bool shouldBuildLookupTables() = 0;
1157  virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
1158  virtual bool useColdCCForColdCall(Function &F) = 0;
1159  virtual unsigned
1160  getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
1161  virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1162  unsigned VF) = 0;
1163  virtual bool supportsEfficientVectorElementLoadStore() = 0;
1164  virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1165  virtual const MemCmpExpansionOptions *enableMemCmpExpansion(
1166  bool IsZeroCmp) const = 0;
1167  virtual bool enableInterleavedAccessVectorization() = 0;
1168  virtual bool enableMaskedInterleavedAccessVectorization() = 0;
1169  virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
1170  virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1171  unsigned BitWidth,
1172  unsigned AddressSpace,
1173  unsigned Alignment,
1174  bool *Fast) = 0;
1175  virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1176  virtual bool haveFastSqrt(Type *Ty) = 0;
1177  virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1178  virtual int getFPOpCost(Type *Ty) = 0;
1179  virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1180  Type *Ty) = 0;
1181  virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
1182  virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1183  Type *Ty) = 0;
1184  virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1185  Type *Ty) = 0;
1186  virtual unsigned getNumberOfRegisters(bool Vector) = 0;
1187  virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
1188  virtual unsigned getMinVectorRegisterBitWidth() = 0;
1189  virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
1190  virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
1191  virtual bool shouldConsiderAddressTypePromotion(
1192  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1193  virtual unsigned getCacheLineSize() = 0;
1194  virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0;
1195  virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0;
1196  virtual unsigned getPrefetchDistance() = 0;
1197  virtual unsigned getMinPrefetchStride() = 0;
1198  virtual unsigned getMaxPrefetchIterationsAhead() = 0;
1199  virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
1200  virtual unsigned
1201  getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1202  OperandValueKind Opd2Info,
1203  OperandValueProperties Opd1PropInfo,
1204  OperandValueProperties Opd2PropInfo,
1205  ArrayRef<const Value *> Args) = 0;
1206  virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1207  Type *SubTp) = 0;
1208  virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1209  const Instruction *I) = 0;
1210  virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1211  VectorType *VecTy, unsigned Index) = 0;
1212  virtual int getCFInstrCost(unsigned Opcode) = 0;
1213  virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1214  Type *CondTy, const Instruction *I) = 0;
1215  virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
1216  unsigned Index) = 0;
1217  virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1218  unsigned AddressSpace, const Instruction *I) = 0;
1219  virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
1220  unsigned Alignment,
1221  unsigned AddressSpace) = 0;
1222  virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1223  Value *Ptr, bool VariableMask,
1224  unsigned Alignment) = 0;
1225  virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
1226  unsigned Factor,
1227  ArrayRef<unsigned> Indices,
1228  unsigned Alignment,
1229  unsigned AddressSpace,
1230  bool UseMaskForCond = false,
1231  bool UseMaskForGaps = false) = 0;
1232  virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1233  bool IsPairwiseForm) = 0;
1234  virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1235  bool IsPairwiseForm, bool IsUnsigned) = 0;
1236  virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1238  unsigned ScalarizationCostPassed) = 0;
1239  virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1240  ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;
1241  virtual int getCallInstrCost(Function *F, Type *RetTy,
1242  ArrayRef<Type *> Tys) = 0;
1243  virtual unsigned getNumberOfParts(Type *Tp) = 0;
1244  virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1245  const SCEV *Ptr) = 0;
1246  virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
1247  virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1248  MemIntrinsicInfo &Info) = 0;
1249  virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
1250  virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1251  Type *ExpectedType) = 0;
1252  virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1253  unsigned SrcAlign,
1254  unsigned DestAlign) const = 0;
1255  virtual void getMemcpyLoopResidualLoweringType(
1256  SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1257  unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
1258  virtual bool areInlineCompatible(const Function *Caller,
1259  const Function *Callee) const = 0;
1260  virtual bool
1261  areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
1262  SmallPtrSetImpl<Argument *> &Args) const = 0;
1263  virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1264  virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0;
1265  virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
1266  virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
1267  virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
1268  virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1269  unsigned Alignment,
1270  unsigned AddrSpace) const = 0;
1271  virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1272  unsigned Alignment,
1273  unsigned AddrSpace) const = 0;
1274  virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1275  unsigned ChainSizeInBytes,
1276  VectorType *VecTy) const = 0;
1277  virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1278  unsigned ChainSizeInBytes,
1279  VectorType *VecTy) const = 0;
1280  virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1281  ReductionFlags) const = 0;
1282  virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
1283  virtual unsigned getGISelRematGlobalCost() const = 0;
1284  virtual int getInstructionLatency(const Instruction *I) = 0;
1285 };
1286 
1287 template <typename T>
1289  T Impl;
1290 
1291 public:
1292  Model(T Impl) : Impl(std::move(Impl)) {}
1293  ~Model() override {}
1294 
1295  const DataLayout &getDataLayout() const override {
1296  return Impl.getDataLayout();
1297  }
1298 
1299  int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
1300  return Impl.getOperationCost(Opcode, Ty, OpTy);
1301  }
1302  int getGEPCost(Type *PointeeType, const Value *Ptr,
1303  ArrayRef<const Value *> Operands) override {
1304  return Impl.getGEPCost(PointeeType, Ptr, Operands);
1305  }
1306  int getExtCost(const Instruction *I, const Value *Src) override {
1307  return Impl.getExtCost(I, Src);
1308  }
1309  int getCallCost(FunctionType *FTy, int NumArgs, const User *U) override {
1310  return Impl.getCallCost(FTy, NumArgs, U);
1311  }
1312  int getCallCost(const Function *F, int NumArgs, const User *U) override {
1313  return Impl.getCallCost(F, NumArgs, U);
1314  }
1315  int getCallCost(const Function *F,
1316  ArrayRef<const Value *> Arguments, const User *U) override {
1317  return Impl.getCallCost(F, Arguments, U);
1318  }
1319  unsigned getInliningThresholdMultiplier() override {
1320  return Impl.getInliningThresholdMultiplier();
1321  }
1322  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1323  ArrayRef<Type *> ParamTys, const User *U = nullptr) override {
1324  return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U);
1325  }
1326  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1327  ArrayRef<const Value *> Arguments,
1328  const User *U = nullptr) override {
1329  return Impl.getIntrinsicCost(IID, RetTy, Arguments, U);
1330  }
1331  int getMemcpyCost(const Instruction *I) override {
1332  return Impl.getMemcpyCost(I);
1333  }
1334  int getUserCost(const User *U, ArrayRef<const Value *> Operands) override {
1335  return Impl.getUserCost(U, Operands);
1336  }
1337  bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
1338  bool isSourceOfDivergence(const Value *V) override {
1339  return Impl.isSourceOfDivergence(V);
1340  }
1341 
1342  bool isAlwaysUniform(const Value *V) override {
1343  return Impl.isAlwaysUniform(V);
1344  }
1345 
1346  unsigned getFlatAddressSpace() override {
1347  return Impl.getFlatAddressSpace();
1348  }
1349 
1350  bool isLoweredToCall(const Function *F) override {
1351  return Impl.isLoweredToCall(F);
1352  }
1353  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1354  UnrollingPreferences &UP) override {
1355  return Impl.getUnrollingPreferences(L, SE, UP);
1356  }
1357  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1358  AssumptionCache &AC,
1359  TargetLibraryInfo *LibInfo,
1360  HardwareLoopInfo &HWLoopInfo) override {
1361  return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
1362  }
1363  bool isLegalAddImmediate(int64_t Imm) override {
1364  return Impl.isLegalAddImmediate(Imm);
1365  }
1366  bool isLegalICmpImmediate(int64_t Imm) override {
1367  return Impl.isLegalICmpImmediate(Imm);
1368  }
1369  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1370  bool HasBaseReg, int64_t Scale,
1371  unsigned AddrSpace,
1372  Instruction *I) override {
1373  return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
1374  Scale, AddrSpace, I);
1375  }
1376  bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1377  TargetTransformInfo::LSRCost &C2) override {
1378  return Impl.isLSRCostLess(C1, C2);
1379  }
1380  bool canMacroFuseCmp() override {
1381  return Impl.canMacroFuseCmp();
1382  }
1383  bool shouldFavorPostInc() const override {
1384  return Impl.shouldFavorPostInc();
1385  }
1386  bool shouldFavorBackedgeIndex(const Loop *L) const override {
1387  return Impl.shouldFavorBackedgeIndex(L);
1388  }
1389  bool isLegalMaskedStore(Type *DataType) override {
1390  return Impl.isLegalMaskedStore(DataType);
1391  }
1392  bool isLegalMaskedLoad(Type *DataType) override {
1393  return Impl.isLegalMaskedLoad(DataType);
1394  }
1395  bool isLegalNTStore(Type *DataType, unsigned Alignment) override {
1396  return Impl.isLegalNTStore(DataType, Alignment);
1397  }
1398  bool isLegalNTLoad(Type *DataType, unsigned Alignment) override {
1399  return Impl.isLegalNTLoad(DataType, Alignment);
1400  }
1401  bool isLegalMaskedScatter(Type *DataType) override {
1402  return Impl.isLegalMaskedScatter(DataType);
1403  }
1404  bool isLegalMaskedGather(Type *DataType) override {
1405  return Impl.isLegalMaskedGather(DataType);
1406  }
1407  bool isLegalMaskedCompressStore(Type *DataType) override {
1408  return Impl.isLegalMaskedCompressStore(DataType);
1409  }
1410  bool isLegalMaskedExpandLoad(Type *DataType) override {
1411  return Impl.isLegalMaskedExpandLoad(DataType);
1412  }
1413  bool hasDivRemOp(Type *DataType, bool IsSigned) override {
1414  return Impl.hasDivRemOp(DataType, IsSigned);
1415  }
1416  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
1417  return Impl.hasVolatileVariant(I, AddrSpace);
1418  }
1419  bool prefersVectorizedAddressing() override {
1420  return Impl.prefersVectorizedAddressing();
1421  }
1422  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1423  bool HasBaseReg, int64_t Scale,
1424  unsigned AddrSpace) override {
1425  return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
1426  Scale, AddrSpace);
1427  }
1428  bool LSRWithInstrQueries() override {
1429  return Impl.LSRWithInstrQueries();
1430  }
1431  bool isTruncateFree(Type *Ty1, Type *Ty2) override {
1432  return Impl.isTruncateFree(Ty1, Ty2);
1433  }
1434  bool isProfitableToHoist(Instruction *I) override {
1435  return Impl.isProfitableToHoist(I);
1436  }
1437  bool useAA() override { return Impl.useAA(); }
1438  bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
1439  unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
1440  unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
1441  bool shouldBuildLookupTables() override {
1442  return Impl.shouldBuildLookupTables();
1443  }
1444  bool shouldBuildLookupTablesForConstant(Constant *C) override {
1445  return Impl.shouldBuildLookupTablesForConstant(C);
1446  }
1447  bool useColdCCForColdCall(Function &F) override {
1448  return Impl.useColdCCForColdCall(F);
1449  }
1450 
1451  unsigned getScalarizationOverhead(Type *Ty, bool Insert,
1452  bool Extract) override {
1453  return Impl.getScalarizationOverhead(Ty, Insert, Extract);
1454  }
1455  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1456  unsigned VF) override {
1457  return Impl.getOperandsScalarizationOverhead(Args, VF);
1458  }
1459 
1460  bool supportsEfficientVectorElementLoadStore() override {
1461  return Impl.supportsEfficientVectorElementLoadStore();
1462  }
1463 
1464  bool enableAggressiveInterleaving(bool LoopHasReductions) override {
1465  return Impl.enableAggressiveInterleaving(LoopHasReductions);
1466  }
1467  const MemCmpExpansionOptions *enableMemCmpExpansion(
1468  bool IsZeroCmp) const override {
1469  return Impl.enableMemCmpExpansion(IsZeroCmp);
1470  }
1471  bool enableInterleavedAccessVectorization() override {
1472  return Impl.enableInterleavedAccessVectorization();
1473  }
1474  bool enableMaskedInterleavedAccessVectorization() override {
1475  return Impl.enableMaskedInterleavedAccessVectorization();
1476  }
1477  bool isFPVectorizationPotentiallyUnsafe() override {
1478  return Impl.isFPVectorizationPotentiallyUnsafe();
1479  }
1480  bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1481  unsigned BitWidth, unsigned AddressSpace,
1482  unsigned Alignment, bool *Fast) override {
1483  return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
1484  Alignment, Fast);
1485  }
1486  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
1487  return Impl.getPopcntSupport(IntTyWidthInBit);
1488  }
1489  bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
1490 
1491  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
1492  return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
1493  }
1494 
1495  int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
1496 
1497  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1498  Type *Ty) override {
1499  return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
1500  }
1501  int getIntImmCost(const APInt &Imm, Type *Ty) override {
1502  return Impl.getIntImmCost(Imm, Ty);
1503  }
1504  int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1505  Type *Ty) override {
1506  return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
1507  }
1508  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1509  Type *Ty) override {
1510  return Impl.getIntImmCost(IID, Idx, Imm, Ty);
1511  }
1512  unsigned getNumberOfRegisters(bool Vector) override {
1513  return Impl.getNumberOfRegisters(Vector);
1514  }
1515  unsigned getRegisterBitWidth(bool Vector) const override {
1516  return Impl.getRegisterBitWidth(Vector);
1517  }
1518  unsigned getMinVectorRegisterBitWidth() override {
1519  return Impl.getMinVectorRegisterBitWidth();
1520  }
1521  bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
1522  return Impl.shouldMaximizeVectorBandwidth(OptSize);
1523  }
1524  unsigned getMinimumVF(unsigned ElemWidth) const override {
1525  return Impl.getMinimumVF(ElemWidth);
1526  }
1527  bool shouldConsiderAddressTypePromotion(
1528  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
1529  return Impl.shouldConsiderAddressTypePromotion(
1530  I, AllowPromotionWithoutCommonHeader);
1531  }
1532  unsigned getCacheLineSize() override {
1533  return Impl.getCacheLineSize();
1534  }
1535  llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override {
1536  return Impl.getCacheSize(Level);
1537  }
1538  llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override {
1539  return Impl.getCacheAssociativity(Level);
1540  }
1541  unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
1542  unsigned getMinPrefetchStride() override {
1543  return Impl.getMinPrefetchStride();
1544  }
1545  unsigned getMaxPrefetchIterationsAhead() override {
1546  return Impl.getMaxPrefetchIterationsAhead();
1547  }
1548  unsigned getMaxInterleaveFactor(unsigned VF) override {
1549  return Impl.getMaxInterleaveFactor(VF);
1550  }
1551  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1552  unsigned &JTSize) override {
1553  return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize);
1554  }
1555  unsigned
1556  getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1557  OperandValueKind Opd2Info,
1558  OperandValueProperties Opd1PropInfo,
1559  OperandValueProperties Opd2PropInfo,
1560  ArrayRef<const Value *> Args) override {
1561  return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
1562  Opd1PropInfo, Opd2PropInfo, Args);
1563  }
1564  int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1565  Type *SubTp) override {
1566  return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
1567  }
1568  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1569  const Instruction *I) override {
1570  return Impl.getCastInstrCost(Opcode, Dst, Src, I);
1571  }
1572  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
1573  unsigned Index) override {
1574  return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
1575  }
1576  int getCFInstrCost(unsigned Opcode) override {
1577  return Impl.getCFInstrCost(Opcode);
1578  }
1579  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1580  const Instruction *I) override {
1581  return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
1582  }
1583  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
1584  return Impl.getVectorInstrCost(Opcode, Val, Index);
1585  }
1586  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1587  unsigned AddressSpace, const Instruction *I) override {
1588  return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
1589  }
1590  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1591  unsigned AddressSpace) override {
1592  return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
1593  }
1594  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1595  Value *Ptr, bool VariableMask,
1596  unsigned Alignment) override {
1597  return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
1598  Alignment);
1599  }
1600  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
1601  ArrayRef<unsigned> Indices, unsigned Alignment,
1602  unsigned AddressSpace, bool UseMaskForCond,
1603  bool UseMaskForGaps) override {
1604  return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
1605  Alignment, AddressSpace,
1606  UseMaskForCond, UseMaskForGaps);
1607  }
1608  int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1609  bool IsPairwiseForm) override {
1610  return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
1611  }
1612  int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1613  bool IsPairwiseForm, bool IsUnsigned) override {
1614  return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
1615  }
1616  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
1617  FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
1618  return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
1619  ScalarizationCostPassed);
1620  }
1621  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1622  ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {
1623  return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
1624  }
1625  int getCallInstrCost(Function *F, Type *RetTy,
1626  ArrayRef<Type *> Tys) override {
1627  return Impl.getCallInstrCost(F, RetTy, Tys);
1628  }
1629  unsigned getNumberOfParts(Type *Tp) override {
1630  return Impl.getNumberOfParts(Tp);
1631  }
1632  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1633  const SCEV *Ptr) override {
1634  return Impl.getAddressComputationCost(Ty, SE, Ptr);
1635  }
1636  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
1637  return Impl.getCostOfKeepingLiveOverCall(Tys);
1638  }
1639  bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1640  MemIntrinsicInfo &Info) override {
1641  return Impl.getTgtMemIntrinsic(Inst, Info);
1642  }
1643  unsigned getAtomicMemIntrinsicMaxElementSize() const override {
1644  return Impl.getAtomicMemIntrinsicMaxElementSize();
1645  }
1646  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1647  Type *ExpectedType) override {
1648  return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
1649  }
1650  Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1651  unsigned SrcAlign,
1652  unsigned DestAlign) const override {
1653  return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign);
1654  }
1655  void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
1656  LLVMContext &Context,
1657  unsigned RemainingBytes,
1658  unsigned SrcAlign,
1659  unsigned DestAlign) const override {
1660  Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
1661  SrcAlign, DestAlign);
1662  }
1663  bool areInlineCompatible(const Function *Caller,
1664  const Function *Callee) const override {
1665  return Impl.areInlineCompatible(Caller, Callee);
1666  }
1668  const Function *Caller, const Function *Callee,
1669  SmallPtrSetImpl<Argument *> &Args) const override {
1670  return Impl.areFunctionArgsABICompatible(Caller, Callee, Args);
1671  }
1672  bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
1673  return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
1674  }
1675  bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
1676  return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
1677  }
1678  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
1679  return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
1680  }
1681  bool isLegalToVectorizeLoad(LoadInst *LI) const override {
1682  return Impl.isLegalToVectorizeLoad(LI);
1683  }
1684  bool isLegalToVectorizeStore(StoreInst *SI) const override {
1685  return Impl.isLegalToVectorizeStore(SI);
1686  }
1687  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1688  unsigned Alignment,
1689  unsigned AddrSpace) const override {
1690  return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
1691  AddrSpace);
1692  }
1693  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1694  unsigned Alignment,
1695  unsigned AddrSpace) const override {
1696  return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
1697  AddrSpace);
1698  }
1699  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1700  unsigned ChainSizeInBytes,
1701  VectorType *VecTy) const override {
1702  return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
1703  }
1704  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1705  unsigned ChainSizeInBytes,
1706  VectorType *VecTy) const override {
1707  return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
1708  }
1709  bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1710  ReductionFlags Flags) const override {
1711  return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
1712  }
1713  bool shouldExpandReduction(const IntrinsicInst *II) const override {
1714  return Impl.shouldExpandReduction(II);
1715  }
1716 
1717  unsigned getGISelRematGlobalCost() const override {
1718  return Impl.getGISelRematGlobalCost();
1719  }
1720 
1721  int getInstructionLatency(const Instruction *I) override {
1722  return Impl.getInstructionLatency(I);
1723  }
1724 };
1725 
1726 template <typename T>
1728  : TTIImpl(new Model<T>(Impl)) {}
1729 
1730 /// Analysis pass providing the \c TargetTransformInfo.
1731 ///
1732 /// The core idea of the TargetIRAnalysis is to expose an interface through
1733 /// which LLVM targets can analyze and provide information about the middle
1734 /// end's target-independent IR. This supports use cases such as target-aware
1735 /// cost modeling of IR constructs.
1736 ///
1737 /// This is a function analysis because much of the cost modeling for targets
1738 /// is done in a subtarget specific way and LLVM supports compiling different
1739 /// functions targeting different subtargets in order to support runtime
1740 /// dispatch according to the observed subtarget.
1741 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
1742 public:
1744 
1745  /// Default construct a target IR analysis.
1746  ///
1747  /// This will use the module's datalayout to construct a baseline
1748  /// conservative TTI result.
1749  TargetIRAnalysis();
1750 
1751  /// Construct an IR analysis pass around a target-provide callback.
1752  ///
1753  /// The callback will be called with a particular function for which the TTI
1754  /// is needed and must return a TTI object for that function.
1755  TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
1756 
1757  // Value semantics. We spell out the constructors for MSVC.
1759  : TTICallback(Arg.TTICallback) {}
1761  : TTICallback(std::move(Arg.TTICallback)) {}
1763  TTICallback = RHS.TTICallback;
1764  return *this;
1765  }
1767  TTICallback = std::move(RHS.TTICallback);
1768  return *this;
1769  }
1770 
1771  Result run(const Function &F, FunctionAnalysisManager &);
1772 
1773 private:
1775  static AnalysisKey Key;
1776 
1777  /// The callback used to produce a result.
1778  ///
1779  /// We use a completely opaque callback so that targets can provide whatever
1780  /// mechanism they desire for constructing the TTI for a given function.
1781  ///
1782  /// FIXME: Should we really use std::function? It's relatively inefficient.
1783  /// It might be possible to arrange for even stateful callbacks to outlive
1784  /// the analysis and thus use a function_ref which would be lighter weight.
1785  /// This may also be less error prone as the callback is likely to reference
1786  /// the external TargetMachine, and that reference needs to never dangle.
1787  std::function<Result(const Function &)> TTICallback;
1788 
1789  /// Helper function used as the callback in the default constructor.
1790  static Result getDefaultTTI(const Function &F);
1791 };
1792 
1793 /// Wrapper pass for TargetTransformInfo.
1794 ///
1795 /// This pass can be constructed from a TTI object which it stores internally
1796 /// and is queried by passes.
1798  TargetIRAnalysis TIRA;
1800 
1801  virtual void anchor();
1802 
1803 public:
1804  static char ID;
1805 
1806  /// We must provide a default constructor for the pass but it should
1807  /// never be used.
1808  ///
1809  /// Use the constructor below or call one of the creation routines.
1811 
1813 
1814  TargetTransformInfo &getTTI(const Function &F);
1815 };
1816 
1817 /// Create an analysis pass wrapper around a TTI object.
1818 ///
1819 /// This analysis pass just holds the TTI instance and makes it available to
1820 /// clients.
1822 
1823 } // End llvm namespace
1824 
1825 #endif
uint64_t CallInst * C
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
LLVMContext & Context
Atomic ordering constants.
SI Whole Quad Mode
This class represents lattice values for constants.
Definition: AllocatorList.h:23
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
value_op_iterator value_op_begin()
Definition: User.h:255
The main scalar evolution driver.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
MemIndexedMode
The type of load/store indexing.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
value_op_iterator value_op_end()
Definition: User.h:258
F(f)
An instruction for reading from memory.
Definition: Instructions.h:167
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
bool areInlineCompatible(const Function &Caller, const Function &Callee)
int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const
Query the cost of a specified instruction.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
static bool areFunctionArgsABICompatible(const Function &F, const TargetTransformInfo &TTI, SmallPtrSetImpl< Argument *> &ArgsToPromote, SmallPtrSetImpl< Argument *> &ByValArgsToTransform)
Definition: BitVector.h:937
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
bool AllowPeeling
Allow peeling off loop iterations for loops with low dynamic tripcount.
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
Key
PAL metadata keys.
Class to represent function types.
Definition: DerivedTypes.h:103
PopcntSupportKind
Flags indicating the kind of support for population count.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
An instruction for storing to memory.
Definition: Instructions.h:320
Reverse the order of the vector.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
ExtractSubvector Index indicates start offset.
If not nullptr, enable inline expansion of memcmp.
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
Flags describing the kind of vector reduction.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Conditional or Unconditional Branch instruction.
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
This is an important base class in LLVM.
Definition: Constant.h:41
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:389
AMDGPU Lower Kernel Arguments
static cl::opt< unsigned > LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1), cl::desc("Set the loop decrement value"))
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
TargetIRAnalysis(const TargetIRAnalysis &Arg)
Class to represent integer types.
Definition: DerivedTypes.h:40
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
Attributes of a target dependent hardware loop.
bool IsMaxOp
If the op a min/max kind, true if it&#39;s a max operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
OperandValueProperties
Additional properties of an operand&#39;s values.
ImmutablePass class - This class is used to provide information that does not need to be run...
Definition: Pass.h:255
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Provides information about what library functions are available for the current target.
AddressSpace
Definition: NVPTXBaseInfo.h:21
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Class to represent vector types.
Definition: DerivedTypes.h:427
Class for arbitrary precision integers.
Definition: APInt.h:69
amdgpu Simplify well known AMD library false FunctionCallee Callee
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
unsigned Threshold
The cost threshold for the unrolled loop.
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:467
Parameters that control the generic loop unrolling transformation.
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
TargetIRAnalysis(TargetIRAnalysis &&Arg)
#define I(x, y, z)
Definition: MD5.cpp:58
TargetCostConstants
Underlying constants for &#39;cost&#39; values in this interface.
int getUserCost(const User *U) const
This is a helper function which calls the two-argument getUserCost with Operands which are the curren...
InsertSubvector. Index indicates start offset.
unsigned Insns
TODO: Some of these could be merged.
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:648
Multiway switch.
TargetTransformInfo Result
LLVM Value Representation.
Definition: Value.h:72
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
Broadcast element 0 to all other elements.
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
bool UpperBound
Allow using trip count upper bound to unroll loops.
print Print MemDeps of function
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:159
OperandValueKind
Additional information about an operand&#39;s possible values.
A container for analyses that lazily runs them and caches their results.
TargetCostKind
The kind of cost model.
CacheLevel
The possible cache levels.
This header defines various interfaces for pass management in LLVM.
Information about a load/store intrinsic defined by the target.
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:70
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
ShuffleKind
The various kinds of shuffle patterns for vector queries.
bool IsSigned
Whether the operation is a signed int reduction.