LLVM 19.0.0git
TargetTransformInfo.h
Go to the documentation of this file.
1//===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This pass exposes codegen information to IR-level passes. Every
10/// transformation that uses codegen information is broken into three parts:
11/// 1. The IR-level analysis pass.
12/// 2. The IR-level transformation interface which provides the needed
13/// information.
14/// 3. Codegen-level implementation which uses target-specific hooks.
15///
16/// This file defines #2, which is the interface that IR-level transformations
17/// use for querying the codegen.
18///
19//===----------------------------------------------------------------------===//
20
21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23
25#include "llvm/IR/FMF.h"
26#include "llvm/IR/InstrTypes.h"
27#include "llvm/IR/PassManager.h"
28#include "llvm/Pass.h"
32#include <functional>
33#include <optional>
34#include <utility>
35
36namespace llvm {
37
38namespace Intrinsic {
39typedef unsigned ID;
40}
41
42class AllocaInst;
43class AssumptionCache;
44class BlockFrequencyInfo;
45class DominatorTree;
46class BranchInst;
47class CallBase;
48class Function;
49class GlobalValue;
50class InstCombiner;
51class OptimizationRemarkEmitter;
52class InterleavedAccessInfo;
53class IntrinsicInst;
54class LoadInst;
55class Loop;
56class LoopInfo;
57class LoopVectorizationLegality;
58class ProfileSummaryInfo;
59class RecurrenceDescriptor;
60class SCEV;
61class ScalarEvolution;
62class StoreInst;
63class SwitchInst;
64class TargetLibraryInfo;
65class Type;
66class User;
67class Value;
68class VPIntrinsic;
69struct KnownBits;
70
71/// Information about a load/store intrinsic defined by the target.
73 /// This is the pointer that the intrinsic is loading from or storing to.
74 /// If this is non-null, then analysis/optimization passes can assume that
75 /// this intrinsic is functionally equivalent to a load/store from this
76 /// pointer.
77 Value *PtrVal = nullptr;
78
79 // Ordering for atomic operations.
81
82 // Same Id is set by the target for corresponding load/store intrinsics.
83 unsigned short MatchingId = 0;
84
85 bool ReadMem = false;
86 bool WriteMem = false;
87 bool IsVolatile = false;
88
89 bool isUnordered() const {
93 }
94};
95
96/// Attributes of a target dependent hardware loop.
98 HardwareLoopInfo() = delete;
100 Loop *L = nullptr;
103 const SCEV *ExitCount = nullptr;
105 Value *LoopDecrement = nullptr; // Decrement the loop counter by this
106 // value in every iteration.
107 bool IsNestingLegal = false; // Can a hardware loop be a parent to
108 // another hardware loop?
109 bool CounterInReg = false; // Should loop counter be updated in
110 // the loop via a phi?
111 bool PerformEntryTest = false; // Generate the intrinsic which also performs
112 // icmp ne zero on the loop counter value and
113 // produces an i1 to guard the loop entry.
115 DominatorTree &DT, bool ForceNestedLoop = false,
116 bool ForceHardwareLoopPHI = false);
117 bool canAnalyze(LoopInfo &LI);
118};
119
121 const IntrinsicInst *II = nullptr;
122 Type *RetTy = nullptr;
123 Intrinsic::ID IID;
124 SmallVector<Type *, 4> ParamTys;
126 FastMathFlags FMF;
127 // If ScalarizationCost is UINT_MAX, the cost of scalarizing the
128 // arguments and the return value will be computed based on types.
129 InstructionCost ScalarizationCost = InstructionCost::getInvalid();
130
131public:
133 Intrinsic::ID Id, const CallBase &CI,
135 bool TypeBasedOnly = false);
136
138 Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys,
139 FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
141
144
148 const IntrinsicInst *I = nullptr,
150
151 Intrinsic::ID getID() const { return IID; }
152 const IntrinsicInst *getInst() const { return II; }
153 Type *getReturnType() const { return RetTy; }
154 FastMathFlags getFlags() const { return FMF; }
155 InstructionCost getScalarizationCost() const { return ScalarizationCost; }
157 const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
158
159 bool isTypeBasedOnly() const {
160 return Arguments.empty();
161 }
162
163 bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
164};
165
167 /// Don't use tail folding
168 None,
169 /// Use predicate only to mask operations on data in the loop.
170 /// When the VL is not known to be a power-of-2, this method requires a
171 /// runtime overflow check for the i + VL in the loop because it compares the
172 /// scalar induction variable against the tripcount rounded up by VL which may
173 /// overflow. When the VL is a power-of-2, both the increment and uprounded
174 /// tripcount will overflow to 0, which does not require a runtime check
175 /// since the loop is exited when the loop induction variable equals the
176 /// uprounded trip-count, which are both 0.
177 Data,
178 /// Same as Data, but avoids using the get.active.lane.mask intrinsic to
179 /// calculate the mask and instead implements this with a
180 /// splat/stepvector/cmp.
181 /// FIXME: Can this kind be removed now that SelectionDAGBuilder expands the
182 /// active.lane.mask intrinsic when it is not natively supported?
184 /// Use predicate to control both data and control flow.
185 /// This method always requires a runtime overflow check for the i + VL
186 /// increment inside the loop, because it uses the result direclty in the
187 /// active.lane.mask to calculate the mask for the next iteration. If the
188 /// increment overflows, the mask is no longer correct.
190 /// Use predicate to control both data and control flow, but modify
191 /// the trip count so that a runtime overflow check can be avoided
192 /// and such that the scalar epilogue loop can always be removed.
194};
195
202 : TLI(TLI), LVL(LVL), IAI(IAI) {}
203};
204
205class TargetTransformInfo;
207
208/// This pass provides access to the codegen interfaces that are needed
209/// for IR-level transformations.
211public:
212 /// Construct a TTI object using a type implementing the \c Concept
213 /// API below.
214 ///
215 /// This is used by targets to construct a TTI wrapping their target-specific
216 /// implementation that encodes appropriate costs for their target.
217 template <typename T> TargetTransformInfo(T Impl);
218
219 /// Construct a baseline TTI object using a minimal implementation of
220 /// the \c Concept API below.
221 ///
222 /// The TTI implementation will reflect the information in the DataLayout
223 /// provided if non-null.
224 explicit TargetTransformInfo(const DataLayout &DL);
225
226 // Provide move semantics.
229
230 // We need to define the destructor out-of-line to define our sub-classes
231 // out-of-line.
233
234 /// Handle the invalidation of this information.
235 ///
236 /// When used as a result of \c TargetIRAnalysis this method will be called
237 /// when the function this was computed for changes. When it returns false,
238 /// the information is preserved across those changes.
241 // FIXME: We should probably in some way ensure that the subtarget
242 // information for a function hasn't changed.
243 return false;
244 }
245
246 /// \name Generic Target Information
247 /// @{
248
249 /// The kind of cost model.
250 ///
251 /// There are several different cost models that can be customized by the
252 /// target. The normalization of each cost model may be target specific.
253 /// e.g. TCK_SizeAndLatency should be comparable to target thresholds such as
254 /// those derived from MCSchedModel::LoopMicroOpBufferSize etc.
256 TCK_RecipThroughput, ///< Reciprocal throughput.
257 TCK_Latency, ///< The latency of instruction.
258 TCK_CodeSize, ///< Instruction code size.
259 TCK_SizeAndLatency ///< The weighted sum of size and latency.
260 };
261
262 /// Underlying constants for 'cost' values in this interface.
263 ///
264 /// Many APIs in this interface return a cost. This enum defines the
265 /// fundamental values that should be used to interpret (and produce) those
266 /// costs. The costs are returned as an int rather than a member of this
267 /// enumeration because it is expected that the cost of one IR instruction
268 /// may have a multiplicative factor to it or otherwise won't fit directly
269 /// into the enum. Moreover, it is common to sum or average costs which works
270 /// better as simple integral values. Thus this enum only provides constants.
271 /// Also note that the returned costs are signed integers to make it natural
272 /// to add, subtract, and test with zero (a common boundary condition). It is
273 /// not expected that 2^32 is a realistic cost to be modeling at any point.
274 ///
275 /// Note that these costs should usually reflect the intersection of code-size
276 /// cost and execution cost. A free instruction is typically one that folds
277 /// into another instruction. For example, reg-to-reg moves can often be
278 /// skipped by renaming the registers in the CPU, but they still are encoded
279 /// and thus wouldn't be considered 'free' here.
281 TCC_Free = 0, ///< Expected to fold away in lowering.
282 TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
283 TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
284 };
285
286 /// Estimate the cost of a GEP operation when lowered.
287 ///
288 /// \p PointeeType is the source element type of the GEP.
289 /// \p Ptr is the base pointer operand.
290 /// \p Operands is the list of indices following the base pointer.
291 ///
292 /// \p AccessType is a hint as to what type of memory might be accessed by
293 /// users of the GEP. getGEPCost will use it to determine if the GEP can be
294 /// folded into the addressing mode of a load/store. If AccessType is null,
295 /// then the resulting target type based off of PointeeType will be used as an
296 /// approximation.
298 getGEPCost(Type *PointeeType, const Value *Ptr,
299 ArrayRef<const Value *> Operands, Type *AccessType = nullptr,
301
302 /// Describe known properties for a set of pointers.
304 /// All the GEPs in a set have same base address.
305 unsigned IsSameBaseAddress : 1;
306 /// These properties only valid if SameBaseAddress is set.
307 /// True if all pointers are separated by a unit stride.
308 unsigned IsUnitStride : 1;
309 /// True if distance between any two neigbouring pointers is a known value.
310 unsigned IsKnownStride : 1;
311 unsigned Reserved : 29;
312
313 bool isSameBase() const { return IsSameBaseAddress; }
314 bool isUnitStride() const { return IsSameBaseAddress && IsUnitStride; }
316
318 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/1,
319 /*IsKnownStride=*/1, 0};
320 }
322 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/0,
323 /*IsKnownStride=*/1, 0};
324 }
326 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/0,
327 /*IsKnownStride=*/0, 0};
328 }
329 };
330 static_assert(sizeof(PointersChainInfo) == 4, "Was size increase justified?");
331
332 /// Estimate the cost of a chain of pointers (typically pointer operands of a
333 /// chain of loads or stores within same block) operations set when lowered.
334 /// \p AccessTy is the type of the loads/stores that will ultimately use the
335 /// \p Ptrs.
338 const PointersChainInfo &Info, Type *AccessTy,
340
341 ) const;
342
343 /// \returns A value by which our inlining threshold should be multiplied.
344 /// This is primarily used to bump up the inlining threshold wholesale on
345 /// targets where calls are unusually expensive.
346 ///
347 /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
348 /// individual classes of instructions would be better.
349 unsigned getInliningThresholdMultiplier() const;
350
353
354 /// \returns A value to be added to the inlining threshold.
355 unsigned adjustInliningThreshold(const CallBase *CB) const;
356
357 /// \returns The cost of having an Alloca in the caller if not inlined, to be
358 /// added to the threshold
359 unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const;
360
361 /// \returns Vector bonus in percent.
362 ///
363 /// Vector bonuses: We want to more aggressively inline vector-dense kernels
364 /// and apply this bonus based on the percentage of vector instructions. A
365 /// bonus is applied if the vector instructions exceed 50% and half that
366 /// amount is applied if it exceeds 10%. Note that these bonuses are some what
367 /// arbitrary and evolved over time by accident as much as because they are
368 /// principled bonuses.
369 /// FIXME: It would be nice to base the bonus values on something more
370 /// scientific. A target may has no bonus on vector instructions.
372
373 /// \return the expected cost of a memcpy, which could e.g. depend on the
374 /// source/destination type and alignment and the number of bytes copied.
376
377 /// Returns the maximum memset / memcpy size in bytes that still makes it
378 /// profitable to inline the call.
380
381 /// \return The estimated number of case clusters when lowering \p 'SI'.
382 /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
383 /// table.
385 unsigned &JTSize,
387 BlockFrequencyInfo *BFI) const;
388
389 /// Estimate the cost of a given IR user when lowered.
390 ///
391 /// This can estimate the cost of either a ConstantExpr or Instruction when
392 /// lowered.
393 ///
394 /// \p Operands is a list of operands which can be a result of transformations
395 /// of the current operands. The number of the operands on the list must equal
396 /// to the number of the current operands the IR user has. Their order on the
397 /// list must be the same as the order of the current operands the IR user
398 /// has.
399 ///
400 /// The returned cost is defined in terms of \c TargetCostConstants, see its
401 /// comments for a detailed explanation of the cost values.
405
406 /// This is a helper function which calls the three-argument
407 /// getInstructionCost with \p Operands which are the current operands U has.
409 TargetCostKind CostKind) const {
410 SmallVector<const Value *, 4> Operands(U->operand_values());
412 }
413
414 /// If a branch or a select condition is skewed in one direction by more than
415 /// this factor, it is very likely to be predicted correctly.
417
418 /// Return true if branch divergence exists.
419 ///
420 /// Branch divergence has a significantly negative impact on GPU performance
421 /// when threads in the same wavefront take different paths due to conditional
422 /// branches.
423 ///
424 /// If \p F is passed, provides a context function. If \p F is known to only
425 /// execute in a single threaded environment, the target may choose to skip
426 /// uniformity analysis and assume all values are uniform.
427 bool hasBranchDivergence(const Function *F = nullptr) const;
428
429 /// Returns whether V is a source of divergence.
430 ///
431 /// This function provides the target-dependent information for
432 /// the target-independent UniformityAnalysis.
433 bool isSourceOfDivergence(const Value *V) const;
434
435 // Returns true for the target specific
436 // set of operations which produce uniform result
437 // even taking non-uniform arguments
438 bool isAlwaysUniform(const Value *V) const;
439
440 /// Query the target whether the specified address space cast from FromAS to
441 /// ToAS is valid.
442 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
443
444 /// Return false if a \p AS0 address cannot possibly alias a \p AS1 address.
445 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const;
446
447 /// Returns the address space ID for a target's 'flat' address space. Note
448 /// this is not necessarily the same as addrspace(0), which LLVM sometimes
449 /// refers to as the generic address space. The flat address space is a
450 /// generic address space that can be used access multiple segments of memory
451 /// with different address spaces. Access of a memory location through a
452 /// pointer with this address space is expected to be legal but slower
453 /// compared to the same memory location accessed through a pointer with a
454 /// different address space.
455 //
456 /// This is for targets with different pointer representations which can
457 /// be converted with the addrspacecast instruction. If a pointer is converted
458 /// to this address space, optimizations should attempt to replace the access
459 /// with the source address space.
460 ///
461 /// \returns ~0u if the target does not have such a flat address space to
462 /// optimize away.
463 unsigned getFlatAddressSpace() const;
464
465 /// Return any intrinsic address operand indexes which may be rewritten if
466 /// they use a flat address space pointer.
467 ///
468 /// \returns true if the intrinsic was handled.
470 Intrinsic::ID IID) const;
471
472 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
473
474 /// Return true if globals in this address space can have initializers other
475 /// than `undef`.
477
478 unsigned getAssumedAddrSpace(const Value *V) const;
479
480 bool isSingleThreaded() const;
481
482 std::pair<const Value *, unsigned>
483 getPredicatedAddrSpace(const Value *V) const;
484
485 /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
486 /// NewV, which has a different address space. This should happen for every
487 /// operand index that collectFlatAddressOperands returned for the intrinsic.
488 /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
489 /// new value (which may be the original \p II with modified operands).
491 Value *NewV) const;
492
493 /// Test whether calls to a function lower to actual program function
494 /// calls.
495 ///
496 /// The idea is to test whether the program is likely to require a 'call'
497 /// instruction or equivalent in order to call the given function.
498 ///
499 /// FIXME: It's not clear that this is a good or useful query API. Client's
500 /// should probably move to simpler cost metrics using the above.
501 /// Alternatively, we could split the cost interface into distinct code-size
502 /// and execution-speed costs. This would allow modelling the core of this
503 /// query more accurately as a call is a single small instruction, but
504 /// incurs significant execution cost.
505 bool isLoweredToCall(const Function *F) const;
506
507 struct LSRCost {
508 /// TODO: Some of these could be merged. Also, a lexical ordering
509 /// isn't always optimal.
510 unsigned Insns;
511 unsigned NumRegs;
512 unsigned AddRecCost;
513 unsigned NumIVMuls;
514 unsigned NumBaseAdds;
515 unsigned ImmCost;
516 unsigned SetupCost;
517 unsigned ScaleCost;
518 };
519
520 /// Parameters that control the generic loop unrolling transformation.
522 /// The cost threshold for the unrolled loop. Should be relative to the
523 /// getInstructionCost values returned by this API, and the expectation is
524 /// that the unrolled loop's instructions when run through that interface
525 /// should not exceed this cost. However, this is only an estimate. Also,
526 /// specific loops may be unrolled even with a cost above this threshold if
527 /// deemed profitable. Set this to UINT_MAX to disable the loop body cost
528 /// restriction.
529 unsigned Threshold;
530 /// If complete unrolling will reduce the cost of the loop, we will boost
531 /// the Threshold by a certain percent to allow more aggressive complete
532 /// unrolling. This value provides the maximum boost percentage that we
533 /// can apply to Threshold (The value should be no less than 100).
534 /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
535 /// MaxPercentThresholdBoost / 100)
536 /// E.g. if complete unrolling reduces the loop execution time by 50%
537 /// then we boost the threshold by the factor of 2x. If unrolling is not
538 /// expected to reduce the running time, then we do not increase the
539 /// threshold.
541 /// The cost threshold for the unrolled loop when optimizing for size (set
542 /// to UINT_MAX to disable).
544 /// The cost threshold for the unrolled loop, like Threshold, but used
545 /// for partial/runtime unrolling (set to UINT_MAX to disable).
547 /// The cost threshold for the unrolled loop when optimizing for size, like
548 /// OptSizeThreshold, but used for partial/runtime unrolling (set to
549 /// UINT_MAX to disable).
551 /// A forced unrolling factor (the number of concatenated bodies of the
552 /// original loop in the unrolled loop body). When set to 0, the unrolling
553 /// transformation will select an unrolling factor based on the current cost
554 /// threshold and other factors.
555 unsigned Count;
556 /// Default unroll count for loops with run-time trip count.
558 // Set the maximum unrolling factor. The unrolling factor may be selected
559 // using the appropriate cost threshold, but may not exceed this number
560 // (set to UINT_MAX to disable). This does not apply in cases where the
561 // loop is being fully unrolled.
562 unsigned MaxCount;
563 /// Set the maximum upper bound of trip count. Allowing the MaxUpperBound
564 /// to be overrided by a target gives more flexiblity on certain cases.
565 /// By default, MaxUpperBound uses UnrollMaxUpperBound which value is 8.
567 /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
568 /// applies even if full unrolling is selected. This allows a target to fall
569 /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
571 // Represents number of instructions optimized when "back edge"
572 // becomes "fall through" in unrolled loop.
573 // For now we count a conditional branch on a backedge and a comparison
574 // feeding it.
575 unsigned BEInsns;
576 /// Allow partial unrolling (unrolling of loops to expand the size of the
577 /// loop body, not only to eliminate small constant-trip-count loops).
579 /// Allow runtime unrolling (unrolling of loops to expand the size of the
580 /// loop body even when the number of loop iterations is not known at
581 /// compile time).
583 /// Allow generation of a loop remainder (extra iterations after unroll).
585 /// Allow emitting expensive instructions (such as divisions) when computing
586 /// the trip count of a loop for runtime unrolling.
588 /// Apply loop unroll on any kind of loop
589 /// (mainly to loops that fail runtime unrolling).
590 bool Force;
591 /// Allow using trip count upper bound to unroll loops.
593 /// Allow unrolling of all the iterations of the runtime loop remainder.
595 /// Allow unroll and jam. Used to enable unroll and jam for the target.
597 /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
598 /// value above is used during unroll and jam for the outer loop size.
599 /// This value is used in the same manner to limit the size of the inner
600 /// loop.
602 /// Don't allow loop unrolling to simulate more than this number of
603 /// iterations when checking full unroll profitability
605 /// Don't disable runtime unroll for the loops which were vectorized.
607 };
608
609 /// Get target-customized preferences for the generic loop unrolling
610 /// transformation. The caller will initialize UP with the current
611 /// target-independent defaults.
614 OptimizationRemarkEmitter *ORE) const;
615
616 /// Query the target whether it would be profitable to convert the given loop
617 /// into a hardware loop.
620 HardwareLoopInfo &HWLoopInfo) const;
621
622 /// Query the target whether it would be prefered to create a predicated
623 /// vector loop, which can avoid the need to emit a scalar epilogue loop.
625
626 /// Query the target what the preferred style of tail folding is.
627 /// \param IVUpdateMayOverflow Tells whether it is known if the IV update
628 /// may (or will never) overflow for the suggested VF/UF in the given loop.
629 /// Targets can use this information to select a more optimal tail folding
630 /// style. The value conservatively defaults to true, such that no assumptions
631 /// are made on overflow.
633 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const;
634
635 // Parameters that control the loop peeling transformation
637 /// A forced peeling factor (the number of bodied of the original loop
638 /// that should be peeled off before the loop body). When set to 0, the
639 /// a peeling factor based on profile information and other factors.
640 unsigned PeelCount;
641 /// Allow peeling off loop iterations.
643 /// Allow peeling off loop iterations for loop nests.
645 /// Allow peeling basing on profile. Uses to enable peeling off all
646 /// iterations basing on provided profile.
647 /// If the value is true the peeling cost model can decide to peel only
648 /// some iterations and in this case it will set this to false.
650 };
651
652 /// Get target-customized preferences for the generic loop peeling
653 /// transformation. The caller will initialize \p PP with the current
654 /// target-independent defaults with information from \p L and \p SE.
656 PeelingPreferences &PP) const;
657
658 /// Targets can implement their own combinations for target-specific
659 /// intrinsics. This function will be called from the InstCombine pass every
660 /// time a target-specific intrinsic is encountered.
661 ///
662 /// \returns std::nullopt to not do anything target specific or a value that
663 /// will be returned from the InstCombiner. It is possible to return null and
664 /// stop further processing of the intrinsic by returning nullptr.
665 std::optional<Instruction *> instCombineIntrinsic(InstCombiner & IC,
666 IntrinsicInst & II) const;
667 /// Can be used to implement target-specific instruction combining.
668 /// \see instCombineIntrinsic
669 std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
670 InstCombiner & IC, IntrinsicInst & II, APInt DemandedMask,
671 KnownBits & Known, bool &KnownBitsComputed) const;
672 /// Can be used to implement target-specific instruction combining.
673 /// \see instCombineIntrinsic
674 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
675 InstCombiner & IC, IntrinsicInst & II, APInt DemandedElts,
676 APInt & UndefElts, APInt & UndefElts2, APInt & UndefElts3,
677 std::function<void(Instruction *, unsigned, APInt, APInt &)>
678 SimplifyAndSetOp) const;
679 /// @}
680
681 /// \name Scalar Target Information
682 /// @{
683
684 /// Flags indicating the kind of support for population count.
685 ///
686 /// Compared to the SW implementation, HW support is supposed to
687 /// significantly boost the performance when the population is dense, and it
688 /// may or may not degrade performance if the population is sparse. A HW
689 /// support is considered as "Fast" if it can outperform, or is on a par
690 /// with, SW implementation when the population is sparse; otherwise, it is
691 /// considered as "Slow".
693
694 /// Return true if the specified immediate is legal add immediate, that
695 /// is the target has add instructions which can add a register with the
696 /// immediate without having to materialize the immediate into a register.
697 bool isLegalAddImmediate(int64_t Imm) const;
698
699 /// Return true if the specified immediate is legal icmp immediate,
700 /// that is the target has icmp instructions which can compare a register
701 /// against the immediate without having to materialize the immediate into a
702 /// register.
703 bool isLegalICmpImmediate(int64_t Imm) const;
704
705 /// Return true if the addressing mode represented by AM is legal for
706 /// this target, for a load/store of the specified type.
707 /// The type may be VoidTy, in which case only return true if the addressing
708 /// mode is legal for a load/store of any legal type.
709 /// If target returns true in LSRWithInstrQueries(), I may be valid.
710 /// TODO: Handle pre/postinc as well.
711 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
712 bool HasBaseReg, int64_t Scale,
713 unsigned AddrSpace = 0,
714 Instruction *I = nullptr) const;
715
716 /// Return true if LSR cost of C1 is lower than C2.
718 const TargetTransformInfo::LSRCost &C2) const;
719
720 /// Return true if LSR major cost is number of registers. Targets which
721 /// implement their own isLSRCostLess and unset number of registers as major
722 /// cost should return false, otherwise return true.
723 bool isNumRegsMajorCostOfLSR() const;
724
725 /// Return true if LSR should attempts to replace a use of an otherwise dead
726 /// primary IV in the latch condition with another IV available in the loop.
727 /// When successful, makes the primary IV dead.
729
730 /// \returns true if LSR should not optimize a chain that includes \p I.
732
733 /// Return true if the target can fuse a compare and branch.
734 /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
735 /// calculation for the instructions in a loop.
736 bool canMacroFuseCmp() const;
737
738 /// Return true if the target can save a compare for loop count, for example
739 /// hardware loop saves a compare.
740 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
742 TargetLibraryInfo *LibInfo) const;
743
748 };
749
750 /// Return the preferred addressing mode LSR should make efforts to generate.
752 ScalarEvolution *SE) const;
753
754 /// Return true if the target supports masked store.
755 bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
756 /// Return true if the target supports masked load.
757 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;
758
759 /// Return true if the target supports nontemporal store.
760 bool isLegalNTStore(Type *DataType, Align Alignment) const;
761 /// Return true if the target supports nontemporal load.
762 bool isLegalNTLoad(Type *DataType, Align Alignment) const;
763
764 /// \Returns true if the target supports broadcasting a load to a vector of
765 /// type <NumElements x ElementTy>.
766 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const;
767
768 /// Return true if the target supports masked scatter.
769 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
770 /// Return true if the target supports masked gather.
771 bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
772 /// Return true if the target forces scalarizing of llvm.masked.gather
773 /// intrinsics.
774 bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const;
775 /// Return true if the target forces scalarizing of llvm.masked.scatter
776 /// intrinsics.
777 bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const;
778
779 /// Return true if the target supports masked compress store.
780 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const;
781 /// Return true if the target supports masked expand load.
782 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const;
783
784 /// Return true if the target supports strided load.
785 bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const;
786
787 /// Return true if this is an alternating opcode pattern that can be lowered
788 /// to a single instruction on the target. In X86 this is for the addsub
789 /// instruction which corrsponds to a Shuffle + Fadd + FSub pattern in IR.
790 /// This function expectes two opcodes: \p Opcode1 and \p Opcode2 being
791 /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
792 /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
793 /// \p VecTy is the vector type of the instruction to be generated.
794 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
795 const SmallBitVector &OpcodeMask) const;
796
797 /// Return true if we should be enabling ordered reductions for the target.
798 bool enableOrderedReductions() const;
799
800 /// Return true if the target has a unified operation to calculate division
801 /// and remainder. If so, the additional implicit multiplication and
802 /// subtraction required to calculate a remainder from division are free. This
803 /// can enable more aggressive transformations for division and remainder than
804 /// would typically be allowed using throughput or size cost models.
805 bool hasDivRemOp(Type *DataType, bool IsSigned) const;
806
807 /// Return true if the given instruction (assumed to be a memory access
808 /// instruction) has a volatile variant. If that's the case then we can avoid
809 /// addrspacecast to generic AS for volatile loads/stores. Default
810 /// implementation returns false, which prevents address space inference for
811 /// volatile loads/stores.
812 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
813
814 /// Return true if target doesn't mind addresses in vectors.
815 bool prefersVectorizedAddressing() const;
816
817 /// Return the cost of the scaling factor used in the addressing
818 /// mode represented by AM for this target, for a load/store
819 /// of the specified type.
820 /// If the AM is supported, the return value must be >= 0.
821 /// If the AM is not supported, it returns a negative value.
822 /// TODO: Handle pre/postinc as well.
824 int64_t BaseOffset, bool HasBaseReg,
825 int64_t Scale,
826 unsigned AddrSpace = 0) const;
827
828 /// Return true if the loop strength reduce pass should make
829 /// Instruction* based TTI queries to isLegalAddressingMode(). This is
830 /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
831 /// immediate offset and no index register.
832 bool LSRWithInstrQueries() const;
833
834 /// Return true if it's free to truncate a value of type Ty1 to type
835 /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
836 /// by referencing its sub-register AX.
837 bool isTruncateFree(Type *Ty1, Type *Ty2) const;
838
839 /// Return true if it is profitable to hoist instruction in the
840 /// then/else to before if.
841 bool isProfitableToHoist(Instruction *I) const;
842
843 bool useAA() const;
844
845 /// Return true if this type is legal.
846 bool isTypeLegal(Type *Ty) const;
847
848 /// Returns the estimated number of registers required to represent \p Ty.
849 unsigned getRegUsageForType(Type *Ty) const;
850
851 /// Return true if switches should be turned into lookup tables for the
852 /// target.
853 bool shouldBuildLookupTables() const;
854
855 /// Return true if switches should be turned into lookup tables
856 /// containing this constant value for the target.
858
859 /// Return true if lookup tables should be turned into relative lookup tables.
860 bool shouldBuildRelLookupTables() const;
861
862 /// Return true if the input function which is cold at all call sites,
863 /// should use coldcc calling convention.
864 bool useColdCCForColdCall(Function &F) const;
865
866 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
867 /// are set if the demanded result elements need to be inserted and/or
868 /// extracted from vectors.
870 const APInt &DemandedElts,
871 bool Insert, bool Extract,
873
874 /// Estimate the overhead of scalarizing an instructions unique
875 /// non-constant operands. The (potentially vector) types to use for each of
876 /// argument are passes via Tys.
881
882 /// If target has efficient vector element load/store instructions, it can
883 /// return true here so that insertion/extraction costs are not added to
884 /// the scalarization cost of a load/store.
886
887 /// If the target supports tail calls.
888 bool supportsTailCalls() const;
889
890 /// If target supports tail call on \p CB
891 bool supportsTailCallFor(const CallBase *CB) const;
892
893 /// Don't restrict interleaved unrolling to small loops.
894 bool enableAggressiveInterleaving(bool LoopHasReductions) const;
895
896 /// Returns options for expansion of memcmp. IsZeroCmp is
897 // true if this is the expansion of memcmp(p1, p2, s) == 0.
899 // Return true if memcmp expansion is enabled.
900 operator bool() const { return MaxNumLoads > 0; }
901
902 // Maximum number of load operations.
903 unsigned MaxNumLoads = 0;
904
905 // The list of available load sizes (in bytes), sorted in decreasing order.
907
908 // For memcmp expansion when the memcmp result is only compared equal or
909 // not-equal to 0, allow up to this number of load pairs per block. As an
910 // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
911 // a0 = load2bytes &a[0]
912 // b0 = load2bytes &b[0]
913 // a2 = load1byte &a[2]
914 // b2 = load1byte &b[2]
915 // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
916 unsigned NumLoadsPerBlock = 1;
917
918 // Set to true to allow overlapping loads. For example, 7-byte compares can
919 // be done with two 4-byte compares instead of 4+2+1-byte compares. This
920 // requires all loads in LoadSizes to be doable in an unaligned way.
922
923 // Sometimes, the amount of data that needs to be compared is smaller than
924 // the standard register size, but it cannot be loaded with just one load
925 // instruction. For example, if the size of the memory comparison is 6
926 // bytes, we can handle it more efficiently by loading all 6 bytes in a
927 // single block and generating an 8-byte number, instead of generating two
928 // separate blocks with conditional jumps for 4 and 2 byte loads. This
929 // approach simplifies the process and produces the comparison result as
930 // normal. This array lists the allowed sizes of memcmp tails that can be
931 // merged into one block
933 };
935 bool IsZeroCmp) const;
936
937 /// Should the Select Optimization pass be enabled and ran.
938 bool enableSelectOptimize() const;
939
940 /// Should the Select Optimization pass treat the given instruction like a
941 /// select, potentially converting it to a conditional branch. This can
942 /// include select-like instructions like or(zext(c), x) that can be converted
943 /// to selects.
945
946 /// Enable matching of interleaved access groups.
948
949 /// Enable matching of interleaved access groups that contain predicated
950 /// accesses or gaps and therefore vectorized using masked
951 /// vector loads/stores.
953
954 /// Indicate that it is potentially unsafe to automatically vectorize
955 /// floating-point operations because the semantics of vector and scalar
956 /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
957 /// does not support IEEE-754 denormal numbers, while depending on the
958 /// platform, scalar floating-point math does.
959 /// This applies to floating-point math operations and calls, not memory
960 /// operations, shuffles, or casts.
962
963 /// Determine if the target supports unaligned memory accesses.
965 unsigned AddressSpace = 0,
966 Align Alignment = Align(1),
967 unsigned *Fast = nullptr) const;
968
969 /// Return hardware support for population count.
970 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
971
972 /// Return true if the hardware has a fast square-root instruction.
973 bool haveFastSqrt(Type *Ty) const;
974
975 /// Return true if the cost of the instruction is too high to speculatively
976 /// execute and should be kept behind a branch.
977 /// This normally just wraps around a getInstructionCost() call, but some
978 /// targets might report a low TCK_SizeAndLatency value that is incompatible
979 /// with the fixed TCC_Expensive value.
980 /// NOTE: This assumes the instruction passes isSafeToSpeculativelyExecute().
982
983 /// Return true if it is faster to check if a floating-point value is NaN
984 /// (or not-NaN) versus a comparison against a constant FP zero value.
985 /// Targets should override this if materializing a 0.0 for comparison is
986 /// generally as cheap as checking for ordered/unordered.
987 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
988
989 /// Return the expected cost of supporting the floating point operation
990 /// of the specified type.
992
993 /// Return the expected cost of materializing for the given integer
994 /// immediate of the specified type.
995 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
997
998 /// Return the expected cost of materialization for the given integer
999 /// immediate of the specified type for a given instruction. The cost can be
1000 /// zero if the immediate can be folded into the specified instruction.
1001 InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1002 const APInt &Imm, Type *Ty,
1004 Instruction *Inst = nullptr) const;
1006 const APInt &Imm, Type *Ty,
1007 TargetCostKind CostKind) const;
1008
1009 /// Return the expected cost for the given integer when optimising
1010 /// for size. This is different than the other integer immediate cost
1011 /// functions in that it is subtarget agnostic. This is useful when you e.g.
1012 /// target one ISA such as Aarch32 but smaller encodings could be possible
1013 /// with another such as Thumb. This return value is used as a penalty when
1014 /// the total costs for a constant is calculated (the bigger the cost, the
1015 /// more beneficial constant hoisting is).
1016 InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1017 const APInt &Imm, Type *Ty) const;
1018
1019 /// It can be advantageous to detach complex constants from their uses to make
1020 /// their generation cheaper. This hook allows targets to report when such
1021 /// transformations might negatively effect the code generation of the
1022 /// underlying operation. The motivating example is divides whereby hoisting
1023 /// constants prevents the code generator's ability to transform them into
1024 /// combinations of simpler operations.
1026 const Function &Fn) const;
1027
1028 /// @}
1029
1030 /// \name Vector Target Information
1031 /// @{
1032
1033 /// The various kinds of shuffle patterns for vector queries.
1035 SK_Broadcast, ///< Broadcast element 0 to all other elements.
1036 SK_Reverse, ///< Reverse the order of the vector.
1037 SK_Select, ///< Selects elements from the corresponding lane of
1038 ///< either source operand. This is equivalent to a
1039 ///< vector select with a constant condition operand.
1040 SK_Transpose, ///< Transpose two vectors.
1041 SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
1042 SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
1043 SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
1044 ///< with any shuffle mask.
1045 SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
1046 ///< shuffle mask.
1047 SK_Splice ///< Concatenates elements from the first input vector
1048 ///< with elements of the second input vector. Returning
1049 ///< a vector of the same type as the input vectors.
1050 ///< Index indicates start offset in first input vector.
1052
1053 /// Additional information about an operand's possible values.
1055 OK_AnyValue, // Operand can have any value.
1056 OK_UniformValue, // Operand is uniform (splat of a value).
1057 OK_UniformConstantValue, // Operand is uniform constant.
1058 OK_NonUniformConstantValue // Operand is a non uniform constant value.
1060
1061 /// Additional properties of an operand's values.
1066 };
1067
1068 // Describe the values an operand can take. We're in the process
1069 // of migrating uses of OperandValueKind and OperandValueProperties
1070 // to use this class, and then will change the internal representation.
1074
1075 bool isConstant() const {
1077 }
1078 bool isUniform() const {
1080 }
1081 bool isPowerOf2() const {
1082 return Properties == OP_PowerOf2;
1083 }
1084 bool isNegatedPowerOf2() const {
1086 }
1087
1089 return {Kind, OP_None};
1090 }
1091 };
1092
1093 /// \return the number of registers in the target-provided register class.
1094 unsigned getNumberOfRegisters(unsigned ClassID) const;
1095
1096 /// \return the target-provided register class ID for the provided type,
1097 /// accounting for type promotion and other type-legalization techniques that
1098 /// the target might apply. However, it specifically does not account for the
1099 /// scalarization or splitting of vector types. Should a vector type require
1100 /// scalarization or splitting into multiple underlying vector registers, that
1101 /// type should be mapped to a register class containing no registers.
1102 /// Specifically, this is designed to provide a simple, high-level view of the
1103 /// register allocation later performed by the backend. These register classes
1104 /// don't necessarily map onto the register classes used by the backend.
1105 /// FIXME: It's not currently possible to determine how many registers
1106 /// are used by the provided type.
1107 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
1108
1109 /// \return the target-provided register class name
1110 const char *getRegisterClassName(unsigned ClassID) const;
1111
1113
1114 /// \return The width of the largest scalar or vector register type.
1116
1117 /// \return The width of the smallest vector register type.
1118 unsigned getMinVectorRegisterBitWidth() const;
1119
1120 /// \return The maximum value of vscale if the target specifies an
1121 /// architectural maximum vector length, and std::nullopt otherwise.
1122 std::optional<unsigned> getMaxVScale() const;
1123
1124 /// \return the value of vscale to tune the cost model for.
1125 std::optional<unsigned> getVScaleForTuning() const;
1126
1127 /// \return true if vscale is known to be a power of 2
1128 bool isVScaleKnownToBeAPowerOfTwo() const;
1129
1130 /// \return True if the vectorization factor should be chosen to
1131 /// make the vector of the smallest element type match the size of a
1132 /// vector register. For wider element types, this could result in
1133 /// creating vectors that span multiple vector registers.
1134 /// If false, the vectorization factor will be chosen based on the
1135 /// size of the widest element type.
1136 /// \p K Register Kind for vectorization.
1138
1139 /// \return The minimum vectorization factor for types of given element
1140 /// bit width, or 0 if there is no minimum VF. The returned value only
1141 /// applies when shouldMaximizeVectorBandwidth returns true.
1142 /// If IsScalable is true, the returned ElementCount must be a scalable VF.
1143 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
1144
1145 /// \return The maximum vectorization factor for types of given element
1146 /// bit width and opcode, or 0 if there is no maximum VF.
1147 /// Currently only used by the SLP vectorizer.
1148 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
1149
1150 /// \return The minimum vectorization factor for the store instruction. Given
1151 /// the initial estimation of the minimum vector factor and store value type,
1152 /// it tries to find possible lowest VF, which still might be profitable for
1153 /// the vectorization.
1154 /// \param VF Initial estimation of the minimum vector factor.
1155 /// \param ScalarMemTy Scalar memory type of the store operation.
1156 /// \param ScalarValTy Scalar type of the stored value.
1157 /// Currently only used by the SLP vectorizer.
1158 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1159 Type *ScalarValTy) const;
1160
1161 /// \return True if it should be considered for address type promotion.
1162 /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
1163 /// profitable without finding other extensions fed by the same input.
1165 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
1166
1167 /// \return The size of a cache line in bytes.
1168 unsigned getCacheLineSize() const;
1169
1170 /// The possible cache levels
1171 enum class CacheLevel {
1172 L1D, // The L1 data cache
1173 L2D, // The L2 data cache
1174
1175 // We currently do not model L3 caches, as their sizes differ widely between
1176 // microarchitectures. Also, we currently do not have a use for L3 cache
1177 // size modeling yet.
1178 };
1179
1180 /// \return The size of the cache level in bytes, if available.
1181 std::optional<unsigned> getCacheSize(CacheLevel Level) const;
1182
1183 /// \return The associativity of the cache level, if available.
1184 std::optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
1185
1186 /// \return The minimum architectural page size for the target.
1187 std::optional<unsigned> getMinPageSize() const;
1188
1189 /// \return How much before a load we should place the prefetch
1190 /// instruction. This is currently measured in number of
1191 /// instructions.
1192 unsigned getPrefetchDistance() const;
1193
1194 /// Some HW prefetchers can handle accesses up to a certain constant stride.
1195 /// Sometimes prefetching is beneficial even below the HW prefetcher limit,
1196 /// and the arguments provided are meant to serve as a basis for deciding this
1197 /// for a particular loop.
1198 ///
1199 /// \param NumMemAccesses Number of memory accesses in the loop.
1200 /// \param NumStridedMemAccesses Number of the memory accesses that
1201 /// ScalarEvolution could find a known stride
1202 /// for.
1203 /// \param NumPrefetches Number of software prefetches that will be
1204 /// emitted as determined by the addresses
1205 /// involved and the cache line size.
1206 /// \param HasCall True if the loop contains a call.
1207 ///
1208 /// \return This is the minimum stride in bytes where it makes sense to start
1209 /// adding SW prefetches. The default is 1, i.e. prefetch with any
1210 /// stride.
1211 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1212 unsigned NumStridedMemAccesses,
1213 unsigned NumPrefetches, bool HasCall) const;
1214
1215 /// \return The maximum number of iterations to prefetch ahead. If
1216 /// the required number of iterations is more than this number, no
1217 /// prefetching is performed.
1218 unsigned getMaxPrefetchIterationsAhead() const;
1219
1220 /// \return True if prefetching should also be done for writes.
1221 bool enableWritePrefetching() const;
1222
1223 /// \return if target want to issue a prefetch in address space \p AS.
1224 bool shouldPrefetchAddressSpace(unsigned AS) const;
1225
1226 /// \return The maximum interleave factor that any transform should try to
1227 /// perform for this target. This number depends on the level of parallelism
1228 /// and the number of execution units in the CPU.
1229 unsigned getMaxInterleaveFactor(ElementCount VF) const;
1230
1231 /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
1232 static OperandValueInfo getOperandInfo(const Value *V);
1233
1234 /// This is an approximation of reciprocal throughput of a math/logic op.
1235 /// A higher cost indicates less expected throughput.
1236 /// From Agner Fog's guides, reciprocal throughput is "the average number of
1237 /// clock cycles per instruction when the instructions are not part of a
1238 /// limiting dependency chain."
1239 /// Therefore, costs should be scaled to account for multiple execution units
1240 /// on the target that can process this type of instruction. For example, if
1241 /// there are 5 scalar integer units and 2 vector integer units that can
1242 /// calculate an 'add' in a single cycle, this model should indicate that the
1243 /// cost of the vector add instruction is 2.5 times the cost of the scalar
1244 /// add instruction.
1245 /// \p Args is an optional argument which holds the instruction operands
1246 /// values so the TTI can analyze those values searching for special
1247 /// cases or optimizations based on those values.
1248 /// \p CxtI is the optional original context instruction, if one exists, to
1249 /// provide even more information.
1250 /// \p TLibInfo is used to search for platform specific vector library
1251 /// functions for instructions that might be converted to calls (e.g. frem).
1253 unsigned Opcode, Type *Ty,
1256 TTI::OperandValueInfo Opd2Info = {TTI::OK_AnyValue, TTI::OP_None},
1257 ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
1258 const Instruction *CxtI = nullptr,
1259 const TargetLibraryInfo *TLibInfo = nullptr) const;
1260
1261 /// Returns the cost estimation for alternating opcode pattern that can be
1262 /// lowered to a single instruction on the target. In X86 this is for the
1263 /// addsub instruction which corrsponds to a Shuffle + Fadd + FSub pattern in
1264 /// IR. This function expects two opcodes: \p Opcode1 and \p Opcode2 being
1265 /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
1266 /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
1267 /// \p VecTy is the vector type of the instruction to be generated.
1268 InstructionCost getAltInstrCost(
1269 VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
1270 const SmallBitVector &OpcodeMask,
1272
1273 /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1274 /// The exact mask may be passed as Mask, or else the array will be empty.
1275 /// The index and subtype parameters are used by the subvector insertion and
1276 /// extraction shuffle kinds to show the insert/extract point and the type of
1277 /// the subvector being inserted/extracted. The operands of the shuffle can be
1278 /// passed through \p Args, which helps improve the cost estimation in some
1279 /// cases, like in broadcast loads.
1280 /// NOTE: For subvector extractions Tp represents the source type.
1281 InstructionCost
1283 ArrayRef<int> Mask = std::nullopt,
1285 int Index = 0, VectorType *SubTp = nullptr,
1286 ArrayRef<const Value *> Args = std::nullopt) const;
1287
1288 /// Represents a hint about the context in which a cast is used.
1289 ///
1290 /// For zext/sext, the context of the cast is the operand, which must be a
1291 /// load of some kind. For trunc, the context is of the cast is the single
1292 /// user of the instruction, which must be a store of some kind.
1293 ///
1294 /// This enum allows the vectorizer to give getCastInstrCost an idea of the
1295 /// type of cast it's dealing with, as not every cast is equal. For instance,
1296 /// the zext of a load may be free, but the zext of an interleaving load can
1297 //// be (very) expensive!
1298 ///
1299 /// See \c getCastContextHint to compute a CastContextHint from a cast
1300 /// Instruction*. Callers can use it if they don't need to override the
1301 /// context and just want it to be calculated from the instruction.
1302 ///
1303 /// FIXME: This handles the types of load/store that the vectorizer can
1304 /// produce, which are the cases where the context instruction is most
1305 /// likely to be incorrect. There are other situations where that can happen
1306 /// too, which might be handled here but in the long run a more general
1307 /// solution of costing multiple instructions at the same times may be better.
1308 enum class CastContextHint : uint8_t {
1309 None, ///< The cast is not used with a load/store of any kind.
1310 Normal, ///< The cast is used with a normal load/store.
1311 Masked, ///< The cast is used with a masked load/store.
1312 GatherScatter, ///< The cast is used with a gather/scatter.
1313 Interleave, ///< The cast is used with an interleaved load/store.
1314 Reversed, ///< The cast is used with a reversed load/store.
1315 };
1316
1317 /// Calculates a CastContextHint from \p I.
1318 /// This should be used by callers of getCastInstrCost if they wish to
1319 /// determine the context from some instruction.
1320 /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
1321 /// or if it's another type of cast.
1323
1324 /// \return The expected cost of cast instructions, such as bitcast, trunc,
1325 /// zext, etc. If there is an existing instruction that holds Opcode, it
1326 /// may be passed in the 'I' parameter.
1328 getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1331 const Instruction *I = nullptr) const;
1332
1333 /// \return The expected cost of a sign- or zero-extended vector extract. Use
1334 /// Index = -1 to indicate that there is no information about the index value.
1335 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1336 VectorType *VecTy,
1337 unsigned Index) const;
1338
1339 /// \return The expected cost of control-flow related instructions such as
1340 /// Phi, Ret, Br, Switch.
1342 getCFInstrCost(unsigned Opcode,
1344 const Instruction *I = nullptr) const;
1345
1346 /// \returns The expected cost of compare and select instructions. If there
1347 /// is an existing instruction that holds Opcode, it may be passed in the
1348 /// 'I' parameter. The \p VecPred parameter can be used to indicate the select
1349 /// is using a compare with the specified predicate as condition. When vector
1350 /// types are passed, \p VecPred must be used for all lanes.
1352 getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1353 CmpInst::Predicate VecPred,
1355 const Instruction *I = nullptr) const;
1356
1357 /// \return The expected cost of vector Insert and Extract.
1358 /// Use -1 to indicate that there is no information on the index value.
1359 /// This is used when the instruction is not available; a typical use
1360 /// case is to provision the cost of vectorization/scalarization in
1361 /// vectorizer passes.
1362 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1364 unsigned Index = -1, Value *Op0 = nullptr,
1365 Value *Op1 = nullptr) const;
1366
1367 /// \return The expected cost of vector Insert and Extract.
1368 /// This is used when instruction is available, and implementation
1369 /// asserts 'I' is not nullptr.
1370 ///
1371 /// A typical suitable use case is cost estimation when vector instruction
1372 /// exists (e.g., from basic blocks during transformation).
1375 unsigned Index = -1) const;
1376
1377 /// \return The cost of replication shuffle of \p VF elements typed \p EltTy
1378 /// \p ReplicationFactor times.
1379 ///
1380 /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
1381 /// <0,0,0,1,1,1,2,2,2,3,3,3>
1382 InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
1383 int VF,
1384 const APInt &DemandedDstElts,
1386
1387 /// \return The cost of Load and Store instructions.
1389 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1390 unsigned AddressSpace,
1392 OperandValueInfo OpdInfo = {OK_AnyValue, OP_None},
1393 const Instruction *I = nullptr) const;
1394
1395 /// \return The cost of VP Load and Store instructions.
1396 InstructionCost
1397 getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1398 unsigned AddressSpace,
1400 const Instruction *I = nullptr) const;
1401
1402 /// \return The cost of masked Load and Store instructions.
1404 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
1406
1407 /// \return The cost of Gather or Scatter operation
1408 /// \p Opcode - is a type of memory access Load or Store
1409 /// \p DataTy - a vector type of the data to be loaded or stored
1410 /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1411 /// \p VariableMask - true when the memory access is predicated with a mask
1412 /// that is not a compile-time constant
1413 /// \p Alignment - alignment of single element
1414 /// \p I - the optional original context instruction, if one exists, e.g. the
1415 /// load/store to transform or the call to the gather/scatter intrinsic
1417 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1419 const Instruction *I = nullptr) const;
1420
1421 /// \return The cost of strided memory operations.
1422 /// \p Opcode - is a type of memory access Load or Store
1423 /// \p DataTy - a vector type of the data to be loaded or stored
1424 /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1425 /// \p VariableMask - true when the memory access is predicated with a mask
1426 /// that is not a compile-time constant
1427 /// \p Alignment - alignment of single element
1428 /// \p I - the optional original context instruction, if one exists, e.g. the
1429 /// load/store to transform or the call to the gather/scatter intrinsic
1431 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1433 const Instruction *I = nullptr) const;
1434
1435 /// \return The cost of the interleaved memory operation.
1436 /// \p Opcode is the memory operation code
1437 /// \p VecTy is the vector type of the interleaved access.
1438 /// \p Factor is the interleave factor
1439 /// \p Indices is the indices for interleaved load members (as interleaved
1440 /// load allows gaps)
1441 /// \p Alignment is the alignment of the memory operation
1442 /// \p AddressSpace is address space of the pointer.
1443 /// \p UseMaskForCond indicates if the memory access is predicated.
1444 /// \p UseMaskForGaps indicates if gaps should be masked.
1446 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1447 Align Alignment, unsigned AddressSpace,
1449 bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
1450
1451 /// A helper function to determine the type of reduction algorithm used
1452 /// for a given \p Opcode and set of FastMathFlags \p FMF.
1453 static bool requiresOrderedReduction(std::optional<FastMathFlags> FMF) {
1454 return FMF && !(*FMF).allowReassoc();
1455 }
1456
1457 /// Calculate the cost of vector reduction intrinsics.
1458 ///
1459 /// This is the cost of reducing the vector value of type \p Ty to a scalar
1460 /// value using the operation denoted by \p Opcode. The FastMathFlags
1461 /// parameter \p FMF indicates what type of reduction we are performing:
1462 /// 1. Tree-wise. This is the typical 'fast' reduction performed that
1463 /// involves successively splitting a vector into half and doing the
1464 /// operation on the pair of halves until you have a scalar value. For
1465 /// example:
1466 /// (v0, v1, v2, v3)
1467 /// ((v0+v2), (v1+v3), undef, undef)
1468 /// ((v0+v2+v1+v3), undef, undef, undef)
1469 /// This is the default behaviour for integer operations, whereas for
1470 /// floating point we only do this if \p FMF indicates that
1471 /// reassociation is allowed.
1472 /// 2. Ordered. For a vector with N elements this involves performing N
1473 /// operations in lane order, starting with an initial scalar value, i.e.
1474 /// result = InitVal + v0
1475 /// result = result + v1
1476 /// result = result + v2
1477 /// result = result + v3
1478 /// This is only the case for FP operations and when reassociation is not
1479 /// allowed.
1480 ///
1482 unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
1484
1488
1489 /// Calculate the cost of an extended reduction pattern, similar to
1490 /// getArithmeticReductionCost of an Add reduction with multiply and optional
1491 /// extensions. This is the cost of as:
1492 /// ResTy vecreduce.add(mul (A, B)).
1493 /// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)).
1495 bool IsUnsigned, Type *ResTy, VectorType *Ty,
1497
1498 /// Calculate the cost of an extended reduction pattern, similar to
1499 /// getArithmeticReductionCost of a reduction with an extension.
1500 /// This is the cost of as:
1501 /// ResTy vecreduce.opcode(ext(Ty A)).
1503 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1504 FastMathFlags FMF,
1506
1507 /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1508 /// Three cases are handled: 1. scalar instruction 2. vector instruction
1509 /// 3. scalar instruction which is to be vectorized.
1512
1513 /// \returns The cost of Call instructions.
1517
1518 /// \returns The number of pieces into which the provided type must be
1519 /// split during legalization. Zero is returned when the answer is unknown.
1520 unsigned getNumberOfParts(Type *Tp) const;
1521
1522 /// \returns The cost of the address computation. For most targets this can be
1523 /// merged into the instruction indexing mode. Some targets might want to
1524 /// distinguish between address computation for memory operations on vector
1525 /// types and scalar types. Such targets should override this function.
1526 /// The 'SE' parameter holds pointer for the scalar evolution object which
1527 /// is used in order to get the Ptr step value in case of constant stride.
1528 /// The 'Ptr' parameter holds SCEV of the access pointer.
1530 ScalarEvolution *SE = nullptr,
1531 const SCEV *Ptr = nullptr) const;
1532
1533 /// \returns The cost, if any, of keeping values of the given types alive
1534 /// over a callsite.
1535 ///
1536 /// Some types may require the use of register classes that do not have
1537 /// any callee-saved registers, so would require a spill and fill.
1539
1540 /// \returns True if the intrinsic is a supported memory intrinsic. Info
1541 /// will contain additional information - whether the intrinsic may write
1542 /// or read to memory, volatility and the pointer. Info is undefined
1543 /// if false is returned.
1545
1546 /// \returns The maximum element size, in bytes, for an element
1547 /// unordered-atomic memory intrinsic.
1548 unsigned getAtomicMemIntrinsicMaxElementSize() const;
1549
1550 /// \returns A value which is the result of the given memory intrinsic. New
1551 /// instructions may be created to extract the result from the given intrinsic
1552 /// memory operation. Returns nullptr if the target cannot create a result
1553 /// from the given intrinsic.
1555 Type *ExpectedType) const;
1556
1557 /// \returns The type to use in a loop expansion of a memcpy call.
1559 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
1560 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
1561 std::optional<uint32_t> AtomicElementSize = std::nullopt) const;
1562
1563 /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1564 /// \param RemainingBytes The number of bytes to copy.
1565 ///
1566 /// Calculates the operand types to use when copying \p RemainingBytes of
1567 /// memory, where source and destination alignments are \p SrcAlign and
1568 /// \p DestAlign respectively.
1570 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1571 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1572 unsigned SrcAlign, unsigned DestAlign,
1573 std::optional<uint32_t> AtomicCpySize = std::nullopt) const;
1574
1575 /// \returns True if the two functions have compatible attributes for inlining
1576 /// purposes.
1577 bool areInlineCompatible(const Function *Caller,
1578 const Function *Callee) const;
1579
1580 /// Returns a penalty for invoking call \p Call in \p F.
1581 /// For example, if a function F calls a function G, which in turn calls
1582 /// function H, then getInlineCallPenalty(F, H()) would return the
1583 /// penalty of calling H from F, e.g. after inlining G into F.
1584 /// \p DefaultCallPenalty is passed to give a default penalty that
1585 /// the target can amend or override.
1586 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
1587 unsigned DefaultCallPenalty) const;
1588
1589 /// \returns True if the caller and callee agree on how \p Types will be
1590 /// passed to or returned from the callee.
1591 /// to the callee.
1592 /// \param Types List of types to check.
1593 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
1594 const ArrayRef<Type *> &Types) const;
1595
1596 /// The type of load/store indexing.
1598 MIM_Unindexed, ///< No indexing.
1599 MIM_PreInc, ///< Pre-incrementing.
1600 MIM_PreDec, ///< Pre-decrementing.
1601 MIM_PostInc, ///< Post-incrementing.
1602 MIM_PostDec ///< Post-decrementing.
1604
1605 /// \returns True if the specified indexed load for the given type is legal.
1606 bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1607
1608 /// \returns True if the specified indexed store for the given type is legal.
1609 bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1610
1611 /// \returns The bitwidth of the largest vector type that should be used to
1612 /// load/store in the given address space.
1613 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1614
1615 /// \returns True if the load instruction is legal to vectorize.
1616 bool isLegalToVectorizeLoad(LoadInst *LI) const;
1617
1618 /// \returns True if the store instruction is legal to vectorize.
1619 bool isLegalToVectorizeStore(StoreInst *SI) const;
1620
1621 /// \returns True if it is legal to vectorize the given load chain.
1622 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
1623 unsigned AddrSpace) const;
1624
1625 /// \returns True if it is legal to vectorize the given store chain.
1626 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
1627 unsigned AddrSpace) const;
1628
1629 /// \returns True if it is legal to vectorize the given reduction kind.
1631 ElementCount VF) const;
1632
1633 /// \returns True if the given type is supported for scalable vectors
1635
1636 /// \returns The new vector factor value if the target doesn't support \p
1637 /// SizeInBytes loads or has a better vector factor.
1638 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1639 unsigned ChainSizeInBytes,
1640 VectorType *VecTy) const;
1641
1642 /// \returns The new vector factor value if the target doesn't support \p
1643 /// SizeInBytes stores or has a better vector factor.
1644 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1645 unsigned ChainSizeInBytes,
1646 VectorType *VecTy) const;
1647
1648 /// Flags describing the kind of vector reduction.
1650 ReductionFlags() = default;
1651 bool IsMaxOp =
1652 false; ///< If the op a min/max kind, true if it's a max operation.
1653 bool IsSigned = false; ///< Whether the operation is a signed int reduction.
1654 bool NoNaN =
1655 false; ///< If op is an fp min/max, whether NaNs may be present.
1656 };
1657
1658 /// \returns True if the target prefers reductions in loop.
1659 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1660 ReductionFlags Flags) const;
1661
1662 /// \returns True if the target prefers reductions select kept in the loop
1663 /// when tail folding. i.e.
1664 /// loop:
1665 /// p = phi (0, s)
1666 /// a = add (p, x)
1667 /// s = select (mask, a, p)
1668 /// vecreduce.add(s)
1669 ///
1670 /// As opposed to the normal scheme of p = phi (0, a) which allows the select
1671 /// to be pulled out of the loop. If the select(.., add, ..) can be predicated
1672 /// by the target, this can lead to cleaner code generation.
1673 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1674 ReductionFlags Flags) const;
1675
1676 /// Return true if the loop vectorizer should consider vectorizing an
1677 /// otherwise scalar epilogue loop.
1678 bool preferEpilogueVectorization() const;
1679
1680 /// \returns True if the target wants to expand the given reduction intrinsic
1681 /// into a shuffle sequence.
1682 bool shouldExpandReduction(const IntrinsicInst *II) const;
1683
1684 /// \returns the size cost of rematerializing a GlobalValue address relative
1685 /// to a stack reload.
1686 unsigned getGISelRematGlobalCost() const;
1687
1688 /// \returns the lower bound of a trip count to decide on vectorization
1689 /// while tail-folding.
1690 unsigned getMinTripCountTailFoldingThreshold() const;
1691
1692 /// \returns True if the target supports scalable vectors.
1693 bool supportsScalableVectors() const;
1694
1695 /// \return true when scalable vectorization is preferred.
1696 bool enableScalableVectorization() const;
1697
1698 /// \name Vector Predication Information
1699 /// @{
1700 /// Whether the target supports the %evl parameter of VP intrinsic efficiently
1701 /// in hardware, for the given opcode and type/alignment. (see LLVM Language
1702 /// Reference - "Vector Predication Intrinsics").
1703 /// Use of %evl is discouraged when that is not the case.
1704 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1705 Align Alignment) const;
1706
1709 // keep the predicating parameter
1711 // where legal, discard the predicate parameter
1713 // transform into something else that is also predicating
1714 Convert = 2
1716
1717 // How to transform the EVL parameter.
1718 // Legal: keep the EVL parameter as it is.
1719 // Discard: Ignore the EVL parameter where it is safe to do so.
1720 // Convert: Fold the EVL into the mask parameter.
1722
1723 // How to transform the operator.
1724 // Legal: The target supports this operator.
1725 // Convert: Convert this to a non-VP operation.
1726 // The 'Discard' strategy is invalid.
1728
1729 bool shouldDoNothing() const {
1730 return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
1731 }
1734 };
1735
1736 /// \returns How the target needs this vector-predicated operation to be
1737 /// transformed.
1739 /// @}
1740
1741 /// \returns Whether a 32-bit branch instruction is available in Arm or Thumb
1742 /// state.
1743 ///
1744 /// Used by the LowerTypeTests pass, which constructs an IR inline assembler
1745 /// node containing a jump table in a format suitable for the target, so it
1746 /// needs to know what format of jump table it can legally use.
1747 ///
1748 /// For non-Arm targets, this function isn't used. It defaults to returning
1749 /// false, but it shouldn't matter what it returns anyway.
1750 bool hasArmWideBranch(bool Thumb) const;
1751
1752 /// \return The maximum number of function arguments the target supports.
1753 unsigned getMaxNumArgs() const;
1754
1755 /// @}
1756
1757private:
1758 /// The abstract base class used to type erase specific TTI
1759 /// implementations.
1760 class Concept;
1761
1762 /// The template model for the base class which wraps a concrete
1763 /// implementation in a type erased interface.
1764 template <typename T> class Model;
1765
1766 std::unique_ptr<Concept> TTIImpl;
1767};
1768
1770public:
1771 virtual ~Concept() = 0;
1772 virtual const DataLayout &getDataLayout() const = 0;
1773 virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
1775 Type *AccessType,
1777 virtual InstructionCost
1779 const TTI::PointersChainInfo &Info, Type *AccessTy,
1781 virtual unsigned getInliningThresholdMultiplier() const = 0;
1783 virtual unsigned
1785 virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
1786 virtual int getInlinerVectorBonusPercent() const = 0;
1787 virtual unsigned getCallerAllocaCost(const CallBase *CB,
1788 const AllocaInst *AI) const = 0;
1791 virtual unsigned
1793 ProfileSummaryInfo *PSI,
1794 BlockFrequencyInfo *BFI) = 0;
1799 virtual bool hasBranchDivergence(const Function *F = nullptr) = 0;
1800 virtual bool isSourceOfDivergence(const Value *V) = 0;
1801 virtual bool isAlwaysUniform(const Value *V) = 0;
1802 virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1803 virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const = 0;
1804 virtual unsigned getFlatAddressSpace() = 0;
1806 Intrinsic::ID IID) const = 0;
1807 virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1808 virtual bool
1810 virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
1811 virtual bool isSingleThreaded() const = 0;
1812 virtual std::pair<const Value *, unsigned>
1813 getPredicatedAddrSpace(const Value *V) const = 0;
1815 Value *OldV,
1816 Value *NewV) const = 0;
1817 virtual bool isLoweredToCall(const Function *F) = 0;
1820 OptimizationRemarkEmitter *ORE) = 0;
1822 PeelingPreferences &PP) = 0;
1824 AssumptionCache &AC,
1825 TargetLibraryInfo *LibInfo,
1826 HardwareLoopInfo &HWLoopInfo) = 0;
1828 virtual TailFoldingStyle
1829 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) = 0;
1830 virtual std::optional<Instruction *> instCombineIntrinsic(
1831 InstCombiner &IC, IntrinsicInst &II) = 0;
1832 virtual std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
1833 InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask,
1834 KnownBits & Known, bool &KnownBitsComputed) = 0;
1835 virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
1836 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts,
1837 APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
1838 std::function<void(Instruction *, unsigned, APInt, APInt &)>
1839 SimplifyAndSetOp) = 0;
1840 virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1841 virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1842 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1843 int64_t BaseOffset, bool HasBaseReg,
1844 int64_t Scale, unsigned AddrSpace,
1845 Instruction *I) = 0;
1847 const TargetTransformInfo::LSRCost &C2) = 0;
1848 virtual bool isNumRegsMajorCostOfLSR() = 0;
1851 virtual bool canMacroFuseCmp() = 0;
1852 virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
1854 TargetLibraryInfo *LibInfo) = 0;
1855 virtual AddressingModeKind
1857 virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
1858 virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
1859 virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
1860 virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
1861 virtual bool isLegalBroadcastLoad(Type *ElementTy,
1862 ElementCount NumElements) const = 0;
1863 virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
1864 virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
1866 Align Alignment) = 0;
1868 Align Alignment) = 0;
1869 virtual bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) = 0;
1870 virtual bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) = 0;
1871 virtual bool isLegalStridedLoadStore(Type *DataType, Align Alignment) = 0;
1872 virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
1873 unsigned Opcode1,
1874 const SmallBitVector &OpcodeMask) const = 0;
1875 virtual bool enableOrderedReductions() = 0;
1876 virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1877 virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1880 int64_t BaseOffset,
1881 bool HasBaseReg, int64_t Scale,
1882 unsigned AddrSpace) = 0;
1883 virtual bool LSRWithInstrQueries() = 0;
1884 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1886 virtual bool useAA() = 0;
1887 virtual bool isTypeLegal(Type *Ty) = 0;
1888 virtual unsigned getRegUsageForType(Type *Ty) = 0;
1889 virtual bool shouldBuildLookupTables() = 0;
1891 virtual bool shouldBuildRelLookupTables() = 0;
1892 virtual bool useColdCCForColdCall(Function &F) = 0;
1894 const APInt &DemandedElts,
1895 bool Insert, bool Extract,
1897 virtual InstructionCost
1899 ArrayRef<Type *> Tys,
1902 virtual bool supportsTailCalls() = 0;
1903 virtual bool supportsTailCallFor(const CallBase *CB) = 0;
1904 virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1906 enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
1907 virtual bool enableSelectOptimize() = 0;
1913 unsigned BitWidth,
1914 unsigned AddressSpace,
1915 Align Alignment,
1916 unsigned *Fast) = 0;
1917 virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1918 virtual bool haveFastSqrt(Type *Ty) = 0;
1920 virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1922 virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1923 const APInt &Imm, Type *Ty) = 0;
1924 virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1926 virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1927 const APInt &Imm, Type *Ty,
1929 Instruction *Inst = nullptr) = 0;
1931 const APInt &Imm, Type *Ty,
1934 const Function &Fn) const = 0;
1935 virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
1936 virtual unsigned getRegisterClassForType(bool Vector,
1937 Type *Ty = nullptr) const = 0;
1938 virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
1940 virtual unsigned getMinVectorRegisterBitWidth() const = 0;
1941 virtual std::optional<unsigned> getMaxVScale() const = 0;
1942 virtual std::optional<unsigned> getVScaleForTuning() const = 0;
1943 virtual bool isVScaleKnownToBeAPowerOfTwo() const = 0;
1944 virtual bool
1946 virtual ElementCount getMinimumVF(unsigned ElemWidth,
1947 bool IsScalable) const = 0;
1948 virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
1949 virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1950 Type *ScalarValTy) const = 0;
1952 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1953 virtual unsigned getCacheLineSize() const = 0;
1954 virtual std::optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
1955 virtual std::optional<unsigned> getCacheAssociativity(CacheLevel Level)
1956 const = 0;
1957 virtual std::optional<unsigned> getMinPageSize() const = 0;
1958
1959 /// \return How much before a load we should place the prefetch
1960 /// instruction. This is currently measured in number of
1961 /// instructions.
1962 virtual unsigned getPrefetchDistance() const = 0;
1963
1964 /// \return Some HW prefetchers can handle accesses up to a certain
1965 /// constant stride. This is the minimum stride in bytes where it
1966 /// makes sense to start adding SW prefetches. The default is 1,
1967 /// i.e. prefetch with any stride. Sometimes prefetching is beneficial
1968 /// even below the HW prefetcher limit, and the arguments provided are
1969 /// meant to serve as a basis for deciding this for a particular loop.
1970 virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1971 unsigned NumStridedMemAccesses,
1972 unsigned NumPrefetches,
1973 bool HasCall) const = 0;
1974
1975 /// \return The maximum number of iterations to prefetch ahead. If
1976 /// the required number of iterations is more than this number, no
1977 /// prefetching is performed.
1978 virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
1979
1980 /// \return True if prefetching should also be done for writes.
1981 virtual bool enableWritePrefetching() const = 0;
1982
1983 /// \return if target want to issue a prefetch in address space \p AS.
1984 virtual bool shouldPrefetchAddressSpace(unsigned AS) const = 0;
1985
1986 virtual unsigned getMaxInterleaveFactor(ElementCount VF) = 0;
1988 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
1989 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
1990 ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
1992 VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
1993 const SmallBitVector &OpcodeMask,
1995
1997 ArrayRef<int> Mask,
1999 int Index, VectorType *SubTp,
2000 ArrayRef<const Value *> Args) = 0;
2001 virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
2002 Type *Src, CastContextHint CCH,
2004 const Instruction *I) = 0;
2005 virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2006 VectorType *VecTy,
2007 unsigned Index) = 0;
2008 virtual InstructionCost getCFInstrCost(unsigned Opcode,
2010 const Instruction *I = nullptr) = 0;
2011 virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
2012 Type *CondTy,
2013 CmpInst::Predicate VecPred,
2015 const Instruction *I) = 0;
2016 virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2018 unsigned Index, Value *Op0,
2019 Value *Op1) = 0;
2022 unsigned Index) = 0;
2023
2024 virtual InstructionCost
2025 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
2026 const APInt &DemandedDstElts,
2028
2029 virtual InstructionCost
2030 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2032 OperandValueInfo OpInfo, const Instruction *I) = 0;
2033 virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src,
2034 Align Alignment,
2035 unsigned AddressSpace,
2037 const Instruction *I) = 0;
2038 virtual InstructionCost
2039 getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2040 unsigned AddressSpace,
2042 virtual InstructionCost
2043 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2044 bool VariableMask, Align Alignment,
2046 const Instruction *I = nullptr) = 0;
2047 virtual InstructionCost
2048 getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2049 bool VariableMask, Align Alignment,
2051 const Instruction *I = nullptr) = 0;
2052
2054 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2055 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2056 bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
2057 virtual InstructionCost
2059 std::optional<FastMathFlags> FMF,
2061 virtual InstructionCost
2065 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
2066 FastMathFlags FMF,
2069 bool IsUnsigned, Type *ResTy, VectorType *Ty,
2071 virtual InstructionCost
2075 ArrayRef<Type *> Tys,
2077 virtual unsigned getNumberOfParts(Type *Tp) = 0;
2078 virtual InstructionCost
2080 virtual InstructionCost
2083 MemIntrinsicInfo &Info) = 0;
2084 virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
2086 Type *ExpectedType) = 0;
2088 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
2089 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
2090 std::optional<uint32_t> AtomicElementSize) const = 0;
2091
2093 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2094 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2095 unsigned SrcAlign, unsigned DestAlign,
2096 std::optional<uint32_t> AtomicCpySize) const = 0;
2097 virtual bool areInlineCompatible(const Function *Caller,
2098 const Function *Callee) const = 0;
2099 virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
2100 unsigned DefaultCallPenalty) const = 0;
2101 virtual bool areTypesABICompatible(const Function *Caller,
2102 const Function *Callee,
2103 const ArrayRef<Type *> &Types) const = 0;
2104 virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
2105 virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;
2106 virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
2107 virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
2108 virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
2109 virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
2110 Align Alignment,
2111 unsigned AddrSpace) const = 0;
2112 virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
2113 Align Alignment,
2114 unsigned AddrSpace) const = 0;
2116 ElementCount VF) const = 0;
2117 virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
2118 virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2119 unsigned ChainSizeInBytes,
2120 VectorType *VecTy) const = 0;
2121 virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2122 unsigned ChainSizeInBytes,
2123 VectorType *VecTy) const = 0;
2124 virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2125 ReductionFlags) const = 0;
2126 virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2127 ReductionFlags) const = 0;
2128 virtual bool preferEpilogueVectorization() const = 0;
2129
2130 virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
2131 virtual unsigned getGISelRematGlobalCost() const = 0;
2132 virtual unsigned getMinTripCountTailFoldingThreshold() const = 0;
2133 virtual bool enableScalableVectorization() const = 0;
2134 virtual bool supportsScalableVectors() const = 0;
2135 virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2136 Align Alignment) const = 0;
2137 virtual VPLegalization
2139 virtual bool hasArmWideBranch(bool Thumb) const = 0;
2140 virtual unsigned getMaxNumArgs() const = 0;
2141};
2142
2143template <typename T>
2144class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
2145 T Impl;
2146
2147public:
2148 Model(T Impl) : Impl(std::move(Impl)) {}
2149 ~Model() override = default;
2150
2151 const DataLayout &getDataLayout() const override {
2152 return Impl.getDataLayout();
2153 }
2154
2155 InstructionCost
2156 getGEPCost(Type *PointeeType, const Value *Ptr,
2157 ArrayRef<const Value *> Operands, Type *AccessType,
2159 return Impl.getGEPCost(PointeeType, Ptr, Operands, AccessType, CostKind);
2160 }
2161 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2162 const Value *Base,
2163 const PointersChainInfo &Info,
2164 Type *AccessTy,
2165 TargetCostKind CostKind) override {
2166 return Impl.getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
2167 }
2168 unsigned getInliningThresholdMultiplier() const override {
2169 return Impl.getInliningThresholdMultiplier();
2170 }
2171 unsigned adjustInliningThreshold(const CallBase *CB) override {
2172 return Impl.adjustInliningThreshold(CB);
2173 }
2174 unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const override {
2175 return Impl.getInliningCostBenefitAnalysisSavingsMultiplier();
2176 }
2177 unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const override {
2178 return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
2179 }
2180 int getInlinerVectorBonusPercent() const override {
2181 return Impl.getInlinerVectorBonusPercent();
2182 }
2183 unsigned getCallerAllocaCost(const CallBase *CB,
2184 const AllocaInst *AI) const override {
2185 return Impl.getCallerAllocaCost(CB, AI);
2186 }
2187 InstructionCost getMemcpyCost(const Instruction *I) override {
2188 return Impl.getMemcpyCost(I);
2189 }
2190
2191 uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override {
2192 return Impl.getMaxMemIntrinsicInlineSizeThreshold();
2193 }
2194
2195 InstructionCost getInstructionCost(const User *U,
2196 ArrayRef<const Value *> Operands,
2197 TargetCostKind CostKind) override {
2198 return Impl.getInstructionCost(U, Operands, CostKind);
2199 }
2200 BranchProbability getPredictableBranchThreshold() override {
2201 return Impl.getPredictableBranchThreshold();
2202 }
2203 bool hasBranchDivergence(const Function *F = nullptr) override {
2204 return Impl.hasBranchDivergence(F);
2205 }
2206 bool isSourceOfDivergence(const Value *V) override {
2207 return Impl.isSourceOfDivergence(V);
2208 }
2209
2210 bool isAlwaysUniform(const Value *V) override {
2211 return Impl.isAlwaysUniform(V);
2212 }
2213
2214 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
2215 return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2216 }
2217
2218 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const override {
2219 return Impl.addrspacesMayAlias(AS0, AS1);
2220 }
2221
2222 unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
2223
2224 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2225 Intrinsic::ID IID) const override {
2226 return Impl.collectFlatAddressOperands(OpIndexes, IID);
2227 }
2228
2229 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
2230 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2231 }
2232
2233 bool
2234 canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
2235 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2236 }
2237
2238 unsigned getAssumedAddrSpace(const Value *V) const override {
2239 return Impl.getAssumedAddrSpace(V);
2240 }
2241
2242 bool isSingleThreaded() const override { return Impl.isSingleThreaded(); }
2243
2244 std::pair<const Value *, unsigned>
2245 getPredicatedAddrSpace(const Value *V) const override {
2246 return Impl.getPredicatedAddrSpace(V);
2247 }
2248
2249 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
2250 Value *NewV) const override {
2251 return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
2252 }
2253
2254 bool isLoweredToCall(const Function *F) override {
2255 return Impl.isLoweredToCall(F);
2256 }
2257 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2258 UnrollingPreferences &UP,
2259 OptimizationRemarkEmitter *ORE) override {
2260 return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2261 }
2262 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2263 PeelingPreferences &PP) override {
2264 return Impl.getPeelingPreferences(L, SE, PP);
2265 }
2266 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2267 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2268 HardwareLoopInfo &HWLoopInfo) override {
2269 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2270 }
2271 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) override {
2272 return Impl.preferPredicateOverEpilogue(TFI);
2273 }
2275 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) override {
2276 return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2277 }
2278 std::optional<Instruction *>
2279 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override {
2280 return Impl.instCombineIntrinsic(IC, II);
2281 }
2282 std::optional<Value *>
2283 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
2284 APInt DemandedMask, KnownBits &Known,
2285 bool &KnownBitsComputed) override {
2286 return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
2287 KnownBitsComputed);
2288 }
2289 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2290 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
2291 APInt &UndefElts2, APInt &UndefElts3,
2292 std::function<void(Instruction *, unsigned, APInt, APInt &)>
2293 SimplifyAndSetOp) override {
2294 return Impl.simplifyDemandedVectorEltsIntrinsic(
2295 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2296 SimplifyAndSetOp);
2297 }
2298 bool isLegalAddImmediate(int64_t Imm) override {
2299 return Impl.isLegalAddImmediate(Imm);
2300 }
2301 bool isLegalICmpImmediate(int64_t Imm) override {
2302 return Impl.isLegalICmpImmediate(Imm);
2303 }
2304 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2305 bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
2306 Instruction *I) override {
2307 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2308 AddrSpace, I);
2309 }
2310 bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
2311 const TargetTransformInfo::LSRCost &C2) override {
2312 return Impl.isLSRCostLess(C1, C2);
2313 }
2314 bool isNumRegsMajorCostOfLSR() override {
2315 return Impl.isNumRegsMajorCostOfLSR();
2316 }
2317 bool shouldFoldTerminatingConditionAfterLSR() const override {
2318 return Impl.shouldFoldTerminatingConditionAfterLSR();
2319 }
2320 bool isProfitableLSRChainElement(Instruction *I) override {
2321 return Impl.isProfitableLSRChainElement(I);
2322 }
2323 bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
2324 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2325 DominatorTree *DT, AssumptionCache *AC,
2326 TargetLibraryInfo *LibInfo) override {
2327 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2328 }
2330 getPreferredAddressingMode(const Loop *L,
2331 ScalarEvolution *SE) const override {
2332 return Impl.getPreferredAddressingMode(L, SE);
2333 }
2334 bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
2335 return Impl.isLegalMaskedStore(DataType, Alignment);
2336 }
2337 bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
2338 return Impl.isLegalMaskedLoad(DataType, Alignment);
2339 }
2340 bool isLegalNTStore(Type *DataType, Align Alignment) override {
2341 return Impl.isLegalNTStore(DataType, Alignment);
2342 }
2343 bool isLegalNTLoad(Type *DataType, Align Alignment) override {
2344 return Impl.isLegalNTLoad(DataType, Alignment);
2345 }
2346 bool isLegalBroadcastLoad(Type *ElementTy,
2347 ElementCount NumElements) const override {
2348 return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2349 }
2350 bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
2351 return Impl.isLegalMaskedScatter(DataType, Alignment);
2352 }
2353 bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
2354 return Impl.isLegalMaskedGather(DataType, Alignment);
2355 }
2356 bool forceScalarizeMaskedGather(VectorType *DataType,
2357 Align Alignment) override {
2358 return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2359 }
2360 bool forceScalarizeMaskedScatter(VectorType *DataType,
2361 Align Alignment) override {
2362 return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2363 }
2364 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) override {
2365 return Impl.isLegalMaskedCompressStore(DataType, Alignment);
2366 }
2367 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) override {
2368 return Impl.isLegalMaskedExpandLoad(DataType, Alignment);
2369 }
2370 bool isLegalStridedLoadStore(Type *DataType, Align Alignment) override {
2371 return Impl.isLegalStridedLoadStore(DataType, Alignment);
2372 }
2373 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
2374 const SmallBitVector &OpcodeMask) const override {
2375 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2376 }
2377 bool enableOrderedReductions() override {
2378 return Impl.enableOrderedReductions();
2379 }
2380 bool hasDivRemOp(Type *DataType, bool IsSigned) override {
2381 return Impl.hasDivRemOp(DataType, IsSigned);
2382 }
2383 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
2384 return Impl.hasVolatileVariant(I, AddrSpace);
2385 }
2386 bool prefersVectorizedAddressing() override {
2387 return Impl.prefersVectorizedAddressing();
2388 }
2389 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2390 int64_t BaseOffset, bool HasBaseReg,
2391 int64_t Scale,
2392 unsigned AddrSpace) override {
2393 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2394 AddrSpace);
2395 }
2396 bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
2397 bool isTruncateFree(Type *Ty1, Type *Ty2) override {
2398 return Impl.isTruncateFree(Ty1, Ty2);
2399 }
2400 bool isProfitableToHoist(Instruction *I) override {
2401 return Impl.isProfitableToHoist(I);
2402 }
2403 bool useAA() override { return Impl.useAA(); }
2404 bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
2405 unsigned getRegUsageForType(Type *Ty) override {
2406 return Impl.getRegUsageForType(Ty);
2407 }
2408 bool shouldBuildLookupTables() override {
2409 return Impl.shouldBuildLookupTables();
2410 }
2411 bool shouldBuildLookupTablesForConstant(Constant *C) override {
2412 return Impl.shouldBuildLookupTablesForConstant(C);
2413 }
2414 bool shouldBuildRelLookupTables() override {
2415 return Impl.shouldBuildRelLookupTables();
2416 }
2417 bool useColdCCForColdCall(Function &F) override {
2418 return Impl.useColdCCForColdCall(F);
2419 }
2420
2421 InstructionCost getScalarizationOverhead(VectorType *Ty,
2422 const APInt &DemandedElts,
2423 bool Insert, bool Extract,
2424 TargetCostKind CostKind) override {
2425 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2426 CostKind);
2427 }
2428 InstructionCost
2429 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2430 ArrayRef<Type *> Tys,
2431 TargetCostKind CostKind) override {
2432 return Impl.getOperandsScalarizationOverhead(Args, Tys, CostKind);
2433 }
2434
2435 bool supportsEfficientVectorElementLoadStore() override {
2436 return Impl.supportsEfficientVectorElementLoadStore();
2437 }
2438
2439 bool supportsTailCalls() override { return Impl.supportsTailCalls(); }
2440 bool supportsTailCallFor(const CallBase *CB) override {
2441 return Impl.supportsTailCallFor(CB);
2442 }
2443
2444 bool enableAggressiveInterleaving(bool LoopHasReductions) override {
2445 return Impl.enableAggressiveInterleaving(LoopHasReductions);
2446 }
2447 MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
2448 bool IsZeroCmp) const override {
2449 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2450 }
2451 bool enableSelectOptimize() override {
2452 return Impl.enableSelectOptimize();
2453 }
2454 bool shouldTreatInstructionLikeSelect(const Instruction *I) override {
2455 return Impl.shouldTreatInstructionLikeSelect(I);
2456 }
2457 bool enableInterleavedAccessVectorization() override {
2458 return Impl.enableInterleavedAccessVectorization();
2459 }
2460 bool enableMaskedInterleavedAccessVectorization() override {
2461 return Impl.enableMaskedInterleavedAccessVectorization();
2462 }
2463 bool isFPVectorizationPotentiallyUnsafe() override {
2464 return Impl.isFPVectorizationPotentiallyUnsafe();
2465 }
2466 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
2467 unsigned AddressSpace, Align Alignment,
2468 unsigned *Fast) override {
2469 return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
2470 Alignment, Fast);
2471 }
2472 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
2473 return Impl.getPopcntSupport(IntTyWidthInBit);
2474 }
2475 bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
2476
2477 bool isExpensiveToSpeculativelyExecute(const Instruction* I) override {
2478 return Impl.isExpensiveToSpeculativelyExecute(I);
2479 }
2480
2481 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
2482 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2483 }
2484
2485 InstructionCost getFPOpCost(Type *Ty) override {
2486 return Impl.getFPOpCost(Ty);
2487 }
2488
2489 InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
2490 const APInt &Imm, Type *Ty) override {
2491 return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
2492 }
2493 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
2494 TargetCostKind CostKind) override {
2495 return Impl.getIntImmCost(Imm, Ty, CostKind);
2496 }
2497 InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
2498 const APInt &Imm, Type *Ty,
2500 Instruction *Inst = nullptr) override {
2501 return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
2502 }
2503 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
2504 const APInt &Imm, Type *Ty,
2505 TargetCostKind CostKind) override {
2506 return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
2507 }
2508 bool preferToKeepConstantsAttached(const Instruction &Inst,
2509 const Function &Fn) const override {
2510 return Impl.preferToKeepConstantsAttached(Inst, Fn);
2511 }
2512 unsigned getNumberOfRegisters(unsigned ClassID) const override {
2513 return Impl.getNumberOfRegisters(ClassID);
2514 }
2515 unsigned getRegisterClassForType(bool Vector,
2516 Type *Ty = nullptr) const override {
2517 return Impl.getRegisterClassForType(Vector, Ty);
2518 }
2519 const char *getRegisterClassName(unsigned ClassID) const override {
2520 return Impl.getRegisterClassName(ClassID);
2521 }
2522 TypeSize getRegisterBitWidth(RegisterKind K) const override {
2523 return Impl.getRegisterBitWidth(K);
2524 }
2525 unsigned getMinVectorRegisterBitWidth() const override {
2526 return Impl.getMinVectorRegisterBitWidth();
2527 }
2528 std::optional<unsigned> getMaxVScale() const override {
2529 return Impl.getMaxVScale();
2530 }
2531 std::optional<unsigned> getVScaleForTuning() const override {
2532 return Impl.getVScaleForTuning();
2533 }
2534 bool isVScaleKnownToBeAPowerOfTwo() const override {
2535 return Impl.isVScaleKnownToBeAPowerOfTwo();
2536 }
2537 bool shouldMaximizeVectorBandwidth(
2538 TargetTransformInfo::RegisterKind K) const override {
2539 return Impl.shouldMaximizeVectorBandwidth(K);
2540 }
2541 ElementCount getMinimumVF(unsigned ElemWidth,
2542 bool IsScalable) const override {
2543 return Impl.getMinimumVF(ElemWidth, IsScalable);
2544 }
2545 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
2546 return Impl.getMaximumVF(ElemWidth, Opcode);
2547 }
2548 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
2549 Type *ScalarValTy) const override {
2550 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2551 }
2552 bool shouldConsiderAddressTypePromotion(
2553 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
2554 return Impl.shouldConsiderAddressTypePromotion(
2555 I, AllowPromotionWithoutCommonHeader);
2556 }
2557 unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
2558 std::optional<unsigned> getCacheSize(CacheLevel Level) const override {
2559 return Impl.getCacheSize(Level);
2560 }
2561 std::optional<unsigned>
2562 getCacheAssociativity(CacheLevel Level) const override {
2563 return Impl.getCacheAssociativity(Level);
2564 }
2565
2566 std::optional<unsigned> getMinPageSize() const override {
2567 return Impl.getMinPageSize();
2568 }
2569
2570 /// Return the preferred prefetch distance in terms of instructions.
2571 ///
2572 unsigned getPrefetchDistance() const override {
2573 return Impl.getPrefetchDistance();
2574 }
2575
2576 /// Return the minimum stride necessary to trigger software
2577 /// prefetching.
2578 ///
2579 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2580 unsigned NumStridedMemAccesses,
2581 unsigned NumPrefetches,
2582 bool HasCall) const override {
2583 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2584 NumPrefetches, HasCall);
2585 }
2586
2587 /// Return the maximum prefetch distance in terms of loop
2588 /// iterations.
2589 ///
2590 unsigned getMaxPrefetchIterationsAhead() const override {
2591 return Impl.getMaxPrefetchIterationsAhead();
2592 }
2593
2594 /// \return True if prefetching should also be done for writes.
2595 bool enableWritePrefetching() const override {
2596 return Impl.enableWritePrefetching();
2597 }
2598
2599 /// \return if target want to issue a prefetch in address space \p AS.
2600 bool shouldPrefetchAddressSpace(unsigned AS) const override {
2601 return Impl.shouldPrefetchAddressSpace(AS);
2602 }
2603
2604 unsigned getMaxInterleaveFactor(ElementCount VF) override {
2605 return Impl.getMaxInterleaveFactor(VF);
2606 }
2607 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
2608 unsigned &JTSize,
2609 ProfileSummaryInfo *PSI,
2610 BlockFrequencyInfo *BFI) override {
2611 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2612 }
2613 InstructionCost getArithmeticInstrCost(
2614 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2615 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2616 ArrayRef<const Value *> Args,
2617 const Instruction *CxtI = nullptr) override {
2618 return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
2619 Args, CxtI);
2620 }
2621 InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
2622 unsigned Opcode1,
2623 const SmallBitVector &OpcodeMask,
2624 TTI::TargetCostKind CostKind) const override {
2625 return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask, CostKind);
2626 }
2627
2628 InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
2629 ArrayRef<int> Mask,
2631 VectorType *SubTp,
2632 ArrayRef<const Value *> Args) override {
2633 return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
2634 }
2635 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
2636 CastContextHint CCH,
2638 const Instruction *I) override {
2639 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
2640 }
2641 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2642 VectorType *VecTy,
2643 unsigned Index) override {
2644 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
2645 }
2646 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
2647 const Instruction *I = nullptr) override {
2648 return Impl.getCFInstrCost(Opcode, CostKind, I);
2649 }
2650 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
2651 CmpInst::Predicate VecPred,
2653 const Instruction *I) override {
2654 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
2655 }
2656 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2658 unsigned Index, Value *Op0,
2659 Value *Op1) override {
2660 return Impl.getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
2661 }
2662 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
2664 unsigned Index) override {
2665 return Impl.getVectorInstrCost(I, Val, CostKind, Index);
2666 }
2667 InstructionCost
2668 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
2669 const APInt &DemandedDstElts,
2670 TTI::TargetCostKind CostKind) override {
2671 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2672 DemandedDstElts, CostKind);
2673 }
2674 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2675 unsigned AddressSpace,
2677 OperandValueInfo OpInfo,
2678 const Instruction *I) override {
2679 return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind,
2680 OpInfo, I);
2681 }
2682 InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2683 unsigned AddressSpace,
2685 const Instruction *I) override {
2686 return Impl.getVPMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2687 CostKind, I);
2688 }
2689 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
2690 Align Alignment, unsigned AddressSpace,
2691 TTI::TargetCostKind CostKind) override {
2692 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2693 CostKind);
2694 }
2695 InstructionCost
2696 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2697 bool VariableMask, Align Alignment,
2699 const Instruction *I = nullptr) override {
2700 return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
2701 Alignment, CostKind, I);
2702 }
2703 InstructionCost
2704 getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2705 bool VariableMask, Align Alignment,
2707 const Instruction *I = nullptr) override {
2708 return Impl.getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
2709 Alignment, CostKind, I);
2710 }
2711 InstructionCost getInterleavedMemoryOpCost(
2712 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2713 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2714 bool UseMaskForCond, bool UseMaskForGaps) override {
2715 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2716 Alignment, AddressSpace, CostKind,
2717 UseMaskForCond, UseMaskForGaps);
2718 }
2719 InstructionCost
2720 getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2721 std::optional<FastMathFlags> FMF,
2722 TTI::TargetCostKind CostKind) override {
2723 return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
2724 }
2725 InstructionCost
2726 getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
2727 TTI::TargetCostKind CostKind) override {
2728 return Impl.getMinMaxReductionCost(IID, Ty, FMF, CostKind);
2729 }
2730 InstructionCost
2731 getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
2732 VectorType *Ty, FastMathFlags FMF,
2733 TTI::TargetCostKind CostKind) override {
2734 return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2735 CostKind);
2736 }
2737 InstructionCost
2738 getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty,
2739 TTI::TargetCostKind CostKind) override {
2740 return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty, CostKind);
2741 }
2742 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2743 TTI::TargetCostKind CostKind) override {
2744 return Impl.getIntrinsicInstrCost(ICA, CostKind);
2745 }
2746 InstructionCost getCallInstrCost(Function *F, Type *RetTy,
2747 ArrayRef<Type *> Tys,
2748 TTI::TargetCostKind CostKind) override {
2749 return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
2750 }
2751 unsigned getNumberOfParts(Type *Tp) override {
2752 return Impl.getNumberOfParts(Tp);
2753 }
2754 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2755 const SCEV *Ptr) override {
2756 return Impl.getAddressComputationCost(Ty, SE, Ptr);
2757 }
2758 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
2759 return Impl.getCostOfKeepingLiveOverCall(Tys);
2760 }
2761 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2762 MemIntrinsicInfo &Info) override {
2763 return Impl.getTgtMemIntrinsic(Inst, Info);
2764 }
2765 unsigned getAtomicMemIntrinsicMaxElementSize() const override {
2766 return Impl.getAtomicMemIntrinsicMaxElementSize();
2767 }
2768 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2769 Type *ExpectedType) override {
2770 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2771 }
2772 Type *getMemcpyLoopLoweringType(
2773 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
2774 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
2775 std::optional<uint32_t> AtomicElementSize) const override {
2776 return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
2777 DestAddrSpace, SrcAlign, DestAlign,
2778 AtomicElementSize);
2779 }
2780 void getMemcpyLoopResidualLoweringType(
2781 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2782 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2783 unsigned SrcAlign, unsigned DestAlign,
2784 std::optional<uint32_t> AtomicCpySize) const override {
2785 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2786 SrcAddrSpace, DestAddrSpace,
2787 SrcAlign, DestAlign, AtomicCpySize);
2788 }
2789 bool areInlineCompatible(const Function *Caller,
2790 const Function *Callee) const override {
2791 return Impl.areInlineCompatible(Caller, Callee);
2792 }
2793 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
2794 unsigned DefaultCallPenalty) const override {
2795 return Impl.getInlineCallPenalty(F, Call, DefaultCallPenalty);
2796 }
2797 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
2798 const ArrayRef<Type *> &Types) const override {
2799 return Impl.areTypesABICompatible(Caller, Callee, Types);
2800 }
2801 bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
2802 return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
2803 }
2804 bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
2805 return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
2806 }
2807 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
2808 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2809 }
2810 bool isLegalToVectorizeLoad(LoadInst *LI) const override {
2811 return Impl.isLegalToVectorizeLoad(LI);
2812 }
2813 bool isLegalToVectorizeStore(StoreInst *SI) const override {
2814 return Impl.isLegalToVectorizeStore(SI);
2815 }
2816 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
2817 unsigned AddrSpace) const override {
2818 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2819 AddrSpace);
2820 }
2821 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
2822 unsigned AddrSpace) const override {
2823 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2824 AddrSpace);
2825 }
2826 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
2827 ElementCount VF) const override {
2828 return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2829 }
2830 bool isElementTypeLegalForScalableVector(Type *Ty) const override {
2831 return Impl.isElementTypeLegalForScalableVector(Ty);
2832 }
2833 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2834 unsigned ChainSizeInBytes,
2835 VectorType *VecTy) const override {
2836 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2837 }
2838 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2839 unsigned ChainSizeInBytes,
2840 VectorType *VecTy) const override {
2841 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2842 }
2843 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2844 ReductionFlags Flags) const override {
2845 return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2846 }
2847 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2848 ReductionFlags Flags) const override {
2849 return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2850 }
2851 bool preferEpilogueVectorization() const override {
2852 return Impl.preferEpilogueVectorization();
2853 }
2854
2855 bool shouldExpandReduction(const IntrinsicInst *II) const override {
2856 return Impl.shouldExpandReduction(II);
2857 }
2858
2859 unsigned getGISelRematGlobalCost() const override {
2860 return Impl.getGISelRematGlobalCost();
2861 }
2862
2863 unsigned getMinTripCountTailFoldingThreshold() const override {
2864 return Impl.getMinTripCountTailFoldingThreshold();
2865 }
2866
2867 bool supportsScalableVectors() const override {
2868 return Impl.supportsScalableVectors();
2869 }
2870
2871 bool enableScalableVectorization() const override {
2872 return Impl.enableScalableVectorization();
2873 }
2874
2875 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2876 Align Alignment) const override {
2877 return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2878 }
2879
2881 getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
2882 return Impl.getVPLegalizationStrategy(PI);
2883 }
2884
2885 bool hasArmWideBranch(bool Thumb) const override {
2886 return Impl.hasArmWideBranch(Thumb);
2887 }
2888
2889 unsigned getMaxNumArgs() const override {
2890 return Impl.getMaxNumArgs();
2891 }
2892};
2893
2894template <typename T>
2896 : TTIImpl(new Model<T>(Impl)) {}
2897
2898/// Analysis pass providing the \c TargetTransformInfo.
2899///
2900/// The core idea of the TargetIRAnalysis is to expose an interface through
2901/// which LLVM targets can analyze and provide information about the middle
2902/// end's target-independent IR. This supports use cases such as target-aware
2903/// cost modeling of IR constructs.
2904///
2905/// This is a function analysis because much of the cost modeling for targets
2906/// is done in a subtarget specific way and LLVM supports compiling different
2907/// functions targeting different subtargets in order to support runtime
2908/// dispatch according to the observed subtarget.
2909class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
2910public:
2912
2913 /// Default construct a target IR analysis.
2914 ///
2915 /// This will use the module's datalayout to construct a baseline
2916 /// conservative TTI result.
2918
2919 /// Construct an IR analysis pass around a target-provide callback.
2920 ///
2921 /// The callback will be called with a particular function for which the TTI
2922 /// is needed and must return a TTI object for that function.
2923 TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
2924
2925 // Value semantics. We spell out the constructors for MSVC.
2927 : TTICallback(Arg.TTICallback) {}
2929 : TTICallback(std::move(Arg.TTICallback)) {}
2931 TTICallback = RHS.TTICallback;
2932 return *this;
2933 }
2935 TTICallback = std::move(RHS.TTICallback);
2936 return *this;
2937 }
2938
2940
2941private:
2943 static AnalysisKey Key;
2944
2945 /// The callback used to produce a result.
2946 ///
2947 /// We use a completely opaque callback so that targets can provide whatever
2948 /// mechanism they desire for constructing the TTI for a given function.
2949 ///
2950 /// FIXME: Should we really use std::function? It's relatively inefficient.
2951 /// It might be possible to arrange for even stateful callbacks to outlive
2952 /// the analysis and thus use a function_ref which would be lighter weight.
2953 /// This may also be less error prone as the callback is likely to reference
2954 /// the external TargetMachine, and that reference needs to never dangle.
2955 std::function<Result(const Function &)> TTICallback;
2956
2957 /// Helper function used as the callback in the default constructor.
2958 static Result getDefaultTTI(const Function &F);
2959};
2960
2961/// Wrapper pass for TargetTransformInfo.
2962///
2963/// This pass can be constructed from a TTI object which it stores internally
2964/// and is queried by passes.
2966 TargetIRAnalysis TIRA;
2967 std::optional<TargetTransformInfo> TTI;
2968
2969 virtual void anchor();
2970
2971public:
2972 static char ID;
2973
2974 /// We must provide a default constructor for the pass but it should
2975 /// never be used.
2976 ///
2977 /// Use the constructor below or call one of the creation routines.
2979
2981
2983};
2984
2985/// Create an analysis pass wrapper around a TTI object.
2986///
2987/// This analysis pass just holds the TTI instance and makes it available to
2988/// clients.
2990
2991} // namespace llvm
2992
2993#endif
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
AMDGPU Lower Kernel Arguments
Atomic ordering constants.
RelocType Type
Definition: COFFYAML.cpp:391
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
Machine InstCombiner
This header defines various interfaces for pass management in LLVM.
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
This file implements the SmallBitVector class.
Value * RHS
Class for arbitrary precision integers.
Definition: APInt.h:76
an instruction to allocate memory on the stack
Definition: Instructions.h:59
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:387
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1455
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:965
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
ImmutablePass class - This class is used to provide information that does not need to be run.
Definition: Pass.h:282
The core instruction combiner logic.
Definition: InstCombiner.h:47
static InstructionCost getInvalid(CostType Val=0)
Class to represent integer types.
Definition: DerivedTypes.h:40
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:586
const SmallVectorImpl< Type * > & getArgTypes() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:184
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
The optimization diagnostic interface.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:71
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
Multiway switch.
Analysis pass providing the TargetTransformInfo.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Provides information about what library functions are available for the current target.
Wrapper pass for TargetTransformInfo.
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
TargetTransformInfo & getTTI(const Function &F)
virtual std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)=0
virtual InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)=0
virtual TypeSize getRegisterBitWidth(RegisterKind K) const =0
virtual const DataLayout & getDataLayout() const =0
virtual bool isProfitableLSRChainElement(Instruction *I)=0
virtual InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr)=0
virtual InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)=0
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)=0
virtual bool isLegalNTStore(Type *DataType, Align Alignment)=0
virtual unsigned adjustInliningThreshold(const CallBase *CB)=0
virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I)=0
virtual bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const =0
virtual std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)=0
virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags) const =0
virtual VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const =0
virtual bool isLegalNTLoad(Type *DataType, Align Alignment)=0
virtual bool enableOrderedReductions()=0
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit)=0
virtual unsigned getNumberOfRegisters(unsigned ClassID) const =0
virtual std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const =0
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment)=0
virtual bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const =0
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
virtual bool shouldPrefetchAddressSpace(unsigned AS) const =0
virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)=0
virtual unsigned getMinVectorRegisterBitWidth() const =0
virtual InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const =0
virtual std::optional< unsigned > getVScaleForTuning() const =0
virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
virtual InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)=0
virtual bool supportsEfficientVectorElementLoadStore()=0
virtual unsigned getRegUsageForType(Type *Ty)=0
virtual bool hasArmWideBranch(bool Thumb) const =0
virtual MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const =0
virtual InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
virtual InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, OperandValueInfo Opd1Info, OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr)=0
virtual unsigned getAssumedAddrSpace(const Value *V) const =0
virtual bool isTruncateFree(Type *Ty1, Type *Ty2)=0
virtual bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const =0
virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)=0
virtual InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TargetCostKind CostKind)=0
virtual bool shouldBuildLookupTables()=0
virtual bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const =0
virtual bool isLegalToVectorizeStore(StoreInst *SI) const =0
virtual unsigned getGISelRematGlobalCost() const =0
virtual unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const =0
virtual void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicCpySize) const =0
virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)=0
virtual bool forceScalarizeMaskedScatter(VectorType *DataType, Align Alignment)=0
virtual bool supportsTailCallFor(const CallBase *CB)=0
virtual std::optional< unsigned > getMaxVScale() const =0
virtual InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind)=0
virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const =0
virtual unsigned getMaxNumArgs() const =0
virtual bool shouldExpandReduction(const IntrinsicInst *II) const =0
virtual bool enableWritePrefetching() const =0
virtual bool useColdCCForColdCall(Function &F)=0
virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const =0
virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags) const =0
virtual int getInlinerVectorBonusPercent() const =0
virtual unsigned getMaxPrefetchIterationsAhead() const =0
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment)=0
virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const =0
virtual unsigned getCacheLineSize() const =0
virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
virtual AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const =0
virtual bool shouldBuildLookupTablesForConstant(Constant *C)=0
virtual bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)=0
virtual bool isProfitableToHoist(Instruction *I)=0
virtual bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)=0
virtual InstructionCost getFPOpCost(Type *Ty)=0
virtual unsigned getMinTripCountTailFoldingThreshold() const =0
virtual bool enableMaskedInterleavedAccessVectorization()=0
virtual unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const =0
virtual bool isTypeLegal(Type *Ty)=0
virtual BranchProbability getPredictableBranchThreshold()=0
virtual bool enableScalableVectorization() const =0
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)=0
virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const =0
virtual const char * getRegisterClassName(unsigned ClassID) const =0
virtual unsigned getMaxInterleaveFactor(ElementCount VF)=0
virtual bool enableAggressiveInterleaving(bool LoopHasReductions)=0
virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const =0
virtual bool haveFastSqrt(Type *Ty)=0
virtual bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)=0
virtual std::optional< unsigned > getCacheSize(CacheLevel Level) const =0
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)=0
virtual InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)=0
virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP)=0
virtual std::optional< unsigned > getCacheAssociativity(CacheLevel Level) const =0
virtual bool supportsScalableVectors() const =0
virtual bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment)=0
virtual unsigned getNumberOfParts(Type *Tp)=0
virtual bool isLegalICmpImmediate(int64_t Imm)=0
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)=0
virtual InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
virtual bool isElementTypeLegalForScalableVector(Type *Ty) const =0
virtual TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true)=0
virtual bool hasDivRemOp(Type *DataType, bool IsSigned)=0
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const =0
virtual bool shouldBuildRelLookupTables()=0
virtual InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TargetCostKind CostKind)=0
virtual bool isLoweredToCall(const Function *F)=0
virtual bool isSourceOfDivergence(const Value *V)=0
virtual bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const =0
virtual unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const =0
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment)=0
virtual InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
virtual bool isFPVectorizationPotentiallyUnsafe()=0
virtual Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)=0
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty)=0
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I)=0
virtual bool hasBranchDivergence(const Function *F=nullptr)=0
virtual InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)=0
virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I)=0
virtual unsigned getInliningThresholdMultiplier() const =0
virtual InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)=0
virtual bool isLegalMaskedStore(Type *DataType, Align Alignment)=0
virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, TTI::TargetCostKind CostKind, unsigned Index)=0
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const =0
virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args)=0
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const =0
virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)=0
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I)=0
virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const =0
virtual InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)=0
virtual bool prefersVectorizedAddressing()=0
virtual uint64_t getMaxMemIntrinsicInlineSizeThreshold() const =0
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, OperandValueInfo OpInfo, const Instruction *I)=0
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo)=0
virtual Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicElementSize) const =0
virtual InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)=0
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)=0
virtual bool isAlwaysUniform(const Value *V)=0
virtual std::optional< unsigned > getMinPageSize() const =0
virtual InstructionCost getMemcpyCost(const Instruction *I)=0
virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const =0
virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const =0
virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const =0
virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)=0
virtual std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)=0
virtual InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
virtual unsigned getFlatAddressSpace()=0
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)=0
virtual unsigned getPrefetchDistance() const =0
virtual bool shouldFoldTerminatingConditionAfterLSR() const =0
virtual bool shouldTreatInstructionLikeSelect(const Instruction *I)=0
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace)=0
virtual bool preferToKeepConstantsAttached(const Instruction &Inst, const Function &Fn) const =0
virtual bool isNumRegsMajorCostOfLSR()=0
virtual bool isLegalStridedLoadStore(Type *DataType, Align Alignment)=0
virtual bool isSingleThreaded() const =0
virtual bool isLegalAddImmediate(int64_t Imm)=0
virtual Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const =0
virtual bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)=0
virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const =0
virtual bool isVScaleKnownToBeAPowerOfTwo() const =0
virtual InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)=0
virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const =0
virtual bool enableInterleavedAccessVectorization()=0
virtual unsigned getAtomicMemIntrinsicMaxElementSize() const =0
virtual bool preferEpilogueVectorization() const =0
virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I)=0
virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const =0
virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const =0
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, unsigned *Fast)=0
virtual unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const =0
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
bool isLegalToVectorizeLoad(LoadInst *LI) const
std::optional< unsigned > getVScaleForTuning() const
static CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const
Return false if a AS0 address cannot possibly alias a AS1 address.
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
Return true if the target supports masked scatter.
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
bool isLegalToVectorizeStore(StoreInst *SI) const
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don't restrict interleaved unrolling to small loops.
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicCpySize=std::nullopt) const
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
bool isAlwaysUniform(const Value *V) const
unsigned getAssumedAddrSpace(const Value *V) const
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C2.
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked expand load.
bool prefersVectorizedAddressing() const
Return true if target doesn't mind addresses in vectors.
bool hasBranchDivergence(const Function *F=nullptr) const
Return true if branch divergence exists.
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Get target-customized preferences for the generic loop unrolling transformation.
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
bool shouldFoldTerminatingConditionAfterLSR() const
Return true if LSR should attempts to replace a use of an otherwise dead primary IV in the latch cond...
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
Estimate the overhead of scalarizing an instructions unique non-constant operands.
bool supportsTailCallFor(const CallBase *CB) const
If target supports tail call on CB.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
bool isProfitableLSRChainElement(Instruction *I) const
TypeSize getRegisterBitWidth(RegisterKind K) const
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const
Returns a penalty for invoking call Call in F.
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
Return true if the cost of the instruction is too high to speculatively execute and should be kept be...
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
Return true if the target supports masked gather.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
std::optional< unsigned > getMaxVScale() const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
bool enableOrderedReductions() const
Return true if we should be enabling ordered reductions for the target.
InstructionCost getInstructionCost(const User *U, TargetCostKind CostKind) const
This is a helper function which calls the three-argument getInstructionCost with Operands which are t...
unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
unsigned getAtomicMemIntrinsicMaxElementSize() const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
bool shouldTreatInstructionLikeSelect(const Instruction *I) const
Should the Select Optimization pass treat the given instruction like a select, potentially converting...
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const
Query the target what the preferred style of tail folding is.
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType=nullptr, TargetCostKind CostKind=TCK_SizeAndLatency) const
Estimate the cost of a GEP operation when lowered.
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
unsigned getRegUsageForType(Type *Ty) const
Returns the estimated number of registers required to represent Ty.
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
\Returns true if the target supports broadcasting a load to a vector of type <NumElements x ElementTy...
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of a reduc...
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of an Add ...
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
bool isElementTypeLegalForScalableVector(Type *Ty) const
bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.gather intrinsics.
unsigned getMaxPrefetchIterationsAhead() const
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
Return true if globals in this address space can have initializers other than undef.
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
bool enableMaskedInterleavedAccessVectorization() const
Enable matching of interleaved access groups that contain predicated accesses or gaps and therefore v...
InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const
Return true if the target supports strided load.
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF=FastMathFlags(), TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCK_Latency
The latency of instruction.
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
bool enableSelectOptimize() const
Should the Select Optimization pass be enabled and ran.
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Return any intrinsic address operand indexes which may be rewritten if they use a flat address space ...
OperandValueProperties
Additional properties of an operand's values.
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const PointersChainInfo &Info, Type *AccessTy, TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Estimate the cost of a chain of pointers (typically pointer operands of a chain of loads or stores wi...
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
std::optional< unsigned > getCacheAssociativity(CacheLevel Level) const
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
InstructionCost getMemcpyCost(const Instruction *I) const
unsigned adjustInliningThreshold(const CallBase *CB) const
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Rewrite intrinsic call II such that OldV will be replaced with NewV, which has a different address sp...
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Some HW prefetchers can handle accesses up to a certain constant stride.
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool shouldPrefetchAddressSpace(unsigned AS) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
unsigned getMinVectorRegisterBitWidth() const
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
unsigned getFlatAddressSpace() const
Returns the address space ID for a target's 'flat' address space.
bool preferToKeepConstantsAttached(const Instruction &Inst, const Function &Fn) const
It can be advantageous to detach complex constants from their uses to make their generation cheaper.
bool hasArmWideBranch(bool Thumb) const
const char * getRegisterClassName(unsigned ClassID) const
bool preferEpilogueVectorization() const
Return true if the loop vectorizer should consider vectorizing an otherwise scalar epilogue loop.
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), unsigned *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask=std::nullopt, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args=std::nullopt) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
PopcntSupportKind
Flags indicating the kind of support for population count.
InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
Return the preferred addressing mode LSR should make efforts to generate.
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
unsigned getInliningThresholdMultiplier() const
unsigned getNumberOfRegisters(unsigned ClassID) const
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
Return true if this is an alternating opcode pattern that can be lowered to a single instruction on t...
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const
Return true if the target supports masked compress store.
std::optional< unsigned > getMinPageSize() const
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
Return true if the target supports masked store.
bool shouldBuildRelLookupTables() const
Return true if lookup tables should be turned into relative lookup tables.
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
std::optional< unsigned > getCacheSize(CacheLevel Level) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Can be used to implement target-specific instruction combining.
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Returns the cost estimation for alternating opcode pattern that can be lowered to a single instructio...
TargetCostConstants
Underlying constants for 'cost' values in this interface.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
unsigned getMinTripCountTailFoldingThreshold() const
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
unsigned getMaxInterleaveFactor(ElementCount VF) const
bool isNumRegsMajorCostOfLSR() const
Return true if LSR major cost is number of registers.
unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) const
unsigned getGISelRematGlobalCost() const
MemIndexedMode
The type of load/store indexing.
@ MIM_PostInc
Post-incrementing.
@ MIM_PostDec
Post-decrementing.
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
bool supportsTailCalls() const
If the target supports tail calls.
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Query the target whether the specified address space cast from FromAS to ToAS is valid.
unsigned getNumberOfParts(Type *Tp) const
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicElementSize=std::nullopt) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind) const
Estimate the overhead of scalarizing an instruction.
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const
Query the target whether it would be prefered to create a predicated vector loop, which can avoid the...
bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.scatter intrinsics.
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
bool shouldExpandReduction(const IntrinsicInst *II) const
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const
Returns the maximum memset / memcpy size in bytes that still makes it profitable to inline the call.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, Value *Op0=nullptr, Value *Op1=nullptr) const
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const
Get target-customized preferences for the generic loop peeling transformation.
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
CastContextHint
Represents a hint about the context in which a cast is used.
@ Reversed
The cast is used with a reversed load/store.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
@ Interleave
The cast is used with an interleaved load/store.
@ GatherScatter
The cast is used with a gather/scatter.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
OperandValueKind
Additional information about an operand's possible values.
CacheLevel
The possible cache levels.
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked load.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Definition: Value.h:74
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
Definition: MsgPackReader.h:53
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Length
Definition: DWP.cpp:456
AddressSpace
Definition: NVPTXBaseInfo.h:21
AtomicOrdering
Atomic ordering for LLVM's memory model.
TargetTransformInfo TTI
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
@ None
Not a recurrence.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1858
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:114
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:26
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
InterleavedAccessInfo * IAI
TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
TargetLibraryInfo * TLI
LoopVectorizationLegality * LVL
unsigned Insns
TODO: Some of these could be merged.
Returns options for expansion of memcmp. IsZeroCmp is.
bool AllowPeeling
Allow peeling off loop iterations.
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
bool PeelProfiledIterations
Allow peeling basing on profile.
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Describe known properties for a set of pointers.
unsigned IsKnownStride
True if distance between any two neigbouring pointers is a known value.
unsigned IsUnitStride
These properties only valid if SameBaseAddress is set.
unsigned IsSameBaseAddress
All the GEPs in a set have same base address.
Flags describing the kind of vector reduction.
bool IsSigned
Whether the operation is a signed int reduction.
bool IsMaxOp
If the op a min/max kind, true if it's a max operation.
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
Parameters that control the generic loop unrolling transformation.
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned Threshold
The cost threshold for the unrolled loop.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
bool UnrollVectorizedLoop
Don't disable runtime unroll for the loops which were vectorized.
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
unsigned MaxIterationsCountToAnalyze
Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned MaxUpperBound
Set the maximum upper bound of trip count.
VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)