LLVM 22.0.0git
NVPTXTargetTransformInfo.h
Go to the documentation of this file.
1//===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file a TargetTransformInfoImplBase conforming object specific to the
10/// NVPTX target machine. It uses the target's detailed information to
11/// provide more precise answers to certain TTI queries, while letting the
12/// target independent and default TTI implementations handle the rest.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
17#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
18
20#include "NVPTXTargetMachine.h"
21#include "NVPTXUtilities.h"
25#include <optional>
26
27namespace llvm {
28
29class NVPTXTTIImpl final : public BasicTTIImplBase<NVPTXTTIImpl> {
31 typedef TargetTransformInfo TTI;
32 friend BaseT;
33
34 const NVPTXSubtarget *ST;
35 const NVPTXTargetLowering *TLI;
36
37 const NVPTXSubtarget *getST() const { return ST; };
38 const NVPTXTargetLowering *getTLI() const { return TLI; };
39
40 /// \returns true if the result of the value could potentially be
41 /// different across threads in a warp.
42 bool isSourceOfDivergence(const Value *V) const;
43
44public:
45 explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F)
46 : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl()),
47 TLI(ST->getTargetLowering()) {}
48
49 bool hasBranchDivergence(const Function *F = nullptr) const override {
50 return true;
51 }
52
53 unsigned getFlatAddressSpace() const override {
54 return AddressSpace::ADDRESS_SPACE_GENERIC;
55 }
56
57 bool
59 return AS != AddressSpace::ADDRESS_SPACE_SHARED &&
60 AS != AddressSpace::ADDRESS_SPACE_LOCAL && AS != ADDRESS_SPACE_PARAM;
61 }
62
63 std::optional<Instruction *>
65
66 // Loads and stores can be vectorized if the alignment is at least as big as
67 // the load/store we want to vectorize.
68 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
69 unsigned AddrSpace) const override {
70 return Alignment >= ChainSizeInBytes;
71 }
72 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
73 unsigned AddrSpace) const override {
74 return isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace);
75 }
76
77 // NVPTX has infinite registers of all kinds, but the actual machine doesn't.
78 // We conservatively return 1 here which is just enough to enable the
79 // vectorizers but disables heuristics based on the number of registers.
80 // FIXME: Return a more reasonable number, while keeping an eye on
81 // LoopVectorizer's unrolling heuristics.
82 unsigned getNumberOfRegisters(unsigned ClassID) const override { return 1; }
83
84 // Only <2 x half> should be vectorized, so always return 32 for the vector
85 // register size.
90 unsigned getMinVectorRegisterBitWidth() const override { return 32; }
91
92 bool shouldExpandReduction(const IntrinsicInst *II) const override {
93 // Turn off ExpandReductions pass for NVPTX, which doesn't have advanced
94 // swizzling operations. Our backend/Selection DAG can expand these
95 // reductions with less movs.
96 return false;
97 }
98
99 // We don't want to prevent inlining because of target-cpu and -features
100 // attributes that were added to newer versions of LLVM/Clang: There are
101 // no incompatible functions in PTX, ptxas will throw errors in such cases.
102 bool areInlineCompatible(const Function *Caller,
103 const Function *Callee) const override {
104 return true;
105 }
106
107 // Increase the inlining cost threshold by a factor of 11, reflecting that
108 // calls are particularly expensive in NVPTX.
109 unsigned getInliningThresholdMultiplier() const override { return 11; }
110
113 TTI::TargetCostKind CostKind) const override;
114
116 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
120 const Instruction *CxtI = nullptr) const override;
121
123 VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract,
124 TTI::TargetCostKind CostKind, bool ForPoisonSrc = true,
125 ArrayRef<Value *> VL = {}) const override {
126 if (!InTy->getElementCount().isFixed())
128
129 auto VT = getTLI()->getValueType(DL, InTy);
130 auto NumElements = InTy->getElementCount().getFixedValue();
132 if (Insert && !VL.empty()) {
133 bool AllConstant = all_of(seq(NumElements), [&](int Idx) {
134 return !DemandedElts[Idx] || isa<Constant>(VL[Idx]);
135 });
136 if (AllConstant) {
137 Cost += TTI::TCC_Free;
138 Insert = false;
139 }
140 }
141 if (Insert && NVPTX::isPackedVectorTy(VT) && VT.is32BitVector()) {
142 // Can be built in a single 32-bit mov (64-bit regs are emulated in SASS
143 // with 2x 32-bit regs)
144 Cost += 1;
145 Insert = false;
146 }
147 if (Insert && VT == MVT::v4i8) {
148 InstructionCost Cost = 3; // 3 x PRMT
149 for (auto Idx : seq(NumElements))
150 if (DemandedElts[Idx])
151 Cost += 1; // zext operand to i32
152 Insert = false;
153 }
154 return Cost + BaseT::getScalarizationOverhead(InTy, DemandedElts, Insert,
155 Extract, CostKind,
156 ForPoisonSrc, VL);
157 }
158
159 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
161 OptimizationRemarkEmitter *ORE) const override;
162
163 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
164 TTI::PeelingPreferences &PP) const override;
165
166 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const override {
167 // Volatile loads/stores are only supported for shared and global address
168 // spaces, or for generic AS that maps to them.
169 if (!(AddrSpace == llvm::ADDRESS_SPACE_GENERIC ||
170 AddrSpace == llvm::ADDRESS_SPACE_GLOBAL ||
171 AddrSpace == llvm::ADDRESS_SPACE_SHARED))
172 return false;
173
174 switch(I->getOpcode()){
175 default:
176 return false;
177 case Instruction::Load:
178 case Instruction::Store:
179 return true;
180 }
181 }
182
184 Intrinsic::ID IID) const override;
185
186 bool isLegalMaskedStore(Type *DataType, Align Alignment, unsigned AddrSpace,
187 TTI::MaskKind MaskKind) const override;
188
189 bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddrSpace,
190 TTI::MaskKind MaskKind) const override;
191
192 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override;
193
195 Value *NewV) const override;
196 unsigned getAssumedAddrSpace(const Value *V) const override;
197
199 const Function &F,
200 SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override;
201
202 bool shouldBuildRelLookupTables() const override {
203 // Self-referential globals are not supported.
204 return false;
205 }
206
208};
209
210} // end namespace llvm
211
212#endif
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
uint64_t IntrinsicInst * II
This file describes how to lower LLVM code to machine code.
This pass exposes codegen information to IR-level passes.
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
A wrapper class for inspecting calls to intrinsic functions.
unsigned getNumberOfRegisters(unsigned ClassID) const override
InstructionUniformity getInstructionUniformity(const Value *V) const override
unsigned getFlatAddressSpace() const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isLegalMaskedStore(Type *DataType, Align Alignment, unsigned AddrSpace, TTI::MaskKind MaskKind) const override
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const override
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const override
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
bool shouldBuildRelLookupTables() const override
unsigned getInliningThresholdMultiplier() const override
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool shouldExpandReduction(const IntrinsicInst *II) const override
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const override
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
Estimate the overhead of scalarizing an instruction.
unsigned getAssumedAddrSpace(const Value *V) const override
void collectKernelLaunchBounds(const Function &F, SmallVectorImpl< std::pair< StringRef, int64_t > > &LB) const override
bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddrSpace, TTI::MaskKind MaskKind) const override
unsigned getMinVectorRegisterBitWidth() const override
bool hasBranchDivergence(const Function *F=nullptr) const override
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual const DataLayout & getDataLayout() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
MaskKind
Some targets only support masked load/store with a constant mask.
TargetCostKind
The kind of cost model.
@ TCC_Free
Expected to fold away in lowering.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM Value Representation.
Definition Value.h:75
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool isPackedVectorTy(EVT VT)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
InstructionCost Cost
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
Definition Uniformity.h:18
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Parameters that control the generic loop unrolling transformation.