26#include "llvm/IR/IntrinsicsAMDGPU.h"
33#define DEBUG_TYPE "AMDGPUtti"
36 "amdgpu-unroll-threshold-private",
37 cl::desc(
"Unroll threshold for AMDGPU if private memory used in a loop"),
41 "amdgpu-unroll-threshold-local",
42 cl::desc(
"Unroll threshold for AMDGPU if local memory used in a loop"),
46 "amdgpu-unroll-threshold-if",
47 cl::desc(
"Unroll threshold increment for AMDGPU for each if statement inside loop"),
51 "amdgpu-unroll-runtime-local",
52 cl::desc(
"Allow runtime unroll for AMDGPU if local memory used in a loop"),
56 "amdgpu-use-legacy-divergence-analysis",
57 cl::desc(
"Enable legacy divergence analysis for AMDGPU"),
61 "amdgpu-unroll-max-block-to-analyze",
62 cl::desc(
"Inner loop block size threshold to analyze in unroll for AMDGPU"),
67 cl::desc(
"Cost of alloca argument"));
75 cl::desc(
"Maximum alloca size to use for inline cost"));
80 cl::desc(
"Maximum number of BBs allowed in a function after inlining"
81 " (compile time constraint)"));
89 for (
const Value *V :
I->operand_values()) {
92 if (
const PHINode *
PHI = dyn_cast<PHINode>(V)) {
94 return SubLoop->contains(PHI); }))
104 TargetTriple(
TM->getTargetTriple()),
106 TLI(ST->getTargetLowering()) {}
111 const Function &
F = *L->getHeader()->getParent();
113 F.getFnAttributeAsParsedInteger(
"amdgpu-unroll-threshold", 300);
114 UP.
MaxCount = std::numeric_limits<unsigned>::max();
124 const unsigned MaxAlloca = (256 - 16) * 4;
130 if (
MDNode *LoopUnrollThreshold =
132 if (LoopUnrollThreshold->getNumOperands() == 2) {
133 ConstantInt *MetaThresholdValue = mdconst::extract_or_null<ConstantInt>(
134 LoopUnrollThreshold->getOperand(1));
135 if (MetaThresholdValue) {
141 ThresholdPrivate = std::min(ThresholdPrivate, UP.
Threshold);
142 ThresholdLocal = std::min(ThresholdLocal, UP.
Threshold);
147 unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal);
149 const DataLayout &
DL = BB->getModule()->getDataLayout();
150 unsigned LocalGEPsSeen = 0;
153 return SubLoop->contains(BB); }))
162 if (
const BranchInst *Br = dyn_cast<BranchInst>(&
I)) {
163 if (UP.
Threshold < MaxBoost && Br->isConditional()) {
166 if ((L->contains(Succ0) && L->isLoopExiting(Succ0)) ||
167 (L->contains(Succ1) && L->isLoopExiting(Succ1)))
173 << *L <<
" due to " << *Br <<
'\n');
185 unsigned AS =
GEP->getAddressSpace();
186 unsigned Threshold = 0;
188 Threshold = ThresholdPrivate;
190 Threshold = ThresholdLocal;
205 if (AllocaSize > MaxAlloca)
214 if (LocalGEPsSeen > 1 || L->getLoopDepth() > 2 ||
215 (!isa<GlobalVariable>(
GEP->getPointerOperand()) &&
216 !isa<Argument>(
GEP->getPointerOperand())))
219 << *L <<
" due to LDS use.\n");
224 bool HasLoopDef =
false;
225 for (
const Value *Op :
GEP->operands()) {
226 const Instruction *Inst = dyn_cast<Instruction>(Op);
227 if (!Inst || L->isLoopInvariant(Op))
231 return SubLoop->contains(Inst); }))
255 << *L <<
" due to " << *
GEP <<
'\n');
274 AMDGPU::FeatureEnableLoadStoreOpt, AMDGPU::FeatureEnableSIScheduler,
275 AMDGPU::FeatureEnableUnsafeDSOffsetFolding, AMDGPU::FeatureFlatForGlobal,
276 AMDGPU::FeaturePromoteAlloca, AMDGPU::FeatureUnalignedScratchAccess,
277 AMDGPU::FeatureUnalignedAccessMode,
279 AMDGPU::FeatureAutoWaitcntBeforeBarrier,
282 AMDGPU::FeatureSGPRInitBug, AMDGPU::FeatureXNACK,
283 AMDGPU::FeatureTrapHandler,
287 AMDGPU::FeatureSRAMECC,
290 AMDGPU::FeatureFastFMAF32, AMDGPU::HalfRate64Ops};
295 TLI(ST->getTargetLowering()), CommonTTI(
TM,
F),
296 IsGraphics(
AMDGPU::isGraphics(
F.getCallingConv())) {
298 HasFP32Denormals = Mode.allFP32Denormals();
299 HasFP64FP16Denormals = Mode.allFP64FP16Denormals();
331 if (Opcode == Instruction::Load || Opcode == Instruction::Store)
332 return 32 * 4 / ElemWidth;
339 unsigned ChainSizeInBytes,
341 unsigned VecRegBitWidth = VF * LoadSize;
344 return 128 / LoadSize;
350 unsigned ChainSizeInBytes,
352 unsigned VecRegBitWidth = VF * StoreSize;
353 if (VecRegBitWidth > 128)
354 return 128 / StoreSize;
376 unsigned AddrSpace)
const {
389 unsigned AddrSpace)
const {
395 unsigned AddrSpace)
const {
408 unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign,
409 std::optional<uint32_t> AtomicElementSize)
const {
411 if (AtomicElementSize)
414 unsigned MinAlign = std::min(SrcAlign, DestAlign);
439 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
440 unsigned SrcAlign,
unsigned DestAlign,
441 std::optional<uint32_t> AtomicCpySize)
const {
442 assert(RemainingBytes < 16);
446 OpsOut,
Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, SrcAlign,
447 DestAlign, AtomicCpySize);
449 unsigned MinAlign = std::min(SrcAlign, DestAlign);
453 while (RemainingBytes >= 8) {
459 while (RemainingBytes >= 4) {
466 while (RemainingBytes >= 2) {
472 while (RemainingBytes) {
490 case Intrinsic::amdgcn_atomic_inc:
491 case Intrinsic::amdgcn_atomic_dec:
492 case Intrinsic::amdgcn_ds_ordered_add:
493 case Intrinsic::amdgcn_ds_ordered_swap:
494 case Intrinsic::amdgcn_ds_fadd:
495 case Intrinsic::amdgcn_ds_fmin:
496 case Intrinsic::amdgcn_ds_fmax: {
497 auto *Ordering = dyn_cast<ConstantInt>(Inst->
getArgOperand(2));
498 auto *Volatile = dyn_cast<ConstantInt>(Inst->
getArgOperand(4));
499 if (!Ordering || !Volatile)
502 unsigned OrderingVal = Ordering->getZExtValue();
509 Info.WriteMem =
true;
510 Info.IsVolatile = !Volatile->isZero();
525 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
530 unsigned NElts = LT.second.isVector() ?
531 LT.second.getVectorNumElements() : 1;
540 return get64BitInstrCost(
CostKind) * LT.first * NElts;
543 NElts = (NElts + 1) / 2;
546 return getFullRateInstrCost() * LT.first * NElts;
554 return 2 * getFullRateInstrCost() * LT.first * NElts;
558 NElts = (NElts + 1) / 2;
560 return LT.first * NElts * getFullRateInstrCost();
562 const int QuarterRateCost = getQuarterRateInstrCost(
CostKind);
564 const int FullRateCost = getFullRateInstrCost();
565 return (4 * QuarterRateCost + (2 * 2) * FullRateCost) * LT.first * NElts;
569 NElts = (NElts + 1) / 2;
572 return QuarterRateCost * NElts * LT.first;
579 if (
const auto *
FAdd = dyn_cast<BinaryOperator>(*CxtI->
user_begin())) {
599 NElts = (NElts + 1) / 2;
601 return LT.first * NElts * get64BitInstrCost(
CostKind);
604 NElts = (NElts + 1) / 2;
607 return LT.first * NElts * getFullRateInstrCost();
619 Cost += 3 * getFullRateInstrCost();
621 return LT.first *
Cost * NElts;
626 if ((SLT ==
MVT::f32 && !HasFP32Denormals) ||
628 return LT.first * getQuarterRateInstrCost(
CostKind) * NElts;
639 4 * getFullRateInstrCost() + 2 * getQuarterRateInstrCost(
CostKind);
640 return LT.first *
Cost * NElts;
645 int Cost = (SLT ==
MVT::f16 ? 14 : 10) * getFullRateInstrCost() +
646 1 * getQuarterRateInstrCost(
CostKind);
648 if (!HasFP32Denormals) {
650 Cost += 2 * getFullRateInstrCost();
653 return LT.first * NElts *
Cost;
674 case Intrinsic::round:
675 case Intrinsic::uadd_sat:
676 case Intrinsic::usub_sat:
677 case Intrinsic::sadd_sat:
678 case Intrinsic::ssub_sat:
688 if (ICA.
getID() == Intrinsic::fabs)
697 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(
RetTy);
699 unsigned NElts = LT.second.isVector() ?
700 LT.second.getVectorNumElements() : 1;
705 return LT.first * NElts * get64BitInstrCost(
CostKind);
709 NElts = (NElts + 1) / 2;
712 unsigned InstRate = getQuarterRateInstrCost(
CostKind);
714 switch (ICA.
getID()) {
717 : getQuarterRateInstrCost(
CostKind);
719 case Intrinsic::uadd_sat:
720 case Intrinsic::usub_sat:
721 case Intrinsic::sadd_sat:
722 case Intrinsic::ssub_sat:
724 if (
any_of(ValidSatTys, [<](
MVT M) {
return M == LT.second; }))
729 return LT.first * NElts * InstRate;
735 assert((
I ==
nullptr ||
I->getOpcode() == Opcode) &&
736 "Opcode should reflect passed instruction.");
739 const int CBrCost = SCost ? 5 : 7;
741 case Instruction::Br: {
743 auto BI = dyn_cast_or_null<BranchInst>(
I);
744 if (BI && BI->isUnconditional())
745 return SCost ? 1 : 4;
750 case Instruction::Switch: {
751 auto SI = dyn_cast_or_null<SwitchInst>(
I);
754 return (
SI ? (
SI->getNumCases() + 1) : 4) * (CBrCost + 1);
756 case Instruction::Ret:
757 return SCost ? 1 : 10;
764 std::optional<FastMathFlags> FMF,
776 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
777 return LT.first * getFullRateInstrCost();
791 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
792 return LT.first * getHalfRateInstrCost(
CostKind);
800 case Instruction::ExtractElement:
801 case Instruction::InsertElement: {
816 return Index == ~0u ? 2 : 0;
830 if (Indices.
size() > 1)
838 const int TargetOutputIdx = Indices.
empty() ? -1 : Indices[0];
841 for (
auto &TC : TargetConstraints) {
846 if (TargetOutputIdx != -1 && TargetOutputIdx != OutputIdx++)
852 TRI, TC.ConstraintCode, TC.ConstraintVT).second;
856 if (!RC || !
TRI->isSGPRClass(RC))
871 cast<MetadataAsValue>(ReadReg->
getArgOperand(0))->getMetadata();
873 cast<MDString>(cast<MDNode>(MD)->getOperand(0))->getString();
892 if (
const Argument *
A = dyn_cast<Argument>(V))
901 if (
const LoadInst *Load = dyn_cast<LoadInst>(V))
909 if (isa<AtomicRMWInst>(V) || isa<AtomicCmpXchgInst>(V))
912 if (
const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
913 if (Intrinsic->getIntrinsicID() == Intrinsic::read_register)
920 if (
const CallInst *CI = dyn_cast<CallInst>(V)) {
921 if (CI->isInlineAsm())
927 if (isa<InvokeInst>(V))
934 if (
const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V))
937 if (
const CallInst *CI = dyn_cast<CallInst>(V)) {
938 if (CI->isInlineAsm())
956 if (
match(V,
m_LShr(m_Intrinsic<Intrinsic::amdgcn_workitem_id_x>(),
958 match(V,
m_AShr(m_Intrinsic<Intrinsic::amdgcn_workitem_id_x>(),
960 const Function *
F = cast<Instruction>(V)->getFunction();
966 if (
match(V,
m_c_And(m_Intrinsic<Intrinsic::amdgcn_workitem_id_x>(),
968 const Function *
F = cast<Instruction>(V)->getFunction();
983 if (
const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(CI)) {
984 switch (Intrinsic->getIntrinsicID()) {
987 case Intrinsic::amdgcn_if:
988 case Intrinsic::amdgcn_else: {
990 return Indices.
size() == 1 && Indices[0] == 1;
1007 case Intrinsic::amdgcn_atomic_inc:
1008 case Intrinsic::amdgcn_atomic_dec:
1009 case Intrinsic::amdgcn_ds_fadd:
1010 case Intrinsic::amdgcn_ds_fmin:
1011 case Intrinsic::amdgcn_ds_fmax:
1012 case Intrinsic::amdgcn_is_shared:
1013 case Intrinsic::amdgcn_is_private:
1014 case Intrinsic::amdgcn_flat_atomic_fadd:
1015 case Intrinsic::amdgcn_flat_atomic_fmax:
1016 case Intrinsic::amdgcn_flat_atomic_fmin:
1026 Value *NewV)
const {
1029 case Intrinsic::amdgcn_atomic_inc:
1030 case Intrinsic::amdgcn_atomic_dec:
1031 case Intrinsic::amdgcn_ds_fadd:
1032 case Intrinsic::amdgcn_ds_fmin:
1033 case Intrinsic::amdgcn_ds_fmax: {
1035 if (!IsVolatile->isZero())
1046 case Intrinsic::amdgcn_is_shared:
1047 case Intrinsic::amdgcn_is_private: {
1048 unsigned TrueAS = IntrID == Intrinsic::amdgcn_is_shared ?
1056 case Intrinsic::ptrmask: {
1062 bool DoTruncate =
false;
1066 if (!
TM.isNoopAddrSpaceCast(OldAS, NewAS)) {
1084 MaskTy =
B.getInt32Ty();
1085 MaskOp =
B.CreateTrunc(MaskOp, MaskTy);
1088 return B.CreateIntrinsic(Intrinsic::ptrmask, {NewV->
getType(), MaskTy},
1091 case Intrinsic::amdgcn_flat_atomic_fadd:
1092 case Intrinsic::amdgcn_flat_atomic_fmax:
1093 case Intrinsic::amdgcn_flat_atomic_fmin: {
1098 {DestTy, SrcTy, DestTy});
1115 if (cast<FixedVectorType>(VT)->getNumElements() == 2 &&
1138 =
static_cast<const GCNSubtarget *
>(
TM.getSubtargetImpl(*Caller));
1142 const FeatureBitset &CallerBits = CallerST->getFeatureBits();
1143 const FeatureBitset &CalleeBits = CalleeST->getFeatureBits();
1145 FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
1146 FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
1147 if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
1157 if (
Callee->hasFnAttribute(Attribute::AlwaysInline) ||
1158 Callee->hasFnAttribute(Attribute::InlineHint))
1166 size_t BBSize = Caller->size() +
Callee->size() - 1;
1176 const int NrOfSGPRUntilSpill = 26;
1177 const int NrOfVGPRUntilSpill = 32;
1181 unsigned adjustThreshold = 0;
1187 for (
auto ArgVT : ValueVTs) {
1191 SGPRsInUse += CCRegNum;
1193 VGPRsInUse += CCRegNum;
1203 ArgStackCost +=
const_cast<GCNTTIImpl *
>(TTIImpl)->getMemoryOpCost(
1206 ArgStackCost +=
const_cast<GCNTTIImpl *
>(TTIImpl)->getMemoryOpCost(
1212 adjustThreshold += std::max(0, SGPRsInUse - NrOfSGPRUntilSpill) *
1214 adjustThreshold += std::max(0, VGPRsInUse - NrOfVGPRUntilSpill) *
1216 return adjustThreshold;
1223 unsigned adjustThreshold = 0;
1227 PointerType *Ty = dyn_cast<PointerType>(PtrArg->getType());
1233 if (
const AllocaInst *AI = dyn_cast<AllocaInst>(PtrArg)) {
1234 if (!AI->isStaticAlloca() || !AIVisited.
insert(AI).second)
1248 return adjustThreshold;
1264 ? getFullRateInstrCost()
1265 : ST->hasHalfRate64Ops() ? getHalfRateInstrCost(
CostKind)
1266 : getQuarterRateInstrCost(
CostKind);
1269std::pair<InstructionCost, MVT>
1270GCNTTIImpl::getTypeLegalizationCost(
Type *Ty)
const {
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Callee
Provides AMDGPU specific target descriptions.
The AMDGPU TargetMachine interface definition for hw codegen targets.
static const Function * getParent(const Value *V)
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
unsigned const TargetRegisterInfo * TRI
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool hasMadMacF32Insts() const
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
unsigned getWavefrontSizeLog2() const
bool has16BitInsts() const
bool hasVOP3PInsts() const
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
bool isFNegFree(EVT VT) const override
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
an instruction to allocate memory on the stack
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask) const
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
bool isInlineAsm() const
Check if this call is an inline asm statement.
CallingConv::ID getCallingConv() const
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
unsigned getArgOperandNo(const Use *U) const
Given a use for a arg operand, get the arg operand number that corresponds to it.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getFalse(LLVMContext &Context)
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
A parsed version of the target data layout string in and methods for querying it.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
constexpr bool isScalar() const
Exactly one element.
Container class for subtarget features.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
const SIRegisterInfo * getRegisterInfo() const override
bool hasUnalignedScratchAccess() const
bool hasPackedFP32Ops() const
bool hasFullRate64Ops() const
bool hasFastFMAF32() const
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
bool isAlwaysUniform(const Value *V) const
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt)
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isInlineAsmSourceOfDivergence(const CallInst *CI, ArrayRef< unsigned > Indices={}) const
Analyze if the results of inline asm are divergent.
bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const
unsigned getNumberOfRegisters(unsigned RCID) const
bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
unsigned getMaxInterleaveFactor(ElementCount VF)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
bool useGPUDivergenceAnalysis() const
unsigned getMinVectorRegisterBitWidth() const
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicCpySize) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
unsigned adjustInliningThreshold(const CallBase *CB) const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
bool isSourceOfDivergence(const Value *V) const
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicElementSize) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Module * getParent()
Get the module that this global value is contained inside of...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
const BasicBlock * getParent() const
bool hasAllowContract() const LLVM_READONLY
Determine whether the allow-contract flag is set.
Type * getReturnType() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Represents a single loop in the control flow graph.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
A Module instance is used to store all the information related to an LLVM module.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
The main scalar evolution driver.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
std::vector< AsmOperandInfo > AsmOperandInfoVector
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
static IntegerType * getInt16Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVMContext & getContext() const
All values hold a context through their type.
Base class of all SIMD vector types.
Type * getElementType() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ BUFFER_FAT_POINTER
Address space for 160-bit buffer fat pointers.
@ PRIVATE_ADDRESS
Address space for private memory.
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
@ C
The default llvm calling convention, compatible with C.
@ ADD
Simple integer binary arithmetic operators.
@ FADD
Simple binary floating point operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SHL
Shift and rotation operations.
@ AND
Bitwise operators - logical and, logical or, logical xor.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
bool match(Val *V, const Pattern &P)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
specific_fpval m_FPOne()
Match a float 1.0 or vector with all elements equal to 1.0.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
AtomicOrdering
Atomic ordering for LLVM's memory model.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t getScalarSizeInBits() const
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Information about a load/store intrinsic defined by the target.
bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const