31#include "llvm/IR/IntrinsicsAMDGPU.h"
36#define DEBUG_TYPE "amdgpu-atomic-optimizer"
43struct ReplacementInfo {
66class AMDGPUAtomicOptimizerImpl
78 Value *
const Identity)
const;
80 Value *
const Identity)
const;
83 std::pair<Value *, Value *>
89 bool ValDivergent)
const;
92 AMDGPUAtomicOptimizerImpl() =
delete;
97 : UA(UA),
DL(
DL), DTU(DTU), ST(ST), IsPixelShader(IsPixelShader),
108char AMDGPUAtomicOptimizer::ID = 0;
112bool AMDGPUAtomicOptimizer::runOnFunction(
Function &
F) {
113 if (skipFunction(
F)) {
118 &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
122 getAnalysisIfAvailable<DominatorTreeWrapperPass>();
124 DomTreeUpdater::UpdateStrategy::Lazy);
132 return AMDGPUAtomicOptimizerImpl(UA,
DL, DTU, ST, IsPixelShader, ScanImpl)
149 AMDGPUAtomicOptimizerImpl(UA,
DL, DTU, ST, IsPixelShader, ScanImpl)
161bool AMDGPUAtomicOptimizerImpl::run(
Function &
F) {
170 const bool Changed = !ToReplace.empty();
172 for (ReplacementInfo &Info : ToReplace) {
181void AMDGPUAtomicOptimizerImpl::visitAtomicRMWInst(
AtomicRMWInst &
I) {
183 switch (
I.getPointerAddressSpace()) {
214 !(
I.getType()->isFloatTy() ||
I.getType()->isDoubleTy())) {
218 const unsigned PtrIdx = 0;
219 const unsigned ValIdx = 1;
223 if (UA->isDivergentUse(
I.getOperandUse(PtrIdx))) {
227 const bool ValDivergent = UA->isDivergentUse(
I.getOperandUse(ValIdx));
234 (!
ST->hasDPP() ||
DL->getTypeSizeInBits(
I.getType()) != 32)) {
241 const ReplacementInfo
Info = {&
I,
Op, ValIdx, ValDivergent};
243 ToReplace.push_back(Info);
246void AMDGPUAtomicOptimizerImpl::visitIntrinsicInst(
IntrinsicInst &
I) {
249 switch (
I.getIntrinsicID()) {
252 case Intrinsic::amdgcn_buffer_atomic_add:
253 case Intrinsic::amdgcn_struct_buffer_atomic_add:
254 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_add:
255 case Intrinsic::amdgcn_raw_buffer_atomic_add:
256 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_add:
259 case Intrinsic::amdgcn_buffer_atomic_sub:
260 case Intrinsic::amdgcn_struct_buffer_atomic_sub:
261 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_sub:
262 case Intrinsic::amdgcn_raw_buffer_atomic_sub:
263 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_sub:
266 case Intrinsic::amdgcn_buffer_atomic_and:
267 case Intrinsic::amdgcn_struct_buffer_atomic_and:
268 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_and:
269 case Intrinsic::amdgcn_raw_buffer_atomic_and:
270 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_and:
273 case Intrinsic::amdgcn_buffer_atomic_or:
274 case Intrinsic::amdgcn_struct_buffer_atomic_or:
275 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_or:
276 case Intrinsic::amdgcn_raw_buffer_atomic_or:
277 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_or:
280 case Intrinsic::amdgcn_buffer_atomic_xor:
281 case Intrinsic::amdgcn_struct_buffer_atomic_xor:
282 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_xor:
283 case Intrinsic::amdgcn_raw_buffer_atomic_xor:
284 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_xor:
287 case Intrinsic::amdgcn_buffer_atomic_smin:
288 case Intrinsic::amdgcn_struct_buffer_atomic_smin:
289 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_smin:
290 case Intrinsic::amdgcn_raw_buffer_atomic_smin:
291 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_smin:
294 case Intrinsic::amdgcn_buffer_atomic_umin:
295 case Intrinsic::amdgcn_struct_buffer_atomic_umin:
296 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_umin:
297 case Intrinsic::amdgcn_raw_buffer_atomic_umin:
298 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_umin:
301 case Intrinsic::amdgcn_buffer_atomic_smax:
302 case Intrinsic::amdgcn_struct_buffer_atomic_smax:
303 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_smax:
304 case Intrinsic::amdgcn_raw_buffer_atomic_smax:
305 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_smax:
308 case Intrinsic::amdgcn_buffer_atomic_umax:
309 case Intrinsic::amdgcn_struct_buffer_atomic_umax:
310 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_umax:
311 case Intrinsic::amdgcn_raw_buffer_atomic_umax:
312 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_umax:
317 const unsigned ValIdx = 0;
319 const bool ValDivergent = UA->isDivergentUse(
I.getOperandUse(ValIdx));
326 (!
ST->hasDPP() ||
DL->getTypeSizeInBits(
I.getType()) != 32)) {
332 for (
unsigned Idx = 1;
Idx <
I.getNumOperands();
Idx++) {
333 if (UA->isDivergentUse(
I.getOperandUse(
Idx))) {
341 const ReplacementInfo
Info = {&
I,
Op, ValIdx, ValDivergent};
343 ToReplace.push_back(Info);
356 return B.CreateBinOp(Instruction::Add,
LHS,
RHS);
360 return B.CreateBinOp(Instruction::Sub,
LHS,
RHS);
364 return B.CreateBinOp(Instruction::And,
LHS,
RHS);
366 return B.CreateBinOp(Instruction::Or,
LHS,
RHS);
368 return B.CreateBinOp(Instruction::Xor,
LHS,
RHS);
383 return B.CreateMaxNum(
LHS,
RHS);
385 return B.CreateMinNum(
LHS,
RHS);
396 Value *
const Identity)
const {
397 Type *AtomicTy =
V->getType();
399 Module *
M =
B.GetInsertBlock()->getModule();
407 B.CreateCall(UpdateDPP,
408 {Identity, V, B.getInt32(DPP::ROW_XMASK0 | 1 << Idx),
409 B.getInt32(0xf), B.getInt32(0xf), B.getFalse()}));
414 V =
B.CreateBitCast(V, IntNTy);
415 Value *Permlanex16Call =
B.CreateIntrinsic(
416 Intrinsic::amdgcn_permlanex16, {},
417 {
V,
V,
B.getInt32(-1),
B.getInt32(-1),
B.getFalse(),
B.getFalse()});
419 B.CreateBitCast(Permlanex16Call, AtomicTy));
420 if (
ST->isWave32()) {
424 if (
ST->hasPermLane64()) {
426 V =
B.CreateBitCast(V, IntNTy);
427 Value *Permlane64Call =
428 B.CreateIntrinsic(Intrinsic::amdgcn_permlane64, {},
V);
430 B.CreateBitCast(Permlane64Call, AtomicTy));
437 V =
B.CreateBitCast(V, IntNTy);
438 Value *Lane0 =
B.CreateCall(ReadLane, {
V,
B.getInt32(0)});
439 Value *Lane32 =
B.CreateCall(ReadLane, {
V,
B.getInt32(32)});
441 B.CreateBitCast(Lane32, AtomicTy));
448 Value *Identity)
const {
449 Type *AtomicTy =
V->getType();
452 Module *
M =
B.GetInsertBlock()->getModule();
459 B.CreateCall(UpdateDPP,
460 {Identity, V, B.getInt32(DPP::ROW_SHR0 | 1 << Idx),
461 B.getInt32(0xf), B.getInt32(0xf), B.getFalse()}));
463 if (
ST->hasDPPBroadcasts()) {
467 B.CreateCall(UpdateDPP,
468 {Identity, V, B.getInt32(DPP::BCAST15), B.getInt32(0xa),
469 B.getInt32(0xf), B.getFalse()}));
472 B.CreateCall(UpdateDPP,
473 {Identity, V, B.getInt32(DPP::BCAST31), B.getInt32(0xc),
474 B.getInt32(0xf), B.getFalse()}));
482 V =
B.CreateBitCast(V, IntNTy);
483 Value *PermX =
B.CreateIntrinsic(
484 Intrinsic::amdgcn_permlanex16, {},
485 {
V,
V,
B.getInt32(-1),
B.getInt32(-1),
B.getFalse(),
B.getFalse()});
487 Value *UpdateDPPCall =
488 B.CreateCall(UpdateDPP, {Identity,
B.CreateBitCast(PermX, AtomicTy),
490 B.getInt32(0xf),
B.getFalse()});
493 if (!
ST->isWave32()) {
495 V =
B.CreateBitCast(V, IntNTy);
496 Value *
const Lane31 =
B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {},
497 {
V,
B.getInt32(31)});
499 Value *UpdateDPPCall =
B.CreateCall(
501 B.getInt32(0xc),
B.getInt32(0xf),
B.getFalse()});
513 Value *Identity)
const {
514 Type *AtomicTy =
V->getType();
517 Module *
M =
B.GetInsertBlock()->getModule();
520 if (
ST->hasDPPWavefrontShifts()) {
522 V =
B.CreateCall(UpdateDPP,
524 B.getInt32(0xf),
B.getFalse()});
534 V =
B.CreateCall(UpdateDPP,
536 B.getInt32(0xf),
B.getInt32(0xf),
B.getFalse()});
541 {
B.CreateCall(ReadLane, {
B.CreateBitCast(Old, IntNTy),
B.getInt32(15)}),
542 B.getInt32(16),
B.CreateBitCast(V, IntNTy)});
543 V =
B.CreateBitCast(V, AtomicTy);
544 if (!
ST->isWave32()) {
546 V =
B.CreateBitCast(V, IntNTy);
547 V =
B.CreateCall(WriteLane,
548 {
B.CreateCall(ReadLane, {
B.CreateBitCast(Old, IntNTy),
555 {
B.CreateCall(ReadLane, {Old,
B.getInt32(47)}),
B.getInt32(48), V});
556 V =
B.CreateBitCast(V, AtomicTy);
568std::pair<Value *, Value *> AMDGPUAtomicOptimizerImpl::buildScanIteratively(
571 auto *Ty =
I.getType();
572 auto *WaveTy =
B.getIntNTy(
ST->getWavefrontSize());
573 auto *EntryBB =
I.getParent();
574 auto NeedResult = !
I.use_empty();
577 B.CreateIntrinsic(Intrinsic::amdgcn_ballot, WaveTy,
B.getTrue());
580 B.SetInsertPoint(ComputeLoop);
584 PHINode *OldValuePhi =
nullptr;
586 OldValuePhi =
B.CreatePHI(Ty, 2,
"OldValuePhi");
589 auto *ActiveBits =
B.CreatePHI(WaveTy, 2,
"ActiveBits");
590 ActiveBits->addIncoming(Ballot, EntryBB);
594 B.CreateIntrinsic(Intrinsic::cttz, WaveTy, {ActiveBits,
B.getTrue()});
596 Type *IntNTy =
B.getIntNTy(Ty->getPrimitiveSizeInBits());
597 auto *LaneIdxInt =
B.CreateTrunc(FF1, IntNTy);
600 V =
B.CreateBitCast(V, IntNTy);
602 B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {}, {
V, LaneIdxInt});
603 LaneValue =
B.CreateBitCast(LaneValue, Ty);
607 Value *OldValue =
nullptr;
610 B.CreateIntrinsic(Intrinsic::amdgcn_writelane, {},
612 B.CreateBitCast(OldValuePhi, IntNTy)});
613 OldValue =
B.CreateBitCast(OldValue, Ty);
619 Accumulator->addIncoming(NewAccumulator, ComputeLoop);
623 auto *
Mask =
B.CreateShl(ConstantInt::get(WaveTy, 1), FF1);
625 auto *InverseMask =
B.CreateXor(Mask, ConstantInt::get(WaveTy, -1));
626 auto *NewActiveBits =
B.CreateAnd(ActiveBits, InverseMask);
627 ActiveBits->addIncoming(NewActiveBits, ComputeLoop);
630 auto *IsEnd =
B.CreateICmpEQ(NewActiveBits, ConstantInt::get(WaveTy, 0));
631 B.CreateCondBr(IsEnd, ComputeEnd, ComputeLoop);
633 B.SetInsertPoint(ComputeEnd);
635 return {OldValue, NewAccumulator};
674void AMDGPUAtomicOptimizerImpl::optimizeAtomic(
Instruction &
I,
677 bool ValDivergent)
const {
682 B.setIsFPConstrained(
I.getFunction()->hasFnAttribute(Attribute::StrictFP));
697 PixelEntryBB =
I.getParent();
699 Value *
const Cond =
B.CreateIntrinsic(Intrinsic::amdgcn_ps_live, {}, {});
704 PixelExitBB =
I.getParent();
706 I.moveBefore(NonHelperTerminator);
707 B.SetInsertPoint(&
I);
710 Type *
const Ty =
I.getType();
714 const unsigned TyBitWidth =
DL->getTypeSizeInBits(Ty);
719 Value *
V =
I.getOperand(ValIdx);
723 Type *
const WaveTy =
B.getIntNTy(
ST->getWavefrontSize());
725 B.CreateIntrinsic(Intrinsic::amdgcn_ballot, WaveTy,
B.getTrue());
732 if (
ST->isWave32()) {
733 Mbcnt =
B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {},
734 {Ballot,
B.getInt32(0)});
737 Value *
const ExtractHi =
B.CreateTrunc(
B.CreateLShr(Ballot, 32),
Int32Ty);
738 Mbcnt =
B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {},
739 {ExtractLo,
B.getInt32(0)});
741 B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_hi, {}, {ExtractHi, Mbcnt});
757 Value *ExclScan =
nullptr;
758 Value *NewV =
nullptr;
760 const bool NeedResult = !
I.use_empty();
770 V =
B.CreateBitCast(V, IntNTy);
771 Identity =
B.CreateBitCast(Identity, IntNTy);
772 NewV =
B.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, IntNTy,
774 NewV =
B.CreateBitCast(NewV, Ty);
775 V =
B.CreateBitCast(V, Ty);
776 Identity =
B.CreateBitCast(Identity, Ty);
777 if (!NeedResult &&
ST->hasPermLaneX16()) {
781 NewV = buildReduction(
B, ScanOp, NewV, Identity);
783 NewV = buildScan(
B, ScanOp, NewV, Identity);
785 ExclScan = buildShiftRight(
B, NewV, Identity);
789 Value *
const LastLaneIdx =
B.getInt32(
ST->getWavefrontSize() - 1);
791 NewV =
B.CreateBitCast(NewV, IntNTy);
792 NewV =
B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {},
793 {NewV, LastLaneIdx});
794 NewV =
B.CreateBitCast(NewV, Ty);
797 NewV =
B.CreateIntrinsic(Intrinsic::amdgcn_strict_wwm, Ty, NewV);
802 std::tie(ExclScan, NewV) = buildScanIteratively(
B, ScanOp, Identity, V,
I,
803 ComputeLoop, ComputeEnd);
816 Value *
const Ctpop =
B.CreateIntCast(
817 B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot), Ty,
false);
823 Value *
const Ctpop =
B.CreateIntCast(
824 B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot),
Int32Ty,
false);
825 Value *
const CtpopFP =
B.CreateUIToFP(Ctpop, Ty);
826 NewV =
B.CreateFMul(V, CtpopFP);
845 Value *
const Ctpop =
B.CreateIntCast(
846 B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot), Ty,
false);
855 Value *
const Cond =
B.CreateICmpEQ(Mbcnt,
B.getInt32(0));
883 B.SetInsertPoint(ComputeEnd);
885 B.Insert(Terminator);
889 B.SetInsertPoint(OriginalBB);
890 B.CreateBr(ComputeLoop);
904 DTU.applyUpdates(DomTreeUpdates);
906 Predecessor = ComputeEnd;
908 Predecessor = OriginalBB;
911 B.SetInsertPoint(SingleLaneTerminator);
921 B.SetInsertPoint(&
I);
927 PHI->addIncoming(NewI, SingleLaneTerminator->
getParent());
932 Value *BroadcastI =
nullptr;
934 if (TyBitWidth == 64) {
935 Value *CastedPhi =
B.CreateBitCast(
PHI, IntNTy);
937 Value *
const ExtractHi =
938 B.CreateTrunc(
B.CreateLShr(CastedPhi, 32),
Int32Ty);
940 B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractLo);
942 B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractHi);
943 Value *
const PartialInsert =
B.CreateInsertElement(
946 B.CreateInsertElement(PartialInsert, ReadFirstLaneHi,
B.getInt32(1));
947 BroadcastI =
B.CreateBitCast(Insert, Ty);
948 }
else if (TyBitWidth == 32) {
949 Value *CastedPhi =
B.CreateBitCast(
PHI, IntNTy);
951 B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, CastedPhi);
952 BroadcastI =
B.CreateBitCast(BroadcastI, Ty);
962 Value *LaneOffset =
nullptr;
966 B.CreateIntrinsic(Intrinsic::amdgcn_strict_wwm, Ty, ExclScan);
968 LaneOffset = ExclScan;
973 Mbcnt = isAtomicFloatingPointTy ?
B.CreateUIToFP(Mbcnt, Ty)
974 :
B.CreateIntCast(Mbcnt, Ty,
false);
990 LaneOffset =
B.CreateSelect(
Cond, Identity, V);
993 LaneOffset =
buildMul(
B, V,
B.CreateAnd(Mbcnt, 1));
997 LaneOffset =
B.CreateFMul(V, Mbcnt);
1004 if (IsPixelShader) {
1010 PHI->addIncoming(Result,
I.getParent());
1011 I.replaceAllUsesWith(
PHI);
1014 I.replaceAllUsesWith(Result);
1019 I.eraseFromParent();
1023 "AMDGPU atomic optimizations",
false,
false)
1030 return new AMDGPUAtomicOptimizer(ScanStrategy);
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static Constant * getIdentityValueForAtomicOp(Type *const Ty, AtomicRMWInst::BinOp Op)
static Value * buildMul(IRBuilder<> &B, Value *LHS, Value *RHS)
static Value * buildNonAtomicBinOp(IRBuilder<> &B, AtomicRMWInst::BinOp Op, Value *LHS, Value *RHS)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
Generic memory optimizations
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
an instruction that atomically reads a memory location, combines it with another value,...
static bool isFPOperation(BinOp Op)
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Min
*p = old <signed v ? old : v
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
LLVM Basic Block Representation.
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Conditional or Unconditional Branch instruction.
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
This is the shared class of boolean and integer constants.
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
Legacy analysis pass which computes a DominatorTree.
DominatorTree & getDomTree()
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
RetTy visitIntrinsicInst(IntrinsicInst &I)
RetTy visitAtomicRMWInst(AtomicRMWInst &I)
const BasicBlock * getParent() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
A Module instance is used to store all the information related to an LLVM module.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserve()
Mark an analysis as preserved.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Primary interface to the complete machine description for the target machine.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Target-Independent Code Generator Pass Configuration Options.
TMC & getTM() const
Get the right type of TargetMachine for this target.
The instances of the Type class are immutable: once they are created, they are never changed.
const fltSemantics & getFltSemantics() const
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
void setOperand(unsigned i, Value *Val)
LLVM Value Representation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy)
DWARFExpression::Operation Op
constexpr unsigned BitWidth
char & AMDGPUAtomicOptimizerID
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)