LLVM 22.0.0git
AMDGPUWaitSGPRHazards.cpp
Go to the documentation of this file.
1//===- AMDGPUWaitSGPRHazards.cpp - Insert waits for SGPR read hazards -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Insert s_wait_alu instructions to mitigate SGPR read hazards on GFX12.
11//
12//===----------------------------------------------------------------------===//
13
15#include "AMDGPU.h"
16#include "GCNSubtarget.h"
18#include "SIInstrInfo.h"
19#include "llvm/ADT/SetVector.h"
20
21using namespace llvm;
22
23#define DEBUG_TYPE "amdgpu-wait-sgpr-hazards"
24
26 "amdgpu-sgpr-hazard-wait", cl::init(true), cl::Hidden,
27 cl::desc("Enable required s_wait_alu on SGPR hazards"));
28
30 "amdgpu-sgpr-hazard-boundary-cull", cl::init(false), cl::Hidden,
31 cl::desc("Cull hazards on function boundaries"));
32
33static cl::opt<bool>
34 GlobalCullSGPRHazardsAtMemWait("amdgpu-sgpr-hazard-mem-wait-cull",
35 cl::init(false), cl::Hidden,
36 cl::desc("Cull hazards on memory waits"));
37
39 "amdgpu-sgpr-hazard-mem-wait-cull-threshold", cl::init(8), cl::Hidden,
40 cl::desc("Number of tracked SGPRs before initiating hazard cull on memory "
41 "wait"));
42
43namespace {
44
45class AMDGPUWaitSGPRHazards {
46public:
47 const GCNSubtarget *ST;
48 const SIInstrInfo *TII;
49 const SIRegisterInfo *TRI;
51 unsigned DsNopCount;
52
53 bool EnableSGPRHazardWaits;
54 bool CullSGPRHazardsOnFunctionBoundary;
55 bool CullSGPRHazardsAtMemWait;
56 unsigned CullSGPRHazardsMemWaitThreshold;
57
58 AMDGPUWaitSGPRHazards() = default;
59
60 // Return the numeric ID 0-127 for a given SGPR.
61 static std::optional<unsigned> sgprNumber(Register Reg,
62 const SIRegisterInfo &TRI) {
63 switch (Reg) {
64 case AMDGPU::M0:
65 case AMDGPU::EXEC:
66 case AMDGPU::EXEC_LO:
67 case AMDGPU::EXEC_HI:
68 case AMDGPU::SGPR_NULL:
69 case AMDGPU::SGPR_NULL64:
70 return {};
71 default:
72 break;
73 }
74 unsigned RegN = TRI.getHWRegIndex(Reg);
75 if (RegN > 127)
76 return {};
77 return RegN;
78 }
79
80 static inline bool isVCC(Register Reg) {
81 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::VCC_HI;
82 }
83
84 // Adjust global offsets for instructions bundled with S_GETPC_B64 after
85 // insertion of a new instruction.
86 static void updateGetPCBundle(MachineInstr *NewMI) {
87 if (!NewMI->isBundled())
88 return;
89
90 // Find start of bundle.
91 auto I = NewMI->getIterator();
92 while (I->isBundledWithPred())
93 I--;
94 if (I->isBundle())
95 I++;
96
97 // Bail if this is not an S_GETPC bundle.
98 if (I->getOpcode() != AMDGPU::S_GETPC_B64)
99 return;
100
101 // Update offsets of any references in the bundle.
102 const unsigned NewBytes = 4;
103 assert(NewMI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
104 "Unexpected instruction insertion in bundle");
105 auto NextMI = std::next(NewMI->getIterator());
106 auto End = NewMI->getParent()->end();
107 while (NextMI != End && NextMI->isBundledWithPred()) {
108 for (auto &Operand : NextMI->operands()) {
109 if (Operand.isGlobal())
110 Operand.setOffset(Operand.getOffset() + NewBytes);
111 }
112 NextMI++;
113 }
114 }
115
116 struct HazardState {
117 static constexpr unsigned None = 0;
118 static constexpr unsigned SALU = (1 << 0);
119 static constexpr unsigned VALU = (1 << 1);
120
121 std::bitset<64> Tracked; // SGPR banks ever read by VALU
122 std::bitset<128> SALUHazards; // SGPRs with uncommitted values from SALU
123 std::bitset<128> VALUHazards; // SGPRs with uncommitted values from VALU
124 unsigned VCCHazard = None; // Source of current VCC writes
125 bool ActiveFlat = false; // Has unwaited flat instructions
126
127 bool merge(const HazardState &RHS) {
128 HazardState Orig(*this);
129 *this |= RHS;
130 return (*this != Orig);
131 }
132
133 bool operator==(const HazardState &RHS) const {
134 return Tracked == RHS.Tracked && SALUHazards == RHS.SALUHazards &&
135 VALUHazards == RHS.VALUHazards && VCCHazard == RHS.VCCHazard &&
136 ActiveFlat == RHS.ActiveFlat;
137 }
138
139 bool operator!=(const HazardState &RHS) const { return !(*this == RHS); }
140
141 void operator|=(const HazardState &RHS) {
142 Tracked |= RHS.Tracked;
143 SALUHazards |= RHS.SALUHazards;
144 VALUHazards |= RHS.VALUHazards;
145 VCCHazard |= RHS.VCCHazard;
146 ActiveFlat |= RHS.ActiveFlat;
147 }
148 };
149
150 struct BlockHazardState {
151 HazardState In;
152 HazardState Out;
153 };
154
155 DenseMap<const MachineBasicBlock *, BlockHazardState> BlockState;
156
157 static constexpr unsigned WAVE32_NOPS = 4;
158 static constexpr unsigned WAVE64_NOPS = 8;
159
160 void insertHazardCull(MachineBasicBlock &MBB,
162 assert(!MI->isBundled());
163 unsigned Count = DsNopCount;
164 while (Count--)
165 BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::DS_NOP));
166 }
167
168 unsigned mergeMasks(unsigned Mask1, unsigned Mask2) {
171 Mask, std::min(AMDGPU::DepCtr::decodeFieldSaSdst(Mask1),
174 Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVcc(Mask1),
177 Mask, std::min(AMDGPU::DepCtr::decodeFieldVmVsrc(Mask1),
180 Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSdst(Mask1),
183 Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVdst(Mask1),
186 Mask, std::min(AMDGPU::DepCtr::decodeFieldHoldCnt(Mask1),
189 Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSsrc(Mask1),
191 return Mask;
192 }
193
194 bool mergeConsecutiveWaitAlus(MachineBasicBlock::instr_iterator &MI,
195 unsigned Mask) {
196 auto MBB = MI->getParent();
197 if (MI == MBB->instr_begin())
198 return false;
199
200 auto It = prev_nodbg(MI, MBB->instr_begin());
201 if (It->getOpcode() != AMDGPU::S_WAITCNT_DEPCTR)
202 return false;
203
204 It->getOperand(0).setImm(mergeMasks(Mask, It->getOperand(0).getImm()));
205 return true;
206 }
207
208 bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) {
209 enum { WA_VALU = 0x1, WA_SALU = 0x2, WA_VCC = 0x4 };
210
211 HazardState State = BlockState[&MBB].In;
212 SmallSet<Register, 8> SeenRegs;
213 bool Emitted = false;
214 unsigned DsNops = 0;
215
217 E = MBB.instr_end();
218 MI != E; ++MI) {
219 if (MI->isMetaInstruction())
220 continue;
221
222 // Clear tracked SGPRs if sufficient DS_NOPs occur
223 if (MI->getOpcode() == AMDGPU::DS_NOP) {
224 if (++DsNops >= DsNopCount)
225 State.Tracked.reset();
226 continue;
227 }
228 DsNops = 0;
229
230 // Snoop FLAT instructions to avoid adding culls before scratch/lds loads.
231 // Culls could be disproportionate in cost to load time.
233 State.ActiveFlat = true;
234
235 // SMEM or VMEM clears hazards
236 // FIXME: adapt to add FLAT without VALU (so !isLDSDMA())?
239 State.VCCHazard = HazardState::None;
240 State.SALUHazards.reset();
241 State.VALUHazards.reset();
242 continue;
243 }
244
245 // Existing S_WAITALU can clear hazards
246 if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {
247 unsigned int Mask = MI->getOperand(0).getImm();
249 State.VCCHazard &= ~HazardState::VALU;
250 if (AMDGPU::DepCtr::decodeFieldSaSdst(Mask) == 0) {
251 State.SALUHazards.reset();
252 State.VCCHazard &= ~HazardState::SALU;
253 }
255 State.VALUHazards.reset();
256 continue;
257 }
258
259 // Snoop counter waits to insert culls
260 if (CullSGPRHazardsAtMemWait &&
261 (MI->getOpcode() == AMDGPU::S_WAIT_LOADCNT ||
262 MI->getOpcode() == AMDGPU::S_WAIT_SAMPLECNT ||
263 MI->getOpcode() == AMDGPU::S_WAIT_BVHCNT) &&
264 (MI->getOperand(0).isImm() && MI->getOperand(0).getImm() == 0) &&
265 (State.Tracked.count() >= CullSGPRHazardsMemWaitThreshold)) {
266 if (MI->getOpcode() == AMDGPU::S_WAIT_LOADCNT && State.ActiveFlat) {
267 State.ActiveFlat = false;
268 } else {
269 State.Tracked.reset();
270 if (Emit)
271 insertHazardCull(MBB, MI);
272 continue;
273 }
274 }
275
276 // Process only VALUs and SALUs
277 bool IsVALU = SIInstrInfo::isVALU(*MI);
278 bool IsSALU = SIInstrInfo::isSALU(*MI);
279 if (!IsVALU && !IsSALU)
280 continue;
281
282 unsigned Wait = 0;
283
284 auto processOperand = [&](const MachineOperand &Op, bool IsUse) {
285 if (!Op.isReg())
286 return;
287 Register Reg = Op.getReg();
288 assert(!Op.getSubReg());
289 if (!TRI->isSGPRReg(*MRI, Reg))
290 return;
291
292 // Only visit each register once
293 if (!SeenRegs.insert(Reg).second)
294 return;
295
296 auto RegNumber = sgprNumber(Reg, *TRI);
297 if (!RegNumber)
298 return;
299
300 // Track SGPRs by pair -- numeric ID of an 64b SGPR pair.
301 // i.e. SGPR0 = SGPR0_SGPR1 = 0, SGPR3 = SGPR2_SGPR3 = 1, etc
302 unsigned RegN = *RegNumber;
303 unsigned PairN = (RegN >> 1) & 0x3f;
304
305 // Read/write of untracked register is safe; but must record any new
306 // reads.
307 if (!State.Tracked[PairN]) {
308 if (IsVALU && IsUse)
309 State.Tracked.set(PairN);
310 return;
311 }
312
313 uint8_t SGPRCount =
314 AMDGPU::getRegBitWidth(*TRI->getRegClassForReg(*MRI, Reg)) / 32;
315
316 if (IsUse) {
317 // SALU reading SGPR clears VALU hazards
318 if (IsSALU) {
319 if (isVCC(Reg)) {
320 if (State.VCCHazard & HazardState::VALU)
321 State.VCCHazard = HazardState::None;
322 } else {
323 State.VALUHazards.reset();
324 }
325 }
326 // Compute required waits
327 for (uint8_t RegIdx = 0; RegIdx < SGPRCount; ++RegIdx) {
328 Wait |= State.SALUHazards[RegN + RegIdx] ? WA_SALU : 0;
329 Wait |= IsVALU && State.VALUHazards[RegN + RegIdx] ? WA_VALU : 0;
330 }
331 if (isVCC(Reg) && State.VCCHazard) {
332 // Note: it's possible for both SALU and VALU to exist if VCC
333 // was updated differently by merged predecessors.
334 if (State.VCCHazard & HazardState::SALU)
335 Wait |= WA_SALU;
336 if (State.VCCHazard & HazardState::VALU)
337 Wait |= WA_VCC;
338 }
339 } else {
340 // Update hazards
341 if (isVCC(Reg)) {
342 State.VCCHazard = IsSALU ? HazardState::SALU : HazardState::VALU;
343 } else {
344 for (uint8_t RegIdx = 0; RegIdx < SGPRCount; ++RegIdx) {
345 if (IsSALU)
346 State.SALUHazards.set(RegN + RegIdx);
347 else
348 State.VALUHazards.set(RegN + RegIdx);
349 }
350 }
351 }
352 };
353
354 const bool IsSetPC =
355 (MI->isCall() || MI->isReturn() || MI->isIndirectBranch()) &&
356 MI->getOpcode() != AMDGPU::S_ENDPGM &&
357 MI->getOpcode() != AMDGPU::S_ENDPGM_SAVED;
358
359 // Only consider implicit VCC specified by instruction descriptor.
360 const bool HasImplicitVCC =
361 llvm::any_of(MI->getDesc().implicit_uses(), isVCC) ||
362 llvm::any_of(MI->getDesc().implicit_defs(), isVCC);
363
364 if (IsSetPC) {
365 // All SGPR writes before a call/return must be flushed as the
366 // callee/caller will not will not see the hazard chain.
367 if (State.VCCHazard & HazardState::VALU)
368 Wait |= WA_VCC;
369 if (State.SALUHazards.any() || (State.VCCHazard & HazardState::SALU))
370 Wait |= WA_SALU;
371 if (State.VALUHazards.any())
372 Wait |= WA_VALU;
373 if (CullSGPRHazardsOnFunctionBoundary && State.Tracked.any()) {
374 State.Tracked.reset();
375 if (Emit)
376 insertHazardCull(MBB, MI);
377 }
378 } else {
379 // Process uses to determine required wait.
380 SeenRegs.clear();
381 for (const MachineOperand &Op : MI->all_uses()) {
382 if (Op.isImplicit() &&
383 (!HasImplicitVCC || !Op.isReg() || !isVCC(Op.getReg())))
384 continue;
385 processOperand(Op, true);
386 }
387 }
388
389 // Apply wait
390 if (Wait) {
392 if (Wait & WA_VCC) {
393 State.VCCHazard &= ~HazardState::VALU;
395 }
396 if (Wait & WA_SALU) {
397 State.SALUHazards.reset();
398 State.VCCHazard &= ~HazardState::SALU;
400 }
401 if (Wait & WA_VALU) {
402 State.VALUHazards.reset();
404 }
405 if (Emit) {
406 if (!mergeConsecutiveWaitAlus(MI, Mask)) {
407 auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(),
408 TII->get(AMDGPU::S_WAITCNT_DEPCTR))
409 .addImm(Mask);
410 updateGetPCBundle(NewMI);
411 }
412 Emitted = true;
413 }
414 }
415
416 // On return from a call SGPR state is unknown, so all potential hazards.
417 if (MI->isCall() && !CullSGPRHazardsOnFunctionBoundary)
418 State.Tracked.set();
419
420 // Update hazards based on defs.
421 SeenRegs.clear();
422 for (const MachineOperand &Op : MI->all_defs()) {
423 if (Op.isImplicit() &&
424 (!HasImplicitVCC || !Op.isReg() || !isVCC(Op.getReg())))
425 continue;
426 processOperand(Op, false);
427 }
428 }
429
430 BlockHazardState &BS = BlockState[&MBB];
431 bool Changed = State != BS.Out;
432 if (Emit) {
433 assert(!Changed && "Hazard state should not change on emit pass");
434 return Emitted;
435 }
436 if (Changed)
437 BS.Out = State;
438 return Changed;
439 }
440
441 bool run(MachineFunction &MF) {
442 ST = &MF.getSubtarget<GCNSubtarget>();
443 if (!ST->hasVALUReadSGPRHazard())
444 return false;
445
446 // Parse settings
447 EnableSGPRHazardWaits = GlobalEnableSGPRHazardWaits;
448 CullSGPRHazardsOnFunctionBoundary = GlobalCullSGPRHazardsOnFunctionBoundary;
449 CullSGPRHazardsAtMemWait = GlobalCullSGPRHazardsAtMemWait;
450 CullSGPRHazardsMemWaitThreshold = GlobalCullSGPRHazardsMemWaitThreshold;
451
453 EnableSGPRHazardWaits = MF.getFunction().getFnAttributeAsParsedInteger(
454 "amdgpu-sgpr-hazard-wait", EnableSGPRHazardWaits);
456 CullSGPRHazardsOnFunctionBoundary =
457 MF.getFunction().hasFnAttribute("amdgpu-sgpr-hazard-boundary-cull");
459 CullSGPRHazardsAtMemWait =
460 MF.getFunction().hasFnAttribute("amdgpu-sgpr-hazard-mem-wait-cull");
461 if (!GlobalCullSGPRHazardsMemWaitThreshold.getNumOccurrences())
462 CullSGPRHazardsMemWaitThreshold =
464 "amdgpu-sgpr-hazard-mem-wait-cull-threshold",
465 CullSGPRHazardsMemWaitThreshold);
466
467 // Bail if disabled
468 if (!EnableSGPRHazardWaits)
469 return false;
470
471 TII = ST->getInstrInfo();
472 TRI = ST->getRegisterInfo();
473 MRI = &MF.getRegInfo();
474 DsNopCount = ST->isWave64() ? WAVE64_NOPS : WAVE32_NOPS;
475
477 if (!AMDGPU::isEntryFunctionCC(CallingConv) &&
478 !CullSGPRHazardsOnFunctionBoundary) {
479 // Callee must consider all SGPRs as tracked.
480 LLVM_DEBUG(dbgs() << "Is called function, track all SGPRs.\n");
481 MachineBasicBlock &EntryBlock = MF.front();
482 BlockState[&EntryBlock].In.Tracked.set();
483 }
484
485 // Calculate the hazard state for each basic block.
486 // Iterate until a fixed point is reached.
487 // Fixed point is guaranteed as merge function only ever increases
488 // the hazard set, and all backedges will cause a merge.
489 //
490 // Note: we have to take care of the entry block as this technically
491 // has an edge from outside the function. Failure to treat this as
492 // a merge could prevent fixed point being reached.
493 SetVector<MachineBasicBlock *> Worklist;
494 for (auto &MBB : reverse(MF))
495 Worklist.insert(&MBB);
496 while (!Worklist.empty()) {
497 auto &MBB = *Worklist.pop_back_val();
498 bool Changed = runOnMachineBasicBlock(MBB, false);
499 if (Changed) {
500 // Note: take a copy of state here in case it is reallocated by map
501 HazardState NewState = BlockState[&MBB].Out;
502 // Propagate to all successor blocks
503 for (auto Succ : MBB.successors()) {
504 // We only need to merge hazards at CFG merge points.
505 auto &SuccState = BlockState[Succ];
506 if (Succ->getSinglePredecessor() && !Succ->isEntryBlock()) {
507 if (SuccState.In != NewState) {
508 SuccState.In = NewState;
509 Worklist.insert(Succ);
510 }
511 } else if (SuccState.In.merge(NewState)) {
512 Worklist.insert(Succ);
513 }
514 }
515 }
516 }
517
518 LLVM_DEBUG(dbgs() << "Emit s_wait_alu instructions\n");
519
520 // Final to emit wait instructions.
521 bool Changed = false;
522 for (auto &MBB : MF)
523 Changed |= runOnMachineBasicBlock(MBB, true);
524
525 BlockState.clear();
526 return Changed;
527 }
528};
529
530class AMDGPUWaitSGPRHazardsLegacy : public MachineFunctionPass {
531public:
532 static char ID;
533
534 AMDGPUWaitSGPRHazardsLegacy() : MachineFunctionPass(ID) {}
535
536 bool runOnMachineFunction(MachineFunction &MF) override {
537 return AMDGPUWaitSGPRHazards().run(MF);
538 }
539
540 void getAnalysisUsage(AnalysisUsage &AU) const override {
541 AU.setPreservesCFG();
543 }
544};
545
546} // namespace
547
548char AMDGPUWaitSGPRHazardsLegacy::ID = 0;
549
550char &llvm::AMDGPUWaitSGPRHazardsLegacyID = AMDGPUWaitSGPRHazardsLegacy::ID;
551
552INITIALIZE_PASS(AMDGPUWaitSGPRHazardsLegacy, DEBUG_TYPE,
553 "AMDGPU Insert waits for SGPR read hazards", false, false)
554
558 if (AMDGPUWaitSGPRHazards().run(MF))
560 return PreservedAnalyses::all();
561}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
static cl::opt< bool > GlobalCullSGPRHazardsAtMemWait("amdgpu-sgpr-hazard-mem-wait-cull", cl::init(false), cl::Hidden, cl::desc("Cull hazards on memory waits"))
static cl::opt< unsigned > GlobalCullSGPRHazardsMemWaitThreshold("amdgpu-sgpr-hazard-mem-wait-cull-threshold", cl::init(8), cl::Hidden, cl::desc("Number of tracked SGPRs before initiating hazard cull on memory " "wait"))
static cl::opt< bool > GlobalCullSGPRHazardsOnFunctionBoundary("amdgpu-sgpr-hazard-boundary-cull", cl::init(false), cl::Hidden, cl::desc("Cull hazards on function boundaries"))
static cl::opt< bool > GlobalEnableSGPRHazardWaits("amdgpu-sgpr-hazard-wait", cl::init(true), cl::Hidden, cl::desc("Enable required s_wait_alu on SGPR hazards"))
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static void updateGetPCBundle(MachineInstr *NewMI)
AMD GCN specific subclass of TargetSubtarget.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
Interface definition for SIInstrInfo.
This file implements a set that has insertion order iteration characteristics.
#define LLVM_DEBUG(...)
Definition Debug.h:114
Value * RHS
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:777
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
Instructions::iterator instr_iterator
iterator_range< succ_iterator > successors()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool isBundled() const
Return true if this instruction part of a bundle.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
Wrapper class representing virtual and physical registers.
Definition Register.h:20
static bool isVMEM(const MachineInstr &MI)
static bool isSMRD(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
static bool isFLATGlobal(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:98
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:149
value_type pop_back_val()
Definition SetVector.h:277
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
int getNumOccurrences() const
self_iterator getIterator()
Definition ilist_node.h:123
Changed
unsigned decodeFieldVaVcc(unsigned Encoded)
unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc)
unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt)
unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc)
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)
unsigned decodeFieldSaSdst(unsigned Encoded)
unsigned decodeFieldVaSdst(unsigned Encoded)
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
unsigned decodeFieldVaSsrc(unsigned Encoded)
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
unsigned decodeFieldVaVdst(unsigned Encoded)
unsigned decodeFieldHoldCnt(unsigned Encoded)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
unsigned decodeFieldVmVsrc(unsigned Encoded)
unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst)
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ Emitted
Assigned address, still materializing.
Definition Core.h:781
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Wait
Definition Threading.h:60
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2114
char & AMDGPUWaitSGPRHazardsLegacyID
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
DWARFExpression::Operation Op
bool operator|=(SparseBitVector< ElementSize > &LHS, const SparseBitVector< ElementSize > *RHS)
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.