LLVM  16.0.0git
AMDGPUInsertDelayAlu.cpp
Go to the documentation of this file.
1 //===- AMDGPUInsertDelayAlu.cpp - Insert s_delay_alu instructions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Insert s_delay_alu instructions to avoid stalls on GFX11+.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "GCNSubtarget.h"
17 #include "SIInstrInfo.h"
18 #include "llvm/ADT/SetVector.h"
19 
20 using namespace llvm;
21 
22 #define DEBUG_TYPE "amdgpu-insert-delay-alu"
23 
24 namespace {
25 
26 class AMDGPUInsertDelayAlu : public MachineFunctionPass {
27 public:
28  static char ID;
29 
30  const SIInstrInfo *SII;
31  const TargetRegisterInfo *TRI;
32 
33  TargetSchedModel SchedModel;
34 
35  AMDGPUInsertDelayAlu() : MachineFunctionPass(ID) {}
36 
37  void getAnalysisUsage(AnalysisUsage &AU) const override {
38  AU.setPreservesCFG();
40  }
41 
42  // Return true if MI waits for all outstanding VALU instructions to complete.
43  static bool instructionWaitsForVALU(const MachineInstr &MI) {
44  // These instruction types wait for VA_VDST==0 before issuing.
45  const uint64_t VA_VDST_0 = SIInstrFlags::DS | SIInstrFlags::EXP |
48  if (MI.getDesc().TSFlags & VA_VDST_0)
49  return true;
50  if (MI.getOpcode() == AMDGPU::S_SENDMSG_RTN_B32 ||
51  MI.getOpcode() == AMDGPU::S_SENDMSG_RTN_B64)
52  return true;
53  if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
54  (MI.getOperand(0).getImm() & 0xf000) == 0)
55  return true;
56  return false;
57  }
58 
59  // Types of delay that can be encoded in an s_delay_alu instruction.
60  enum DelayType { VALU, TRANS, SALU, OTHER };
61 
62  // Get the delay type for an instruction with the specified TSFlags.
63  static DelayType getDelayType(uint64_t TSFlags) {
65  return TRANS;
67  return VALU;
69  return SALU;
70  return OTHER;
71  }
72 
73  // Information about the last instruction(s) that wrote to a particular
74  // regunit. In straight-line code there will only be one such instruction, but
75  // when control flow converges we merge the delay information from each path
76  // to represent the union of the worst-case delays of each type.
77  struct DelayInfo {
78  // One larger than the maximum number of (non-TRANS) VALU instructions we
79  // can encode in an s_delay_alu instruction.
80  static const unsigned VALU_MAX = 5;
81 
82  // One larger than the maximum number of TRANS instructions we can encode in
83  // an s_delay_alu instruction.
84  static const unsigned TRANS_MAX = 4;
85 
86  // If it was written by a (non-TRANS) VALU, remember how many clock cycles
87  // are left until it completes, and how many other (non-TRANS) VALU we have
88  // seen since it was issued.
89  uint8_t VALUCycles = 0;
90  uint8_t VALUNum = VALU_MAX;
91 
92  // If it was written by a TRANS, remember how many clock cycles are left
93  // until it completes, and how many other TRANS we have seen since it was
94  // issued.
95  uint8_t TRANSCycles = 0;
96  uint8_t TRANSNum = TRANS_MAX;
97  // Also remember how many other (non-TRANS) VALU we have seen since it was
98  // issued. When an instruction depends on both a prior TRANS and a prior
99  // non-TRANS VALU, this is used to decide whether to encode a wait for just
100  // one or both of them.
101  uint8_t TRANSNumVALU = VALU_MAX;
102 
103  // If it was written by an SALU, remember how many clock cycles are left
104  // until it completes.
105  uint8_t SALUCycles = 0;
106 
107  DelayInfo() = default;
108 
109  DelayInfo(DelayType Type, unsigned Cycles) {
110  switch (Type) {
111  default:
112  llvm_unreachable("unexpected type");
113  case VALU:
114  VALUCycles = Cycles;
115  VALUNum = 0;
116  break;
117  case TRANS:
118  TRANSCycles = Cycles;
119  TRANSNum = 0;
120  TRANSNumVALU = 0;
121  break;
122  case SALU:
123  SALUCycles = Cycles;
124  break;
125  }
126  }
127 
128  bool operator==(const DelayInfo &RHS) const {
129  return VALUCycles == RHS.VALUCycles && VALUNum == RHS.VALUNum &&
130  TRANSCycles == RHS.TRANSCycles && TRANSNum == RHS.TRANSNum &&
131  TRANSNumVALU == RHS.TRANSNumVALU && SALUCycles == RHS.SALUCycles;
132  }
133 
134  bool operator!=(const DelayInfo &RHS) const { return !(*this == RHS); }
135 
136  // Merge another DelayInfo into this one, to represent the union of the
137  // worst-case delays of each type.
138  void merge(const DelayInfo &RHS) {
139  VALUCycles = std::max(VALUCycles, RHS.VALUCycles);
140  VALUNum = std::min(VALUNum, RHS.VALUNum);
141  TRANSCycles = std::max(TRANSCycles, RHS.TRANSCycles);
142  TRANSNum = std::min(TRANSNum, RHS.TRANSNum);
143  TRANSNumVALU = std::min(TRANSNumVALU, RHS.TRANSNumVALU);
144  SALUCycles = std::max(SALUCycles, RHS.SALUCycles);
145  }
146 
147  // Update this DelayInfo after issuing an instruction. IsVALU should be 1
148  // when issuing a (non-TRANS) VALU, else 0. IsTRANS should be 1 when issuing
149  // a TRANS, else 0. Cycles is the number of cycles it takes to issue the
150  // instruction. Return true if there is no longer any useful delay info.
151  bool advance(DelayType Type, unsigned Cycles) {
152  bool Erase = true;
153 
154  VALUNum += (Type == VALU);
155  if (VALUNum >= VALU_MAX || VALUCycles <= Cycles) {
156  // Forget about the VALU instruction. It was too far back or has
157  // definitely completed by now.
158  VALUNum = VALU_MAX;
159  VALUCycles = 0;
160  } else {
161  VALUCycles -= Cycles;
162  Erase = false;
163  }
164 
165  TRANSNum += (Type == TRANS);
166  TRANSNumVALU += (Type == VALU);
167  if (TRANSNum >= TRANS_MAX || TRANSCycles <= Cycles) {
168  // Forget about any TRANS instruction. It was too far back or has
169  // definitely completed by now.
170  TRANSNum = TRANS_MAX;
171  TRANSNumVALU = VALU_MAX;
172  TRANSCycles = 0;
173  } else {
174  TRANSCycles -= Cycles;
175  Erase = false;
176  }
177 
178  if (SALUCycles <= Cycles) {
179  // Forget about any SALU instruction. It has definitely completed by
180  // now.
181  SALUCycles = 0;
182  } else {
183  SALUCycles -= Cycles;
184  Erase = false;
185  }
186 
187  return Erase;
188  }
189 
190 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
191  void dump() const {
192  if (VALUCycles)
193  dbgs() << " VALUCycles=" << (int)VALUCycles;
194  if (VALUNum < VALU_MAX)
195  dbgs() << " VALUNum=" << (int)VALUNum;
196  if (TRANSCycles)
197  dbgs() << " TRANSCycles=" << (int)TRANSCycles;
198  if (TRANSNum < TRANS_MAX)
199  dbgs() << " TRANSNum=" << (int)TRANSNum;
200  if (TRANSNumVALU < VALU_MAX)
201  dbgs() << " TRANSNumVALU=" << (int)TRANSNumVALU;
202  if (SALUCycles)
203  dbgs() << " SALUCycles=" << (int)SALUCycles;
204  }
205 #endif
206  };
207 
208  // A map from regunits to the delay info for that regunit.
209  struct DelayState : DenseMap<unsigned, DelayInfo> {
210  // Merge another DelayState into this one by merging the delay info for each
211  // regunit.
212  void merge(const DelayState &RHS) {
213  for (const auto &KV : RHS) {
214  iterator It;
215  bool Inserted;
216  std::tie(It, Inserted) = insert(KV);
217  if (!Inserted)
218  It->second.merge(KV.second);
219  }
220  }
221 
222  // Advance the delay info for each regunit, erasing any that are no longer
223  // useful.
224  void advance(DelayType Type, unsigned Cycles) {
225  iterator Next;
226  for (auto I = begin(), E = end(); I != E; I = Next) {
227  Next = std::next(I);
228  if (I->second.advance(Type, Cycles))
229  erase(I);
230  }
231  }
232 
233 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
234  void dump(const TargetRegisterInfo *TRI) const {
235  if (empty()) {
236  dbgs() << " empty\n";
237  return;
238  }
239 
240  // Dump DelayInfo for each RegUnit in numerical order.
242  Order.reserve(size());
243  for (const_iterator I = begin(), E = end(); I != E; ++I)
244  Order.push_back(I);
245  llvm::sort(Order, [](const const_iterator &A, const const_iterator &B) {
246  return A->first < B->first;
247  });
248  for (const_iterator I : Order) {
249  dbgs() << " " << printRegUnit(I->first, TRI);
250  I->second.dump();
251  dbgs() << "\n";
252  }
253  }
254 #endif
255  };
256 
257  // The saved delay state at the end of each basic block.
259 
260  // Emit an s_delay_alu instruction if necessary before MI.
261  MachineInstr *emitDelayAlu(MachineInstr &MI, DelayInfo Delay,
262  MachineInstr *LastDelayAlu) {
263  unsigned Imm = 0;
264 
265  // Wait for a TRANS instruction.
266  if (Delay.TRANSNum < DelayInfo::TRANS_MAX)
267  Imm |= 4 + Delay.TRANSNum;
268 
269  // Wait for a VALU instruction (if it's more recent than any TRANS
270  // instruction that we're also waiting for).
271  if (Delay.VALUNum < DelayInfo::VALU_MAX &&
272  Delay.VALUNum <= Delay.TRANSNumVALU) {
273  if (Imm & 0xf)
274  Imm |= Delay.VALUNum << 7;
275  else
276  Imm |= Delay.VALUNum;
277  }
278 
279  // Wait for an SALU instruction.
280  if (Delay.SALUCycles) {
281  if (Imm & 0x780) {
282  // We have already encoded a VALU and a TRANS delay. There's no room in
283  // the encoding for an SALU delay as well, so just drop it.
284  } else if (Imm & 0xf) {
285  Imm |= (Delay.SALUCycles + 8) << 7;
286  } else {
287  Imm |= Delay.SALUCycles + 8;
288  }
289  }
290 
291  // Don't emit the s_delay_alu instruction if there's nothing to wait for.
292  if (!Imm)
293  return LastDelayAlu;
294 
295  // If we only need to wait for one instruction, try encoding it in the last
296  // s_delay_alu that we emitted.
297  if (!(Imm & 0x780) && LastDelayAlu) {
298  unsigned Skip = 0;
299  for (auto I = MachineBasicBlock::instr_iterator(LastDelayAlu),
301  ++I != E;) {
302  if (!I->isBundle() && !I->isMetaInstruction())
303  ++Skip;
304  }
305  if (Skip < 6) {
306  MachineOperand &Op = LastDelayAlu->getOperand(0);
307  unsigned LastImm = Op.getImm();
308  assert((LastImm & ~0xf) == 0 &&
309  "Remembered an s_delay_alu with no room for another delay!");
310  LastImm |= Imm << 7 | Skip << 4;
311  Op.setImm(LastImm);
312  return nullptr;
313  }
314  }
315 
316  auto &MBB = *MI.getParent();
317  MachineInstr *DelayAlu =
318  BuildMI(MBB, MI, DebugLoc(), SII->get(AMDGPU::S_DELAY_ALU)).addImm(Imm);
319  // Remember the s_delay_alu for next time if there is still room in it to
320  // encode another delay.
321  return (Imm & 0x780) ? nullptr : DelayAlu;
322  }
323 
324  bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) {
325  DelayState State;
326  for (auto *Pred : MBB.predecessors())
327  State.merge(BlockState[Pred]);
328 
329  LLVM_DEBUG(dbgs() << " State at start of " << printMBBReference(MBB)
330  << "\n";
331  State.dump(TRI););
332 
333  bool Changed = false;
334  MachineInstr *LastDelayAlu = nullptr;
335 
336  // Iterate over the contents of bundles, but don't emit any instructions
337  // inside a bundle.
338  for (auto &MI : MBB.instrs()) {
339  if (MI.isBundle() || MI.isMetaInstruction())
340  continue;
341 
342  // Ignore some more instructions that do not generate any code.
343  switch (MI.getOpcode()) {
344  case AMDGPU::SI_RETURN_TO_EPILOG:
345  continue;
346  }
347 
348  DelayType Type = getDelayType(MI.getDesc().TSFlags);
349 
350  if (instructionWaitsForVALU(MI)) {
351  // Forget about all outstanding VALU delays.
352  State = DelayState();
353  } else if (Type != OTHER) {
354  DelayInfo Delay;
355  // TODO: Scan implicit uses too?
356  for (const auto &Op : MI.explicit_uses()) {
357  if (Op.isReg()) {
358  // One of the operands of the writelane is also the output operand.
359  // This creates the insertion of redundant delays. Hence, we have to
360  // ignore this operand.
361  if (MI.getOpcode() == AMDGPU::V_WRITELANE_B32 && Op.isTied())
362  continue;
363  for (MCRegUnitIterator UI(Op.getReg(), TRI); UI.isValid(); ++UI) {
364  auto It = State.find(*UI);
365  if (It != State.end()) {
366  Delay.merge(It->second);
367  State.erase(*UI);
368  }
369  }
370  }
371  }
372  if (Emit && !MI.isBundledWithPred()) {
373  // TODO: For VALU->SALU delays should we use s_delay_alu or s_nop or
374  // just ignore them?
375  LastDelayAlu = emitDelayAlu(MI, Delay, LastDelayAlu);
376  }
377  }
378 
379  if (Type != OTHER) {
380  // TODO: Scan implicit defs too?
381  for (const auto &Op : MI.defs()) {
382  unsigned Latency = SchedModel.computeOperandLatency(
383  &MI, MI.getOperandNo(&Op), nullptr, 0);
384  for (MCRegUnitIterator UI(Op.getReg(), TRI); UI.isValid(); ++UI)
385  State[*UI] = DelayInfo(Type, Latency);
386  }
387  }
388 
389  // Advance by the number of cycles it takes to issue this instruction.
390  // TODO: Use a more advanced model that accounts for instructions that
391  // take multiple cycles to issue on a particular pipeline.
392  unsigned Cycles = SIInstrInfo::getNumWaitStates(MI);
393  // TODO: In wave64 mode, double the number of cycles for VALU and VMEM
394  // instructions on the assumption that they will usually have to be issued
395  // twice?
396  State.advance(Type, Cycles);
397 
398  LLVM_DEBUG(dbgs() << " State after " << MI; State.dump(TRI););
399  }
400 
401  if (Emit) {
402  assert(State == BlockState[&MBB] &&
403  "Basic block state should not have changed on final pass!");
404  } else if (State != BlockState[&MBB]) {
405  BlockState[&MBB] = std::move(State);
406  Changed = true;
407  }
408  return Changed;
409  }
410 
411  bool runOnMachineFunction(MachineFunction &MF) override {
412  if (skipFunction(MF.getFunction()))
413  return false;
414 
415  LLVM_DEBUG(dbgs() << "AMDGPUInsertDelayAlu running on " << MF.getName()
416  << "\n");
417 
418  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
419  if (!ST.hasDelayAlu())
420  return false;
421 
422  SII = ST.getInstrInfo();
423  TRI = ST.getRegisterInfo();
424 
425  SchedModel.init(&ST);
426 
427  // Calculate the delay state for each basic block, iterating until we reach
428  // a fixed point.
430  for (auto &MBB : reverse(MF))
431  WorkList.insert(&MBB);
432  while (!WorkList.empty()) {
433  auto &MBB = *WorkList.pop_back_val();
434  bool Changed = runOnMachineBasicBlock(MBB, false);
435  if (Changed)
436  WorkList.insert(MBB.succ_begin(), MBB.succ_end());
437  }
438 
439  LLVM_DEBUG(dbgs() << "Final pass over all BBs\n");
440 
441  // Make one last pass over all basic blocks to emit s_delay_alu
442  // instructions.
443  bool Changed = false;
444  for (auto &MBB : MF)
445  Changed |= runOnMachineBasicBlock(MBB, true);
446  return Changed;
447  }
448 };
449 
450 } // namespace
451 
452 char AMDGPUInsertDelayAlu::ID = 0;
453 
455 
456 INITIALIZE_PASS(AMDGPUInsertDelayAlu, DEBUG_TYPE, "AMDGPU Insert Delay ALU",
457  false, false)
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:109
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::SIInstrFlags::DS
@ DS
Definition: SIDefines.h:60
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::Latency
@ Latency
Definition: SIMachineScheduler.h:34
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUInsertDelayAlu.cpp:22
llvm::MachineBasicBlock::instrs
instr_range instrs()
Definition: MachineBasicBlock.h:300
llvm::SIInstrInfo::getNumWaitStates
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
Definition: SIInstrInfo.cpp:1840
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::printMBBReference
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Definition: MachineBasicBlock.cpp:117
llvm::SIInstrFlags::FLAT
@ FLAT
Definition: SIDefines.h:59
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:237
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
llvm::operator!=
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2012
llvm::SIInstrFlags::VALU
@ VALU
Definition: SIDefines.h:30
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:877
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:167
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::SIInstrFlags::EXP
@ EXP
Definition: SIDefines.h:58
llvm::TargetSchedModel::init
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
Definition: TargetSchedule.cpp:47
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::SIInstrFlags::SALU
@ SALU
Definition: SIDefines.h:29
GCNSubtarget.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineBasicBlock::succ_end
succ_iterator succ_end()
Definition: MachineBasicBlock.h:371
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:37
Emit
dxil metadata DXIL Metadata Emit
Definition: DXILTranslateMetadata.cpp:65
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
merge
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
Definition: LoopDeletion.cpp:53
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::SetVector::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::printRegUnit
Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI)
Create Printable object to print register units on a raw_ostream.
Definition: TargetRegisterInfo.cpp:142
llvm::SIInstrFlags::MTBUF
@ MTBUF
Definition: SIDefines.h:55
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1682
TSFlags
uint64_t TSFlags
Definition: RISCVInsertVSETVLI.cpp:595
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:657
AMDGPUMCTargetDesc.h
llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:30
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
uint64_t
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:567
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineBasicBlock::succ_begin
succ_iterator succ_begin()
Definition: MachineBasicBlock.h:369
llvm::operator==
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:2010
llvm::MachineBasicBlock::predecessors
iterator_range< pred_iterator > predecessors()
Definition: MachineBasicBlock.h:386
llvm::TargetSchedModel::computeOperandLatency
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
Definition: TargetSchedule.cpp:168
SIInstrInfo.h
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1715
llvm::SetVector::pop_back_val
T pop_back_val()
Definition: SetVector.h:232
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
AMDGPU.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::SIInstrFlags::MUBUF
@ MUBUF
Definition: SIDefines.h:54
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::SIInstrFlags::MIMG
@ MIMG
Definition: SIDefines.h:57
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:623
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
llvm::MachineBasicBlock::instr_iterator
Instructions::iterator instr_iterator
Definition: MachineBasicBlock.h:264
llvm::RISCVMatInt::Imm
@ Imm
Definition: RISCVMatInt.h:23
llvm::SIInstrInfo
Definition: SIInstrInfo.h:44
llvm::MCRegUnitIterator
Definition: MCRegisterInfo.h:680
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:357
llvm::MCRegisterInfo::DiffListIterator::isValid
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
Definition: MCRegisterInfo.h:224
llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:485
llvm::SIInstrFlags::TRANS
@ TRANS
Definition: SIDefines.h:51
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::AMDGPUInsertDelayAluID
char & AMDGPUInsertDelayAluID
Definition: AMDGPUInsertDelayAlu.cpp:454
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:667
SetVector.h