LLVM  14.0.0git
SILowerControlFlow.cpp
Go to the documentation of this file.
1 //===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass lowers the pseudo control flow instructions to real
11 /// machine instructions.
12 ///
13 /// All control flow is handled using predicated instructions and
14 /// a predicate stack. Each Scalar ALU controls the operations of 64 Vector
15 /// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs
16 /// by writting to the 64-bit EXEC register (each bit corresponds to a
17 /// single vector ALU). Typically, for predicates, a vector ALU will write
18 /// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
19 /// Vector ALU) and then the ScalarALU will AND the VCC register with the
20 /// EXEC to update the predicates.
21 ///
22 /// For example:
23 /// %vcc = V_CMP_GT_F32 %vgpr1, %vgpr2
24 /// %sgpr0 = SI_IF %vcc
25 /// %vgpr0 = V_ADD_F32 %vgpr0, %vgpr0
26 /// %sgpr0 = SI_ELSE %sgpr0
27 /// %vgpr0 = V_SUB_F32 %vgpr0, %vgpr0
28 /// SI_END_CF %sgpr0
29 ///
30 /// becomes:
31 ///
32 /// %sgpr0 = S_AND_SAVEEXEC_B64 %vcc // Save and update the exec mask
33 /// %sgpr0 = S_XOR_B64 %sgpr0, %exec // Clear live bits from saved exec mask
34 /// S_CBRANCH_EXECZ label0 // This instruction is an optional
35 /// // optimization which allows us to
36 /// // branch if all the bits of
37 /// // EXEC are zero.
38 /// %vgpr0 = V_ADD_F32 %vgpr0, %vgpr0 // Do the IF block of the branch
39 ///
40 /// label0:
41 /// %sgpr0 = S_OR_SAVEEXEC_B64 %sgpr0 // Restore the exec mask for the Then block
42 /// %exec = S_XOR_B64 %sgpr0, %exec // Update the exec mask
43 /// S_BRANCH_EXECZ label1 // Use our branch optimization
44 /// // instruction again.
45 /// %vgpr0 = V_SUB_F32 %vgpr0, %vgpr // Do the THEN block
46 /// label1:
47 /// %exec = S_OR_B64 %exec, %sgpr0 // Re-enable saved exec mask bits
48 //===----------------------------------------------------------------------===//
49 
50 #include "AMDGPU.h"
51 #include "GCNSubtarget.h"
53 #include "llvm/ADT/SmallSet.h"
56 
57 using namespace llvm;
58 
59 #define DEBUG_TYPE "si-lower-control-flow"
60 
61 static cl::opt<bool>
62 RemoveRedundantEndcf("amdgpu-remove-redundant-endcf",
63  cl::init(true), cl::ReallyHidden);
64 
65 namespace {
66 
67 class SILowerControlFlow : public MachineFunctionPass {
68 private:
69  const SIRegisterInfo *TRI = nullptr;
70  const SIInstrInfo *TII = nullptr;
71  LiveIntervals *LIS = nullptr;
72  MachineRegisterInfo *MRI = nullptr;
73  SetVector<MachineInstr*> LoweredEndCf;
74  DenseSet<Register> LoweredIf;
76 
77  const TargetRegisterClass *BoolRC = nullptr;
78  unsigned AndOpc;
79  unsigned OrOpc;
80  unsigned XorOpc;
81  unsigned MovTermOpc;
82  unsigned Andn2TermOpc;
83  unsigned XorTermrOpc;
84  unsigned OrTermrOpc;
85  unsigned OrSaveExecOpc;
86  unsigned Exec;
87 
88  bool hasKill(const MachineBasicBlock *Begin, const MachineBasicBlock *End);
89 
90  void emitIf(MachineInstr &MI);
91  void emitElse(MachineInstr &MI);
92  void emitIfBreak(MachineInstr &MI);
93  void emitLoop(MachineInstr &MI);
94 
95  MachineBasicBlock *emitEndCf(MachineInstr &MI);
96 
97  void lowerInitExec(MachineBasicBlock *MBB, MachineInstr &MI);
98 
99  void findMaskOperands(MachineInstr &MI, unsigned OpNo,
101 
102  void combineMasks(MachineInstr &MI);
103 
104  bool removeMBBifRedundant(MachineBasicBlock &MBB);
105 
106  MachineBasicBlock *process(MachineInstr &MI);
107 
108  // Skip to the next instruction, ignoring debug instructions, and trivial
109  // block boundaries (blocks that have one (typically fallthrough) successor,
110  // and the successor has one predecessor.
112  skipIgnoreExecInstsTrivialSucc(MachineBasicBlock &MBB,
113  MachineBasicBlock::iterator It) const;
114 
115  /// Find the insertion point for a new conditional branch.
117  skipToUncondBrOrEnd(MachineBasicBlock &MBB,
119  assert(I->isTerminator());
120 
121  // FIXME: What if we had multiple pre-existing conditional branches?
123  while (I != End && !I->isUnconditionalBranch())
124  ++I;
125  return I;
126  }
127 
128  // Remove redundant SI_END_CF instructions.
129  void optimizeEndCf();
130 
131 public:
132  static char ID;
133 
134  SILowerControlFlow() : MachineFunctionPass(ID) {}
135 
136  bool runOnMachineFunction(MachineFunction &MF) override;
137 
138  StringRef getPassName() const override {
139  return "SI Lower control flow pseudo instructions";
140  }
141 
142  void getAnalysisUsage(AnalysisUsage &AU) const override {
143  // Should preserve the same set that TwoAddressInstructions does.
148  }
149 };
150 
151 } // end anonymous namespace
152 
153 char SILowerControlFlow::ID = 0;
154 
155 INITIALIZE_PASS(SILowerControlFlow, DEBUG_TYPE,
156  "SI lower control flow", false, false)
157 
158 static void setImpSCCDefDead(MachineInstr &MI, bool IsDead) {
159  MachineOperand &ImpDefSCC = MI.getOperand(3);
160  assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef());
161 
162  ImpDefSCC.setIsDead(IsDead);
163 }
164 
166 
167 bool SILowerControlFlow::hasKill(const MachineBasicBlock *Begin,
168  const MachineBasicBlock *End) {
171 
172  while (!Worklist.empty()) {
173  MachineBasicBlock *MBB = Worklist.pop_back_val();
174 
175  if (MBB == End || !Visited.insert(MBB).second)
176  continue;
177  if (KillBlocks.contains(MBB))
178  return true;
179 
180  Worklist.append(MBB->succ_begin(), MBB->succ_end());
181  }
182 
183  return false;
184 }
185 
186 static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI) {
187  Register SaveExecReg = MI.getOperand(0).getReg();
188  auto U = MRI->use_instr_nodbg_begin(SaveExecReg);
189 
190  if (U == MRI->use_instr_nodbg_end() ||
191  std::next(U) != MRI->use_instr_nodbg_end() ||
192  U->getOpcode() != AMDGPU::SI_END_CF)
193  return false;
194 
195  return true;
196 }
197 
198 void SILowerControlFlow::emitIf(MachineInstr &MI) {
199  MachineBasicBlock &MBB = *MI.getParent();
200  const DebugLoc &DL = MI.getDebugLoc();
202  Register SaveExecReg = MI.getOperand(0).getReg();
203  MachineOperand& Cond = MI.getOperand(1);
204  assert(Cond.getSubReg() == AMDGPU::NoSubRegister);
205 
206  MachineOperand &ImpDefSCC = MI.getOperand(4);
207  assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef());
208 
209  // If there is only one use of save exec register and that use is SI_END_CF,
210  // we can optimize SI_IF by returning the full saved exec mask instead of
211  // just cleared bits.
212  bool SimpleIf = isSimpleIf(MI, MRI);
213 
214  if (SimpleIf) {
215  // Check for SI_KILL_*_TERMINATOR on path from if to endif.
216  // if there is any such terminator simplifications are not safe.
217  auto UseMI = MRI->use_instr_nodbg_begin(SaveExecReg);
218  SimpleIf = !hasKill(MI.getParent(), UseMI->getParent());
219  }
220 
221  // Add an implicit def of exec to discourage scheduling VALU after this which
222  // will interfere with trying to form s_and_saveexec_b64 later.
223  Register CopyReg = SimpleIf ? SaveExecReg
224  : MRI->createVirtualRegister(BoolRC);
225  MachineInstr *CopyExec =
226  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), CopyReg)
227  .addReg(Exec)
229  LoweredIf.insert(CopyReg);
230 
231  Register Tmp = MRI->createVirtualRegister(BoolRC);
232 
233  MachineInstr *And =
234  BuildMI(MBB, I, DL, TII->get(AndOpc), Tmp)
235  .addReg(CopyReg)
236  .add(Cond);
237 
238  setImpSCCDefDead(*And, true);
239 
240  MachineInstr *Xor = nullptr;
241  if (!SimpleIf) {
242  Xor =
243  BuildMI(MBB, I, DL, TII->get(XorOpc), SaveExecReg)
244  .addReg(Tmp)
245  .addReg(CopyReg);
246  setImpSCCDefDead(*Xor, ImpDefSCC.isDead());
247  }
248 
249  // Use a copy that is a terminator to get correct spill code placement it with
250  // fast regalloc.
251  MachineInstr *SetExec =
252  BuildMI(MBB, I, DL, TII->get(MovTermOpc), Exec)
253  .addReg(Tmp, RegState::Kill);
254 
255  // Skip ahead to the unconditional branch in case there are other terminators
256  // present.
257  I = skipToUncondBrOrEnd(MBB, I);
258 
259  // Insert the S_CBRANCH_EXECZ instruction which will be optimized later
260  // during SIRemoveShortExecBranches.
261  MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
262  .add(MI.getOperand(2));
263 
264  if (!LIS) {
265  MI.eraseFromParent();
266  return;
267  }
268 
269  LIS->InsertMachineInstrInMaps(*CopyExec);
270 
271  // Replace with and so we don't need to fix the live interval for condition
272  // register.
273  LIS->ReplaceMachineInstrInMaps(MI, *And);
274 
275  if (!SimpleIf)
276  LIS->InsertMachineInstrInMaps(*Xor);
277  LIS->InsertMachineInstrInMaps(*SetExec);
278  LIS->InsertMachineInstrInMaps(*NewBr);
279 
280  LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
281  MI.eraseFromParent();
282 
283  // FIXME: Is there a better way of adjusting the liveness? It shouldn't be
284  // hard to add another def here but I'm not sure how to correctly update the
285  // valno.
286  LIS->removeInterval(SaveExecReg);
287  LIS->createAndComputeVirtRegInterval(SaveExecReg);
289  if (!SimpleIf)
290  LIS->createAndComputeVirtRegInterval(CopyReg);
291 }
292 
293 void SILowerControlFlow::emitElse(MachineInstr &MI) {
294  MachineBasicBlock &MBB = *MI.getParent();
295  const DebugLoc &DL = MI.getDebugLoc();
296 
297  Register DstReg = MI.getOperand(0).getReg();
298 
300 
301  // This must be inserted before phis and any spill code inserted before the
302  // else.
303  Register SaveReg = MRI->createVirtualRegister(BoolRC);
304  MachineInstr *OrSaveExec =
305  BuildMI(MBB, Start, DL, TII->get(OrSaveExecOpc), SaveReg)
306  .add(MI.getOperand(1)); // Saved EXEC
307 
308  MachineBasicBlock *DestBB = MI.getOperand(2).getMBB();
309 
311 
312  // This accounts for any modification of the EXEC mask within the block and
313  // can be optimized out pre-RA when not required.
314  MachineInstr *And = BuildMI(MBB, ElsePt, DL, TII->get(AndOpc), DstReg)
315  .addReg(Exec)
316  .addReg(SaveReg);
317 
318  if (LIS)
319  LIS->InsertMachineInstrInMaps(*And);
320 
321  MachineInstr *Xor =
322  BuildMI(MBB, ElsePt, DL, TII->get(XorTermrOpc), Exec)
323  .addReg(Exec)
324  .addReg(DstReg);
325 
326  // Skip ahead to the unconditional branch in case there are other terminators
327  // present.
328  ElsePt = skipToUncondBrOrEnd(MBB, ElsePt);
329 
331  BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
332  .addMBB(DestBB);
333 
334  if (!LIS) {
335  MI.eraseFromParent();
336  return;
337  }
338 
340  MI.eraseFromParent();
341 
342  LIS->InsertMachineInstrInMaps(*OrSaveExec);
343 
344  LIS->InsertMachineInstrInMaps(*Xor);
346 
347  LIS->removeInterval(DstReg);
348  LIS->createAndComputeVirtRegInterval(DstReg);
349  LIS->createAndComputeVirtRegInterval(SaveReg);
350 
351  // Let this be recomputed.
352  LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
353 }
354 
355 void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
356  MachineBasicBlock &MBB = *MI.getParent();
357  const DebugLoc &DL = MI.getDebugLoc();
358  auto Dst = MI.getOperand(0).getReg();
359 
360  // Skip ANDing with exec if the break condition is already masked by exec
361  // because it is a V_CMP in the same basic block. (We know the break
362  // condition operand was an i1 in IR, so if it is a VALU instruction it must
363  // be one with a carry-out.)
364  bool SkipAnding = false;
365  if (MI.getOperand(1).isReg()) {
366  if (MachineInstr *Def = MRI->getUniqueVRegDef(MI.getOperand(1).getReg())) {
367  SkipAnding = Def->getParent() == MI.getParent()
369  }
370  }
371 
372  // AND the break condition operand with exec, then OR that into the "loop
373  // exit" mask.
374  MachineInstr *And = nullptr, *Or = nullptr;
375  if (!SkipAnding) {
376  Register AndReg = MRI->createVirtualRegister(BoolRC);
377  And = BuildMI(MBB, &MI, DL, TII->get(AndOpc), AndReg)
378  .addReg(Exec)
379  .add(MI.getOperand(1));
380  Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
381  .addReg(AndReg)
382  .add(MI.getOperand(2));
383  if (LIS)
384  LIS->createAndComputeVirtRegInterval(AndReg);
385  } else
386  Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
387  .add(MI.getOperand(1))
388  .add(MI.getOperand(2));
389 
390  if (LIS) {
391  if (And)
392  LIS->InsertMachineInstrInMaps(*And);
393  LIS->ReplaceMachineInstrInMaps(MI, *Or);
394  }
395 
396  MI.eraseFromParent();
397 }
398 
399 void SILowerControlFlow::emitLoop(MachineInstr &MI) {
400  MachineBasicBlock &MBB = *MI.getParent();
401  const DebugLoc &DL = MI.getDebugLoc();
402 
403  MachineInstr *AndN2 =
404  BuildMI(MBB, &MI, DL, TII->get(Andn2TermOpc), Exec)
405  .addReg(Exec)
406  .add(MI.getOperand(0));
407 
408  auto BranchPt = skipToUncondBrOrEnd(MBB, MI.getIterator());
410  BuildMI(MBB, BranchPt, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
411  .add(MI.getOperand(1));
412 
413  if (LIS) {
414  LIS->ReplaceMachineInstrInMaps(MI, *AndN2);
416  }
417 
418  MI.eraseFromParent();
419 }
420 
422 SILowerControlFlow::skipIgnoreExecInstsTrivialSucc(
424 
426  MachineBasicBlock *B = &MBB;
427  do {
428  if (!Visited.insert(B).second)
429  return MBB.end();
430 
431  auto E = B->end();
432  for ( ; It != E; ++It) {
433  if (TII->mayReadEXEC(*MRI, *It))
434  break;
435  }
436 
437  if (It != E)
438  return It;
439 
440  if (B->succ_size() != 1)
441  return MBB.end();
442 
443  // If there is one trivial successor, advance to the next block.
444  MachineBasicBlock *Succ = *B->succ_begin();
445 
446  It = Succ->begin();
447  B = Succ;
448  } while (true);
449 }
450 
451 MachineBasicBlock *SILowerControlFlow::emitEndCf(MachineInstr &MI) {
452  MachineBasicBlock &MBB = *MI.getParent();
453  const DebugLoc &DL = MI.getDebugLoc();
454 
456 
457  // If we have instructions that aren't prolog instructions, split the block
458  // and emit a terminator instruction. This ensures correct spill placement.
459  // FIXME: We should unconditionally split the block here.
460  bool NeedBlockSplit = false;
461  Register DataReg = MI.getOperand(0).getReg();
462  for (MachineBasicBlock::iterator I = InsPt, E = MI.getIterator();
463  I != E; ++I) {
464  if (I->modifiesRegister(DataReg, TRI)) {
465  NeedBlockSplit = true;
466  break;
467  }
468  }
469 
470  unsigned Opcode = OrOpc;
471  MachineBasicBlock *SplitBB = &MBB;
472  if (NeedBlockSplit) {
473  SplitBB = MBB.splitAt(MI, /*UpdateLiveIns*/true, LIS);
474  Opcode = OrTermrOpc;
475  InsPt = MI;
476  }
477 
478  MachineInstr *NewMI =
479  BuildMI(MBB, InsPt, DL, TII->get(Opcode), Exec)
480  .addReg(Exec)
481  .add(MI.getOperand(0));
482 
483  LoweredEndCf.insert(NewMI);
484 
485  if (LIS)
486  LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
487 
488  MI.eraseFromParent();
489 
490  if (LIS)
491  LIS->handleMove(*NewMI);
492  return SplitBB;
493 }
494 
495 // Returns replace operands for a logical operation, either single result
496 // for exec or two operands if source was another equivalent operation.
497 void SILowerControlFlow::findMaskOperands(MachineInstr &MI, unsigned OpNo,
498  SmallVectorImpl<MachineOperand> &Src) const {
499  MachineOperand &Op = MI.getOperand(OpNo);
500  if (!Op.isReg() || !Op.getReg().isVirtual()) {
501  Src.push_back(Op);
502  return;
503  }
504 
505  MachineInstr *Def = MRI->getUniqueVRegDef(Op.getReg());
506  if (!Def || Def->getParent() != MI.getParent() ||
507  !(Def->isFullCopy() || (Def->getOpcode() == MI.getOpcode())))
508  return;
509 
510  // Make sure we do not modify exec between def and use.
511  // A copy with implcitly defined exec inserted earlier is an exclusion, it
512  // does not really modify exec.
513  for (auto I = Def->getIterator(); I != MI.getIterator(); ++I)
514  if (I->modifiesRegister(AMDGPU::EXEC, TRI) &&
515  !(I->isCopy() && I->getOperand(0).getReg() != Exec))
516  return;
517 
518  for (const auto &SrcOp : Def->explicit_operands())
519  if (SrcOp.isReg() && SrcOp.isUse() &&
520  (SrcOp.getReg().isVirtual() || SrcOp.getReg() == Exec))
521  Src.push_back(SrcOp);
522 }
523 
524 // Search and combine pairs of equivalent instructions, like
525 // S_AND_B64 x, (S_AND_B64 x, y) => S_AND_B64 x, y
526 // S_OR_B64 x, (S_OR_B64 x, y) => S_OR_B64 x, y
527 // One of the operands is exec mask.
528 void SILowerControlFlow::combineMasks(MachineInstr &MI) {
529  assert(MI.getNumExplicitOperands() == 3);
531  unsigned OpToReplace = 1;
532  findMaskOperands(MI, 1, Ops);
533  if (Ops.size() == 1) OpToReplace = 2; // First operand can be exec or its copy
534  findMaskOperands(MI, 2, Ops);
535  if (Ops.size() != 3) return;
536 
537  unsigned UniqueOpndIdx;
538  if (Ops[0].isIdenticalTo(Ops[1])) UniqueOpndIdx = 2;
539  else if (Ops[0].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
540  else if (Ops[1].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
541  else return;
542 
543  Register Reg = MI.getOperand(OpToReplace).getReg();
544  MI.RemoveOperand(OpToReplace);
545  MI.addOperand(Ops[UniqueOpndIdx]);
546  if (MRI->use_empty(Reg))
548 }
549 
550 void SILowerControlFlow::optimizeEndCf() {
551  // If the only instruction immediately following this END_CF is an another
552  // END_CF in the only successor we can avoid emitting exec mask restore here.
554  return;
555 
556  for (MachineInstr *MI : LoweredEndCf) {
557  MachineBasicBlock &MBB = *MI->getParent();
558  auto Next =
559  skipIgnoreExecInstsTrivialSucc(MBB, std::next(MI->getIterator()));
560  if (Next == MBB.end() || !LoweredEndCf.count(&*Next))
561  continue;
562  // Only skip inner END_CF if outer ENDCF belongs to SI_IF.
563  // If that belongs to SI_ELSE then saved mask has an inverted value.
564  Register SavedExec
565  = TII->getNamedOperand(*Next, AMDGPU::OpName::src1)->getReg();
566  assert(SavedExec.isVirtual() && "Expected saved exec to be src1!");
567 
568  const MachineInstr *Def = MRI->getUniqueVRegDef(SavedExec);
569  if (Def && LoweredIf.count(SavedExec)) {
570  LLVM_DEBUG(dbgs() << "Skip redundant "; MI->dump());
571  if (LIS)
573  MI->eraseFromParent();
574  removeMBBifRedundant(MBB);
575  }
576  }
577 }
578 
579 MachineBasicBlock *SILowerControlFlow::process(MachineInstr &MI) {
580  MachineBasicBlock &MBB = *MI.getParent();
582  MachineInstr *Prev = (I != MBB.begin()) ? &*(std::prev(I)) : nullptr;
583 
584  MachineBasicBlock *SplitBB = &MBB;
585 
586  switch (MI.getOpcode()) {
587  case AMDGPU::SI_IF:
588  emitIf(MI);
589  break;
590 
591  case AMDGPU::SI_ELSE:
592  emitElse(MI);
593  break;
594 
595  case AMDGPU::SI_IF_BREAK:
596  emitIfBreak(MI);
597  break;
598 
599  case AMDGPU::SI_LOOP:
600  emitLoop(MI);
601  break;
602 
603  case AMDGPU::SI_WATERFALL_LOOP:
604  MI.setDesc(TII->get(AMDGPU::S_CBRANCH_EXECNZ));
605  break;
606 
607  case AMDGPU::SI_END_CF:
608  SplitBB = emitEndCf(MI);
609  break;
610 
611  default:
612  assert(false && "Attempt to process unsupported instruction");
613  break;
614  }
615 
617  for (I = Prev ? Prev->getIterator() : MBB.begin(); I != MBB.end(); I = Next) {
618  Next = std::next(I);
619  MachineInstr &MaskMI = *I;
620  switch (MaskMI.getOpcode()) {
621  case AMDGPU::S_AND_B64:
622  case AMDGPU::S_OR_B64:
623  case AMDGPU::S_AND_B32:
624  case AMDGPU::S_OR_B32:
625  // Cleanup bit manipulations on exec mask
626  combineMasks(MaskMI);
627  break;
628  default:
629  I = MBB.end();
630  break;
631  }
632  }
633 
634  return SplitBB;
635 }
636 
637 void SILowerControlFlow::lowerInitExec(MachineBasicBlock *MBB,
638  MachineInstr &MI) {
639  MachineFunction &MF = *MBB->getParent();
640  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
641  bool IsWave32 = ST.isWave32();
642 
643  if (MI.getOpcode() == AMDGPU::SI_INIT_EXEC) {
644  // This should be before all vector instructions.
645  BuildMI(*MBB, MBB->begin(), MI.getDebugLoc(),
646  TII->get(IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64), Exec)
647  .addImm(MI.getOperand(0).getImm());
648  if (LIS)
650  MI.eraseFromParent();
651  return;
652  }
653 
654  // Extract the thread count from an SGPR input and set EXEC accordingly.
655  // Since BFM can't shift by 64, handle that case with CMP + CMOV.
656  //
657  // S_BFE_U32 count, input, {shift, 7}
658  // S_BFM_B64 exec, count, 0
659  // S_CMP_EQ_U32 count, 64
660  // S_CMOV_B64 exec, -1
661  Register InputReg = MI.getOperand(0).getReg();
662  MachineInstr *FirstMI = &*MBB->begin();
663  if (InputReg.isVirtual()) {
664  MachineInstr *DefInstr = MRI->getVRegDef(InputReg);
665  assert(DefInstr && DefInstr->isCopy());
666  if (DefInstr->getParent() == MBB) {
667  if (DefInstr != FirstMI) {
668  // If the `InputReg` is defined in current block, we also need to
669  // move that instruction to the beginning of the block.
670  DefInstr->removeFromParent();
671  MBB->insert(FirstMI, DefInstr);
672  if (LIS)
673  LIS->handleMove(*DefInstr);
674  } else {
675  // If first instruction is definition then move pointer after it.
676  FirstMI = &*std::next(FirstMI->getIterator());
677  }
678  }
679  }
680 
681  // Insert instruction sequence at block beginning (before vector operations).
682  const DebugLoc DL = MI.getDebugLoc();
683  const unsigned WavefrontSize = ST.getWavefrontSize();
684  const unsigned Mask = (WavefrontSize << 1) - 1;
685  Register CountReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);
686  auto BfeMI = BuildMI(*MBB, FirstMI, DL, TII->get(AMDGPU::S_BFE_U32), CountReg)
687  .addReg(InputReg)
688  .addImm((MI.getOperand(1).getImm() & Mask) | 0x70000);
689  auto BfmMI =
690  BuildMI(*MBB, FirstMI, DL,
691  TII->get(IsWave32 ? AMDGPU::S_BFM_B32 : AMDGPU::S_BFM_B64), Exec)
692  .addReg(CountReg)
693  .addImm(0);
694  auto CmpMI = BuildMI(*MBB, FirstMI, DL, TII->get(AMDGPU::S_CMP_EQ_U32))
695  .addReg(CountReg, RegState::Kill)
697  auto CmovMI =
698  BuildMI(*MBB, FirstMI, DL,
699  TII->get(IsWave32 ? AMDGPU::S_CMOV_B32 : AMDGPU::S_CMOV_B64),
700  Exec)
701  .addImm(-1);
702 
703  if (!LIS) {
704  MI.eraseFromParent();
705  return;
706  }
707 
709  MI.eraseFromParent();
710 
711  LIS->InsertMachineInstrInMaps(*BfeMI);
712  LIS->InsertMachineInstrInMaps(*BfmMI);
713  LIS->InsertMachineInstrInMaps(*CmpMI);
714  LIS->InsertMachineInstrInMaps(*CmovMI);
715 
716  LIS->removeInterval(InputReg);
717  LIS->createAndComputeVirtRegInterval(InputReg);
718  LIS->createAndComputeVirtRegInterval(CountReg);
719 }
720 
721 bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) {
722  auto GetFallThroughSucc = [=](MachineBasicBlock *B) -> MachineBasicBlock * {
723  auto *S = B->getNextNode();
724  if (!S)
725  return nullptr;
726  if (B->isSuccessor(S)) {
727  // The only fallthrough candidate
728  MachineBasicBlock::iterator I(B->getFirstInstrTerminator());
730  for (; I != E; I++) {
731  if (I->isBranch() && TII->getBranchDestBlock(*I) == S)
732  // We have unoptimized branch to layout successor
733  return nullptr;
734  }
735  }
736  return S;
737  };
738 
739  for (auto &I : MBB.instrs()) {
740  if (!I.isDebugInstr() && !I.isUnconditionalBranch())
741  return false;
742  }
743 
744  assert(MBB.succ_size() == 1 && "MBB has more than one successor");
745 
746  MachineBasicBlock *Succ = *MBB.succ_begin();
747  MachineBasicBlock *FallThrough = nullptr;
748 
749  while (!MBB.predecessors().empty()) {
751  if (GetFallThroughSucc(P) == &MBB)
752  FallThrough = P;
753  P->ReplaceUsesOfBlockWith(&MBB, Succ);
754  }
755  MBB.removeSuccessor(Succ);
756  if (LIS) {
757  for (auto &I : MBB.instrs())
759  }
760  MBB.clear();
762  if (FallThrough && !FallThrough->isLayoutSuccessor(Succ)) {
763  if (!GetFallThroughSucc(Succ)) {
764  MachineFunction *MF = FallThrough->getParent();
765  MachineFunction::iterator FallThroughPos(FallThrough);
766  MF->splice(std::next(FallThroughPos), Succ);
767  } else
768  BuildMI(*FallThrough, FallThrough->end(),
769  FallThrough->findBranchDebugLoc(), TII->get(AMDGPU::S_BRANCH))
770  .addMBB(Succ);
771  }
772 
773  return true;
774 }
775 
776 bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
777  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
778  TII = ST.getInstrInfo();
779  TRI = &TII->getRegisterInfo();
780 
781  // This doesn't actually need LiveIntervals, but we can preserve them.
782  LIS = getAnalysisIfAvailable<LiveIntervals>();
783  MRI = &MF.getRegInfo();
784  BoolRC = TRI->getBoolRC();
785 
786  if (ST.isWave32()) {
787  AndOpc = AMDGPU::S_AND_B32;
788  OrOpc = AMDGPU::S_OR_B32;
789  XorOpc = AMDGPU::S_XOR_B32;
790  MovTermOpc = AMDGPU::S_MOV_B32_term;
791  Andn2TermOpc = AMDGPU::S_ANDN2_B32_term;
792  XorTermrOpc = AMDGPU::S_XOR_B32_term;
793  OrTermrOpc = AMDGPU::S_OR_B32_term;
794  OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B32;
795  Exec = AMDGPU::EXEC_LO;
796  } else {
797  AndOpc = AMDGPU::S_AND_B64;
798  OrOpc = AMDGPU::S_OR_B64;
799  XorOpc = AMDGPU::S_XOR_B64;
800  MovTermOpc = AMDGPU::S_MOV_B64_term;
801  Andn2TermOpc = AMDGPU::S_ANDN2_B64_term;
802  XorTermrOpc = AMDGPU::S_XOR_B64_term;
803  OrTermrOpc = AMDGPU::S_OR_B64_term;
804  OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B64;
805  Exec = AMDGPU::EXEC;
806  }
807 
808  // Compute set of blocks with kills
809  const bool CanDemote =
811  for (auto &MBB : MF) {
812  bool IsKillBlock = false;
813  for (auto &Term : MBB.terminators()) {
814  if (TII->isKillTerminator(Term.getOpcode())) {
815  KillBlocks.insert(&MBB);
816  IsKillBlock = true;
817  break;
818  }
819  }
820  if (CanDemote && !IsKillBlock) {
821  for (auto &MI : MBB) {
822  if (MI.getOpcode() == AMDGPU::SI_DEMOTE_I1) {
823  KillBlocks.insert(&MBB);
824  break;
825  }
826  }
827  }
828  }
829 
831  for (MachineFunction::iterator BI = MF.begin();
832  BI != MF.end(); BI = NextBB) {
833  NextBB = std::next(BI);
834  MachineBasicBlock *MBB = &*BI;
835 
837  E = MBB->end();
838  for (I = MBB->begin(); I != E; I = Next) {
839  Next = std::next(I);
840  MachineInstr &MI = *I;
841  MachineBasicBlock *SplitMBB = MBB;
842 
843  switch (MI.getOpcode()) {
844  case AMDGPU::SI_IF:
845  case AMDGPU::SI_ELSE:
846  case AMDGPU::SI_IF_BREAK:
847  case AMDGPU::SI_WATERFALL_LOOP:
848  case AMDGPU::SI_LOOP:
849  case AMDGPU::SI_END_CF:
850  SplitMBB = process(MI);
851  break;
852 
853  // FIXME: find a better place for this
854  case AMDGPU::SI_INIT_EXEC:
855  case AMDGPU::SI_INIT_EXEC_FROM_INPUT:
856  lowerInitExec(MBB, MI);
857  if (LIS)
858  LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
859  break;
860 
861  default:
862  break;
863  }
864 
865  if (SplitMBB != MBB) {
866  MBB = Next->getParent();
867  E = MBB->end();
868  }
869  }
870  }
871 
872  optimizeEndCf();
873 
874  LoweredEndCf.clear();
875  LoweredIf.clear();
876  KillBlocks.clear();
877 
878  return true;
879 }
llvm::MachineBasicBlock::succ_size
unsigned succ_size() const
Definition: MachineBasicBlock.h:344
IsDead
bool IsDead
Definition: SILowerControlFlow.cpp:158
llvm::MachineBasicBlock::pred_begin
pred_iterator pred_begin()
Definition: MachineBasicBlock.h:316
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:103
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::MachineRegisterInfo::use_instr_nodbg_end
static use_instr_nodbg_iterator use_instr_nodbg_end()
Definition: MachineRegisterInfo.h:538
UseMI
MachineInstrBuilder & UseMI
Definition: AArch64ExpandPseudoInsts.cpp:102
llvm::RecurKind::Or
@ Or
Bitwise or logical OR of integers.
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::MachineBasicBlock::instrs
instr_range instrs()
Definition: MachineBasicBlock.h:263
llvm::MachineBasicBlock::clear
void clear()
Definition: MachineBasicBlock.h:942
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
INITIALIZE_PASS
INITIALIZE_PASS(SILowerControlFlow, DEBUG_TYPE, "SI lower control flow", false, false) static void setImpSCCDefDead(MachineInstr &MI
llvm::MachineRegisterInfo::getUniqueVRegDef
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Definition: MachineRegisterInfo.cpp:411
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::MachineRegisterInfo::use_instr_nodbg_begin
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:535
llvm::LiveIntervals::removeAllRegUnitsForPhysReg
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
Definition: LiveIntervals.h:425
llvm::LiveIntervals::removeInterval
void removeInterval(Register Reg)
Interval removal.
Definition: LiveIntervals.h:145
llvm::SILowerControlFlowID
char & SILowerControlFlowID
Definition: SILowerControlFlow.cpp:165
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
llvm::MachineInstr::isCopy
bool isCopy() const
Definition: MachineInstr.h:1291
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
RemoveRedundantEndcf
static cl::opt< bool > RemoveRedundantEndcf("amdgpu-remove-redundant-endcf", cl::init(true), cl::ReallyHidden)
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::MachineBasicBlock::terminators
iterator_range< iterator > terminators()
Definition: MachineBasicBlock.h:288
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::count
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::cl::ReallyHidden
@ ReallyHidden
Definition: CommandLine.h:144
llvm::LiveIntervals::createAndComputeVirtRegInterval
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
Definition: LiveIntervals.h:138
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::LiveIntervals::handleMove
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
Definition: LiveIntervals.cpp:1512
llvm::RegState::ImplicitDefine
@ ImplicitDefine
Definition: MachineInstrBuilder.h:63
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:636
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
GCNSubtarget.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::MachineBasicBlock::succ_end
succ_iterator succ_end()
Definition: MachineBasicBlock.h:334
llvm::LiveIntervals::InsertMachineInstrInMaps
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
Definition: LiveIntervals.h:266
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SILowerControlFlow.cpp:59
llvm::MachineBasicBlock::eraseFromParent
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
Definition: MachineBasicBlock.cpp:1332
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::MachineInstr::removeFromParent
MachineInstr * removeFromParent()
Unlink 'this' from the containing basic block, and return it without deleting it.
Definition: MachineInstr.cpp:667
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::MachineBasicBlock::splitAt
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
Definition: MachineBasicBlock.cpp:965
llvm::SlotIndexes
SlotIndexes pass.
Definition: SlotIndexes.h:314
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:28
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:400
llvm::MachineRegisterInfo::use_empty
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
Definition: MachineRegisterInfo.h:506
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:626
llvm::cl::opt< bool >
AMDGPUMCTargetDesc.h
llvm::MachineOperand::setIsDead
void setIsDead(bool Val=true)
Definition: MachineOperand.h:506
llvm::LiveIntervals::ReplaceMachineInstrInMaps
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
Definition: LiveIntervals.h:280
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
LiveIntervals.h
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:239
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::MachineFunction::splice
void splice(iterator InsertPt, iterator MBBI)
Definition: MachineFunction.h:830
llvm::MachineOperand::isDead
bool isDead() const
Definition: MachineOperand.h:385
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::LiveVariablesID
char & LiveVariablesID
LiveVariables pass - This pass computes the set of blocks in which each variable is life and sets mac...
Definition: LiveVariables.cpp:45
MachineFunctionPass.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineBasicBlock::succ_begin
succ_iterator succ_begin()
Definition: MachineBasicBlock.h:332
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
llvm::MachineBasicBlock::predecessors
iterator_range< pred_iterator > predecessors()
Definition: MachineBasicBlock.h:349
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::clear
void clear()
Definition: DenseSet.h:92
llvm::AMDGPU::CPol::SCC
@ SCC
Definition: SIDefines.h:292
llvm::SrcOp::getReg
Register getReg() const
Definition: MachineIRBuilder.h:171
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::MachineBasicBlock::successors
iterator_range< succ_iterator > successors()
Definition: MachineBasicBlock.h:355
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
AMDGPU.h
llvm::AnalysisUsage::addPreservedID
AnalysisUsage & addPreservedID(const void *ID)
Definition: PassAnalysisSupport.h:88
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:489
llvm::MachineBasicBlock::findBranchDebugLoc
DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
Definition: MachineBasicBlock.cpp:1412
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:375
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:180
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:286
llvm::Pass::dump
void dump() const
Definition: Pass.cpp:131
isSimpleIf
static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI)
Definition: SILowerControlFlow.cpp:186
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:592
llvm::MachineBasicBlock::removeSuccessor
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:784
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:321
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1312
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
llvm::MCID::Branch
@ Branch
Definition: MCInstrDesc.h:156
llvm::LiveIntervals
Definition: LiveIntervals.h:54
llvm::MachineBasicBlock::isLayoutSuccessor
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
Definition: MachineBasicBlock.cpp:912
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::SIInstrInfo::isVALU
static bool isVALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:360
llvm::LiveIntervals::RemoveMachineInstrFromMaps
void RemoveMachineInstrFromMaps(MachineInstr &MI)
Definition: LiveIntervals.h:276
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:48
llvm::SmallSet::clear
void clear()
Definition: SmallSet.h:218
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::AMDGPU::HSAMD::Kernel::CodeProps::Key::WavefrontSize
constexpr char WavefrontSize[]
Key for Kernel::CodeProps::Metadata::mWavefrontSize.
Definition: AMDGPUMetadata.h:251
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::MachineInstr::eraseFromParent
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition: MachineInstr.cpp:677
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:210
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
llvm::SrcOp
Definition: MachineIRBuilder.h:119
llvm::RecurKind::Xor
@ Xor
Bitwise or logical XOR of integers.
llvm::SmallSet::contains
bool contains(const T &V) const
Check if the SmallSet contains the given element.
Definition: SmallSet.h:236
SmallSet.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37