LLVM  14.0.0git
SIFixSGPRCopies.cpp
Go to the documentation of this file.
1 //===- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Copies from VGPR to SGPR registers are illegal and the register coalescer
11 /// will sometimes generate these illegal copies in situations like this:
12 ///
13 /// Register Class <vsrc> is the union of <vgpr> and <sgpr>
14 ///
15 /// BB0:
16 /// %0 <sgpr> = SCALAR_INST
17 /// %1 <vsrc> = COPY %0 <sgpr>
18 /// ...
19 /// BRANCH %cond BB1, BB2
20 /// BB1:
21 /// %2 <vgpr> = VECTOR_INST
22 /// %3 <vsrc> = COPY %2 <vgpr>
23 /// BB2:
24 /// %4 <vsrc> = PHI %1 <vsrc>, <%bb.0>, %3 <vrsc>, <%bb.1>
25 /// %5 <vgpr> = VECTOR_INST %4 <vsrc>
26 ///
27 ///
28 /// The coalescer will begin at BB0 and eliminate its copy, then the resulting
29 /// code will look like this:
30 ///
31 /// BB0:
32 /// %0 <sgpr> = SCALAR_INST
33 /// ...
34 /// BRANCH %cond BB1, BB2
35 /// BB1:
36 /// %2 <vgpr> = VECTOR_INST
37 /// %3 <vsrc> = COPY %2 <vgpr>
38 /// BB2:
39 /// %4 <sgpr> = PHI %0 <sgpr>, <%bb.0>, %3 <vsrc>, <%bb.1>
40 /// %5 <vgpr> = VECTOR_INST %4 <sgpr>
41 ///
42 /// Now that the result of the PHI instruction is an SGPR, the register
43 /// allocator is now forced to constrain the register class of %3 to
44 /// <sgpr> so we end up with final code like this:
45 ///
46 /// BB0:
47 /// %0 <sgpr> = SCALAR_INST
48 /// ...
49 /// BRANCH %cond BB1, BB2
50 /// BB1:
51 /// %2 <vgpr> = VECTOR_INST
52 /// %3 <sgpr> = COPY %2 <vgpr>
53 /// BB2:
54 /// %4 <sgpr> = PHI %0 <sgpr>, <%bb.0>, %3 <sgpr>, <%bb.1>
55 /// %5 <vgpr> = VECTOR_INST %4 <sgpr>
56 ///
57 /// Now this code contains an illegal copy from a VGPR to an SGPR.
58 ///
59 /// In order to avoid this problem, this pass searches for PHI instructions
60 /// which define a <vsrc> register and constrains its definition class to
61 /// <vgpr> if the user of the PHI's definition register is a vector instruction.
62 /// If the PHI's definition class is constrained to <vgpr> then the coalescer
63 /// will be unable to perform the COPY removal from the above example which
64 /// ultimately led to the creation of an illegal COPY.
65 //===----------------------------------------------------------------------===//
66 
67 #include "AMDGPU.h"
68 #include "GCNSubtarget.h"
71 #include "llvm/InitializePasses.h"
73 
74 using namespace llvm;
75 
76 #define DEBUG_TYPE "si-fix-sgpr-copies"
77 
79  "amdgpu-enable-merge-m0",
80  cl::desc("Merge and hoist M0 initializations"),
81  cl::init(true));
82 
83 namespace {
84 
85 class SIFixSGPRCopies : public MachineFunctionPass {
87 
88 public:
89  static char ID;
90 
92  const SIRegisterInfo *TRI;
93  const SIInstrInfo *TII;
94 
95  SIFixSGPRCopies() : MachineFunctionPass(ID) {}
96 
97  bool runOnMachineFunction(MachineFunction &MF) override;
98 
99  MachineBasicBlock *processPHINode(MachineInstr &MI);
100 
101  StringRef getPassName() const override { return "SI Fix SGPR copies"; }
102 
103  void getAnalysisUsage(AnalysisUsage &AU) const override {
106  AU.setPreservesCFG();
108  }
109 };
110 
111 } // end anonymous namespace
112 
113 INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE,
114  "SI Fix SGPR copies", false, false)
117  "SI Fix SGPR copies", false, false)
118 
119 char SIFixSGPRCopies::ID = 0;
120 
121 char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID;
122 
124  return new SIFixSGPRCopies();
125 }
126 
127 static bool hasVectorOperands(const MachineInstr &MI,
128  const SIRegisterInfo *TRI) {
129  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
130  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
131  if (!MI.getOperand(i).isReg() || !MI.getOperand(i).getReg().isVirtual())
132  continue;
133 
134  if (TRI->hasVectorRegisters(MRI.getRegClass(MI.getOperand(i).getReg())))
135  return true;
136  }
137  return false;
138 }
139 
140 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
142  const SIRegisterInfo &TRI,
143  const MachineRegisterInfo &MRI) {
144  Register DstReg = Copy.getOperand(0).getReg();
145  Register SrcReg = Copy.getOperand(1).getReg();
146 
147  const TargetRegisterClass *SrcRC = SrcReg.isVirtual()
148  ? MRI.getRegClass(SrcReg)
149  : TRI.getPhysRegClass(SrcReg);
150 
151  // We don't really care about the subregister here.
152  // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg());
153 
154  const TargetRegisterClass *DstRC = DstReg.isVirtual()
155  ? MRI.getRegClass(DstReg)
156  : TRI.getPhysRegClass(DstReg);
157 
158  return std::make_pair(SrcRC, DstRC);
159 }
160 
161 static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
162  const TargetRegisterClass *DstRC,
163  const SIRegisterInfo &TRI) {
164  return SrcRC != &AMDGPU::VReg_1RegClass && TRI.isSGPRClass(DstRC) &&
165  TRI.hasVectorRegisters(SrcRC);
166 }
167 
168 static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
169  const TargetRegisterClass *DstRC,
170  const SIRegisterInfo &TRI) {
171  return DstRC != &AMDGPU::VReg_1RegClass && TRI.isSGPRClass(SrcRC) &&
172  TRI.hasVectorRegisters(DstRC);
173 }
174 
176  const SIRegisterInfo *TRI,
177  const SIInstrInfo *TII) {
178  MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
179  auto &Src = MI.getOperand(1);
180  Register DstReg = MI.getOperand(0).getReg();
181  Register SrcReg = Src.getReg();
182  if (!SrcReg.isVirtual() || !DstReg.isVirtual())
183  return false;
184 
185  for (const auto &MO : MRI.reg_nodbg_operands(DstReg)) {
186  const auto *UseMI = MO.getParent();
187  if (UseMI == &MI)
188  continue;
189  if (MO.isDef() || UseMI->getParent() != MI.getParent() ||
190  UseMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
191  return false;
192 
193  unsigned OpIdx = UseMI->getOperandNo(&MO);
194  if (OpIdx >= UseMI->getDesc().getNumOperands() ||
195  !TII->isOperandLegal(*UseMI, OpIdx, &Src))
196  return false;
197  }
198  // Change VGPR to SGPR destination.
199  MRI.setRegClass(DstReg, TRI->getEquivalentSGPRClass(MRI.getRegClass(DstReg)));
200  return true;
201 }
202 
203 // Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE.
204 //
205 // SGPRx = ...
206 // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
207 // VGPRz = COPY SGPRy
208 //
209 // ==>
210 //
211 // VGPRx = COPY SGPRx
212 // VGPRz = REG_SEQUENCE VGPRx, sub0
213 //
214 // This exposes immediate folding opportunities when materializing 64-bit
215 // immediates.
217  const SIRegisterInfo *TRI,
218  const SIInstrInfo *TII,
220  assert(MI.isRegSequence());
221 
222  Register DstReg = MI.getOperand(0).getReg();
223  if (!TRI->isSGPRClass(MRI.getRegClass(DstReg)))
224  return false;
225 
226  if (!MRI.hasOneUse(DstReg))
227  return false;
228 
229  MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg);
230  if (!CopyUse.isCopy())
231  return false;
232 
233  // It is illegal to have vreg inputs to a physreg defining reg_sequence.
234  if (CopyUse.getOperand(0).getReg().isPhysical())
235  return false;
236 
237  const TargetRegisterClass *SrcRC, *DstRC;
238  std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI);
239 
240  if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI))
241  return false;
242 
243  if (tryChangeVGPRtoSGPRinCopy(CopyUse, TRI, TII))
244  return true;
245 
246  // TODO: Could have multiple extracts?
247  unsigned SubReg = CopyUse.getOperand(1).getSubReg();
248  if (SubReg != AMDGPU::NoSubRegister)
249  return false;
250 
251  MRI.setRegClass(DstReg, DstRC);
252 
253  // SGPRx = ...
254  // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
255  // VGPRz = COPY SGPRy
256 
257  // =>
258  // VGPRx = COPY SGPRx
259  // VGPRz = REG_SEQUENCE VGPRx, sub0
260 
261  MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
262  bool IsAGPR = TRI->hasAGPRs(DstRC);
263 
264  for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
265  Register SrcReg = MI.getOperand(I).getReg();
266  unsigned SrcSubReg = MI.getOperand(I).getSubReg();
267 
268  const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
269  assert(TRI->isSGPRClass(SrcRC) &&
270  "Expected SGPR REG_SEQUENCE to only have SGPR inputs");
271 
272  SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg);
273  const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC);
274 
275  Register TmpReg = MRI.createVirtualRegister(NewSrcRC);
276 
277  BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY),
278  TmpReg)
279  .add(MI.getOperand(I));
280 
281  if (IsAGPR) {
282  const TargetRegisterClass *NewSrcRC = TRI->getEquivalentAGPRClass(SrcRC);
283  Register TmpAReg = MRI.createVirtualRegister(NewSrcRC);
284  unsigned Opc = NewSrcRC == &AMDGPU::AGPR_32RegClass ?
285  AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::COPY;
286  BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(Opc),
287  TmpAReg)
288  .addReg(TmpReg, RegState::Kill);
289  TmpReg = TmpAReg;
290  }
291 
292  MI.getOperand(I).setReg(TmpReg);
293  }
294 
295  CopyUse.eraseFromParent();
296  return true;
297 }
298 
299 static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
300  const MachineInstr *MoveImm,
301  const SIInstrInfo *TII,
302  unsigned &SMovOp,
303  int64_t &Imm) {
304  if (Copy->getOpcode() != AMDGPU::COPY)
305  return false;
306 
307  if (!MoveImm->isMoveImmediate())
308  return false;
309 
310  const MachineOperand *ImmOp =
311  TII->getNamedOperand(*MoveImm, AMDGPU::OpName::src0);
312  if (!ImmOp->isImm())
313  return false;
314 
315  // FIXME: Handle copies with sub-regs.
316  if (Copy->getOperand(0).getSubReg())
317  return false;
318 
319  switch (MoveImm->getOpcode()) {
320  default:
321  return false;
322  case AMDGPU::V_MOV_B32_e32:
323  SMovOp = AMDGPU::S_MOV_B32;
324  break;
325  case AMDGPU::V_MOV_B64_PSEUDO:
326  SMovOp = AMDGPU::S_MOV_B64;
327  break;
328  }
329  Imm = ImmOp->getImm();
330  return true;
331 }
332 
333 template <class UnaryPredicate>
335  const MachineBasicBlock *CutOff,
336  UnaryPredicate Predicate) {
337  if (MBB == CutOff)
338  return false;
339 
342 
343  while (!Worklist.empty()) {
344  MachineBasicBlock *MBB = Worklist.pop_back_val();
345 
346  if (!Visited.insert(MBB).second)
347  continue;
348  if (MBB == CutOff)
349  continue;
350  if (Predicate(MBB))
351  return true;
352 
353  Worklist.append(MBB->pred_begin(), MBB->pred_end());
354  }
355 
356  return false;
357 }
358 
359 // Checks if there is potential path From instruction To instruction.
360 // If CutOff is specified and it sits in between of that path we ignore
361 // a higher portion of the path and report it is not reachable.
362 static bool isReachable(const MachineInstr *From,
363  const MachineInstr *To,
364  const MachineBasicBlock *CutOff,
365  MachineDominatorTree &MDT) {
366  if (MDT.dominates(From, To))
367  return true;
368 
369  const MachineBasicBlock *MBBFrom = From->getParent();
370  const MachineBasicBlock *MBBTo = To->getParent();
371 
372  // Do predecessor search.
373  // We should almost never get here since we do not usually produce M0 stores
374  // other than -1.
375  return searchPredecessors(MBBTo, CutOff, [MBBFrom]
376  (const MachineBasicBlock *MBB) { return MBB == MBBFrom; });
377 }
378 
379 // Return the first non-prologue instruction in the block.
383  while (I != MBB->end() && TII->isBasicBlockPrologue(*I))
384  ++I;
385 
386  return I;
387 }
388 
389 // Hoist and merge identical SGPR initializations into a common predecessor.
390 // This is intended to combine M0 initializations, but can work with any
391 // SGPR. A VGPR cannot be processed since we cannot guarantee vector
392 // executioon.
393 static bool hoistAndMergeSGPRInits(unsigned Reg,
394  const MachineRegisterInfo &MRI,
395  const TargetRegisterInfo *TRI,
397  const TargetInstrInfo *TII) {
398  // List of inits by immediate value.
399  using InitListMap = std::map<unsigned, std::list<MachineInstr *>>;
400  InitListMap Inits;
401  // List of clobbering instructions.
403  // List of instructions marked for deletion.
404  SmallSet<MachineInstr*, 8> MergedInstrs;
405 
406  bool Changed = false;
407 
408  for (auto &MI : MRI.def_instructions(Reg)) {
409  MachineOperand *Imm = nullptr;
410  for (auto &MO : MI.operands()) {
411  if ((MO.isReg() && ((MO.isDef() && MO.getReg() != Reg) || !MO.isDef())) ||
412  (!MO.isImm() && !MO.isReg()) || (MO.isImm() && Imm)) {
413  Imm = nullptr;
414  break;
415  } else if (MO.isImm())
416  Imm = &MO;
417  }
418  if (Imm)
419  Inits[Imm->getImm()].push_front(&MI);
420  else
421  Clobbers.push_back(&MI);
422  }
423 
424  for (auto &Init : Inits) {
425  auto &Defs = Init.second;
426 
427  for (auto I1 = Defs.begin(), E = Defs.end(); I1 != E; ) {
428  MachineInstr *MI1 = *I1;
429 
430  for (auto I2 = std::next(I1); I2 != E; ) {
431  MachineInstr *MI2 = *I2;
432 
433  // Check any possible interference
434  auto interferes = [&](MachineBasicBlock::iterator From,
435  MachineBasicBlock::iterator To) -> bool {
436 
437  assert(MDT.dominates(&*To, &*From));
438 
439  auto interferes = [&MDT, From, To](MachineInstr* &Clobber) -> bool {
440  const MachineBasicBlock *MBBFrom = From->getParent();
441  const MachineBasicBlock *MBBTo = To->getParent();
442  bool MayClobberFrom = isReachable(Clobber, &*From, MBBTo, MDT);
443  bool MayClobberTo = isReachable(Clobber, &*To, MBBTo, MDT);
444  if (!MayClobberFrom && !MayClobberTo)
445  return false;
446  if ((MayClobberFrom && !MayClobberTo) ||
447  (!MayClobberFrom && MayClobberTo))
448  return true;
449  // Both can clobber, this is not an interference only if both are
450  // dominated by Clobber and belong to the same block or if Clobber
451  // properly dominates To, given that To >> From, so it dominates
452  // both and located in a common dominator.
453  return !((MBBFrom == MBBTo &&
454  MDT.dominates(Clobber, &*From) &&
455  MDT.dominates(Clobber, &*To)) ||
456  MDT.properlyDominates(Clobber->getParent(), MBBTo));
457  };
458 
459  return (llvm::any_of(Clobbers, interferes)) ||
460  (llvm::any_of(Inits, [&](InitListMap::value_type &C) {
461  return C.first != Init.first &&
462  llvm::any_of(C.second, interferes);
463  }));
464  };
465 
466  if (MDT.dominates(MI1, MI2)) {
467  if (!interferes(MI2, MI1)) {
468  LLVM_DEBUG(dbgs()
469  << "Erasing from "
470  << printMBBReference(*MI2->getParent()) << " " << *MI2);
471  MergedInstrs.insert(MI2);
472  Changed = true;
473  ++I2;
474  continue;
475  }
476  } else if (MDT.dominates(MI2, MI1)) {
477  if (!interferes(MI1, MI2)) {
478  LLVM_DEBUG(dbgs()
479  << "Erasing from "
480  << printMBBReference(*MI1->getParent()) << " " << *MI1);
481  MergedInstrs.insert(MI1);
482  Changed = true;
483  ++I1;
484  break;
485  }
486  } else {
487  auto *MBB = MDT.findNearestCommonDominator(MI1->getParent(),
488  MI2->getParent());
489  if (!MBB) {
490  ++I2;
491  continue;
492  }
493 
495  if (!interferes(MI1, I) && !interferes(MI2, I)) {
496  LLVM_DEBUG(dbgs()
497  << "Erasing from "
498  << printMBBReference(*MI1->getParent()) << " " << *MI1
499  << "and moving from "
500  << printMBBReference(*MI2->getParent()) << " to "
501  << printMBBReference(*I->getParent()) << " " << *MI2);
502  I->getParent()->splice(I, MI2->getParent(), MI2);
503  MergedInstrs.insert(MI1);
504  Changed = true;
505  ++I1;
506  break;
507  }
508  }
509  ++I2;
510  }
511  ++I1;
512  }
513  }
514 
515  // Remove initializations that were merged into another.
516  for (auto &Init : Inits) {
517  auto &Defs = Init.second;
518  auto I = Defs.begin();
519  while (I != Defs.end()) {
520  if (MergedInstrs.count(*I)) {
521  (*I)->eraseFromParent();
522  I = Defs.erase(I);
523  } else
524  ++I;
525  }
526  }
527 
528  // Try to schedule SGPR initializations as early as possible in the MBB.
529  for (auto &Init : Inits) {
530  auto &Defs = Init.second;
531  for (auto MI : Defs) {
532  auto MBB = MI->getParent();
533  MachineInstr &BoundaryMI = *getFirstNonPrologue(MBB, TII);
535  // Check if B should actually be a boundary. If not set the previous
536  // instruction as the boundary instead.
537  if (!TII->isBasicBlockPrologue(*B))
538  B++;
539 
540  auto R = std::next(MI->getReverseIterator());
541  const unsigned Threshold = 50;
542  // Search until B or Threshold for a place to insert the initialization.
543  for (unsigned I = 0; R != B && I < Threshold; ++R, ++I)
544  if (R->readsRegister(Reg, TRI) || R->definesRegister(Reg, TRI) ||
546  break;
547 
548  // Move to directly after R.
549  if (&*--R != MI)
550  MBB->splice(*R, MBB, MI);
551  }
552  }
553 
554  if (Changed)
556 
557  return Changed;
558 }
559 
560 bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
561  // Only need to run this in SelectionDAG path.
562  if (MF.getProperties().hasProperty(
564  return false;
565 
566  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
567  MRI = &MF.getRegInfo();
568  TRI = ST.getRegisterInfo();
569  TII = ST.getInstrInfo();
570  MDT = &getAnalysis<MachineDominatorTree>();
571 
572  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
573  BI != BE; ++BI) {
574  MachineBasicBlock *MBB = &*BI;
575  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
576  ++I) {
577  MachineInstr &MI = *I;
578 
579  switch (MI.getOpcode()) {
580  default:
581  continue;
582  case AMDGPU::COPY:
583  case AMDGPU::WQM:
584  case AMDGPU::STRICT_WQM:
585  case AMDGPU::SOFT_WQM:
586  case AMDGPU::STRICT_WWM: {
587  Register DstReg = MI.getOperand(0).getReg();
588  const TargetRegisterClass *SrcRC, *DstRC;
589  std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, *MRI);
590 
591  if (MI.isCopy()) {
592  Register SrcReg = MI.getOperand(1).getReg();
593  if (SrcReg == AMDGPU::SCC) {
595  TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID));
596  I = BuildMI(*MI.getParent(),
597  std::next(MachineBasicBlock::iterator(MI)),
598  MI.getDebugLoc(),
599  TII->get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
600  : AMDGPU::S_CSELECT_B64),
601  SCCCopy)
602  .addImm(-1)
603  .addImm(0);
604  I = BuildMI(*MI.getParent(), std::next(I), I->getDebugLoc(),
605  TII->get(AMDGPU::COPY), DstReg)
606  .addReg(SCCCopy);
607  MI.eraseFromParent();
608  continue;
609  } else if (DstReg == AMDGPU::SCC) {
610  unsigned Opcode =
611  ST.isWave64() ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
612  Register Exec = ST.isWave64() ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
613  Register Tmp = MRI->createVirtualRegister(TRI->getBoolRC());
614  I = BuildMI(*MI.getParent(),
615  std::next(MachineBasicBlock::iterator(MI)),
616  MI.getDebugLoc(), TII->get(Opcode))
617  .addReg(Tmp, getDefRegState(true))
618  .addReg(SrcReg)
619  .addReg(Exec);
620  MI.eraseFromParent();
621  continue;
622  }
623  }
624 
625  if (!DstReg.isVirtual()) {
626  // If the destination register is a physical register there isn't
627  // really much we can do to fix this.
628  // Some special instructions use M0 as an input. Some even only use
629  // the first lane. Insert a readfirstlane and hope for the best.
630  if (DstReg == AMDGPU::M0 && TRI->hasVectorRegisters(SrcRC)) {
631  Register TmpReg
632  = MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
633 
634  BuildMI(*MBB, MI, MI.getDebugLoc(),
635  TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg)
636  .add(MI.getOperand(1));
637  MI.getOperand(1).setReg(TmpReg);
638  }
639 
640  continue;
641  }
642 
643  if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
644  Register SrcReg = MI.getOperand(1).getReg();
645  if (!SrcReg.isVirtual()) {
646  MachineBasicBlock *NewBB = TII->moveToVALU(MI, MDT);
647  if (NewBB && NewBB != MBB) {
648  MBB = NewBB;
649  E = MBB->end();
651  BE = MF.end();
652  }
653  assert((!NewBB || NewBB == I->getParent()) &&
654  "moveToVALU did not return the right basic block");
655  break;
656  }
657 
658  MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
659  unsigned SMovOp;
660  int64_t Imm;
661  // If we are just copying an immediate, we can replace the copy with
662  // s_mov_b32.
663  if (isSafeToFoldImmIntoCopy(&MI, DefMI, TII, SMovOp, Imm)) {
664  MI.getOperand(1).ChangeToImmediate(Imm);
665  MI.addImplicitDefUseOperands(MF);
666  MI.setDesc(TII->get(SMovOp));
667  break;
668  }
669  MachineBasicBlock *NewBB = TII->moveToVALU(MI, MDT);
670  if (NewBB && NewBB != MBB) {
671  MBB = NewBB;
672  E = MBB->end();
674  BE = MF.end();
675  }
676  assert((!NewBB || NewBB == I->getParent()) &&
677  "moveToVALU did not return the right basic block");
678  } else if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) {
680  }
681 
682  break;
683  }
684  case AMDGPU::PHI: {
685  MachineBasicBlock *NewBB = processPHINode(MI);
686  if (NewBB && NewBB != MBB) {
687  MBB = NewBB;
688  E = MBB->end();
690  BE = MF.end();
691  }
692  assert((!NewBB || NewBB == I->getParent()) &&
693  "moveToVALU did not return the right basic block");
694  break;
695  }
696  case AMDGPU::REG_SEQUENCE: {
697  if (TRI->hasVectorRegisters(TII->getOpRegClass(MI, 0)) ||
698  !hasVectorOperands(MI, TRI)) {
700  continue;
701  }
702 
703  LLVM_DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
704 
705  MachineBasicBlock *NewBB = TII->moveToVALU(MI, MDT);
706  if (NewBB && NewBB != MBB) {
707  MBB = NewBB;
708  E = MBB->end();
710  BE = MF.end();
711  }
712  assert((!NewBB || NewBB == I->getParent()) &&
713  "moveToVALU did not return the right basic block");
714  break;
715  }
716  case AMDGPU::INSERT_SUBREG: {
717  const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
718  DstRC = MRI->getRegClass(MI.getOperand(0).getReg());
719  Src0RC = MRI->getRegClass(MI.getOperand(1).getReg());
720  Src1RC = MRI->getRegClass(MI.getOperand(2).getReg());
721  if (TRI->isSGPRClass(DstRC) &&
722  (TRI->hasVectorRegisters(Src0RC) ||
723  TRI->hasVectorRegisters(Src1RC))) {
724  LLVM_DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
725  MachineBasicBlock *NewBB = TII->moveToVALU(MI, MDT);
726  if (NewBB && NewBB != MBB) {
727  MBB = NewBB;
728  E = MBB->end();
730  BE = MF.end();
731  }
732  assert((!NewBB || NewBB == I->getParent()) &&
733  "moveToVALU did not return the right basic block");
734  }
735  break;
736  }
737  case AMDGPU::V_WRITELANE_B32: {
738  // Some architectures allow more than one constant bus access without
739  // SGPR restriction
740  if (ST.getConstantBusLimit(MI.getOpcode()) != 1)
741  break;
742 
743  // Writelane is special in that it can use SGPR and M0 (which would
744  // normally count as using the constant bus twice - but in this case it
745  // is allowed since the lane selector doesn't count as a use of the
746  // constant bus). However, it is still required to abide by the 1 SGPR
747  // rule. Apply a fix here as we might have multiple SGPRs after
748  // legalizing VGPRs to SGPRs
749  int Src0Idx =
750  AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
751  int Src1Idx =
752  AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
753  MachineOperand &Src0 = MI.getOperand(Src0Idx);
754  MachineOperand &Src1 = MI.getOperand(Src1Idx);
755 
756  // Check to see if the instruction violates the 1 SGPR rule
757  if ((Src0.isReg() && TRI->isSGPRReg(*MRI, Src0.getReg()) &&
758  Src0.getReg() != AMDGPU::M0) &&
759  (Src1.isReg() && TRI->isSGPRReg(*MRI, Src1.getReg()) &&
760  Src1.getReg() != AMDGPU::M0)) {
761 
762  // Check for trivially easy constant prop into one of the operands
763  // If this is the case then perform the operation now to resolve SGPR
764  // issue. If we don't do that here we will always insert a mov to m0
765  // that can't be resolved in later operand folding pass
766  bool Resolved = false;
767  for (MachineOperand *MO : {&Src0, &Src1}) {
768  if (MO->getReg().isVirtual()) {
769  MachineInstr *DefMI = MRI->getVRegDef(MO->getReg());
770  if (DefMI && TII->isFoldableCopy(*DefMI)) {
771  const MachineOperand &Def = DefMI->getOperand(0);
772  if (Def.isReg() &&
773  MO->getReg() == Def.getReg() &&
774  MO->getSubReg() == Def.getSubReg()) {
775  const MachineOperand &Copied = DefMI->getOperand(1);
776  if (Copied.isImm() &&
777  TII->isInlineConstant(APInt(64, Copied.getImm(), true))) {
778  MO->ChangeToImmediate(Copied.getImm());
779  Resolved = true;
780  break;
781  }
782  }
783  }
784  }
785  }
786 
787  if (!Resolved) {
788  // Haven't managed to resolve by replacing an SGPR with an immediate
789  // Move src1 to be in M0
790  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
791  TII->get(AMDGPU::COPY), AMDGPU::M0)
792  .add(Src1);
793  Src1.ChangeToRegister(AMDGPU::M0, false);
794  }
795  }
796  break;
797  }
798  }
799  }
800  }
801 
804 
805  return true;
806 }
807 
808 MachineBasicBlock *SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
809  unsigned numVGPRUses = 0;
810  bool AllAGPRUses = true;
813  SetVector<MachineInstr *> PHIOperands;
814  MachineBasicBlock *CreatedBB = nullptr;
815  worklist.insert(&MI);
816  Visited.insert(&MI);
817  while (!worklist.empty()) {
818  const MachineInstr *Instr = worklist.pop_back_val();
819  Register Reg = Instr->getOperand(0).getReg();
820  for (const auto &Use : MRI->use_operands(Reg)) {
821  const MachineInstr *UseMI = Use.getParent();
822  AllAGPRUses &= (UseMI->isCopy() &&
823  TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg())) ||
824  TRI->isAGPR(*MRI, Use.getReg());
825  if (UseMI->isCopy() || UseMI->isRegSequence()) {
826  if (UseMI->isCopy() &&
827  UseMI->getOperand(0).getReg().isPhysical() &&
828  !TRI->isSGPRReg(*MRI, UseMI->getOperand(0).getReg())) {
829  numVGPRUses++;
830  }
831  if (Visited.insert(UseMI).second)
832  worklist.insert(UseMI);
833 
834  continue;
835  }
836 
837  if (UseMI->isPHI()) {
838  const TargetRegisterClass *UseRC = MRI->getRegClass(Use.getReg());
839  if (!TRI->isSGPRReg(*MRI, Use.getReg()) &&
840  UseRC != &AMDGPU::VReg_1RegClass)
841  numVGPRUses++;
842  continue;
843  }
844 
845  const TargetRegisterClass *OpRC =
846  TII->getOpRegClass(*UseMI, UseMI->getOperandNo(&Use));
847  if (!TRI->isSGPRClass(OpRC) && OpRC != &AMDGPU::VS_32RegClass &&
848  OpRC != &AMDGPU::VS_64RegClass) {
849  numVGPRUses++;
850  }
851  }
852  }
853 
854  Register PHIRes = MI.getOperand(0).getReg();
855  const TargetRegisterClass *RC0 = MRI->getRegClass(PHIRes);
856  if (AllAGPRUses && numVGPRUses && !TRI->hasAGPRs(RC0)) {
857  LLVM_DEBUG(dbgs() << "Moving PHI to AGPR: " << MI);
858  MRI->setRegClass(PHIRes, TRI->getEquivalentAGPRClass(RC0));
859  for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
860  MachineInstr *DefMI = MRI->getVRegDef(MI.getOperand(I).getReg());
861  if (DefMI && DefMI->isPHI())
862  PHIOperands.insert(DefMI);
863  }
864  }
865 
866  bool hasVGPRInput = false;
867  for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
868  Register InputReg = MI.getOperand(i).getReg();
869  MachineInstr *Def = MRI->getVRegDef(InputReg);
870  if (TRI->isVectorRegister(*MRI, InputReg)) {
871  if (Def->isCopy()) {
872  Register SrcReg = Def->getOperand(1).getReg();
873  const TargetRegisterClass *RC =
874  TRI->getRegClassForReg(*MRI, SrcReg);
875  if (TRI->isSGPRClass(RC))
876  continue;
877  }
878  hasVGPRInput = true;
879  break;
880  }
881  else if (Def->isCopy() &&
882  TRI->isVectorRegister(*MRI, Def->getOperand(1).getReg())) {
883  Register SrcReg = Def->getOperand(1).getReg();
884  MachineInstr *SrcDef = MRI->getVRegDef(SrcReg);
885  unsigned SMovOp;
886  int64_t Imm;
887  if (!isSafeToFoldImmIntoCopy(Def, SrcDef, TII, SMovOp, Imm)) {
888  hasVGPRInput = true;
889  break;
890  } else {
891  // Formally, if we did not do this right away
892  // it would be done on the next iteration of the
893  // runOnMachineFunction main loop. But why not if we can?
894  MachineFunction *MF = MI.getParent()->getParent();
895  Def->getOperand(1).ChangeToImmediate(Imm);
896  Def->addImplicitDefUseOperands(*MF);
897  Def->setDesc(TII->get(SMovOp));
898  }
899  }
900  }
901 
902  if ((!TRI->isVectorRegister(*MRI, PHIRes) &&
903  RC0 != &AMDGPU::VReg_1RegClass) &&
904  (hasVGPRInput || numVGPRUses > 1)) {
905  LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
906  CreatedBB = TII->moveToVALU(MI);
907  }
908  else {
909  LLVM_DEBUG(dbgs() << "Legalizing PHI: " << MI);
910  TII->legalizeOperands(MI, MDT);
911  }
912 
913  // Propagate register class back to PHI operands which are PHI themselves.
914  while (!PHIOperands.empty()) {
915  processPHINode(*PHIOperands.pop_back_val());
916  }
917  return CreatedBB;
918 }
llvm::MachineDominatorTree::findNearestCommonDominator
MachineBasicBlock * findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B.
Definition: MachineDominators.h:155
i
i
Definition: README.txt:29
llvm::TargetMachine::getOptLevel
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Definition: TargetMachine.cpp:185
llvm::HexagonInstrInfo::isSchedulingBoundary
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
Test if the given instruction should be considered a scheduling boundary.
Definition: HexagonInstrInfo.cpp:1706
llvm::MachineFunctionProperties::hasProperty
bool hasProperty(Property P) const
Definition: MachineFunction.h:165
llvm::MachineBasicBlock::pred_begin
pred_iterator pred_begin()
Definition: MachineBasicBlock.h:316
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:103
llvm::MachineInstr::getOperandNo
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:683
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
isReachable
static bool isReachable(const MachineInstr *From, const MachineInstr *To, const MachineBasicBlock *CutOff, MachineDominatorTree &MDT)
Definition: SIFixSGPRCopies.cpp:362
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
UseMI
MachineInstrBuilder & UseMI
Definition: AArch64ExpandPseudoInsts.cpp:102
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
llvm::createSIFixSGPRCopiesPass
FunctionPass * createSIFixSGPRCopiesPass()
Definition: SIFixSGPRCopies.cpp:123
llvm::MachineDominatorTree::properlyDominates
bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
Definition: MachineDominators.h:141
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::MachineFunction::end
iterator end()
Definition: MachineFunction.h:810
llvm::printMBBReference
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Definition: MachineBasicBlock.cpp:119
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:233
llvm::MachineInstr::getDesc
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:486
llvm::MachineRegisterInfo::use_operands
iterator_range< use_iterator > use_operands(Register Reg) const
Definition: MachineRegisterInfo.h:469
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
llvm::MachineInstr::isCopy
bool isCopy() const
Definition: MachineInstr.h:1291
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::SIInstrFlags::WQM
@ WQM
Definition: SIDefines.h:70
llvm::MachineDominatorTree::dominates
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
Definition: MachineDominators.h:109
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::AMDGPU::getNamedOperandIdx
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::MachineFunction::iterator
BasicBlockListType::iterator iterator
Definition: MachineFunction.h:790
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::Register::isPhysical
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:97
llvm::getDefRegState
unsigned getDefRegState(bool B)
Definition: MachineInstrBuilder.h:502
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:636
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
llvm::MachineFunctionProperties::Property::Selected
@ Selected
TargetMachine.h
llvm::MachineOperand::ChangeToRegister
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
Definition: MachineOperand.cpp:241
GCNSubtarget.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:537
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:499
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::MachineFunction::getProperties
const MachineFunctionProperties & getProperties() const
Get the function properties.
Definition: MachineFunction.h:717
llvm::MachineRegisterInfo::reg_nodbg_operands
iterator_range< reg_nodbg_iterator > reg_nodbg_operands(Register Reg) const
Definition: MachineRegisterInfo.h:337
false
Definition: StackSlotColoring.cpp:142
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MachineOperand::ChangeToImmediate
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
Definition: MachineOperand.cpp:156
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
getFirstNonPrologue
static MachineBasicBlock::iterator getFirstNonPrologue(MachineBasicBlock *MBB, const TargetInstrInfo *TII)
Definition: SIFixSGPRCopies.cpp:381
copies
SI Fix SGPR copies
Definition: SIFixSGPRCopies.cpp:117
llvm::M0
unsigned M0(unsigned Val)
Definition: VE.h:371
llvm::MachineFunction::begin
iterator begin()
Definition: MachineFunction.h:808
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE, "SI Fix SGPR copies", false, false) INITIALIZE_PASS_END(SIFixSGPRCopies
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:28
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:400
llvm::SetVector::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
llvm::MachineRegisterInfo::clearKillFlags
void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
Definition: MachineRegisterInfo.cpp:431
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:626
llvm::cl::opt< bool >
llvm::PPC::Predicate
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
llvm::SmallSet::count
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
AMDGPUMCTargetDesc.h
llvm::MachineBasicBlock::pred_end
pred_iterator pred_end()
Definition: MachineBasicBlock.h:318
llvm::TargetRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
Definition: TargetRegisterInfo.h:739
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:321
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::MachineRegisterInfo::use_instr_begin
use_instr_iterator use_instr_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:477
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SIFixSGPRCopies.cpp:76
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::MachineRegisterInfo::def_instructions
iterator_range< def_instr_iterator > def_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:405
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::MachineBasicBlock::getFirstNonPHI
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: MachineBasicBlock.cpp:200
llvm::SIFixSGPRCopiesID
char & SIFixSGPRCopiesID
Definition: SIFixSGPRCopies.cpp:121
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
llvm::MachineInstr::isPHI
bool isPHI() const
Definition: MachineInstr.h:1255
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
llvm::MachineBasicBlock::predecessors
iterator_range< pred_iterator > predecessors()
Definition: MachineBasicBlock.h:349
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::CodeGenOpt::None
@ None
Definition: CodeGen.h:53
llvm::AMDGPU::CPol::SCC
@ SCC
Definition: SIDefines.h:292
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1558
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:950
AMDGPU.h
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:489
isSGPRToVGPRCopy
static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC, const TargetRegisterClass *DstRC, const SIRegisterInfo &TRI)
Definition: SIFixSGPRCopies.cpp:168
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::MCID::MoveImm
@ MoveImm
Definition: MCInstrDesc.h:159
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:180
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:286
llvm::Init
Definition: Record.h:271
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineOperand::getSubReg
unsigned getSubReg() const
Definition: MachineOperand.h:365
hoistAndMergeSGPRInits
static bool hoistAndMergeSGPRInits(unsigned Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo *TRI, MachineDominatorTree &MDT, const TargetInstrInfo *TII)
Definition: SIFixSGPRCopies.cpp:393
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
foldVGPRCopyIntoRegSequence
static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, const SIRegisterInfo *TRI, const SIInstrInfo *TII, MachineRegisterInfo &MRI)
Definition: SIFixSGPRCopies.cpp:216
llvm::MachineInstr::isRegSequence
bool isRegSequence() const
Definition: MachineInstr.h:1283
hasVectorOperands
static bool hasVectorOperands(const MachineInstr &MI, const SIRegisterInfo *TRI)
Definition: SIFixSGPRCopies.cpp:127
tryChangeVGPRtoSGPRinCopy
static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI, const SIRegisterInfo *TRI, const SIInstrInfo *TII)
Definition: SIFixSGPRCopies.cpp:175
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:622
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
isSafeToFoldImmIntoCopy
static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy, const MachineInstr *MoveImm, const SIInstrInfo *TII, unsigned &SMovOp, int64_t &Imm)
Definition: SIFixSGPRCopies.cpp:299
llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:323
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
Predicate
isVGPRToSGPRCopy
static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC, const TargetRegisterClass *DstRC, const SIRegisterInfo &TRI)
Definition: SIFixSGPRCopies.cpp:161
llvm::MachineRegisterInfo::hasOneUse
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
Definition: MachineRegisterInfo.h:510
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
N
#define N
DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition: AArch64ExpandPseudoInsts.cpp:103
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:48
Fix
Falkor HW Prefetch Fix
Definition: AArch64FalkorHWPFFix.cpp:114
Threshold
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
getCopyRegClasses
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getCopyRegClasses(const MachineInstr &Copy, const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI)
Definition: SIFixSGPRCopies.cpp:141
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::orc::SymbolState::Resolved
@ Resolved
Queried, materialization begun.
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
From
BlockVerifier::State From
Definition: BlockVerifier.cpp:55
searchPredecessors
bool searchPredecessors(const MachineBasicBlock *MBB, const MachineBasicBlock *CutOff, UnaryPredicate Predicate)
Definition: SIFixSGPRCopies.cpp:334
llvm::cl::desc
Definition: CommandLine.h:414
EnableM0Merge
static cl::opt< bool > EnableM0Merge("amdgpu-enable-merge-m0", cl::desc("Merge and hoist M0 initializations"), cl::init(true))
llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:45
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::MachineInstr::eraseFromParent
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition: MachineInstr.cpp:677
llvm::MachineInstrBundleIterator
MachineBasicBlock iterator that automatically skips over MIs that are inside bundles (i....
Definition: MachineInstrBundleIterator.h:108
InitializePasses.h
llvm::SetVector::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SetVector.h:232
llvm::MCInstrDesc::getNumOperands
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:228
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
SubReg
unsigned SubReg
Definition: AArch64AdvSIMDScalarPass.cpp:104
llvm::MachineRegisterInfo::setRegClass
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
Definition: MachineRegisterInfo.cpp:58
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
MachineDominators.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37