LLVM  14.0.0git
GCNDPPCombine.cpp
Go to the documentation of this file.
1 //=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
9 // operand. If any of the use instruction cannot be combined with the mov the
10 // whole sequence is reverted.
11 //
12 // $old = ...
13 // $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
14 // dpp_controls..., $row_mask, $bank_mask, $bound_ctrl
15 // $res = VALU $dpp_value [, src1]
16 //
17 // to
18 //
19 // $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
20 // dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
21 //
22 // Combining rules :
23 //
24 // if $row_mask and $bank_mask are fully enabled (0xF) and
25 // $bound_ctrl==DPP_BOUND_ZERO or $old==0
26 // -> $combined_old = undef,
27 // $combined_bound_ctrl = DPP_BOUND_ZERO
28 //
29 // if the VALU op is binary and
30 // $bound_ctrl==DPP_BOUND_OFF and
31 // $old==identity value (immediate) for the VALU op
32 // -> $combined_old = src1,
33 // $combined_bound_ctrl = DPP_BOUND_OFF
34 //
35 // Otherwise cancel.
36 //
37 // The mov_dpp instruction should reside in the same BB as all its uses
38 //===----------------------------------------------------------------------===//
39 
40 #include "AMDGPU.h"
41 #include "GCNSubtarget.h"
43 #include "llvm/ADT/Statistic.h"
45 
46 using namespace llvm;
47 
48 #define DEBUG_TYPE "gcn-dpp-combine"
49 
50 STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
51 
52 namespace {
53 
54 class GCNDPPCombine : public MachineFunctionPass {
56  const SIInstrInfo *TII;
57  const GCNSubtarget *ST;
58 
60 
61  MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const;
62 
63  MachineInstr *createDPPInst(MachineInstr &OrigMI, MachineInstr &MovMI,
64  RegSubRegPair CombOldVGPR,
65  MachineOperand *OldOpnd, bool CombBCZ,
66  bool IsShrinkable) const;
67 
68  MachineInstr *createDPPInst(MachineInstr &OrigMI, MachineInstr &MovMI,
69  RegSubRegPair CombOldVGPR, bool CombBCZ,
70  bool IsShrinkable) const;
71 
72  bool hasNoImmOrEqual(MachineInstr &MI,
73  unsigned OpndName,
74  int64_t Value,
75  int64_t Mask = -1) const;
76 
77  bool combineDPPMov(MachineInstr &MI) const;
78 
79 public:
80  static char ID;
81 
82  GCNDPPCombine() : MachineFunctionPass(ID) {
84  }
85 
86  bool runOnMachineFunction(MachineFunction &MF) override;
87 
88  StringRef getPassName() const override { return "GCN DPP Combine"; }
89 
90  void getAnalysisUsage(AnalysisUsage &AU) const override {
91  AU.setPreservesCFG();
93  }
94 
95  MachineFunctionProperties getRequiredProperties() const override {
98  }
99 
100 private:
101  int getDPPOp(unsigned Op, bool IsShrinkable) const;
102  bool isShrinkable(MachineInstr &MI) const;
103 };
104 
105 } // end anonymous namespace
106 
107 INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
108 
109 char GCNDPPCombine::ID = 0;
110 
111 char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
112 
114  return new GCNDPPCombine();
115 }
116 
117 bool GCNDPPCombine::isShrinkable(MachineInstr &MI) const {
118  unsigned Op = MI.getOpcode();
119  if (!TII->isVOP3(Op)) {
120  return false;
121  }
122  if (!TII->hasVALU32BitEncoding(Op)) {
123  LLVM_DEBUG(dbgs() << " Inst hasn't e32 equivalent\n");
124  return false;
125  }
126  if (const auto *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) {
127  // Give up if there are any uses of the carry-out from instructions like
128  // V_ADD_CO_U32. The shrunken form of the instruction would write it to vcc
129  // instead of to a virtual register.
130  if (!MRI->use_nodbg_empty(SDst->getReg()))
131  return false;
132  }
133  // check if other than abs|neg modifiers are set (opsel for example)
134  const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG);
135  if (!hasNoImmOrEqual(MI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
136  !hasNoImmOrEqual(MI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
137  !hasNoImmOrEqual(MI, AMDGPU::OpName::clamp, 0) ||
138  !hasNoImmOrEqual(MI, AMDGPU::OpName::omod, 0)) {
139  LLVM_DEBUG(dbgs() << " Inst has non-default modifiers\n");
140  return false;
141  }
142  return true;
143 }
144 
145 int GCNDPPCombine::getDPPOp(unsigned Op, bool IsShrinkable) const {
146  auto DPP32 = AMDGPU::getDPPOp32(Op);
147  if (IsShrinkable) {
148  assert(DPP32 == -1);
149  auto E32 = AMDGPU::getVOPe32(Op);
150  DPP32 = (E32 == -1) ? -1 : AMDGPU::getDPPOp32(E32);
151  }
152  return (DPP32 == -1 || TII->pseudoToMCOpcode(DPP32) == -1) ? -1 : DPP32;
153 }
154 
155 // tracks the register operand definition and returns:
156 // 1. immediate operand used to initialize the register if found
157 // 2. nullptr if the register operand is undef
158 // 3. the operand itself otherwise
159 MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
160  auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI);
161  if (!Def)
162  return nullptr;
163 
164  switch(Def->getOpcode()) {
165  default: break;
166  case AMDGPU::IMPLICIT_DEF:
167  return nullptr;
168  case AMDGPU::COPY:
169  case AMDGPU::V_MOV_B32_e32:
170  case AMDGPU::V_MOV_B64_PSEUDO: {
171  auto &Op1 = Def->getOperand(1);
172  if (Op1.isImm())
173  return &Op1;
174  break;
175  }
176  }
177  return &OldOpnd;
178 }
179 
180 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
181  MachineInstr &MovMI,
182  RegSubRegPair CombOldVGPR,
183  bool CombBCZ,
184  bool IsShrinkable) const {
185  assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||
186  MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
187 
188  auto OrigOp = OrigMI.getOpcode();
189  auto DPPOp = getDPPOp(OrigOp, IsShrinkable);
190  if (DPPOp == -1) {
191  LLVM_DEBUG(dbgs() << " failed: no DPP opcode\n");
192  return nullptr;
193  }
194 
195  auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
196  OrigMI.getDebugLoc(), TII->get(DPPOp))
197  .setMIFlags(OrigMI.getFlags());
198 
199  bool Fail = false;
200  do {
201  auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst);
202  assert(Dst);
203  DPPInst.add(*Dst);
204  int NumOperands = 1;
205 
206  const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
207  if (OldIdx != -1) {
208  assert(OldIdx == NumOperands);
210  CombOldVGPR,
211  *MRI->getRegClass(
212  TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg()),
213  *MRI));
214  auto *Def = getVRegSubRegDef(CombOldVGPR, *MRI);
215  DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef,
216  CombOldVGPR.SubReg);
217  ++NumOperands;
218  } else {
219  // TODO: this discards MAC/FMA instructions for now, let's add it later
220  LLVM_DEBUG(dbgs() << " failed: no old operand in DPP instruction,"
221  " TBD\n");
222  Fail = true;
223  break;
224  }
225 
226  if (auto *Mod0 = TII->getNamedOperand(OrigMI,
227  AMDGPU::OpName::src0_modifiers)) {
228  assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
229  AMDGPU::OpName::src0_modifiers));
230  assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
231  DPPInst.addImm(Mod0->getImm());
232  ++NumOperands;
233  } else if (AMDGPU::getNamedOperandIdx(DPPOp,
234  AMDGPU::OpName::src0_modifiers) != -1) {
235  DPPInst.addImm(0);
236  ++NumOperands;
237  }
238  auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
239  assert(Src0);
240  if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
241  LLVM_DEBUG(dbgs() << " failed: src0 is illegal\n");
242  Fail = true;
243  break;
244  }
245  DPPInst.add(*Src0);
246  DPPInst->getOperand(NumOperands).setIsKill(false);
247  ++NumOperands;
248 
249  if (auto *Mod1 = TII->getNamedOperand(OrigMI,
250  AMDGPU::OpName::src1_modifiers)) {
251  assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
252  AMDGPU::OpName::src1_modifiers));
253  assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
254  DPPInst.addImm(Mod1->getImm());
255  ++NumOperands;
256  } else if (AMDGPU::getNamedOperandIdx(DPPOp,
257  AMDGPU::OpName::src1_modifiers) != -1) {
258  DPPInst.addImm(0);
259  ++NumOperands;
260  }
261  if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
262  if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
263  LLVM_DEBUG(dbgs() << " failed: src1 is illegal\n");
264  Fail = true;
265  break;
266  }
267  DPPInst.add(*Src1);
268  ++NumOperands;
269  }
270 
271  if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) {
272  if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) ||
273  !TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
274  LLVM_DEBUG(dbgs() << " failed: src2 is illegal\n");
275  Fail = true;
276  break;
277  }
278  DPPInst.add(*Src2);
279  }
280 
281  DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
282  DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
283  DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
284  DPPInst.addImm(CombBCZ ? 1 : 0);
285  } while (false);
286 
287  if (Fail) {
288  DPPInst.getInstr()->eraseFromParent();
289  return nullptr;
290  }
291  LLVM_DEBUG(dbgs() << " combined: " << *DPPInst.getInstr());
292  return DPPInst.getInstr();
293 }
294 
295 static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) {
296  assert(OldOpnd->isImm());
297  switch (OrigMIOp) {
298  default: break;
299  case AMDGPU::V_ADD_U32_e32:
300  case AMDGPU::V_ADD_U32_e64:
301  case AMDGPU::V_ADD_CO_U32_e32:
302  case AMDGPU::V_ADD_CO_U32_e64:
303  case AMDGPU::V_OR_B32_e32:
304  case AMDGPU::V_OR_B32_e64:
305  case AMDGPU::V_SUBREV_U32_e32:
306  case AMDGPU::V_SUBREV_U32_e64:
307  case AMDGPU::V_SUBREV_CO_U32_e32:
308  case AMDGPU::V_SUBREV_CO_U32_e64:
309  case AMDGPU::V_MAX_U32_e32:
310  case AMDGPU::V_MAX_U32_e64:
311  case AMDGPU::V_XOR_B32_e32:
312  case AMDGPU::V_XOR_B32_e64:
313  if (OldOpnd->getImm() == 0)
314  return true;
315  break;
316  case AMDGPU::V_AND_B32_e32:
317  case AMDGPU::V_AND_B32_e64:
318  case AMDGPU::V_MIN_U32_e32:
319  case AMDGPU::V_MIN_U32_e64:
320  if (static_cast<uint32_t>(OldOpnd->getImm()) ==
322  return true;
323  break;
324  case AMDGPU::V_MIN_I32_e32:
325  case AMDGPU::V_MIN_I32_e64:
326  if (static_cast<int32_t>(OldOpnd->getImm()) ==
328  return true;
329  break;
330  case AMDGPU::V_MAX_I32_e32:
331  case AMDGPU::V_MAX_I32_e64:
332  if (static_cast<int32_t>(OldOpnd->getImm()) ==
334  return true;
335  break;
336  case AMDGPU::V_MUL_I32_I24_e32:
337  case AMDGPU::V_MUL_I32_I24_e64:
338  case AMDGPU::V_MUL_U32_U24_e32:
339  case AMDGPU::V_MUL_U32_U24_e64:
340  if (OldOpnd->getImm() == 1)
341  return true;
342  break;
343  }
344  return false;
345 }
346 
347 MachineInstr *GCNDPPCombine::createDPPInst(
348  MachineInstr &OrigMI, MachineInstr &MovMI, RegSubRegPair CombOldVGPR,
349  MachineOperand *OldOpndValue, bool CombBCZ, bool IsShrinkable) const {
350  assert(CombOldVGPR.Reg);
351  if (!CombBCZ && OldOpndValue && OldOpndValue->isImm()) {
352  auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
353  if (!Src1 || !Src1->isReg()) {
354  LLVM_DEBUG(dbgs() << " failed: no src1 or it isn't a register\n");
355  return nullptr;
356  }
357  if (!isIdentityValue(OrigMI.getOpcode(), OldOpndValue)) {
358  LLVM_DEBUG(dbgs() << " failed: old immediate isn't an identity\n");
359  return nullptr;
360  }
361  CombOldVGPR = getRegSubRegPair(*Src1);
362  auto MovDst = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
363  const TargetRegisterClass *RC = MRI->getRegClass(MovDst->getReg());
364  if (!isOfRegClass(CombOldVGPR, *RC, *MRI)) {
365  LLVM_DEBUG(dbgs() << " failed: src1 has wrong register class\n");
366  return nullptr;
367  }
368  }
369  return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ, IsShrinkable);
370 }
371 
372 // returns true if MI doesn't have OpndName immediate operand or the
373 // operand has Value
374 bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
375  int64_t Value, int64_t Mask) const {
376  auto *Imm = TII->getNamedOperand(MI, OpndName);
377  if (!Imm)
378  return true;
379 
380  assert(Imm->isImm());
381  return (Imm->getImm() & Mask) == Value;
382 }
383 
384 bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
385  assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||
386  MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
387  LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
388 
389  auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
390  assert(DstOpnd && DstOpnd->isReg());
391  auto DPPMovReg = DstOpnd->getReg();
392  if (DPPMovReg.isPhysical()) {
393  LLVM_DEBUG(dbgs() << " failed: dpp move writes physreg\n");
394  return false;
395  }
396  if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) {
397  LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same"
398  " for all uses\n");
399  return false;
400  }
401 
402  if (MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) {
403  auto *DppCtrl = TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl);
404  assert(DppCtrl && DppCtrl->isImm());
405  if (!AMDGPU::isLegal64BitDPPControl(DppCtrl->getImm())) {
406  LLVM_DEBUG(dbgs() << " failed: 64 bit dpp move uses unsupported"
407  " control value\n");
408  // Let it split, then control may become legal.
409  return false;
410  }
411  }
412 
413  auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
414  assert(RowMaskOpnd && RowMaskOpnd->isImm());
415  auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
416  assert(BankMaskOpnd && BankMaskOpnd->isImm());
417  const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF &&
418  BankMaskOpnd->getImm() == 0xF;
419 
420  auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
421  assert(BCZOpnd && BCZOpnd->isImm());
422  bool BoundCtrlZero = BCZOpnd->getImm();
423 
424  auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
425  auto *SrcOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
426  assert(OldOpnd && OldOpnd->isReg());
427  assert(SrcOpnd && SrcOpnd->isReg());
428  if (OldOpnd->getReg().isPhysical() || SrcOpnd->getReg().isPhysical()) {
429  LLVM_DEBUG(dbgs() << " failed: dpp move reads physreg\n");
430  return false;
431  }
432 
433  auto * const OldOpndValue = getOldOpndValue(*OldOpnd);
434  // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else
435  // We could use: assert(!OldOpndValue || OldOpndValue->isImm())
436  // but the third option is used to distinguish undef from non-immediate
437  // to reuse IMPLICIT_DEF instruction later
438  assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd);
439 
440  bool CombBCZ = false;
441 
442  if (MaskAllLanes && BoundCtrlZero) { // [1]
443  CombBCZ = true;
444  } else {
445  if (!OldOpndValue || !OldOpndValue->isImm()) {
446  LLVM_DEBUG(dbgs() << " failed: the DPP mov isn't combinable\n");
447  return false;
448  }
449 
450  if (OldOpndValue->getParent()->getParent() != MovMI.getParent()) {
451  LLVM_DEBUG(dbgs() <<
452  " failed: old reg def and mov should be in the same BB\n");
453  return false;
454  }
455 
456  if (OldOpndValue->getImm() == 0) {
457  if (MaskAllLanes) {
458  assert(!BoundCtrlZero); // by check [1]
459  CombBCZ = true;
460  }
461  } else if (BoundCtrlZero) {
462  assert(!MaskAllLanes); // by check [1]
463  LLVM_DEBUG(dbgs() <<
464  " failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n");
465  return false;
466  }
467  }
468 
469  LLVM_DEBUG(dbgs() << " old=";
470  if (!OldOpndValue)
471  dbgs() << "undef";
472  else
473  dbgs() << *OldOpndValue;
474  dbgs() << ", bound_ctrl=" << CombBCZ << '\n');
475 
476  SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs;
478  auto CombOldVGPR = getRegSubRegPair(*OldOpnd);
479  // try to reuse previous old reg if its undefined (IMPLICIT_DEF)
480  if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef
481  const TargetRegisterClass *RC = MRI->getRegClass(DPPMovReg);
482  CombOldVGPR = RegSubRegPair(
484  auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(),
485  TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.Reg);
486  DPPMIs.push_back(UndefInst.getInstr());
487  }
488 
489  OrigMIs.push_back(&MovMI);
490  bool Rollback = true;
492 
493  for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) {
494  Uses.push_back(&Use);
495  }
496 
497  while (!Uses.empty()) {
498  MachineOperand *Use = Uses.pop_back_val();
499  Rollback = true;
500 
501  auto &OrigMI = *Use->getParent();
502  LLVM_DEBUG(dbgs() << " try: " << OrigMI);
503 
504  auto OrigOp = OrigMI.getOpcode();
505  if (OrigOp == AMDGPU::REG_SEQUENCE) {
506  Register FwdReg = OrigMI.getOperand(0).getReg();
507  unsigned FwdSubReg = 0;
508 
509  if (execMayBeModifiedBeforeAnyUse(*MRI, FwdReg, OrigMI)) {
510  LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same"
511  " for all uses\n");
512  break;
513  }
514 
515  unsigned OpNo, E = OrigMI.getNumOperands();
516  for (OpNo = 1; OpNo < E; OpNo += 2) {
517  if (OrigMI.getOperand(OpNo).getReg() == DPPMovReg) {
518  FwdSubReg = OrigMI.getOperand(OpNo + 1).getImm();
519  break;
520  }
521  }
522 
523  if (!FwdSubReg)
524  break;
525 
526  for (auto &Op : MRI->use_nodbg_operands(FwdReg)) {
527  if (Op.getSubReg() == FwdSubReg)
528  Uses.push_back(&Op);
529  }
530  RegSeqWithOpNos[&OrigMI].push_back(OpNo);
531  continue;
532  }
533 
534  bool IsShrinkable = isShrinkable(OrigMI);
535  if (!(IsShrinkable || TII->isVOP1(OrigOp) || TII->isVOP2(OrigOp))) {
536  LLVM_DEBUG(dbgs() << " failed: not VOP1/2/3\n");
537  break;
538  }
539 
540  auto *Src0 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0);
541  auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
542  if (Use != Src0 && !(Use == Src1 && OrigMI.isCommutable())) { // [1]
543  LLVM_DEBUG(dbgs() << " failed: no suitable operands\n");
544  break;
545  }
546 
547  assert(Src0 && "Src1 without Src0?");
548  if (Src1 && Src1->isIdenticalTo(*Src0)) {
549  assert(Src1->isReg());
550  LLVM_DEBUG(
551  dbgs()
552  << " " << OrigMI
553  << " failed: DPP register is used more than once per instruction\n");
554  break;
555  }
556 
557  LLVM_DEBUG(dbgs() << " combining: " << OrigMI);
558  if (Use == Src0) {
559  if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
560  OldOpndValue, CombBCZ, IsShrinkable)) {
561  DPPMIs.push_back(DPPInst);
562  Rollback = false;
563  }
564  } else {
565  assert(Use == Src1 && OrigMI.isCommutable()); // by check [1]
566  auto *BB = OrigMI.getParent();
567  auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
568  BB->insert(OrigMI, NewMI);
569  if (TII->commuteInstruction(*NewMI)) {
570  LLVM_DEBUG(dbgs() << " commuted: " << *NewMI);
571  if (auto *DPPInst =
572  createDPPInst(*NewMI, MovMI, CombOldVGPR, OldOpndValue, CombBCZ,
573  IsShrinkable)) {
574  DPPMIs.push_back(DPPInst);
575  Rollback = false;
576  }
577  } else
578  LLVM_DEBUG(dbgs() << " failed: cannot be commuted\n");
579  NewMI->eraseFromParent();
580  }
581  if (Rollback)
582  break;
583  OrigMIs.push_back(&OrigMI);
584  }
585 
586  Rollback |= !Uses.empty();
587 
588  for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
589  MI->eraseFromParent();
590 
591  if (!Rollback) {
592  for (auto &S : RegSeqWithOpNos) {
593  if (MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) {
594  S.first->eraseFromParent();
595  continue;
596  }
597  while (!S.second.empty())
598  S.first->getOperand(S.second.pop_back_val()).setIsUndef(true);
599  }
600  }
601 
602  return !Rollback;
603 }
604 
605 bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
606  ST = &MF.getSubtarget<GCNSubtarget>();
607  if (!ST->hasDPP() || skipFunction(MF.getFunction()))
608  return false;
609 
610  MRI = &MF.getRegInfo();
611  TII = ST->getInstrInfo();
612 
613  bool Changed = false;
614  for (auto &MBB : MF) {
615  for (auto I = MBB.rbegin(), E = MBB.rend(); I != E;) {
616  auto &MI = *I++;
617  if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
618  Changed = true;
619  ++NumDPPMovsCombined;
620  } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) {
621  if (ST->has64BitDPP() && combineDPPMov(MI)) {
622  Changed = true;
623  ++NumDPPMovsCombined;
624  } else {
625  auto Split = TII->expandMovDPP64(MI);
626  for (auto M : { Split.first, Split.second }) {
627  if (M && combineDPPMov(*M))
628  ++NumDPPMovsCombined;
629  }
630  Changed = true;
631  }
632  }
633  }
634  }
635  return Changed;
636 }
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:103
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
llvm::getRegSubRegPair
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
Definition: SIInstrInfo.h:1153
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::SmallVector< MachineInstr *, 4 >
Statistic.h
llvm::AMDGPU::isLegal64BitDPPControl
LLVM_READNONE bool isLegal64BitDPPControl(unsigned DC)
Definition: AMDGPUBaseInfo.h:903
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
Fail
#define Fail
Definition: AArch64Disassembler.cpp:261
llvm::MachineFunctionProperties::Property::IsSSA
@ IsSSA
llvm::MachineFunctionProperties
Properties which a MachineFunction may have at a given point in time.
Definition: MachineFunction.h:111
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::AMDGPU::getNamedOperandIdx
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::initializeGCNDPPCombinePass
void initializeGCNDPPCombinePass(PassRegistry &)
llvm::TargetInstrInfo::RegSubRegPair
A pair composed of a register and a sub-register index.
Definition: TargetInstrInfo.h:477
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:579
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MachineInstr::getFlags
uint16_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:325
llvm::SISrcMods::ABS
@ ABS
Definition: SIDefines.h:207
llvm::Register::isPhysical
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:97
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:636
llvm::MachineRegisterInfo::use_nodbg_operands
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
Definition: MachineRegisterInfo.h:526
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
GCNSubtarget.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:537
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:499
old
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM ID Predecessors according to mbb< bb27, 0x8b0a7c0 > Note ADDri is not a two address instruction its result reg1037 is an operand of the PHI node in bb76 and its operand reg1039 is the result of the PHI node We should treat it as a two address code and make sure the ADDri is scheduled after any node that reads reg1039 Use info(i.e. register scavenger) to assign it a free register to allow reuse the collector could move the objects and invalidate the derived pointer This is bad enough in the first but safe points can crop up unpredictably **array_addr i32 n old
Definition: README.txt:123
INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:37
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::SISrcMods::NEG
@ NEG
Definition: SIDefines.h:206
llvm::AMDGPU::getDPPOp32
LLVM_READONLY int getDPPOp32(uint16_t Opcode)
llvm::AMDGPU::getVOPe32
LLVM_READONLY int getVOPe32(uint16_t Opcode)
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::MachineFunctionProperties::set
MachineFunctionProperties & set(Property P)
Definition: MachineFunction.h:169
llvm::MachineBasicBlock::rend
reverse_iterator rend()
Definition: MachineBasicBlock.h:278
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::getVRegSubRegDef
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
Definition: SIInstrInfo.cpp:7823
llvm::MachineOperand::getParent
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Definition: MachineOperand.h:238
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:626
llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:418
AMDGPUMCTargetDesc.h
llvm::TargetInstrInfo::RegSubRegPair::Reg
Register Reg
Definition: TargetInstrInfo.h:478
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:321
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::execMayBeModifiedBeforeAnyUse
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
Definition: SIInstrInfo.cpp:7891
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::MachineInstr::isCommutable
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
Definition: MachineInstr.h:1057
llvm::AMDGPU::DPP::DppCtrl
DppCtrl
Definition: SIDefines.h:682
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:59
MachineFunctionPass.h
llvm::GCNDPPCombineID
char & GCNDPPCombineID
Definition: GCNDPPCombine.cpp:111
RegSubRegPair
TargetInstrInfo::RegSubRegPair RegSubRegPair
Definition: PeepholeOptimizer.cpp:101
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
isIdentityValue
static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd)
Definition: GCNDPPCombine.cpp:295
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::MachineRegisterInfo::use_nodbg_empty
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
Definition: MachineRegisterInfo.h:566
llvm::TargetInstrInfo::RegSubRegPair::SubReg
unsigned SubReg
Definition: TargetInstrInfo.h:479
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::MachineBasicBlock::rbegin
reverse_iterator rbegin()
Definition: MachineBasicBlock.h:272
AMDGPU.h
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:489
uint32_t
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:286
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::createGCNDPPCombinePass
FunctionPass * createGCNDPPCombinePass()
Definition: GCNDPPCombine.cpp:113
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:592
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:321
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:323
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
llvm::MachineInstrBuilder::setMIFlags
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Definition: MachineInstrBuilder.h:273
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:492
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
DEBUG_TYPE
#define DEBUG_TYPE
Definition: GCNDPPCombine.cpp:48
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::isOfRegClass
bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P, const TargetRegisterClass &TRC, MachineRegisterInfo &MRI)
Returns true if a reg:subreg pair P has a TRC class.
Definition: SIInstrInfo.h:1141
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37