LLVM  9.0.0svn
AMDGPURegisterBankInfo.cpp
Go to the documentation of this file.
1 //===- AMDGPURegisterBankInfo.cpp -------------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the RegisterBankInfo class for
10 /// AMDGPU.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPURegisterBankInfo.h"
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPUSubtarget.h"
18 #include "SIMachineFunctionInfo.h"
19 #include "SIRegisterInfo.h"
20 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/IR/Constants.h"
28 
29 #define GET_TARGET_REGBANK_IMPL
30 #include "AMDGPUGenRegisterBank.inc"
31 
32 // This file will be TableGen'ed at some point.
33 #include "AMDGPUGenRegisterBankInfo.def"
34 
35 using namespace llvm;
36 
37 namespace {
38 
39 // Observer to apply a register bank to new registers created by LegalizerHelper.
40 class ApplyRegBankMapping final : public GISelChangeObserver {
41 private:
43  const RegisterBank *NewBank;
45 
46 public:
47  ApplyRegBankMapping(MachineRegisterInfo &MRI_, const RegisterBank *RB)
48  : MRI(MRI_), NewBank(RB) {}
49 
50  ~ApplyRegBankMapping() {
51  for (MachineInstr *MI : NewInsts)
52  applyBank(*MI);
53  }
54 
55  /// Set any registers that don't have a set register class or bank to SALU.
56  void applyBank(MachineInstr &MI) {
57  for (MachineOperand &Op : MI.operands()) {
58  if (!Op.isReg())
59  continue;
60 
61  Register Reg = Op.getReg();
62  if (MRI.getRegClassOrRegBank(Reg))
63  continue;
64 
65  const RegisterBank *RB = NewBank;
66  // FIXME: This might not be enough to detect when SCC should be used.
67  if (MRI.getType(Reg) == LLT::scalar(1))
68  RB = (NewBank == &AMDGPU::SGPRRegBank ?
69  &AMDGPU::SCCRegBank : &AMDGPU::VCCRegBank);
70 
71  MRI.setRegBank(Reg, *RB);
72  }
73  }
74 
75  void erasingInstr(MachineInstr &MI) override {}
76 
77  void createdInstr(MachineInstr &MI) override {
78  // At this point, the instruction was just inserted and has no operands.
79  NewInsts.push_back(&MI);
80  }
81 
82  void changingInstr(MachineInstr &MI) override {}
83  void changedInstr(MachineInstr &MI) override {}
84 };
85 
86 }
89  TRI(static_cast<const SIRegisterInfo*>(&TRI)) {
90 
91  // HACK: Until this is fully tablegen'd.
92  static bool AlreadyInit = false;
93  if (AlreadyInit)
94  return;
95 
96  AlreadyInit = true;
97 
98  const RegisterBank &RBSGPR = getRegBank(AMDGPU::SGPRRegBankID);
99  (void)RBSGPR;
100  assert(&RBSGPR == &AMDGPU::SGPRRegBank);
101 
102  const RegisterBank &RBVGPR = getRegBank(AMDGPU::VGPRRegBankID);
103  (void)RBVGPR;
104  assert(&RBVGPR == &AMDGPU::VGPRRegBank);
105 
106 }
107 
109  const RegisterBank &Src,
110  unsigned Size) const {
111  // TODO: Should there be a UniformVGPRRegBank which can use readfirstlane?
112  if (Dst.getID() == AMDGPU::SGPRRegBankID &&
113  Src.getID() == AMDGPU::VGPRRegBankID) {
115  }
116 
117  // Bool values are tricky, because the meaning is based on context. The SCC
118  // and VCC banks are for the natural scalar and vector conditions produced by
119  // a compare.
120  //
121  // Legalization doesn't know about the necessary context, so an s1 use may
122  // have been a truncate from an arbitrary value, in which case a copy (lowered
123  // as a compare with 0) needs to be inserted.
124  if (Size == 1 &&
125  (Dst.getID() == AMDGPU::SCCRegBankID ||
126  Dst.getID() == AMDGPU::SGPRRegBankID) &&
127  (Src.getID() == AMDGPU::SGPRRegBankID ||
128  Src.getID() == AMDGPU::VGPRRegBankID ||
129  Src.getID() == AMDGPU::VCCRegBankID))
131 
132  if (Dst.getID() == AMDGPU::SCCRegBankID &&
133  Src.getID() == AMDGPU::VCCRegBankID)
135 
136  return RegisterBankInfo::copyCost(Dst, Src, Size);
137 }
138 
140  const ValueMapping &ValMapping,
141  const RegisterBank *CurBank) const {
142  // Check if this is a breakdown for G_LOAD to move the pointer from SGPR to
143  // VGPR.
144  // FIXME: Is there a better way to do this?
145  if (ValMapping.NumBreakDowns >= 2 || ValMapping.BreakDown[0].Length >= 64)
146  return 10; // This is expensive.
147 
148  assert(ValMapping.NumBreakDowns == 2 &&
149  ValMapping.BreakDown[0].Length == 32 &&
150  ValMapping.BreakDown[0].StartIdx == 0 &&
151  ValMapping.BreakDown[1].Length == 32 &&
152  ValMapping.BreakDown[1].StartIdx == 32 &&
153  ValMapping.BreakDown[0].RegBank == ValMapping.BreakDown[1].RegBank);
154 
155  // 32-bit extract of a 64-bit value is just access of a subregister, so free.
156  // TODO: Cost of 0 hits assert, though it's not clear it's what we really
157  // want.
158 
159  // TODO: 32-bit insert to a 64-bit SGPR may incur a non-free copy due to SGPR
160  // alignment restrictions, but this probably isn't important.
161  return 1;
162 }
163 
165  const TargetRegisterClass &RC) const {
166 
167  if (TRI->isSGPRClass(&RC))
168  return getRegBank(AMDGPU::SGPRRegBankID);
169 
170  return getRegBank(AMDGPU::VGPRRegBankID);
171 }
172 
173 template <unsigned NumOps>
175 AMDGPURegisterBankInfo::addMappingFromTable(
176  const MachineInstr &MI, const MachineRegisterInfo &MRI,
177  const std::array<unsigned, NumOps> RegSrcOpIdx,
178  ArrayRef<OpRegBankEntry<NumOps>> Table) const {
179 
180  InstructionMappings AltMappings;
181 
183 
184  unsigned Sizes[NumOps];
185  for (unsigned I = 0; I < NumOps; ++I) {
186  Register Reg = MI.getOperand(RegSrcOpIdx[I]).getReg();
187  Sizes[I] = getSizeInBits(Reg, MRI, *TRI);
188  }
189 
190  for (unsigned I = 0, E = MI.getNumExplicitDefs(); I != E; ++I) {
191  unsigned SizeI = getSizeInBits(MI.getOperand(I).getReg(), MRI, *TRI);
192  Operands[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI);
193  }
194 
195  unsigned MappingID = 0;
196  for (const auto &Entry : Table) {
197  for (unsigned I = 0; I < NumOps; ++I) {
198  int OpIdx = RegSrcOpIdx[I];
199  Operands[OpIdx] = AMDGPU::getValueMapping(Entry.RegBanks[I], Sizes[I]);
200  }
201 
202  AltMappings.push_back(&getInstructionMapping(MappingID++, Entry.Cost,
203  getOperandsMapping(Operands),
204  Operands.size()));
205  }
206 
207  return AltMappings;
208 }
209 
211 AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsic(
212  const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
213  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
214  case Intrinsic::amdgcn_readlane: {
215  static const OpRegBankEntry<3> Table[2] = {
216  // Perfectly legal.
217  { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
218 
219  // Need a readfirstlane for the index.
220  { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
221  };
222 
223  const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
224  return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
225  }
226  case Intrinsic::amdgcn_writelane: {
227  static const OpRegBankEntry<4> Table[4] = {
228  // Perfectly legal.
229  { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
230 
231  // Need readfirstlane of first op
232  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
233 
234  // Need readfirstlane of second op
235  { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
236 
237  // Need readfirstlane of both ops
238  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 3 }
239  };
240 
241  // rsrc, voffset, offset
242  const std::array<unsigned, 4> RegSrcOpIdx = { { 0, 2, 3, 4 } };
243  return addMappingFromTable<4>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
244  }
245  default:
247  }
248 }
249 
251 AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects(
252  const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
253 
254  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
255  case Intrinsic::amdgcn_buffer_load: {
256  static const OpRegBankEntry<3> Table[4] = {
257  // Perfectly legal.
258  { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
259  { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
260 
261  // Waterfall loop needed for rsrc. In the worst case this will execute
262  // approximately an extra 10 * wavesize + 2 instructions.
263  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
264  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1000 }
265  };
266 
267  // rsrc, voffset, offset
268  const std::array<unsigned, 3> RegSrcOpIdx = { { 2, 3, 4 } };
269  return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
270  }
271  case Intrinsic::amdgcn_s_buffer_load: {
272  static const OpRegBankEntry<2> Table[4] = {
273  // Perfectly legal.
274  { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
275 
276  // Only need 1 register in loop
277  { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 300 },
278 
279  // Have to waterfall the resource.
280  { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
281 
282  // Have to waterfall the resource, and the offset.
283  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1500 }
284  };
285 
286  // rsrc, offset
287  const std::array<unsigned, 2> RegSrcOpIdx = { { 2, 3 } };
288  return addMappingFromTable<2>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
289  }
290  case Intrinsic::amdgcn_ds_ordered_add:
291  case Intrinsic::amdgcn_ds_ordered_swap: {
292  // VGPR = M0, VGPR
293  static const OpRegBankEntry<3> Table[2] = {
294  // Perfectly legal.
295  { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
296 
297  // Need a readfirstlane for m0
298  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
299  };
300 
301  const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
302  return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
303  }
304  case Intrinsic::amdgcn_s_sendmsg:
305  case Intrinsic::amdgcn_s_sendmsghalt: {
306  static const OpRegBankEntry<1> Table[2] = {
307  // Perfectly legal.
308  { { AMDGPU::SGPRRegBankID }, 1 },
309 
310  // Need readlane
311  { { AMDGPU::VGPRRegBankID }, 3 }
312  };
313 
314  const std::array<unsigned, 1> RegSrcOpIdx = { { 2 } };
315  return addMappingFromTable<1>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
316  }
317  default:
319  }
320 }
321 
322 static bool isInstrUniform(const MachineInstr &MI) {
323  if (!MI.hasOneMemOperand())
324  return false;
325 
326  const MachineMemOperand *MMO = *MI.memoperands_begin();
327  return AMDGPUInstrInfo::isUniformMMO(MMO);
328 }
329 
332  const MachineInstr &MI) const {
333 
334  const MachineFunction &MF = *MI.getParent()->getParent();
335  const MachineRegisterInfo &MRI = MF.getRegInfo();
336 
337 
338  InstructionMappings AltMappings;
339  switch (MI.getOpcode()) {
340  case TargetOpcode::G_AND:
341  case TargetOpcode::G_OR:
342  case TargetOpcode::G_XOR: {
343  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
344 
345  if (Size == 1) {
346  // s_{and|or|xor}_b32 set scc when the result of the 32-bit op is not 0.
347  const InstructionMapping &SCCMapping = getInstructionMapping(
348  1, 1, getOperandsMapping(
349  {AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, Size),
350  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
351  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
352  3); // Num Operands
353  AltMappings.push_back(&SCCMapping);
354 
355  const InstructionMapping &SGPRMapping = getInstructionMapping(
356  1, 1, getOperandsMapping(
357  {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
358  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
359  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
360  3); // Num Operands
361  AltMappings.push_back(&SGPRMapping);
362 
363  const InstructionMapping &VCCMapping0 = getInstructionMapping(
364  2, 10, getOperandsMapping(
365  {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size),
366  AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size),
367  AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size)}),
368  3); // Num Operands
369  AltMappings.push_back(&VCCMapping0);
370  return AltMappings;
371  }
372 
373  if (Size != 64)
374  break;
375 
376  const InstructionMapping &SSMapping = getInstructionMapping(
377  1, 1, getOperandsMapping(
378  {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
379  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
380  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
381  3); // Num Operands
382  AltMappings.push_back(&SSMapping);
383 
384  const InstructionMapping &VVMapping = getInstructionMapping(
385  2, 2, getOperandsMapping(
386  {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
387  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
388  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
389  3); // Num Operands
390  AltMappings.push_back(&VVMapping);
391 
392  const InstructionMapping &SVMapping = getInstructionMapping(
393  3, 3, getOperandsMapping(
394  {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
395  AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size),
396  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
397  3); // Num Operands
398  AltMappings.push_back(&SVMapping);
399 
400  // SGPR in LHS is slightly preferrable, so make it VS more expensive than
401  // SV.
402  const InstructionMapping &VSMapping = getInstructionMapping(
403  3, 4, getOperandsMapping(
404  {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
405  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
406  AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size)}),
407  3); // Num Operands
408  AltMappings.push_back(&VSMapping);
409  break;
410  }
411  case TargetOpcode::G_LOAD: {
412  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
413  LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
414  // FIXME: Should we be hard coding the size for these mappings?
415  if (isInstrUniform(MI)) {
416  const InstructionMapping &SSMapping = getInstructionMapping(
417  1, 1, getOperandsMapping(
418  {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
419  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
420  2); // Num Operands
421  AltMappings.push_back(&SSMapping);
422  }
423 
424  const InstructionMapping &VVMapping = getInstructionMapping(
425  2, 1, getOperandsMapping(
426  {AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy),
427  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}),
428  2); // Num Operands
429  AltMappings.push_back(&VVMapping);
430 
431  // It may be possible to have a vgpr = load sgpr mapping here, because
432  // the mubuf instructions support this kind of load, but probably for only
433  // gfx7 and older. However, the addressing mode matching in the instruction
434  // selector should be able to do a better job of detecting and selecting
435  // these kinds of loads from the vgpr = load vgpr mapping.
436 
437  return AltMappings;
438 
439  }
440  case TargetOpcode::G_ICMP: {
441  unsigned Size = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI);
442  const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
443  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
444  nullptr, // Predicate operand.
445  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
446  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
447  4); // Num Operands
448  AltMappings.push_back(&SSMapping);
449 
450  const InstructionMapping &SVMapping = getInstructionMapping(2, 1,
451  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
452  nullptr, // Predicate operand.
453  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
454  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
455  4); // Num Operands
456  AltMappings.push_back(&SVMapping);
457 
458  const InstructionMapping &VSMapping = getInstructionMapping(3, 1,
459  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
460  nullptr, // Predicate operand.
461  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
462  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
463  4); // Num Operands
464  AltMappings.push_back(&VSMapping);
465 
466  const InstructionMapping &VVMapping = getInstructionMapping(4, 1,
467  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
468  nullptr, // Predicate operand.
469  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
470  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
471  4); // Num Operands
472  AltMappings.push_back(&VVMapping);
473 
474  return AltMappings;
475  }
476  case TargetOpcode::G_SELECT: {
477  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
478  const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
479  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
480  AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
481  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
482  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
483  4); // Num Operands
484  AltMappings.push_back(&SSMapping);
485 
486  const InstructionMapping &VVMapping = getInstructionMapping(2, 1,
487  getOperandsMapping({AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
488  AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
489  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
490  AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
491  4); // Num Operands
492  AltMappings.push_back(&VVMapping);
493 
494  return AltMappings;
495  }
496  case TargetOpcode::G_SMIN:
497  case TargetOpcode::G_SMAX:
498  case TargetOpcode::G_UMIN:
499  case TargetOpcode::G_UMAX: {
500  static const OpRegBankEntry<3> Table[4] = {
501  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
502  { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
503  { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
504 
505  // Scalar requires cmp+select, and extends if 16-bit.
506  // FIXME: Should there be separate costs for 32 and 16-bit
507  { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 3 }
508  };
509 
510  const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 1, 2 } };
511  return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
512  }
513  case TargetOpcode::G_UADDE:
514  case TargetOpcode::G_USUBE:
515  case TargetOpcode::G_SADDE:
516  case TargetOpcode::G_SSUBE: {
517  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
518  const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
520  {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
521  AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
522  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
523  AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
524  AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1)}),
525  5); // Num Operands
526  AltMappings.push_back(&SSMapping);
527 
528  const InstructionMapping &VVMapping = getInstructionMapping(2, 1,
529  getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
530  AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
531  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
532  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
533  AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1)}),
534  5); // Num Operands
535  AltMappings.push_back(&VVMapping);
536  return AltMappings;
537  }
538  case AMDGPU::G_BRCOND: {
539  assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);
540 
541  const InstructionMapping &SMapping = getInstructionMapping(
542  1, 1, getOperandsMapping(
543  {AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1), nullptr}),
544  2); // Num Operands
545  AltMappings.push_back(&SMapping);
546 
547  const InstructionMapping &VMapping = getInstructionMapping(
548  1, 1, getOperandsMapping(
549  {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1), nullptr }),
550  2); // Num Operands
551  AltMappings.push_back(&VMapping);
552  return AltMappings;
553  }
554  case AMDGPU::G_INTRINSIC:
555  return getInstrAlternativeMappingsIntrinsic(MI, MRI);
556  case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
557  return getInstrAlternativeMappingsIntrinsicWSideEffects(MI, MRI);
558  default:
559  break;
560  }
562 }
563 
564 void AMDGPURegisterBankInfo::split64BitValueForMapping(
567  LLT HalfTy,
568  Register Reg) const {
569  assert(HalfTy.getSizeInBits() == 32);
570  MachineRegisterInfo *MRI = B.getMRI();
571  Register LoLHS = MRI->createGenericVirtualRegister(HalfTy);
572  Register HiLHS = MRI->createGenericVirtualRegister(HalfTy);
573  const RegisterBank *Bank = getRegBank(Reg, *MRI, *TRI);
574  MRI->setRegBank(LoLHS, *Bank);
575  MRI->setRegBank(HiLHS, *Bank);
576 
577  Regs.push_back(LoLHS);
578  Regs.push_back(HiLHS);
579 
580  B.buildInstr(AMDGPU::G_UNMERGE_VALUES)
581  .addDef(LoLHS)
582  .addDef(HiLHS)
583  .addUse(Reg);
584 }
585 
586 /// Replace the current type each register in \p Regs has with \p NewTy
588  LLT NewTy) {
589  for (Register Reg : Regs) {
590  assert(MRI.getType(Reg).getSizeInBits() == NewTy.getSizeInBits());
591  MRI.setType(Reg, NewTy);
592  }
593 }
594 
596  if (Ty.isVector()) {
597  assert(Ty.getNumElements() % 2 == 0);
598  return LLT::scalarOrVector(Ty.getNumElements() / 2, Ty.getElementType());
599  }
600 
601  assert(Ty.getSizeInBits() % 2 == 0);
602  return LLT::scalar(Ty.getSizeInBits() / 2);
603 }
604 
605 /// Legalize instruction \p MI where operands in \p OpIndices must be SGPRs. If
606 /// any of the required SGPR operands are VGPRs, perform a waterfall loop to
607 /// execute the instruction for each unique combination of values in all lanes
608 /// in the wave. The block will be split such that rest of the instructions are
609 /// moved to a new block.
610 ///
611 /// Essentially performs this loop:
612 //
613 /// Save Execution Mask
614 /// For (Lane : Wavefront) {
615 /// Enable Lane, Disable all other lanes
616 /// SGPR = read SGPR value for current lane from VGPR
617 /// VGPRResult[Lane] = use_op SGPR
618 /// }
619 /// Restore Execution Mask
620 ///
621 /// There is additional complexity to try for compare values to identify the
622 /// unique values used.
623 void AMDGPURegisterBankInfo::executeInWaterfallLoop(
625  ArrayRef<unsigned> OpIndices) const {
626  MachineFunction *MF = MI.getParent()->getParent();
627  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
628  const SIInstrInfo *TII = ST.getInstrInfo();
630 
631  MachineBasicBlock &MBB = *MI.getParent();
632  const DebugLoc &DL = MI.getDebugLoc();
633 
634  // Use a set to avoid extra readfirstlanes in the case where multiple operands
635  // are the same register.
636  SmallSet<Register, 4> SGPROperandRegs;
637  for (unsigned Op : OpIndices) {
638  assert(MI.getOperand(Op).isUse());
639  Register Reg = MI.getOperand(Op).getReg();
640  const RegisterBank *OpBank = getRegBank(Reg, MRI, *TRI);
641  if (OpBank->getID() == AMDGPU::VGPRRegBankID)
642  SGPROperandRegs.insert(Reg);
643  }
644 
645  // No operands need to be replaced, so no need to loop.
646  if (SGPROperandRegs.empty())
647  return;
648 
649  MachineIRBuilder B(MI);
650  SmallVector<Register, 4> ResultRegs;
651  SmallVector<Register, 4> InitResultRegs;
652  SmallVector<Register, 4> PhiRegs;
653  for (MachineOperand &Def : MI.defs()) {
654  LLT ResTy = MRI.getType(Def.getReg());
655  const RegisterBank *DefBank = getRegBank(Def.getReg(), MRI, *TRI);
656  ResultRegs.push_back(Def.getReg());
657  Register InitReg = B.buildUndef(ResTy).getReg(0);
658  Register PhiReg = MRI.createGenericVirtualRegister(ResTy);
659  InitResultRegs.push_back(InitReg);
660  PhiRegs.push_back(PhiReg);
661  MRI.setRegBank(PhiReg, *DefBank);
662  MRI.setRegBank(InitReg, *DefBank);
663  }
664 
665  Register SaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
666  Register InitSaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
667 
668  // Don't bother using generic instructions/registers for the exec mask.
669  B.buildInstr(TargetOpcode::IMPLICIT_DEF)
670  .addDef(InitSaveExecReg);
671 
672  Register PhiExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
673  Register NewExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
674 
675  // To insert the loop we need to split the block. Move everything before this
676  // point to a new block, and insert a new empty block before this instruction.
678  MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock();
679  MachineBasicBlock *RestoreExecBB = MF->CreateMachineBasicBlock();
680  MachineFunction::iterator MBBI(MBB);
681  ++MBBI;
682  MF->insert(MBBI, LoopBB);
683  MF->insert(MBBI, RestoreExecBB);
684  MF->insert(MBBI, RemainderBB);
685 
686  LoopBB->addSuccessor(RestoreExecBB);
687  LoopBB->addSuccessor(LoopBB);
688 
689  // Move the rest of the block into a new block.
690  RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
691  RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
692 
693  MBB.addSuccessor(LoopBB);
694  RestoreExecBB->addSuccessor(RemainderBB);
695 
696  B.setInsertPt(*LoopBB, LoopBB->end());
697 
698  B.buildInstr(TargetOpcode::PHI)
699  .addDef(PhiExec)
700  .addReg(InitSaveExecReg)
701  .addMBB(&MBB)
702  .addReg(NewExec)
703  .addMBB(LoopBB);
704 
705  for (auto Result : zip(InitResultRegs, ResultRegs, PhiRegs)) {
706  B.buildInstr(TargetOpcode::G_PHI)
707  .addDef(std::get<2>(Result))
708  .addReg(std::get<0>(Result)) // Initial value / implicit_def
709  .addMBB(&MBB)
710  .addReg(std::get<1>(Result)) // Mid-loop value.
711  .addMBB(LoopBB);
712  }
713 
714  // Move the instruction into the loop.
715  LoopBB->splice(LoopBB->end(), &MBB, I);
716  I = std::prev(LoopBB->end());
717 
718  B.setInstr(*I);
719 
720  Register CondReg;
721 
722  for (MachineOperand &Op : MI.uses()) {
723  if (!Op.isReg())
724  continue;
725 
726  assert(!Op.isDef());
727  if (SGPROperandRegs.count(Op.getReg())) {
728  LLT OpTy = MRI.getType(Op.getReg());
729  unsigned OpSize = OpTy.getSizeInBits();
730 
731  // Can only do a readlane of 32-bit pieces.
732  if (OpSize == 32) {
733  // Avoid extra copies in the simple case of one 32-bit register.
734  Register CurrentLaneOpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
735  MRI.setType(CurrentLaneOpReg, OpTy);
736 
737  constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI);
738  // Read the next variant <- also loop target.
739  BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), CurrentLaneOpReg)
740  .addReg(Op.getReg());
741 
742  Register NewCondReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
743  bool First = CondReg == AMDGPU::NoRegister;
744  if (First)
745  CondReg = NewCondReg;
746 
747  // Compare the just read M0 value to all possible Idx values.
748  B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64)
749  .addDef(NewCondReg)
750  .addReg(CurrentLaneOpReg)
751  .addReg(Op.getReg());
752  Op.setReg(CurrentLaneOpReg);
753 
754  if (!First) {
755  Register AndReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
756 
757  // If there are multiple operands to consider, and the conditions.
758  B.buildInstr(AMDGPU::S_AND_B64)
759  .addDef(AndReg)
760  .addReg(NewCondReg)
761  .addReg(CondReg);
762  CondReg = AndReg;
763  }
764  } else {
765  LLT S32 = LLT::scalar(32);
766  SmallVector<Register, 8> ReadlanePieces;
767 
768  // The compares can be done as 64-bit, but the extract needs to be done
769  // in 32-bit pieces.
770 
771  bool Is64 = OpSize % 64 == 0;
772 
773  LLT UnmergeTy = OpSize % 64 == 0 ? LLT::scalar(64) : LLT::scalar(32);
774  unsigned CmpOp = OpSize % 64 == 0 ? AMDGPU::V_CMP_EQ_U64_e64
775  : AMDGPU::V_CMP_EQ_U32_e64;
776 
777  // The compares can be done as 64-bit, but the extract needs to be done
778  // in 32-bit pieces.
779 
780  // Insert the unmerge before the loop.
781 
782  B.setMBB(MBB);
783  auto Unmerge = B.buildUnmerge(UnmergeTy, Op.getReg());
784  B.setInstr(*I);
785 
786  unsigned NumPieces = Unmerge->getNumOperands() - 1;
787  for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) {
788  unsigned UnmergePiece = Unmerge.getReg(PieceIdx);
789 
790  Register CurrentLaneOpReg;
791  if (Is64) {
792  Register CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32);
793  Register CurrentLaneOpRegHi = MRI.createGenericVirtualRegister(S32);
794 
795  MRI.setRegClass(UnmergePiece, &AMDGPU::VReg_64RegClass);
796  MRI.setRegClass(CurrentLaneOpRegLo, &AMDGPU::SReg_32_XM0RegClass);
797  MRI.setRegClass(CurrentLaneOpRegHi, &AMDGPU::SReg_32_XM0RegClass);
798 
799  // Read the next variant <- also loop target.
800  BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
801  CurrentLaneOpRegLo)
802  .addReg(UnmergePiece, 0, AMDGPU::sub0);
803 
804  // Read the next variant <- also loop target.
805  BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
806  CurrentLaneOpRegHi)
807  .addReg(UnmergePiece, 0, AMDGPU::sub1);
808 
809  CurrentLaneOpReg =
810  B.buildMerge(LLT::scalar(64),
811  {CurrentLaneOpRegLo, CurrentLaneOpRegHi})
812  .getReg(0);
813 
814  MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_64_XEXECRegClass);
815 
816  if (OpTy.getScalarSizeInBits() == 64) {
817  // If we need to produce a 64-bit element vector, so use the
818  // merged pieces
819  ReadlanePieces.push_back(CurrentLaneOpReg);
820  } else {
821  // 32-bit element type.
822  ReadlanePieces.push_back(CurrentLaneOpRegLo);
823  ReadlanePieces.push_back(CurrentLaneOpRegHi);
824  }
825  } else {
826  CurrentLaneOpReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
827  MRI.setRegClass(UnmergePiece, &AMDGPU::VGPR_32RegClass);
828  MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_32_XM0RegClass);
829 
830  // Read the next variant <- also loop target.
831  BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
832  CurrentLaneOpReg)
833  .addReg(UnmergePiece);
834  ReadlanePieces.push_back(CurrentLaneOpReg);
835  }
836 
837  Register NewCondReg
838  = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
839  bool First = CondReg == AMDGPU::NoRegister;
840  if (First)
841  CondReg = NewCondReg;
842 
843  B.buildInstr(CmpOp)
844  .addDef(NewCondReg)
845  .addReg(CurrentLaneOpReg)
846  .addReg(UnmergePiece);
847 
848  if (!First) {
849  Register AndReg
850  = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
851 
852  // If there are multiple operands to consider, and the conditions.
853  B.buildInstr(AMDGPU::S_AND_B64)
854  .addDef(AndReg)
855  .addReg(NewCondReg)
856  .addReg(CondReg);
857  CondReg = AndReg;
858  }
859  }
860 
861  // FIXME: Build merge seems to switch to CONCAT_VECTORS but not
862  // BUILD_VECTOR
863  if (OpTy.isVector()) {
864  auto Merge = B.buildBuildVector(OpTy, ReadlanePieces);
865  Op.setReg(Merge.getReg(0));
866  } else {
867  auto Merge = B.buildMerge(OpTy, ReadlanePieces);
868  Op.setReg(Merge.getReg(0));
869  }
870 
871  MRI.setRegBank(Op.getReg(), getRegBank(AMDGPU::SGPRRegBankID));
872  }
873  }
874  }
875 
876  B.setInsertPt(*LoopBB, LoopBB->end());
877 
878  // Update EXEC, save the original EXEC value to VCC.
879  B.buildInstr(AMDGPU::S_AND_SAVEEXEC_B64)
880  .addDef(NewExec)
881  .addReg(CondReg, RegState::Kill);
882 
883  MRI.setSimpleHint(NewExec, CondReg);
884 
885  // Update EXEC, switch all done bits to 0 and all todo bits to 1.
886  B.buildInstr(AMDGPU::S_XOR_B64_term)
887  .addDef(AMDGPU::EXEC)
888  .addReg(AMDGPU::EXEC)
889  .addReg(NewExec);
890 
891  // XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
892  // s_cbranch_scc0?
893 
894  // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover.
895  B.buildInstr(AMDGPU::S_CBRANCH_EXECNZ)
896  .addMBB(LoopBB);
897 
898  // Save the EXEC mask before the loop.
899  BuildMI(MBB, MBB.end(), DL, TII->get(AMDGPU::S_MOV_B64_term), SaveExecReg)
900  .addReg(AMDGPU::EXEC);
901 
902  // Restore the EXEC mask after the loop.
903  B.setMBB(*RestoreExecBB);
904  B.buildInstr(AMDGPU::S_MOV_B64_term)
905  .addDef(AMDGPU::EXEC)
906  .addReg(SaveExecReg);
907 }
908 
909 // Legalize an operand that must be an SGPR by inserting a readfirstlane.
910 void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane(
911  MachineInstr &MI, MachineRegisterInfo &MRI, unsigned OpIdx) const {
912  Register Reg = MI.getOperand(OpIdx).getReg();
913  const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI);
914  if (Bank != &AMDGPU::VGPRRegBank)
915  return;
916 
917  MachineIRBuilder B(MI);
918  Register SGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
919  B.buildInstr(AMDGPU::V_READFIRSTLANE_B32)
920  .addDef(SGPR)
921  .addReg(Reg);
922 
923  const TargetRegisterClass *Constrained =
924  constrainGenericRegister(Reg, AMDGPU::VGPR_32RegClass, MRI);
925  (void)Constrained;
926  assert(Constrained && "Failed to constrain readfirstlane src reg");
927 
928  MI.getOperand(OpIdx).setReg(SGPR);
929 }
930 
931 // When regbankselect repairs registers, it will insert a repair instruction
932 // which defines the repaired register. Then it calls applyMapping and expects
933 // that the targets will either delete or rewrite the originally wrote to the
934 // repaired registers. Beccause of this, we end up in a situation where
935 // we have 2 instructions defining the same registers.
937  Register Reg,
938  const MachineInstr &MI) {
939  // Is there some way we can assert that there are exactly 2 def instructions?
940  for (MachineInstr &Other : MRI.def_instructions(Reg)) {
941  if (&Other != &MI)
942  return &Other;
943  }
944 
945  return nullptr;
946 }
947 
948 bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI,
949  const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
950  MachineRegisterInfo &MRI) const {
951  Register DstReg = MI.getOperand(0).getReg();
952  const LLT LoadTy = MRI.getType(DstReg);
953  unsigned LoadSize = LoadTy.getSizeInBits();
954  const unsigned MaxNonSmrdLoadSize = 128;
955  // 128-bit loads are supported for all instruction types.
956  if (LoadSize <= MaxNonSmrdLoadSize)
957  return false;
958 
959  SmallVector<unsigned, 16> DefRegs(OpdMapper.getVRegs(0));
960  SmallVector<unsigned, 1> SrcRegs(OpdMapper.getVRegs(1));
961 
962  // If the pointer is an SGPR, we have nothing to do.
963  if (SrcRegs.empty())
964  return false;
965 
966  assert(LoadSize % MaxNonSmrdLoadSize == 0);
967 
968  // We want to get the repair instruction now, because it will help us
969  // determine which instruction the legalizer inserts that will also
970  // write to DstReg.
971  MachineInstr *RepairInst = getOtherVRegDef(MRI, DstReg, MI);
972 
973  // RegBankSelect only emits scalar types, so we need to reset the pointer
974  // operand to a pointer type.
975  Register BasePtrReg = SrcRegs[0];
976  LLT PtrTy = MRI.getType(MI.getOperand(1).getReg());
977  MRI.setType(BasePtrReg, PtrTy);
978 
979  MachineIRBuilder B(MI);
980 
981  unsigned SplitElts =
982  MaxNonSmrdLoadSize / LoadTy.getScalarType().getSizeInBits();
983  const LLT LoadSplitTy = LLT::vector(SplitElts, LoadTy.getScalarType());
984  ApplyRegBankMapping O(MRI, &AMDGPU::VGPRRegBank);
985  GISelObserverWrapper Observer(&O);
986  B.setChangeObserver(Observer);
987  LegalizerHelper Helper(B.getMF(), Observer, B);
988  if (Helper.fewerElementsVector(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized)
989  return false;
990 
991  // At this point, the legalizer has split the original load into smaller
992  // loads. At the end of lowering, it inserts an instruction (LegalizedInst)
993  // that combines the outputs of the lower loads and writes it to DstReg.
994  // The register bank selector has also added the RepairInst which writes to
995  // DstReg as well.
996 
997  MachineInstr *LegalizedInst = getOtherVRegDef(MRI, DstReg, *RepairInst);
998 
999  // Replace the output of the LegalizedInst with a temporary register, since
1000  // RepairInst already defines DstReg.
1001  Register TmpReg = MRI.createGenericVirtualRegister(MRI.getType(DstReg));
1002  LegalizedInst->getOperand(0).setReg(TmpReg);
1003  B.setInsertPt(*RepairInst->getParent(), RepairInst);
1004 
1005  for (unsigned DefIdx = 0, e = DefRegs.size(); DefIdx != e; ++DefIdx) {
1007  B.buildConstant(IdxReg, DefIdx);
1008  MRI.setRegBank(IdxReg, getRegBank(AMDGPU::VGPRRegBankID));
1009  B.buildExtractVectorElement(DefRegs[DefIdx], TmpReg, IdxReg);
1010  }
1011 
1012  MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
1013  return true;
1014 }
1015 
1016 // For cases where only a single copy is inserted for matching register banks.
1017 // Replace the register in the instruction operand
1019  const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, unsigned OpIdx) {
1020  SmallVector<unsigned, 1> SrcReg(OpdMapper.getVRegs(OpIdx));
1021  if (!SrcReg.empty()) {
1022  assert(SrcReg.size() == 1);
1023  OpdMapper.getMI().getOperand(OpIdx).setReg(SrcReg[0]);
1024  }
1025 }
1026 
1027 void AMDGPURegisterBankInfo::applyMappingImpl(
1028  const OperandsMapper &OpdMapper) const {
1029  MachineInstr &MI = OpdMapper.getMI();
1030  unsigned Opc = MI.getOpcode();
1031  MachineRegisterInfo &MRI = OpdMapper.getMRI();
1032  switch (Opc) {
1033  case AMDGPU::G_SELECT: {
1034  Register DstReg = MI.getOperand(0).getReg();
1035  LLT DstTy = MRI.getType(DstReg);
1036  if (DstTy.getSizeInBits() != 64)
1037  break;
1038 
1039  LLT HalfTy = getHalfSizedType(DstTy);
1040 
1041  SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
1042  SmallVector<Register, 1> Src0Regs(OpdMapper.getVRegs(1));
1043  SmallVector<Register, 2> Src1Regs(OpdMapper.getVRegs(2));
1044  SmallVector<Register, 2> Src2Regs(OpdMapper.getVRegs(3));
1045 
1046  // All inputs are SGPRs, nothing special to do.
1047  if (DefRegs.empty()) {
1048  assert(Src1Regs.empty() && Src2Regs.empty());
1049  break;
1050  }
1051 
1052  MachineIRBuilder B(MI);
1053  if (Src0Regs.empty())
1054  Src0Regs.push_back(MI.getOperand(1).getReg());
1055  else {
1056  assert(Src0Regs.size() == 1);
1057  }
1058 
1059  if (Src1Regs.empty())
1060  split64BitValueForMapping(B, Src1Regs, HalfTy, MI.getOperand(2).getReg());
1061  else {
1062  setRegsToType(MRI, Src1Regs, HalfTy);
1063  }
1064 
1065  if (Src2Regs.empty())
1066  split64BitValueForMapping(B, Src2Regs, HalfTy, MI.getOperand(3).getReg());
1067  else
1068  setRegsToType(MRI, Src2Regs, HalfTy);
1069 
1070  setRegsToType(MRI, DefRegs, HalfTy);
1071 
1072  B.buildSelect(DefRegs[0], Src0Regs[0], Src1Regs[0], Src2Regs[0]);
1073  B.buildSelect(DefRegs[1], Src0Regs[0], Src1Regs[1], Src2Regs[1]);
1074 
1075  MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
1076  MI.eraseFromParent();
1077  return;
1078  }
1079  case AMDGPU::G_AND:
1080  case AMDGPU::G_OR:
1081  case AMDGPU::G_XOR: {
1082  // 64-bit and is only available on the SALU, so split into 2 32-bit ops if
1083  // there is a VGPR input.
1084  Register DstReg = MI.getOperand(0).getReg();
1085  LLT DstTy = MRI.getType(DstReg);
1086  if (DstTy.getSizeInBits() != 64)
1087  break;
1088 
1089  LLT HalfTy = getHalfSizedType(DstTy);
1090  SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
1091  SmallVector<Register, 2> Src0Regs(OpdMapper.getVRegs(1));
1092  SmallVector<Register, 2> Src1Regs(OpdMapper.getVRegs(2));
1093 
1094  // All inputs are SGPRs, nothing special to do.
1095  if (DefRegs.empty()) {
1096  assert(Src0Regs.empty() && Src1Regs.empty());
1097  break;
1098  }
1099 
1100  assert(DefRegs.size() == 2);
1101  assert(Src0Regs.size() == Src1Regs.size() &&
1102  (Src0Regs.empty() || Src0Regs.size() == 2));
1103 
1104  // Depending on where the source registers came from, the generic code may
1105  // have decided to split the inputs already or not. If not, we still need to
1106  // extract the values.
1107  MachineIRBuilder B(MI);
1108 
1109  if (Src0Regs.empty())
1110  split64BitValueForMapping(B, Src0Regs, HalfTy, MI.getOperand(1).getReg());
1111  else
1112  setRegsToType(MRI, Src0Regs, HalfTy);
1113 
1114  if (Src1Regs.empty())
1115  split64BitValueForMapping(B, Src1Regs, HalfTy, MI.getOperand(2).getReg());
1116  else
1117  setRegsToType(MRI, Src1Regs, HalfTy);
1118 
1119  setRegsToType(MRI, DefRegs, HalfTy);
1120 
1121  B.buildInstr(Opc)
1122  .addDef(DefRegs[0])
1123  .addUse(Src0Regs[0])
1124  .addUse(Src1Regs[0]);
1125 
1126  B.buildInstr(Opc)
1127  .addDef(DefRegs[1])
1128  .addUse(Src0Regs[1])
1129  .addUse(Src1Regs[1]);
1130 
1131  MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
1132  MI.eraseFromParent();
1133  return;
1134  }
1135  case AMDGPU::G_ADD:
1136  case AMDGPU::G_SUB:
1137  case AMDGPU::G_MUL: {
1138  Register DstReg = MI.getOperand(0).getReg();
1139  LLT DstTy = MRI.getType(DstReg);
1140  if (DstTy != LLT::scalar(16))
1141  break;
1142 
1143  const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
1144  if (DstBank == &AMDGPU::VGPRRegBank)
1145  break;
1146 
1147  // 16-bit operations are VALU only, but can be promoted to 32-bit SALU.
1148  MachineFunction *MF = MI.getParent()->getParent();
1149  MachineIRBuilder B(MI);
1150  ApplyRegBankMapping ApplySALU(MRI, &AMDGPU::SGPRRegBank);
1151  GISelObserverWrapper Observer(&ApplySALU);
1152  LegalizerHelper Helper(*MF, Observer, B);
1153 
1154  if (Helper.widenScalar(MI, 0, LLT::scalar(32)) !=
1156  llvm_unreachable("widen scalar should have succeeded");
1157  return;
1158  }
1159  case AMDGPU::G_SMIN:
1160  case AMDGPU::G_SMAX:
1161  case AMDGPU::G_UMIN:
1162  case AMDGPU::G_UMAX: {
1163  Register DstReg = MI.getOperand(0).getReg();
1164  const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
1165  if (DstBank == &AMDGPU::VGPRRegBank)
1166  break;
1167 
1168  MachineFunction *MF = MI.getParent()->getParent();
1169  MachineIRBuilder B(MI);
1170  ApplyRegBankMapping ApplySALU(MRI, &AMDGPU::SGPRRegBank);
1171  GISelObserverWrapper Observer(&ApplySALU);
1172  LegalizerHelper Helper(*MF, Observer, B);
1173 
1174  // Turn scalar min/max into a compare and select.
1175  LLT Ty = MRI.getType(DstReg);
1176  LLT S32 = LLT::scalar(32);
1177  LLT S16 = LLT::scalar(16);
1178 
1179  if (Ty == S16) {
1180  // Need to widen to s32, and expand as cmp + select.
1181  if (Helper.widenScalar(MI, 0, S32) != LegalizerHelper::Legalized)
1182  llvm_unreachable("widenScalar should have succeeded");
1183 
1184  // FIXME: This is relying on widenScalar leaving MI in place.
1185  if (Helper.lower(MI, 0, S32) != LegalizerHelper::Legalized)
1186  llvm_unreachable("lower should have succeeded");
1187  } else {
1188  if (Helper.lower(MI, 0, Ty) != LegalizerHelper::Legalized)
1189  llvm_unreachable("lower should have succeeded");
1190  }
1191 
1192  return;
1193  }
1194  case AMDGPU::G_SEXT:
1195  case AMDGPU::G_ZEXT: {
1196  Register SrcReg = MI.getOperand(1).getReg();
1197  LLT SrcTy = MRI.getType(SrcReg);
1198  bool Signed = Opc == AMDGPU::G_SEXT;
1199 
1200  MachineIRBuilder B(MI);
1201  const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI);
1202 
1203  Register DstReg = MI.getOperand(0).getReg();
1204  LLT DstTy = MRI.getType(DstReg);
1205  if (DstTy.isScalar() &&
1206  SrcBank != &AMDGPU::SGPRRegBank &&
1207  SrcBank != &AMDGPU::SCCRegBank &&
1208  SrcBank != &AMDGPU::VCCRegBank &&
1209  // FIXME: Should handle any type that round to s64 when irregular
1210  // breakdowns supported.
1211  DstTy.getSizeInBits() == 64 &&
1212  SrcTy.getSizeInBits() <= 32) {
1213  const LLT S32 = LLT::scalar(32);
1214  SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
1215 
1216  // Extend to 32-bit, and then extend the low half.
1217  if (Signed) {
1218  // TODO: Should really be buildSExtOrCopy
1219  B.buildSExtOrTrunc(DefRegs[0], SrcReg);
1220 
1221  // Replicate sign bit from 32-bit extended part.
1222  auto ShiftAmt = B.buildConstant(S32, 31);
1223  MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
1224  B.buildAShr(DefRegs[1], DefRegs[0], ShiftAmt);
1225  } else {
1226  B.buildZExtOrTrunc(DefRegs[0], SrcReg);
1227  B.buildConstant(DefRegs[1], 0);
1228  }
1229 
1230  MRI.setRegBank(DstReg, *SrcBank);
1231  MI.eraseFromParent();
1232  return;
1233  }
1234 
1235  if (SrcTy != LLT::scalar(1))
1236  return;
1237 
1238  if (SrcBank == &AMDGPU::SCCRegBank || SrcBank == &AMDGPU::VCCRegBank) {
1239  SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
1240 
1241  const RegisterBank *DstBank = SrcBank == &AMDGPU::SCCRegBank ?
1242  &AMDGPU::SGPRRegBank : &AMDGPU::VGPRRegBank;
1243 
1244  unsigned DstSize = DstTy.getSizeInBits();
1245  // 64-bit select is SGPR only
1246  const bool UseSel64 = DstSize > 32 &&
1247  SrcBank->getID() == AMDGPU::SCCRegBankID;
1248 
1249  // TODO: Should s16 select be legal?
1250  LLT SelType = UseSel64 ? LLT::scalar(64) : LLT::scalar(32);
1251  auto True = B.buildConstant(SelType, Signed ? -1 : 1);
1252  auto False = B.buildConstant(SelType, 0);
1253 
1254  MRI.setRegBank(True.getReg(0), *DstBank);
1255  MRI.setRegBank(False.getReg(0), *DstBank);
1256  MRI.setRegBank(DstReg, *DstBank);
1257 
1258  if (DstSize > 32 && SrcBank->getID() != AMDGPU::SCCRegBankID) {
1259  B.buildSelect(DefRegs[0], SrcReg, True, False);
1260  B.buildCopy(DefRegs[1], DefRegs[0]);
1261  } else if (DstSize < 32) {
1262  auto Sel = B.buildSelect(SelType, SrcReg, True, False);
1263  MRI.setRegBank(Sel.getReg(0), *DstBank);
1264  B.buildTrunc(DstReg, Sel);
1265  } else {
1266  B.buildSelect(DstReg, SrcReg, True, False);
1267  }
1268 
1269  MI.eraseFromParent();
1270  return;
1271  }
1272 
1273  // Fixup the case with an s1 src that isn't a condition register. Use shifts
1274  // instead of introducing a compare to avoid an unnecessary condition
1275  // register (and since there's no scalar 16-bit compares).
1276  auto Ext = B.buildAnyExt(DstTy, SrcReg);
1277  auto ShiftAmt = B.buildConstant(LLT::scalar(32), DstTy.getSizeInBits() - 1);
1278  auto Shl = B.buildShl(DstTy, Ext, ShiftAmt);
1279 
1280  if (MI.getOpcode() == AMDGPU::G_SEXT)
1281  B.buildAShr(DstReg, Shl, ShiftAmt);
1282  else
1283  B.buildLShr(DstReg, Shl, ShiftAmt);
1284 
1285  MRI.setRegBank(DstReg, *SrcBank);
1286  MRI.setRegBank(Ext.getReg(0), *SrcBank);
1287  MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
1288  MRI.setRegBank(Shl.getReg(0), *SrcBank);
1289  MI.eraseFromParent();
1290  return;
1291  }
1292  case AMDGPU::G_EXTRACT_VECTOR_ELT:
1293  applyDefaultMapping(OpdMapper);
1294  executeInWaterfallLoop(MI, MRI, { 2 });
1295  return;
1296  case AMDGPU::G_INTRINSIC: {
1297  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
1298  case Intrinsic::amdgcn_s_buffer_load: {
1299  // FIXME: Move to G_INTRINSIC_W_SIDE_EFFECTS
1300  executeInWaterfallLoop(MI, MRI, { 2, 3 });
1301  return;
1302  }
1303  case Intrinsic::amdgcn_readlane: {
1304  substituteSimpleCopyRegs(OpdMapper, 2);
1305 
1306  assert(empty(OpdMapper.getVRegs(0)));
1307  assert(empty(OpdMapper.getVRegs(3)));
1308 
1309  // Make sure the index is an SGPR. It doesn't make sense to run this in a
1310  // waterfall loop, so assume it's a uniform value.
1311  constrainOpWithReadfirstlane(MI, MRI, 3); // Index
1312  return;
1313  }
1314  case Intrinsic::amdgcn_writelane: {
1315  assert(empty(OpdMapper.getVRegs(0)));
1316  assert(empty(OpdMapper.getVRegs(2)));
1317  assert(empty(OpdMapper.getVRegs(3)));
1318 
1319  substituteSimpleCopyRegs(OpdMapper, 4); // VGPR input val
1320  constrainOpWithReadfirstlane(MI, MRI, 2); // Source value
1321  constrainOpWithReadfirstlane(MI, MRI, 3); // Index
1322  return;
1323  }
1324  default:
1325  break;
1326  }
1327  break;
1328  }
1329  case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
1330  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
1331  case Intrinsic::amdgcn_buffer_load: {
1332  executeInWaterfallLoop(MI, MRI, { 2 });
1333  return;
1334  }
1335  case Intrinsic::amdgcn_ds_ordered_add:
1336  case Intrinsic::amdgcn_ds_ordered_swap: {
1337  // This is only allowed to execute with 1 lane, so readfirstlane is safe.
1338  assert(empty(OpdMapper.getVRegs(0)));
1339  substituteSimpleCopyRegs(OpdMapper, 3);
1340  constrainOpWithReadfirstlane(MI, MRI, 2); // M0
1341  return;
1342  }
1343  case Intrinsic::amdgcn_s_sendmsg:
1344  case Intrinsic::amdgcn_s_sendmsghalt: {
1345  // FIXME: Should this use a waterfall loop?
1346  constrainOpWithReadfirstlane(MI, MRI, 2); // M0
1347  return;
1348  }
1349  default:
1350  break;
1351  }
1352  break;
1353  }
1354  case AMDGPU::G_LOAD: {
1355  if (applyMappingWideLoad(MI, OpdMapper, MRI))
1356  return;
1357  break;
1358  }
1359  default:
1360  break;
1361  }
1362 
1363  return applyDefaultMapping(OpdMapper);
1364 }
1365 
1366 bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const {
1367  const MachineFunction &MF = *MI.getParent()->getParent();
1368  const MachineRegisterInfo &MRI = MF.getRegInfo();
1369  for (unsigned i = 0, e = MI.getNumOperands();i != e; ++i) {
1370  if (!MI.getOperand(i).isReg())
1371  continue;
1372  Register Reg = MI.getOperand(i).getReg();
1373  if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) {
1374  if (Bank->getID() == AMDGPU::VGPRRegBankID)
1375  return false;
1376 
1377  assert(Bank->getID() == AMDGPU::SGPRRegBankID ||
1378  Bank->getID() == AMDGPU::SCCRegBankID);
1379  }
1380  }
1381  return true;
1382 }
1383 
1385 AMDGPURegisterBankInfo::getDefaultMappingSOP(const MachineInstr &MI) const {
1386  const MachineFunction &MF = *MI.getParent()->getParent();
1387  const MachineRegisterInfo &MRI = MF.getRegInfo();
1389 
1390  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1391  unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
1392  unsigned BankID = Size == 1 ? AMDGPU::SCCRegBankID : AMDGPU::SGPRRegBankID;
1393  OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size);
1394  }
1395  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
1396  MI.getNumOperands());
1397 }
1398 
1400 AMDGPURegisterBankInfo::getDefaultMappingVOP(const MachineInstr &MI) const {
1401  const MachineFunction &MF = *MI.getParent()->getParent();
1402  const MachineRegisterInfo &MRI = MF.getRegInfo();
1404  unsigned OpdIdx = 0;
1405 
1406  unsigned Size0 = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
1407  OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size0);
1408 
1409  if (MI.getOperand(OpdIdx).isIntrinsicID())
1410  OpdsMapping[OpdIdx++] = nullptr;
1411 
1412  Register Reg1 = MI.getOperand(OpdIdx).getReg();
1413  unsigned Size1 = getSizeInBits(Reg1, MRI, *TRI);
1414 
1415  unsigned DefaultBankID = Size1 == 1 ?
1416  AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
1417  unsigned Bank1 = getRegBankID(Reg1, MRI, *TRI, DefaultBankID);
1418 
1419  OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(Bank1, Size1);
1420 
1421  for (unsigned e = MI.getNumOperands(); OpdIdx != e; ++OpdIdx) {
1422  const MachineOperand &MO = MI.getOperand(OpdIdx);
1423  if (!MO.isReg())
1424  continue;
1425 
1426  unsigned Size = getSizeInBits(MO.getReg(), MRI, *TRI);
1427  unsigned BankID = Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
1428  OpdsMapping[OpdIdx] = AMDGPU::getValueMapping(BankID, Size);
1429  }
1430 
1431  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
1432  MI.getNumOperands());
1433 }
1434 
1436 AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const {
1437  const MachineFunction &MF = *MI.getParent()->getParent();
1438  const MachineRegisterInfo &MRI = MF.getRegInfo();
1440 
1441  for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
1442  const MachineOperand &Op = MI.getOperand(I);
1443  if (!Op.isReg())
1444  continue;
1445 
1446  unsigned Size = getSizeInBits(Op.getReg(), MRI, *TRI);
1447  OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
1448  }
1449 
1450  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
1451  MI.getNumOperands());
1452 }
1453 
1455 AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
1456 
1457  const MachineFunction &MF = *MI.getParent()->getParent();
1458  const MachineRegisterInfo &MRI = MF.getRegInfo();
1460  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
1461  LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
1462  unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
1463 
1464  const ValueMapping *ValMapping;
1465  const ValueMapping *PtrMapping;
1466 
1467  if (isInstrUniform(MI)) {
1468  // We have a uniform instruction so we want to use an SMRD load
1469  ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
1470  PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
1471  } else {
1472  ValMapping = AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy);
1473  // FIXME: What would happen if we used SGPRRegBankID here?
1474  PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
1475  }
1476 
1477  OpdsMapping[0] = ValMapping;
1478  OpdsMapping[1] = PtrMapping;
1480  1, 1, getOperandsMapping(OpdsMapping), MI.getNumOperands());
1481  return Mapping;
1482 
1483  // FIXME: Do we want to add a mapping for FLAT load, or should we just
1484  // handle that during instruction selection?
1485 }
1486 
1487 unsigned
1488 AMDGPURegisterBankInfo::getRegBankID(Register Reg,
1489  const MachineRegisterInfo &MRI,
1490  const TargetRegisterInfo &TRI,
1491  unsigned Default) const {
1492 
1493  const RegisterBank *Bank = getRegBank(Reg, MRI, TRI);
1494  return Bank ? Bank->getID() : Default;
1495 }
1496 
1497 ///
1498 /// This function must return a legal mapping, because
1499 /// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called
1500 /// in RegBankSelect::Mode::Fast. Any mapping that would cause a
1501 /// VGPR to SGPR generated is illegal.
1502 ///
1505  const MachineFunction &MF = *MI.getParent()->getParent();
1506  const MachineRegisterInfo &MRI = MF.getRegInfo();
1507 
1508  if (MI.isRegSequence()) {
1509  // If any input is a VGPR, the result must be a VGPR. The default handling
1510  // assumes any copy between banks is legal.
1511  unsigned BankID = AMDGPU::SGPRRegBankID;
1512 
1513  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
1514  auto OpBank = getRegBankID(MI.getOperand(I).getReg(), MRI, *TRI);
1515  // It doesn't make sense to use vcc or scc banks here, so just ignore
1516  // them.
1517  if (OpBank != AMDGPU::SGPRRegBankID) {
1518  BankID = AMDGPU::VGPRRegBankID;
1519  break;
1520  }
1521  }
1522  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
1523 
1524  const ValueMapping &ValMap = getValueMapping(0, Size, getRegBank(BankID));
1525  return getInstructionMapping(
1526  1, /*Cost*/ 1,
1527  /*OperandsMapping*/ getOperandsMapping({&ValMap}), 1);
1528  }
1529 
1530  // The default handling is broken and doesn't handle illegal SGPR->VGPR copies
1531  // properly.
1532  //
1533  // TODO: There are additional exec masking dependencies to analyze.
1534  if (MI.getOpcode() == TargetOpcode::G_PHI) {
1535  // TODO: Generate proper invalid bank enum.
1536  int ResultBank = -1;
1537 
1538  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
1539  unsigned Reg = MI.getOperand(I).getReg();
1540  const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI);
1541 
1542  // FIXME: Assuming VGPR for any undetermined inputs.
1543  if (!Bank || Bank->getID() == AMDGPU::VGPRRegBankID) {
1544  ResultBank = AMDGPU::VGPRRegBankID;
1545  break;
1546  }
1547 
1548  unsigned OpBank = Bank->getID();
1549  // scc, scc -> sgpr
1550  if (OpBank == AMDGPU::SCCRegBankID) {
1551  // There's only one SCC register, so a phi requires copying to SGPR.
1552  OpBank = AMDGPU::SGPRRegBankID;
1553  } else if (OpBank == AMDGPU::VCCRegBankID) {
1554  // vcc, vcc -> vcc
1555  // vcc, sgpr -> vgpr
1556  if (ResultBank != -1 && ResultBank != AMDGPU::VCCRegBankID) {
1557  ResultBank = AMDGPU::VGPRRegBankID;
1558  break;
1559  }
1560  }
1561 
1562  ResultBank = OpBank;
1563  }
1564 
1565  assert(ResultBank != -1);
1566 
1567  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1568 
1569  const ValueMapping &ValMap =
1570  getValueMapping(0, Size, getRegBank(ResultBank));
1571  return getInstructionMapping(
1572  1, /*Cost*/ 1,
1573  /*OperandsMapping*/ getOperandsMapping({&ValMap}), 1);
1574  }
1575 
1577  if (Mapping.isValid())
1578  return Mapping;
1579 
1581 
1582  switch (MI.getOpcode()) {
1583  default:
1585 
1586  case AMDGPU::G_AND:
1587  case AMDGPU::G_OR:
1588  case AMDGPU::G_XOR: {
1589  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1590  if (Size == 1) {
1591  const RegisterBank *DstBank
1592  = getRegBank(MI.getOperand(0).getReg(), MRI, *TRI);
1593 
1594  unsigned TargetBankID = -1;
1595  unsigned BankLHS = -1;
1596  unsigned BankRHS = -1;
1597  if (DstBank) {
1598  TargetBankID = DstBank->getID();
1599  if (DstBank == &AMDGPU::VCCRegBank) {
1600  TargetBankID = AMDGPU::VCCRegBankID;
1601  BankLHS = AMDGPU::VCCRegBankID;
1602  BankRHS = AMDGPU::VCCRegBankID;
1603  } else if (DstBank == &AMDGPU::SCCRegBank) {
1604  TargetBankID = AMDGPU::SCCRegBankID;
1605  BankLHS = AMDGPU::SGPRRegBankID;
1606  BankRHS = AMDGPU::SGPRRegBankID;
1607  } else {
1608  BankLHS = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
1609  AMDGPU::SGPRRegBankID);
1610  BankRHS = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
1611  AMDGPU::SGPRRegBankID);
1612  }
1613  } else {
1614  BankLHS = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
1615  AMDGPU::VCCRegBankID);
1616  BankRHS = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
1617  AMDGPU::VCCRegBankID);
1618 
1619  // Both inputs should be true booleans to produce a boolean result.
1620  if (BankLHS == AMDGPU::VGPRRegBankID || BankRHS == AMDGPU::VGPRRegBankID) {
1621  TargetBankID = AMDGPU::VGPRRegBankID;
1622  } else if (BankLHS == AMDGPU::VCCRegBankID || BankRHS == AMDGPU::VCCRegBankID) {
1623  TargetBankID = AMDGPU::VCCRegBankID;
1624  BankLHS = AMDGPU::VCCRegBankID;
1625  BankRHS = AMDGPU::VCCRegBankID;
1626  } else if (BankLHS == AMDGPU::SGPRRegBankID && BankRHS == AMDGPU::SGPRRegBankID) {
1627  TargetBankID = AMDGPU::SGPRRegBankID;
1628  } else if (BankLHS == AMDGPU::SCCRegBankID || BankRHS == AMDGPU::SCCRegBankID) {
1629  // The operation must be done on a 32-bit register, but it will set
1630  // scc. The result type could interchangably be SCC or SGPR, since
1631  // both values will be produced.
1632  TargetBankID = AMDGPU::SCCRegBankID;
1633  BankLHS = AMDGPU::SGPRRegBankID;
1634  BankRHS = AMDGPU::SGPRRegBankID;
1635  }
1636  }
1637 
1638  OpdsMapping[0] = AMDGPU::getValueMapping(TargetBankID, Size);
1639  OpdsMapping[1] = AMDGPU::getValueMapping(BankLHS, Size);
1640  OpdsMapping[2] = AMDGPU::getValueMapping(BankRHS, Size);
1641  break;
1642  }
1643 
1644  if (Size == 64) {
1645 
1646  if (isSALUMapping(MI)) {
1647  OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size);
1648  OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0];
1649  } else {
1650  OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size);
1651  unsigned Bank1 = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI/*, DefaultBankID*/);
1652  OpdsMapping[1] = AMDGPU::getValueMapping(Bank1, Size);
1653 
1654  unsigned Bank2 = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI/*, DefaultBankID*/);
1655  OpdsMapping[2] = AMDGPU::getValueMapping(Bank2, Size);
1656  }
1657 
1658  break;
1659  }
1660 
1662  }
1663 
1664  case AMDGPU::G_GEP:
1665  case AMDGPU::G_ADD:
1666  case AMDGPU::G_SUB:
1667  case AMDGPU::G_MUL:
1668  case AMDGPU::G_SHL:
1669  case AMDGPU::G_LSHR:
1670  case AMDGPU::G_ASHR:
1671  case AMDGPU::G_UADDO:
1672  case AMDGPU::G_SADDO:
1673  case AMDGPU::G_USUBO:
1674  case AMDGPU::G_SSUBO:
1675  case AMDGPU::G_UADDE:
1676  case AMDGPU::G_SADDE:
1677  case AMDGPU::G_USUBE:
1678  case AMDGPU::G_SSUBE:
1679  case AMDGPU::G_UMULH:
1680  case AMDGPU::G_SMULH:
1681  case AMDGPU::G_SMIN:
1682  case AMDGPU::G_SMAX:
1683  case AMDGPU::G_UMIN:
1684  case AMDGPU::G_UMAX:
1685  if (isSALUMapping(MI))
1686  return getDefaultMappingSOP(MI);
1688 
1689  case AMDGPU::G_FADD:
1690  case AMDGPU::G_FSUB:
1691  case AMDGPU::G_FPTOSI:
1692  case AMDGPU::G_FPTOUI:
1693  case AMDGPU::G_FMUL:
1694  case AMDGPU::G_FMA:
1695  case AMDGPU::G_FSQRT:
1696  case AMDGPU::G_SITOFP:
1697  case AMDGPU::G_UITOFP:
1698  case AMDGPU::G_FPTRUNC:
1699  case AMDGPU::G_FPEXT:
1700  case AMDGPU::G_FEXP2:
1701  case AMDGPU::G_FLOG2:
1702  case AMDGPU::G_FCANONICALIZE:
1703  case AMDGPU::G_INTRINSIC_TRUNC:
1704  case AMDGPU::G_INTRINSIC_ROUND:
1705  return getDefaultMappingVOP(MI);
1706  case AMDGPU::G_IMPLICIT_DEF: {
1707  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1708  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
1709  break;
1710  }
1711  case AMDGPU::G_FCONSTANT:
1712  case AMDGPU::G_CONSTANT:
1713  case AMDGPU::G_FRAME_INDEX:
1714  case AMDGPU::G_BLOCK_ADDR: {
1715  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1716  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
1717  break;
1718  }
1719  case AMDGPU::G_INSERT: {
1720  unsigned BankID = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID :
1721  AMDGPU::VGPRRegBankID;
1722  unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
1723  unsigned SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
1724  unsigned EltSize = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI);
1725  OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
1726  OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
1727  OpdsMapping[2] = AMDGPU::getValueMapping(BankID, EltSize);
1728  OpdsMapping[3] = nullptr;
1729  break;
1730  }
1731  case AMDGPU::G_EXTRACT: {
1732  unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
1733  unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
1734  unsigned SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
1735  OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
1736  OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
1737  OpdsMapping[2] = nullptr;
1738  break;
1739  }
1740  case AMDGPU::G_MERGE_VALUES:
1741  case AMDGPU::G_BUILD_VECTOR:
1742  case AMDGPU::G_CONCAT_VECTORS: {
1743  unsigned Bank = isSALUMapping(MI) ?
1744  AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
1745  unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1746  unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1747 
1748  OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
1749  // Op1 and Dst should use the same register bank.
1750  for (unsigned i = 1, e = MI.getNumOperands(); i != e; ++i)
1751  OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
1752  break;
1753  }
1754  case AMDGPU::G_BITCAST:
1755  case AMDGPU::G_INTTOPTR:
1756  case AMDGPU::G_PTRTOINT:
1757  case AMDGPU::G_CTLZ:
1758  case AMDGPU::G_CTLZ_ZERO_UNDEF:
1759  case AMDGPU::G_CTTZ:
1760  case AMDGPU::G_CTTZ_ZERO_UNDEF:
1761  case AMDGPU::G_CTPOP:
1762  case AMDGPU::G_BSWAP:
1763  case AMDGPU::G_FABS:
1764  case AMDGPU::G_FNEG: {
1765  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1766  unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
1767  OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
1768  break;
1769  }
1770  case AMDGPU::G_TRUNC: {
1771  Register Dst = MI.getOperand(0).getReg();
1772  Register Src = MI.getOperand(1).getReg();
1773  unsigned Bank = getRegBankID(Src, MRI, *TRI);
1774  unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
1775  unsigned SrcSize = getSizeInBits(Src, MRI, *TRI);
1776  OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
1777  OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize);
1778  break;
1779  }
1780  case AMDGPU::G_ZEXT:
1781  case AMDGPU::G_SEXT:
1782  case AMDGPU::G_ANYEXT: {
1783  Register Dst = MI.getOperand(0).getReg();
1784  Register Src = MI.getOperand(1).getReg();
1785  unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
1786  unsigned SrcSize = getSizeInBits(Src, MRI, *TRI);
1787 
1788  unsigned DstBank;
1789  const RegisterBank *SrcBank = getRegBank(Src, MRI, *TRI);
1790  assert(SrcBank);
1791  switch (SrcBank->getID()) {
1792  case AMDGPU::SCCRegBankID:
1793  case AMDGPU::SGPRRegBankID:
1794  DstBank = AMDGPU::SGPRRegBankID;
1795  break;
1796  default:
1797  DstBank = AMDGPU::VGPRRegBankID;
1798  break;
1799  }
1800 
1801  // TODO: Should anyext be split into 32-bit part as well?
1802  if (MI.getOpcode() == AMDGPU::G_ANYEXT) {
1803  OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize);
1804  OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank->getID(), SrcSize);
1805  } else {
1806  // Scalar extend can use 64-bit BFE, but VGPRs require extending to
1807  // 32-bits, and then to 64.
1808  OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
1809  OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->getID(),
1810  SrcSize);
1811  }
1812  break;
1813  }
1814  case AMDGPU::G_FCMP: {
1815  unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1816  unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
1817  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
1818  OpdsMapping[1] = nullptr; // Predicate Operand.
1819  OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
1820  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
1821  break;
1822  }
1823  case AMDGPU::G_STORE: {
1824  assert(MI.getOperand(0).isReg());
1825  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1826  // FIXME: We need to specify a different reg bank once scalar stores
1827  // are supported.
1828  const ValueMapping *ValMapping =
1829  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
1830  // FIXME: Depending on the type of store, the pointer could be in
1831  // the SGPR Reg bank.
1832  // FIXME: Pointer size should be based on the address space.
1833  const ValueMapping *PtrMapping =
1834  AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
1835 
1836  OpdsMapping[0] = ValMapping;
1837  OpdsMapping[1] = PtrMapping;
1838  break;
1839  }
1840 
1841  case AMDGPU::G_ICMP: {
1842  auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1843  unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1844  unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
1845  unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
1846 
1847  bool CanUseSCC = Op2Bank == AMDGPU::SGPRRegBankID &&
1848  Op3Bank == AMDGPU::SGPRRegBankID &&
1849  (Size == 32 || (Size == 64 &&
1850  (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) &&
1852 
1853  unsigned Op0Bank = CanUseSCC ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
1854 
1855  OpdsMapping[0] = AMDGPU::getValueMapping(Op0Bank, 1);
1856  OpdsMapping[1] = nullptr; // Predicate Operand.
1857  OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
1858  OpdsMapping[3] = AMDGPU::getValueMapping(Op3Bank, Size);
1859  break;
1860  }
1861  case AMDGPU::G_EXTRACT_VECTOR_ELT: {
1862  unsigned OutputBankID = isSALUMapping(MI) ?
1863  AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
1864  unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1865  unsigned IdxSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1866  unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
1867 
1868  OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
1869  OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
1870 
1871  // The index can be either if the source vector is VGPR.
1872  OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
1873  break;
1874  }
1875  case AMDGPU::G_INSERT_VECTOR_ELT: {
1876  unsigned OutputBankID = isSALUMapping(MI) ?
1877  AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
1878 
1879  unsigned VecSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1880  unsigned InsertSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1881  unsigned IdxSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
1882  unsigned InsertEltBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
1883  unsigned IdxBank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
1884 
1885  OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
1886  OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
1887  OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBank, InsertSize);
1888 
1889  // The index can be either if the source vector is VGPR.
1890  OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
1891  break;
1892  }
1893  case AMDGPU::G_UNMERGE_VALUES: {
1894  unsigned Bank = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID :
1895  AMDGPU::VGPRRegBankID;
1896 
1897  // Op1 and Dst should use the same register bank.
1898  // FIXME: Shouldn't this be the default? Why do we need to handle this?
1899  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1900  unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
1901  OpdsMapping[i] = AMDGPU::getValueMapping(Bank, Size);
1902  }
1903  break;
1904  }
1905  case AMDGPU::G_INTRINSIC: {
1906  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
1907  default:
1909  case Intrinsic::maxnum:
1910  case Intrinsic::minnum:
1911  case Intrinsic::amdgcn_div_fmas:
1912  case Intrinsic::amdgcn_trig_preop:
1913  case Intrinsic::amdgcn_sin:
1914  case Intrinsic::amdgcn_cos:
1915  case Intrinsic::amdgcn_log_clamp:
1916  case Intrinsic::amdgcn_rcp:
1917  case Intrinsic::amdgcn_rcp_legacy:
1918  case Intrinsic::amdgcn_rsq:
1919  case Intrinsic::amdgcn_rsq_legacy:
1920  case Intrinsic::amdgcn_rsq_clamp:
1921  case Intrinsic::amdgcn_ldexp:
1922  case Intrinsic::amdgcn_frexp_mant:
1923  case Intrinsic::amdgcn_frexp_exp:
1924  case Intrinsic::amdgcn_fract:
1925  case Intrinsic::amdgcn_cvt_pkrtz:
1926  case Intrinsic::amdgcn_cvt_pknorm_i16:
1927  case Intrinsic::amdgcn_cvt_pknorm_u16:
1928  case Intrinsic::amdgcn_cvt_pk_i16:
1929  case Intrinsic::amdgcn_cvt_pk_u16:
1930  case Intrinsic::amdgcn_fmed3:
1931  case Intrinsic::amdgcn_cubeid:
1932  case Intrinsic::amdgcn_cubema:
1933  case Intrinsic::amdgcn_cubesc:
1934  case Intrinsic::amdgcn_cubetc:
1935  case Intrinsic::amdgcn_sffbh:
1936  case Intrinsic::amdgcn_fmad_ftz:
1937  case Intrinsic::amdgcn_mbcnt_lo:
1938  case Intrinsic::amdgcn_mbcnt_hi:
1939  case Intrinsic::amdgcn_ubfe:
1940  case Intrinsic::amdgcn_sbfe:
1941  case Intrinsic::amdgcn_lerp:
1942  case Intrinsic::amdgcn_sad_u8:
1943  case Intrinsic::amdgcn_msad_u8:
1944  case Intrinsic::amdgcn_sad_hi_u8:
1945  case Intrinsic::amdgcn_sad_u16:
1946  case Intrinsic::amdgcn_qsad_pk_u16_u8:
1947  case Intrinsic::amdgcn_mqsad_pk_u16_u8:
1948  case Intrinsic::amdgcn_mqsad_u32_u8:
1949  case Intrinsic::amdgcn_cvt_pk_u8_f32:
1950  case Intrinsic::amdgcn_alignbit:
1951  case Intrinsic::amdgcn_alignbyte:
1952  case Intrinsic::amdgcn_fdot2:
1953  case Intrinsic::amdgcn_sdot2:
1954  case Intrinsic::amdgcn_udot2:
1955  case Intrinsic::amdgcn_sdot4:
1956  case Intrinsic::amdgcn_udot4:
1957  case Intrinsic::amdgcn_sdot8:
1958  case Intrinsic::amdgcn_udot8:
1959  case Intrinsic::amdgcn_fdiv_fast:
1960  case Intrinsic::amdgcn_wwm:
1961  case Intrinsic::amdgcn_wqm:
1962  return getDefaultMappingVOP(MI);
1963  case Intrinsic::amdgcn_ds_permute:
1964  case Intrinsic::amdgcn_ds_bpermute:
1965  case Intrinsic::amdgcn_update_dpp:
1966  return getDefaultMappingAllVGPR(MI);
1967  case Intrinsic::amdgcn_kernarg_segment_ptr:
1968  case Intrinsic::amdgcn_s_getpc:
1969  case Intrinsic::amdgcn_groupstaticsize: {
1970  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1971  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
1972  break;
1973  }
1974  case Intrinsic::amdgcn_wqm_vote: {
1975  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1976  OpdsMapping[0] = OpdsMapping[2]
1977  = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size);
1978  break;
1979  }
1980  case Intrinsic::amdgcn_s_buffer_load: {
1981  // FIXME: This should be moved to G_INTRINSIC_W_SIDE_EFFECTS
1982  Register RSrc = MI.getOperand(2).getReg(); // SGPR
1983  Register Offset = MI.getOperand(3).getReg(); // SGPR/imm
1984 
1985  unsigned Size0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1986  unsigned Size2 = MRI.getType(RSrc).getSizeInBits();
1987  unsigned Size3 = MRI.getType(Offset).getSizeInBits();
1988 
1989  unsigned RSrcBank = getRegBankID(RSrc, MRI, *TRI);
1990  unsigned OffsetBank = getRegBankID(Offset, MRI, *TRI);
1991 
1992  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size0);
1993  OpdsMapping[1] = nullptr; // intrinsic id
1994 
1995  // Lie and claim everything is legal, even though some need to be
1996  // SGPRs. applyMapping will have to deal with it as a waterfall loop.
1997  OpdsMapping[2] = AMDGPU::getValueMapping(RSrcBank, Size2); // rsrc
1998  OpdsMapping[3] = AMDGPU::getValueMapping(OffsetBank, Size3);
1999  OpdsMapping[4] = nullptr;
2000  break;
2001  }
2002  case Intrinsic::amdgcn_div_scale: {
2003  unsigned Dst0Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2004  unsigned Dst1Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
2005  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Dst0Size);
2006  OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Dst1Size);
2007 
2008  unsigned SrcSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
2009  OpdsMapping[3] = AMDGPU::getValueMapping(
2010  getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI), SrcSize);
2011  OpdsMapping[4] = AMDGPU::getValueMapping(
2012  getRegBankID(MI.getOperand(4).getReg(), MRI, *TRI), SrcSize);
2013 
2014  break;
2015  }
2016  case Intrinsic::amdgcn_class: {
2017  Register Src0Reg = MI.getOperand(2).getReg();
2018  Register Src1Reg = MI.getOperand(3).getReg();
2019  unsigned Src0Size = MRI.getType(Src0Reg).getSizeInBits();
2020  unsigned Src1Size = MRI.getType(Src1Reg).getSizeInBits();
2021  unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2022  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
2023  OpdsMapping[2] = AMDGPU::getValueMapping(getRegBankID(Src0Reg, MRI, *TRI),
2024  Src0Size);
2025  OpdsMapping[3] = AMDGPU::getValueMapping(getRegBankID(Src1Reg, MRI, *TRI),
2026  Src1Size);
2027  break;
2028  }
2029  case Intrinsic::amdgcn_icmp:
2030  case Intrinsic::amdgcn_fcmp: {
2031  unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2032  // This is not VCCRegBank because this is not used in boolean contexts.
2033  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
2034  unsigned OpSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
2035  unsigned Op1Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
2036  unsigned Op2Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
2037  OpdsMapping[2] = AMDGPU::getValueMapping(Op1Bank, OpSize);
2038  OpdsMapping[3] = AMDGPU::getValueMapping(Op2Bank, OpSize);
2039  break;
2040  }
2041  case Intrinsic::amdgcn_readlane: {
2042  // This must be an SGPR, but accept a VGPR.
2043  unsigned IdxReg = MI.getOperand(3).getReg();
2044  unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits();
2045  unsigned IdxBank = getRegBankID(IdxReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
2046  OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
2048  }
2049  case Intrinsic::amdgcn_readfirstlane: {
2050  unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2051  unsigned SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
2052  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
2053  OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
2054  break;
2055  }
2056  case Intrinsic::amdgcn_writelane: {
2057  unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2058  unsigned SrcReg = MI.getOperand(2).getReg();
2059  unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
2060  unsigned SrcBank = getRegBankID(SrcReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
2061  unsigned IdxReg = MI.getOperand(3).getReg();
2062  unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits();
2063  unsigned IdxBank = getRegBankID(IdxReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
2064  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
2065 
2066  // These 2 must be SGPRs, but accept VGPRs. Readfirstlane will be inserted
2067  // to legalize.
2068  OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, SrcSize);
2069  OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
2070  OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
2071  break;
2072  }
2073  case Intrinsic::amdgcn_if_break: {
2074  unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
2075  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
2076  OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
2077  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
2078  break;
2079  }
2080  }
2081  break;
2082  }
2083  case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
2084  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
2085  default:
2087  case Intrinsic::amdgcn_s_getreg:
2088  case Intrinsic::amdgcn_s_memtime:
2089  case Intrinsic::amdgcn_s_memrealtime:
2090  case Intrinsic::amdgcn_s_get_waveid_in_workgroup: {
2091  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2092  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
2093  break;
2094  }
2095  case Intrinsic::amdgcn_ds_append:
2096  case Intrinsic::amdgcn_ds_consume:
2097  case Intrinsic::amdgcn_ds_fadd:
2098  case Intrinsic::amdgcn_ds_fmin:
2099  case Intrinsic::amdgcn_ds_fmax:
2100  case Intrinsic::amdgcn_atomic_inc:
2101  case Intrinsic::amdgcn_atomic_dec:
2102  return getDefaultMappingAllVGPR(MI);
2103  case Intrinsic::amdgcn_ds_ordered_add:
2104  case Intrinsic::amdgcn_ds_ordered_swap: {
2105  unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2106  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
2107  unsigned M0Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
2108  AMDGPU::SGPRRegBankID);
2109  OpdsMapping[2] = AMDGPU::getValueMapping(M0Bank, 32);
2110  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2111  break;
2112  }
2113  case Intrinsic::amdgcn_exp_compr:
2114  OpdsMapping[0] = nullptr; // IntrinsicID
2115  // FIXME: These are immediate values which can't be read from registers.
2116  OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2117  OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2118  // FIXME: Could we support packed types here?
2119  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2120  OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2121  // FIXME: These are immediate values which can't be read from registers.
2122  OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2123  OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2124  break;
2125  case Intrinsic::amdgcn_exp:
2126  OpdsMapping[0] = nullptr; // IntrinsicID
2127  // FIXME: These are immediate values which can't be read from registers.
2128  OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2129  OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2130  // FIXME: Could we support packed types here?
2131  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2132  OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2133  OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2134  OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
2135  // FIXME: These are immediate values which can't be read from registers.
2136  OpdsMapping[7] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2137  OpdsMapping[8] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
2138  break;
2139  case Intrinsic::amdgcn_buffer_load: {
2140  Register RSrc = MI.getOperand(2).getReg(); // SGPR
2141  Register VIndex = MI.getOperand(3).getReg(); // VGPR
2142  Register Offset = MI.getOperand(4).getReg(); // SGPR/VGPR/imm
2143 
2144  unsigned Size0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2145  unsigned Size2 = MRI.getType(RSrc).getSizeInBits();
2146  unsigned Size3 = MRI.getType(VIndex).getSizeInBits();
2147  unsigned Size4 = MRI.getType(Offset).getSizeInBits();
2148 
2149  unsigned RSrcBank = getRegBankID(RSrc, MRI, *TRI);
2150  unsigned OffsetBank = getRegBankID(Offset, MRI, *TRI);
2151 
2152  OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size0);
2153  OpdsMapping[1] = nullptr; // intrinsic id
2154 
2155  // Lie and claim everything is legal, even though some need to be
2156  // SGPRs. applyMapping will have to deal with it as a waterfall loop.
2157  OpdsMapping[2] = AMDGPU::getValueMapping(RSrcBank, Size2); // rsrc
2158  OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size3);
2159  OpdsMapping[4] = AMDGPU::getValueMapping(OffsetBank, Size4);
2160  OpdsMapping[5] = nullptr;
2161  OpdsMapping[6] = nullptr;
2162  break;
2163  }
2164  case Intrinsic::amdgcn_s_sendmsg:
2165  case Intrinsic::amdgcn_s_sendmsghalt: {
2166  // This must be an SGPR, but accept a VGPR.
2167  unsigned Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
2168  AMDGPU::SGPRRegBankID);
2169  OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
2170  break;
2171  }
2172  case Intrinsic::amdgcn_end_cf: {
2173  unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
2174  OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
2175  break;
2176  }
2177  }
2178  break;
2179  }
2180  case AMDGPU::G_SELECT: {
2181  unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2182  unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
2183  AMDGPU::SGPRRegBankID);
2184  unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI,
2185  AMDGPU::SGPRRegBankID);
2186  bool SGPRSrcs = Op2Bank == AMDGPU::SGPRRegBankID &&
2187  Op3Bank == AMDGPU::SGPRRegBankID;
2188 
2189  unsigned CondBankDefault = SGPRSrcs ?
2190  AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
2191  unsigned CondBank = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
2192  CondBankDefault);
2193  if (CondBank == AMDGPU::SGPRRegBankID)
2194  CondBank = SGPRSrcs ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
2195  else if (CondBank == AMDGPU::VGPRRegBankID)
2196  CondBank = AMDGPU::VCCRegBankID;
2197 
2198  unsigned Bank = SGPRSrcs && CondBank == AMDGPU::SCCRegBankID ?
2199  AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
2200 
2201  assert(CondBank == AMDGPU::VCCRegBankID || CondBank == AMDGPU::SCCRegBankID);
2202 
2203  if (Size == 64) {
2204  OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
2205  OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
2206  OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
2207  OpdsMapping[3] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
2208  } else {
2209  OpdsMapping[0] = AMDGPU::getValueMapping(Bank, Size);
2210  OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
2211  OpdsMapping[2] = AMDGPU::getValueMapping(Bank, Size);
2212  OpdsMapping[3] = AMDGPU::getValueMapping(Bank, Size);
2213  }
2214 
2215  break;
2216  }
2217 
2218  case AMDGPU::G_LOAD:
2219  return getInstrMappingForLoad(MI);
2220 
2221  case AMDGPU::G_ATOMICRMW_XCHG:
2222  case AMDGPU::G_ATOMICRMW_ADD:
2223  case AMDGPU::G_ATOMICRMW_SUB:
2224  case AMDGPU::G_ATOMICRMW_AND:
2225  case AMDGPU::G_ATOMICRMW_OR:
2226  case AMDGPU::G_ATOMICRMW_XOR:
2227  case AMDGPU::G_ATOMICRMW_MAX:
2228  case AMDGPU::G_ATOMICRMW_MIN:
2229  case AMDGPU::G_ATOMICRMW_UMAX:
2230  case AMDGPU::G_ATOMICRMW_UMIN:
2231  case AMDGPU::G_ATOMIC_CMPXCHG: {
2232  return getDefaultMappingAllVGPR(MI);
2233  }
2234  case AMDGPU::G_BRCOND: {
2235  unsigned Bank = getRegBankID(MI.getOperand(0).getReg(), MRI, *TRI,
2236  AMDGPU::SGPRRegBankID);
2237  assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);
2238  if (Bank != AMDGPU::SCCRegBankID)
2239  Bank = AMDGPU::VCCRegBankID;
2240 
2241  OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
2242  break;
2243  }
2244  }
2245 
2246  return getInstructionMapping(/*ID*/1, /*Cost*/1,
2247  getOperandsMapping(OpdsMapping),
2248  MI.getNumOperands());
2249 }
2250 
const InstructionMapping & getInstrMapping(const MachineInstr &MI) const override
This function must return a legal mapping, because AMDGPURegisterBankInfo::getInstrAlternativeMapping...
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Interface definition for SIRegisterInfo.
const ValueMapping * getOperandsMapping(Iterator Begin, Iterator End) const
Get the uniquely generated array of ValueMapping for the elements of between Begin and End...
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:494
const InstructionMapping & getInstructionMapping(unsigned ID, unsigned Cost, const ValueMapping *OperandsMapping, unsigned NumOperands) const
Method to get a uniquely generated InstructionMapping.
MachineInstrBuilder buildSExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
Register getReg(unsigned Idx) const
Get the register for the operand index.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
unsigned getScalarSizeInBits() const
unsigned getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
Helper class that represents how the value of an instruction may be mapped and what is the related co...
void push_back(const T &Elt)
Definition: SmallVector.h:211
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:385
bool isScalar() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
unsigned Reg
LLT getScalarType() const
bool isRegSequence() const
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
const SIInstrInfo * getInstrInfo() const override
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
void setRegBank(unsigned Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:461
const PartialMapping * BreakDown
How the value is broken down between the different register banks.
bool isIntrinsicID() const
bool isVector() const
unsigned copyCost(const RegisterBank &A, const RegisterBank &B, unsigned Size) const override
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
A description of a memory reference used in the backend.
LLVM_NODISCARD bool empty() const
Definition: SmallSet.h:155
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:414
const MachineInstrBuilder & addUse(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned StartIdx
Number of bits at which this partial mapping starts in the original value.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
bool isSGPRClass(const TargetRegisterClass *RC) const
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:877
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
const RegClassOrRegBank & getRegClassOrRegBank(unsigned Reg) const
Return the register bank or register class of Reg.
static void substituteSimpleCopyRegs(const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, unsigned OpIdx)
MachineFunction & getMF()
Getter for the function we currently build.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
const RegisterBank * RegBank
Register bank where the partial value lives.
void setChangeObserver(GISelChangeObserver &Observer)
static bool isInstrUniform(const MachineInstr &MI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
bool hasScalarCompareEq64() const
MachineRegisterInfo * getMRI()
Getter for MRI.
Abstract class that contains various methods for clients to notify about changes. ...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static LLT scalarOrVector(uint16_t NumElements, LLT ScalarTy)
const InstructionMapping & getInvalidInstructionMapping() const
Method to get a uniquely generated invalid InstructionMapping.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&... args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:667
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Helper class to build MachineInstr.
static bool isUniformMMO(const MachineMemOperand *MMO)
unsigned Length
Length of this mapping in bits.
void setType(unsigned VReg, LLT Ty)
Set the low-level type of VReg to Ty.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:551
const ValueMapping & getValueMapping(unsigned StartIdx, unsigned Length, const RegisterBank &RegBank) const
The most common ValueMapping consists of a single PartialMapping.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:483
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
unsigned getBreakDownCost(const ValueMapping &ValMapping, const RegisterBank *CurBank=nullptr) const override
Get the cost of using ValMapping to decompose a register.
R600 Clause Merge
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1248
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:209
bool isValid() const
Check whether this object is valid.
Helper class used to get/create the virtual registers that will be used to replace the MachineOperand...
Iterator for intrusive lists based on ilist_node.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
static unsigned getIntrinsicID(const SDNode *N)
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:536
static LLT getHalfSizedType(LLT Ty)
RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
MachineOperand class - Representation of each machine instruction operand.
static void applyDefaultMapping(const OperandsMapper &OpdMapper)
Helper method to apply something that is like the default mapping.
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
static MachineInstr * getOtherVRegDef(const MachineRegisterInfo &MRI, Register Reg, const MachineInstr &MI)
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target...
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
This class implements the register bank concept.
Definition: RegisterBank.h:28
Helper struct that represents how a value is mapped through different register banks.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
This file declares the MachineIRBuilder class.
AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const override
Get the alternative mappings for MI.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Instruction has been legalized and the MachineFunction changed.
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC) const override
Get a register bank that covers RC.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
virtual InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const
Get the alternative mappings for MI.
virtual unsigned copyCost(const RegisterBank &A, const RegisterBank &B, unsigned Size) const
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
void setMBB(MachineBasicBlock &MBB)
Set the insertion point to the end of MBB.
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:58
const InstructionMapping & getInstrMappingImpl(const MachineInstr &MI) const
Try to get the mapping of MI.
uint32_t Size
Definition: Profile.cpp:46
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
void setSimpleHint(unsigned VReg, unsigned PrefReg)
Specify the preferred (target independent) register allocation hint for the specified virtual registe...
MachineRegisterInfo & getMRI() const
The MachineRegisterInfo we used to realize the mapping.
unsigned getNumExplicitDefs() const
Returns the number of non-implicit definitions.
This class provides the information for the target register banks.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineInstr & getMI() const
Helper struct that represents how a value is mapped through different register banks.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
aarch64 promote const
unsigned NumBreakDowns
Number of partial mapping to break down this value.
iterator_range< SmallVectorImpl< Register >::const_iterator > getVRegs(unsigned OpIdx, bool ForDebug=false) const
Get all the virtual registers required to map the OpIdx-th operand of the instruction.
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
iterator_range< def_instr_iterator > def_instructions(unsigned Reg) const
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
static void setRegsToType(MachineRegisterInfo &MRI, ArrayRef< Register > Regs, LLT NewTy)
Replace the current type each register in Regs has with NewTy.
MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ...
IRTranslator LLVM IR MI
void setRegClass(unsigned Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
const MachineInstrBuilder & addDef(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Simple wrapper observer that takes several observers, and calls each one for each event...
Register getReg() const
getReg - Returns the register number.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
const ValueMapping & getValueMapping(unsigned StartIdx, unsigned Length, const RegisterBank &RegBank) const
The most common ValueMapping consists of a single PartialMapping.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1237
Wrapper class representing virtual and physical registers.
Definition: Register.h:18
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:47
unsigned getPredicate() const
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164