LLVM  9.0.0svn
AMDGPUISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines an instruction selector for the AMDGPU target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
16 #include "AMDGPUISelLowering.h" // For AMDGPUISD
17 #include "AMDGPUInstrInfo.h"
18 #include "AMDGPUPerfHintAnalysis.h"
19 #include "AMDGPURegisterInfo.h"
20 #include "AMDGPUSubtarget.h"
21 #include "AMDGPUTargetMachine.h"
22 #include "SIDefines.h"
23 #include "SIISelLowering.h"
24 #include "SIInstrInfo.h"
25 #include "SIMachineFunctionInfo.h"
26 #include "SIRegisterInfo.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/StringRef.h"
41 #include "llvm/IR/BasicBlock.h"
42 #ifdef EXPENSIVE_CHECKS
43 #include "llvm/IR/Dominators.h"
44 #endif
45 #include "llvm/IR/Instruction.h"
46 #include "llvm/MC/MCInstrDesc.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/CodeGen.h"
52 #include <cassert>
53 #include <cstdint>
54 #include <new>
55 #include <vector>
56 
57 #define DEBUG_TYPE "isel"
58 
59 using namespace llvm;
60 
61 namespace llvm {
62 
63 class R600InstrInfo;
64 
65 } // end namespace llvm
66 
67 //===----------------------------------------------------------------------===//
68 // Instruction Selector Implementation
69 //===----------------------------------------------------------------------===//
70 
71 namespace {
72 
73 static bool isNullConstantOrUndef(SDValue V) {
74  if (V.isUndef())
75  return true;
76 
78  return Const != nullptr && Const->isNullValue();
79 }
80 
81 static bool getConstantValue(SDValue N, uint32_t &Out) {
82  // This is only used for packed vectors, where ussing 0 for undef should
83  // always be good.
84  if (N.isUndef()) {
85  Out = 0;
86  return true;
87  }
88 
89  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
90  Out = C->getAPIntValue().getSExtValue();
91  return true;
92  }
93 
94  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
95  Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
96  return true;
97  }
98 
99  return false;
100 }
101 
102 // TODO: Handle undef as zero
103 static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
104  bool Negate = false) {
105  assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
106  uint32_t LHSVal, RHSVal;
107  if (getConstantValue(N->getOperand(0), LHSVal) &&
108  getConstantValue(N->getOperand(1), RHSVal)) {
109  SDLoc SL(N);
110  uint32_t K = Negate ?
111  (-LHSVal & 0xffff) | (-RHSVal << 16) :
112  (LHSVal & 0xffff) | (RHSVal << 16);
113  return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
114  DAG.getTargetConstant(K, SL, MVT::i32));
115  }
116 
117  return nullptr;
118 }
119 
120 static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
121  return packConstantV2I16(N, DAG, true);
122 }
123 
124 /// AMDGPU specific code to select AMDGPU machine instructions for
125 /// SelectionDAG operations.
126 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
127  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
128  // make the right decision when generating code for different targets.
129  const GCNSubtarget *Subtarget;
130  bool EnableLateStructurizeCFG;
131 
132 public:
133  explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
135  : SelectionDAGISel(*TM, OptLevel) {
136  EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
137  }
138  ~AMDGPUDAGToDAGISel() override = default;
139 
140  void getAnalysisUsage(AnalysisUsage &AU) const override {
143 #ifdef EXPENSIVE_CHECKS
146 #endif
148  }
149 
150  bool matchLoadD16FromBuildVector(SDNode *N) const;
151 
152  bool runOnMachineFunction(MachineFunction &MF) override;
153  void PreprocessISelDAG() override;
154  void Select(SDNode *N) override;
155  StringRef getPassName() const override;
156  void PostprocessISelDAG() override;
157 
158 protected:
159  void SelectBuildVector(SDNode *N, unsigned RegClassID);
160 
161 private:
162  std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
163  bool isNoNanSrc(SDValue N) const;
164  bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
165  bool isNegInlineImmediate(const SDNode *N) const {
166  return isInlineImmediate(N, true);
167  }
168 
169  bool isVGPRImm(const SDNode *N) const;
170  bool isUniformLoad(const SDNode *N) const;
171  bool isUniformBr(const SDNode *N) const;
172 
173  MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
174 
175  SDNode *glueCopyToM0LDSInit(SDNode *N) const;
176  SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
177 
178  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
179  virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
180  virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
181  bool isDSOffsetLegal(SDValue Base, unsigned Offset,
182  unsigned OffsetBits) const;
183  bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
184  bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
185  SDValue &Offset1) const;
186  bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
187  SDValue &SOffset, SDValue &Offset, SDValue &Offen,
188  SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
189  SDValue &TFE, SDValue &DLC) const;
190  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
191  SDValue &SOffset, SDValue &Offset, SDValue &GLC,
192  SDValue &SLC, SDValue &TFE, SDValue &DLC) const;
193  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
194  SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
195  SDValue &SLC) const;
196  bool SelectMUBUFScratchOffen(SDNode *Parent,
197  SDValue Addr, SDValue &RSrc, SDValue &VAddr,
198  SDValue &SOffset, SDValue &ImmOffset) const;
199  bool SelectMUBUFScratchOffset(SDNode *Parent,
200  SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
201  SDValue &Offset) const;
202 
203  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
204  SDValue &Offset, SDValue &GLC, SDValue &SLC,
205  SDValue &TFE, SDValue &DLC) const;
206  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
207  SDValue &Offset, SDValue &SLC) const;
208  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
209  SDValue &Offset) const;
210 
211  bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr,
212  SDValue &Offset, SDValue &SLC) const;
213  bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr,
214  SDValue &Offset, SDValue &SLC) const;
215 
216  template <bool IsSigned>
217  bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
218  SDValue &Offset, SDValue &SLC) const;
219 
220  bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
221  bool &Imm) const;
222  SDValue Expand32BitAddress(SDValue Addr) const;
223  bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
224  bool &Imm) const;
225  bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
226  bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
227  bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
228  bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
229  bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
230  bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
231 
232  bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
233  bool SelectVOP3Mods_f32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
234  bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
235  bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
236  bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
237  bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
238  SDValue &Clamp, SDValue &Omod) const;
239  bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
240  SDValue &Clamp, SDValue &Omod) const;
241 
242  bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
243  SDValue &Clamp,
244  SDValue &Omod) const;
245 
246  bool SelectVOP3OMods(SDValue In, SDValue &Src,
247  SDValue &Clamp, SDValue &Omod) const;
248 
249  bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
250  bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
251  SDValue &Clamp) const;
252 
253  bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
254  bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
255  SDValue &Clamp) const;
256 
257  bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
258  bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
259  SDValue &Clamp) const;
260  bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
261  bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
262 
263  SDValue getHi16Elt(SDValue In) const;
264 
265  void SelectADD_SUB_I64(SDNode *N);
266  void SelectAddcSubb(SDNode *N);
267  void SelectUADDO_USUBO(SDNode *N);
268  void SelectDIV_SCALE(SDNode *N);
269  void SelectDIV_FMAS(SDNode *N);
270  void SelectMAD_64_32(SDNode *N);
271  void SelectFMA_W_CHAIN(SDNode *N);
272  void SelectFMUL_W_CHAIN(SDNode *N);
273 
274  SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
275  uint32_t Offset, uint32_t Width);
276  void SelectS_BFEFromShifts(SDNode *N);
277  void SelectS_BFE(SDNode *N);
278  bool isCBranchSCC(const SDNode *N) const;
279  void SelectBRCOND(SDNode *N);
280  void SelectFMAD_FMA(SDNode *N);
281  void SelectATOMIC_CMP_SWAP(SDNode *N);
282  void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
283  void SelectDS_GWS(SDNode *N, unsigned IntrID);
284  void SelectINTRINSIC_W_CHAIN(SDNode *N);
285  void SelectINTRINSIC_VOID(SDNode *N);
286 
287 protected:
288  // Include the pieces autogenerated from the target description.
289 #include "AMDGPUGenDAGISel.inc"
290 };
291 
292 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
293  const R600Subtarget *Subtarget;
294 
295  bool isConstantLoad(const MemSDNode *N, int cbID) const;
296  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
297  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
298  SDValue& Offset);
299 public:
300  explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
301  AMDGPUDAGToDAGISel(TM, OptLevel) {}
302 
303  void Select(SDNode *N) override;
304 
305  bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
306  SDValue &Offset) override;
307  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
308  SDValue &Offset) override;
309 
310  bool runOnMachineFunction(MachineFunction &MF) override;
311 
312  void PreprocessISelDAG() override {}
313 
314 protected:
315  // Include the pieces autogenerated from the target description.
316 #include "R600GenDAGISel.inc"
317 };
318 
319 static SDValue stripBitcast(SDValue Val) {
320  return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
321 }
322 
323 // Figure out if this is really an extract of the high 16-bits of a dword.
324 static bool isExtractHiElt(SDValue In, SDValue &Out) {
325  In = stripBitcast(In);
326  if (In.getOpcode() != ISD::TRUNCATE)
327  return false;
328 
329  SDValue Srl = In.getOperand(0);
330  if (Srl.getOpcode() == ISD::SRL) {
331  if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
332  if (ShiftAmt->getZExtValue() == 16) {
333  Out = stripBitcast(Srl.getOperand(0));
334  return true;
335  }
336  }
337  }
338 
339  return false;
340 }
341 
342 // Look through operations that obscure just looking at the low 16-bits of the
343 // same register.
344 static SDValue stripExtractLoElt(SDValue In) {
345  if (In.getOpcode() == ISD::TRUNCATE) {
346  SDValue Src = In.getOperand(0);
347  if (Src.getValueType().getSizeInBits() == 32)
348  return stripBitcast(Src);
349  }
350 
351  return In;
352 }
353 
354 } // end anonymous namespace
355 
356 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
357  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
361 #ifdef EXPENSIVE_CHECKS
364 #endif
365 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
366  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
367 
368 /// This pass converts a legalized DAG into a AMDGPU-specific
369 // DAG, ready for instruction scheduling.
371  CodeGenOpt::Level OptLevel) {
372  return new AMDGPUDAGToDAGISel(TM, OptLevel);
373 }
374 
375 /// This pass converts a legalized DAG into a R600-specific
376 // DAG, ready for instruction scheduling.
378  CodeGenOpt::Level OptLevel) {
379  return new R600DAGToDAGISel(TM, OptLevel);
380 }
381 
382 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
383 #ifdef EXPENSIVE_CHECKS
384  DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
385  LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
386  for (auto &L : LI->getLoopsInPreorder()) {
387  assert(L->isLCSSAForm(DT));
388  }
389 #endif
390  Subtarget = &MF.getSubtarget<GCNSubtarget>();
392 }
393 
394 bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
395  assert(Subtarget->d16PreservesUnusedBits());
396  MVT VT = N->getValueType(0).getSimpleVT();
397  if (VT != MVT::v2i16 && VT != MVT::v2f16)
398  return false;
399 
400  SDValue Lo = N->getOperand(0);
401  SDValue Hi = N->getOperand(1);
402 
403  LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
404 
405  // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
406  // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
407  // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
408 
409  // Need to check for possible indirect dependencies on the other half of the
410  // vector to avoid introducing a cycle.
411  if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
412  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
413 
414  SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);
415  SDValue Ops[] = {
416  LdHi->getChain(), LdHi->getBasePtr(), TiedIn
417  };
418 
419  unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
420  if (LdHi->getMemoryVT() == MVT::i8) {
421  LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
423  } else {
424  assert(LdHi->getMemoryVT() == MVT::i16);
425  }
426 
427  SDValue NewLoadHi =
428  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
429  Ops, LdHi->getMemoryVT(),
430  LdHi->getMemOperand());
431 
432  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
433  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
434  return true;
435  }
436 
437  // build_vector (load ptr), hi -> load_d16_lo ptr, hi
438  // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
439  // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
440  LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
441  if (LdLo && Lo.hasOneUse()) {
442  SDValue TiedIn = getHi16Elt(Hi);
443  if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
444  return false;
445 
446  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
447  unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
448  if (LdLo->getMemoryVT() == MVT::i8) {
449  LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
451  } else {
452  assert(LdLo->getMemoryVT() == MVT::i16);
453  }
454 
455  TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
456 
457  SDValue Ops[] = {
458  LdLo->getChain(), LdLo->getBasePtr(), TiedIn
459  };
460 
461  SDValue NewLoadLo =
462  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
463  Ops, LdLo->getMemoryVT(),
464  LdLo->getMemOperand());
465 
466  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
467  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
468  return true;
469  }
470 
471  return false;
472 }
473 
474 void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
475  if (!Subtarget->d16PreservesUnusedBits())
476  return;
477 
478  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
479 
480  bool MadeChange = false;
481  while (Position != CurDAG->allnodes_begin()) {
482  SDNode *N = &*--Position;
483  if (N->use_empty())
484  continue;
485 
486  switch (N->getOpcode()) {
487  case ISD::BUILD_VECTOR:
488  MadeChange |= matchLoadD16FromBuildVector(N);
489  break;
490  default:
491  break;
492  }
493  }
494 
495  if (MadeChange) {
496  CurDAG->RemoveDeadNodes();
497  LLVM_DEBUG(dbgs() << "After PreProcess:\n";
498  CurDAG->dump(););
499  }
500 }
501 
502 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
503  if (TM.Options.NoNaNsFPMath)
504  return true;
505 
506  // TODO: Move into isKnownNeverNaN
507  if (N->getFlags().isDefined())
508  return N->getFlags().hasNoNaNs();
509 
510  return CurDAG->isKnownNeverNaN(N);
511 }
512 
513 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
514  bool Negated) const {
515  if (N->isUndef())
516  return true;
517 
518  const SIInstrInfo *TII = Subtarget->getInstrInfo();
519  if (Negated) {
520  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
521  return TII->isInlineConstant(-C->getAPIntValue());
522 
523  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
524  return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
525 
526  } else {
527  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
528  return TII->isInlineConstant(C->getAPIntValue());
529 
530  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
531  return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
532  }
533 
534  return false;
535 }
536 
537 /// Determine the register class for \p OpNo
538 /// \returns The register class of the virtual register that will be used for
539 /// the given operand number \OpNo or NULL if the register class cannot be
540 /// determined.
541 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
542  unsigned OpNo) const {
543  if (!N->isMachineOpcode()) {
544  if (N->getOpcode() == ISD::CopyToReg) {
545  unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
547  MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
548  return MRI.getRegClass(Reg);
549  }
550 
551  const SIRegisterInfo *TRI
552  = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
553  return TRI->getPhysRegClass(Reg);
554  }
555 
556  return nullptr;
557  }
558 
559  switch (N->getMachineOpcode()) {
560  default: {
561  const MCInstrDesc &Desc =
562  Subtarget->getInstrInfo()->get(N->getMachineOpcode());
563  unsigned OpIdx = Desc.getNumDefs() + OpNo;
564  if (OpIdx >= Desc.getNumOperands())
565  return nullptr;
566  int RegClass = Desc.OpInfo[OpIdx].RegClass;
567  if (RegClass == -1)
568  return nullptr;
569 
570  return Subtarget->getRegisterInfo()->getRegClass(RegClass);
571  }
572  case AMDGPU::REG_SEQUENCE: {
573  unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
574  const TargetRegisterClass *SuperRC =
575  Subtarget->getRegisterInfo()->getRegClass(RCID);
576 
577  SDValue SubRegOp = N->getOperand(OpNo + 1);
578  unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
579  return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
580  SubRegIdx);
581  }
582  }
583 }
584 
585 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
586  const SITargetLowering& Lowering =
587  *static_cast<const SITargetLowering*>(getTargetLowering());
588 
589  assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
590 
591  SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N),
592  Val);
593 
594  SDValue Glue = M0.getValue(1);
595 
597  Ops.push_back(M0); // Replace the chain.
598  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
599  Ops.push_back(N->getOperand(i));
600 
601  Ops.push_back(Glue);
602  return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
603 }
604 
605 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
606  unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
607  if (AS == AMDGPUAS::LOCAL_ADDRESS) {
608  if (Subtarget->ldsRequiresM0Init())
609  return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
610  } else if (AS == AMDGPUAS::REGION_ADDRESS) {
611  MachineFunction &MF = CurDAG->getMachineFunction();
612  unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
613  return
614  glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
615  }
616  return N;
617 }
618 
619 MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
620  EVT VT) const {
621  SDNode *Lo = CurDAG->getMachineNode(
622  AMDGPU::S_MOV_B32, DL, MVT::i32,
623  CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
624  SDNode *Hi =
625  CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
626  CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
627  const SDValue Ops[] = {
628  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
629  SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
630  SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
631 
632  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
633 }
634 
635 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
636  switch (NumVectorElts) {
637  case 1:
638  return AMDGPU::SReg_32_XM0RegClassID;
639  case 2:
640  return AMDGPU::SReg_64RegClassID;
641  case 3:
642  return AMDGPU::SGPR_96RegClassID;
643  case 4:
644  return AMDGPU::SReg_128RegClassID;
645  case 5:
646  return AMDGPU::SGPR_160RegClassID;
647  case 8:
648  return AMDGPU::SReg_256RegClassID;
649  case 16:
650  return AMDGPU::SReg_512RegClassID;
651  case 32:
652  return AMDGPU::SReg_1024RegClassID;
653  }
654 
655  llvm_unreachable("invalid vector size");
656 }
657 
658 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
659  EVT VT = N->getValueType(0);
660  unsigned NumVectorElts = VT.getVectorNumElements();
661  EVT EltVT = VT.getVectorElementType();
662  SDLoc DL(N);
663  SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
664 
665  if (NumVectorElts == 1) {
666  CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
667  RegClass);
668  return;
669  }
670 
671  assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
672  "supported yet");
673  // 32 = Max Num Vector Elements
674  // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
675  // 1 = Vector Register Class
676  SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
677 
678  RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
679  bool IsRegSeq = true;
680  unsigned NOps = N->getNumOperands();
681  for (unsigned i = 0; i < NOps; i++) {
682  // XXX: Why is this here?
683  if (isa<RegisterSDNode>(N->getOperand(i))) {
684  IsRegSeq = false;
685  break;
686  }
688  RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
689  RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
690  }
691  if (NOps != NumVectorElts) {
692  // Fill in the missing undef elements if this was a scalar_to_vector.
693  assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
694  MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
695  DL, EltVT);
696  for (unsigned i = NOps; i < NumVectorElts; ++i) {
698  RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
699  RegSeqArgs[1 + (2 * i) + 1] =
700  CurDAG->getTargetConstant(Sub, DL, MVT::i32);
701  }
702  }
703 
704  if (!IsRegSeq)
705  SelectCode(N);
706  CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
707 }
708 
710  unsigned int Opc = N->getOpcode();
711  if (N->isMachineOpcode()) {
712  N->setNodeId(-1);
713  return; // Already selected.
714  }
715 
716  if (isa<AtomicSDNode>(N) ||
717  (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
718  Opc == ISD::ATOMIC_LOAD_FADD ||
721  N = glueCopyToM0LDSInit(N);
722 
723  switch (Opc) {
724  default:
725  break;
726  // We are selecting i64 ADD here instead of custom lower it during
727  // DAG legalization, so we can fold some i64 ADDs used for address
728  // calculation into the LOAD and STORE instructions.
729  case ISD::ADDC:
730  case ISD::ADDE:
731  case ISD::SUBC:
732  case ISD::SUBE: {
733  if (N->getValueType(0) != MVT::i64)
734  break;
735 
736  SelectADD_SUB_I64(N);
737  return;
738  }
739  case ISD::ADDCARRY:
740  case ISD::SUBCARRY:
741  if (N->getValueType(0) != MVT::i32)
742  break;
743 
744  SelectAddcSubb(N);
745  return;
746  case ISD::UADDO:
747  case ISD::USUBO: {
748  SelectUADDO_USUBO(N);
749  return;
750  }
752  SelectFMUL_W_CHAIN(N);
753  return;
754  }
755  case AMDGPUISD::FMA_W_CHAIN: {
756  SelectFMA_W_CHAIN(N);
757  return;
758  }
759 
761  case ISD::BUILD_VECTOR: {
762  EVT VT = N->getValueType(0);
763  unsigned NumVectorElts = VT.getVectorNumElements();
764  if (VT.getScalarSizeInBits() == 16) {
765  if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
766  if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
767  ReplaceNode(N, Packed);
768  return;
769  }
770  }
771 
772  break;
773  }
774 
776  unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
777  SelectBuildVector(N, RegClassID);
778  return;
779  }
780  case ISD::BUILD_PAIR: {
781  SDValue RC, SubReg0, SubReg1;
782  SDLoc DL(N);
783  if (N->getValueType(0) == MVT::i128) {
784  RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
785  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
786  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
787  } else if (N->getValueType(0) == MVT::i64) {
788  RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
789  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
790  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
791  } else {
792  llvm_unreachable("Unhandled value type for BUILD_PAIR");
793  }
794  const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
795  N->getOperand(1), SubReg1 };
796  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
797  N->getValueType(0), Ops));
798  return;
799  }
800 
801  case ISD::Constant:
802  case ISD::ConstantFP: {
803  if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
804  break;
805 
806  uint64_t Imm;
807  if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
808  Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
809  else {
810  ConstantSDNode *C = cast<ConstantSDNode>(N);
811  Imm = C->getZExtValue();
812  }
813 
814  SDLoc DL(N);
815  ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
816  return;
817  }
818  case ISD::LOAD:
819  case ISD::STORE:
820  case ISD::ATOMIC_LOAD:
821  case ISD::ATOMIC_STORE: {
822  N = glueCopyToM0LDSInit(N);
823  break;
824  }
825 
826  case AMDGPUISD::BFE_I32:
827  case AMDGPUISD::BFE_U32: {
828  // There is a scalar version available, but unlike the vector version which
829  // has a separate operand for the offset and width, the scalar version packs
830  // the width and offset into a single operand. Try to move to the scalar
831  // version if the offsets are constant, so that we can try to keep extended
832  // loads of kernel arguments in SGPRs.
833 
834  // TODO: Technically we could try to pattern match scalar bitshifts of
835  // dynamic values, but it's probably not useful.
837  if (!Offset)
838  break;
839 
841  if (!Width)
842  break;
843 
844  bool Signed = Opc == AMDGPUISD::BFE_I32;
845 
846  uint32_t OffsetVal = Offset->getZExtValue();
847  uint32_t WidthVal = Width->getZExtValue();
848 
849  ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
850  SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
851  return;
852  }
853  case AMDGPUISD::DIV_SCALE: {
854  SelectDIV_SCALE(N);
855  return;
856  }
857  case AMDGPUISD::DIV_FMAS: {
858  SelectDIV_FMAS(N);
859  return;
860  }
862  case AMDGPUISD::MAD_U64_U32: {
863  SelectMAD_64_32(N);
864  return;
865  }
866  case ISD::CopyToReg: {
867  const SITargetLowering& Lowering =
868  *static_cast<const SITargetLowering*>(getTargetLowering());
869  N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
870  break;
871  }
872  case ISD::AND:
873  case ISD::SRL:
874  case ISD::SRA:
876  if (N->getValueType(0) != MVT::i32)
877  break;
878 
879  SelectS_BFE(N);
880  return;
881  case ISD::BRCOND:
882  SelectBRCOND(N);
883  return;
884  case ISD::FMAD:
885  case ISD::FMA:
886  SelectFMAD_FMA(N);
887  return;
889  SelectATOMIC_CMP_SWAP(N);
890  return;
896  // Hack around using a legal type if f16 is illegal.
897  if (N->getValueType(0) == MVT::i32) {
899  N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
900  { N->getOperand(0), N->getOperand(1) });
901  SelectCode(N);
902  return;
903  }
904 
905  break;
906  }
907  case ISD::INTRINSIC_W_CHAIN: {
908  SelectINTRINSIC_W_CHAIN(N);
909  return;
910  }
911  case ISD::INTRINSIC_VOID: {
912  SelectINTRINSIC_VOID(N);
913  return;
914  }
915  }
916 
917  SelectCode(N);
918 }
919 
920 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
921  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
922  const Instruction *Term = BB->getTerminator();
923  return Term->getMetadata("amdgpu.uniform") ||
924  Term->getMetadata("structurizecfg.uniform");
925 }
926 
927 StringRef AMDGPUDAGToDAGISel::getPassName() const {
928  return "AMDGPU DAG->DAG Pattern Instruction Selection";
929 }
930 
931 //===----------------------------------------------------------------------===//
932 // Complex Patterns
933 //===----------------------------------------------------------------------===//
934 
935 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
936  SDValue &Offset) {
937  return false;
938 }
939 
940 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
941  SDValue &Offset) {
942  ConstantSDNode *C;
943  SDLoc DL(Addr);
944 
945  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
946  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
947  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
948  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
949  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
950  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
951  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
952  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
953  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
954  Base = Addr.getOperand(0);
955  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
956  } else {
957  Base = Addr;
958  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
959  }
960 
961  return true;
962 }
963 
964 // FIXME: Should only handle addcarry/subcarry
965 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
966  SDLoc DL(N);
967  SDValue LHS = N->getOperand(0);
968  SDValue RHS = N->getOperand(1);
969 
970  unsigned Opcode = N->getOpcode();
971  bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
972  bool ProduceCarry =
973  ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
974  bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
975 
976  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
977  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
978 
979  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
980  DL, MVT::i32, LHS, Sub0);
981  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
982  DL, MVT::i32, LHS, Sub1);
983 
984  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
985  DL, MVT::i32, RHS, Sub0);
986  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
987  DL, MVT::i32, RHS, Sub1);
988 
989  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
990 
991  unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
992  unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
993 
994  SDNode *AddLo;
995  if (!ConsumeCarry) {
996  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
997  AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
998  } else {
999  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
1000  AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
1001  }
1002  SDValue AddHiArgs[] = {
1003  SDValue(Hi0, 0),
1004  SDValue(Hi1, 0),
1005  SDValue(AddLo, 1)
1006  };
1007  SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
1008 
1009  SDValue RegSequenceArgs[] = {
1010  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
1011  SDValue(AddLo,0),
1012  Sub0,
1013  SDValue(AddHi,0),
1014  Sub1,
1015  };
1016  SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1017  MVT::i64, RegSequenceArgs);
1018 
1019  if (ProduceCarry) {
1020  // Replace the carry-use
1021  ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
1022  }
1023 
1024  // Replace the remaining uses.
1025  ReplaceNode(N, RegSequence);
1026 }
1027 
1028 void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
1029  SDLoc DL(N);
1030  SDValue LHS = N->getOperand(0);
1031  SDValue RHS = N->getOperand(1);
1032  SDValue CI = N->getOperand(2);
1033 
1034  unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
1035  : AMDGPU::V_SUBB_U32_e64;
1036  CurDAG->SelectNodeTo(
1037  N, Opc, N->getVTList(),
1038  {LHS, RHS, CI, CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
1039 }
1040 
1041 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
1042  // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
1043  // carry out despite the _i32 name. These were renamed in VI to _U32.
1044  // FIXME: We should probably rename the opcodes here.
1045  unsigned Opc = N->getOpcode() == ISD::UADDO ?
1046  AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
1047 
1048  CurDAG->SelectNodeTo(
1049  N, Opc, N->getVTList(),
1050  {N->getOperand(0), N->getOperand(1),
1051  CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
1052 }
1053 
1054 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
1055  SDLoc SL(N);
1056  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
1057  SDValue Ops[10];
1058 
1059  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
1060  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
1061  SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
1062  Ops[8] = N->getOperand(0);
1063  Ops[9] = N->getOperand(4);
1064 
1065  CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
1066 }
1067 
1068 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
1069  SDLoc SL(N);
1070  // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
1071  SDValue Ops[8];
1072 
1073  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
1074  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
1075  Ops[6] = N->getOperand(0);
1076  Ops[7] = N->getOperand(3);
1077 
1078  CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
1079 }
1080 
1081 // We need to handle this here because tablegen doesn't support matching
1082 // instructions with multiple outputs.
1083 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
1084  SDLoc SL(N);
1085  EVT VT = N->getValueType(0);
1086 
1087  assert(VT == MVT::f32 || VT == MVT::f64);
1088 
1089  unsigned Opc
1090  = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
1091 
1092  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
1093  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1094 }
1095 
1096 void AMDGPUDAGToDAGISel::SelectDIV_FMAS(SDNode *N) {
1097  const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
1098  const SIRegisterInfo *TRI = ST->getRegisterInfo();
1099 
1100  SDLoc SL(N);
1101  EVT VT = N->getValueType(0);
1102 
1103  assert(VT == MVT::f32 || VT == MVT::f64);
1104 
1105  unsigned Opc
1106  = (VT == MVT::f64) ? AMDGPU::V_DIV_FMAS_F64 : AMDGPU::V_DIV_FMAS_F32;
1107 
1108  SDValue CarryIn = N->getOperand(3);
1109  // V_DIV_FMAS implicitly reads VCC.
1110  SDValue VCC = CurDAG->getCopyToReg(CurDAG->getEntryNode(), SL,
1111  TRI->getVCC(), CarryIn, SDValue());
1112 
1113  SDValue Ops[10];
1114 
1115  SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1116  SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
1117  SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
1118 
1119  Ops[8] = VCC;
1120  Ops[9] = VCC.getValue(1);
1121 
1122  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1123 }
1124 
1125 // We need to handle this here because tablegen doesn't support matching
1126 // instructions with multiple outputs.
1127 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
1128  SDLoc SL(N);
1129  bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
1130  unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
1131 
1132  SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1133  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
1134  Clamp };
1135  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1136 }
1137 
1138 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset,
1139  unsigned OffsetBits) const {
1140  if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
1141  (OffsetBits == 8 && !isUInt<8>(Offset)))
1142  return false;
1143 
1144  if (Subtarget->hasUsableDSOffset() ||
1145  Subtarget->unsafeDSOffsetFoldingEnabled())
1146  return true;
1147 
1148  // On Southern Islands instruction with a negative base value and an offset
1149  // don't seem to work.
1150  return CurDAG->SignBitIsZero(Base);
1151 }
1152 
1153 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
1154  SDValue &Offset) const {
1155  SDLoc DL(Addr);
1156  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1157  SDValue N0 = Addr.getOperand(0);
1158  SDValue N1 = Addr.getOperand(1);
1159  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1160  if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
1161  // (add n0, c0)
1162  Base = N0;
1163  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1164  return true;
1165  }
1166  } else if (Addr.getOpcode() == ISD::SUB) {
1167  // sub C, x -> add (sub 0, x), C
1168  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1169  int64_t ByteOffset = C->getSExtValue();
1170  if (isUInt<16>(ByteOffset)) {
1171  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1172 
1173  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1174  // the known bits in isDSOffsetLegal. We need to emit the selected node
1175  // here, so this is thrown away.
1176  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1177  Zero, Addr.getOperand(1));
1178 
1179  if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
1181  Opnds.push_back(Zero);
1182  Opnds.push_back(Addr.getOperand(1));
1183 
1184  // FIXME: Select to VOP3 version for with-carry.
1185  unsigned SubOp = AMDGPU::V_SUB_I32_e32;
1186  if (Subtarget->hasAddNoCarry()) {
1187  SubOp = AMDGPU::V_SUB_U32_e64;
1188  Opnds.push_back(
1189  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1190  }
1191 
1192  MachineSDNode *MachineSub =
1193  CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1194 
1195  Base = SDValue(MachineSub, 0);
1196  Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
1197  return true;
1198  }
1199  }
1200  }
1201  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1202  // If we have a constant address, prefer to put the constant into the
1203  // offset. This can save moves to load the constant address since multiple
1204  // operations can share the zero base address register, and enables merging
1205  // into read2 / write2 instructions.
1206 
1207  SDLoc DL(Addr);
1208 
1209  if (isUInt<16>(CAddr->getZExtValue())) {
1210  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1211  MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1212  DL, MVT::i32, Zero);
1213  Base = SDValue(MovZero, 0);
1214  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1215  return true;
1216  }
1217  }
1218 
1219  // default case
1220  Base = Addr;
1221  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1222  return true;
1223 }
1224 
1225 // TODO: If offset is too big, put low 16-bit into offset.
1226 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
1227  SDValue &Offset0,
1228  SDValue &Offset1) const {
1229  SDLoc DL(Addr);
1230 
1231  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1232  SDValue N0 = Addr.getOperand(0);
1233  SDValue N1 = Addr.getOperand(1);
1234  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1235  unsigned DWordOffset0 = C1->getZExtValue() / 4;
1236  unsigned DWordOffset1 = DWordOffset0 + 1;
1237  // (add n0, c0)
1238  if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
1239  Base = N0;
1240  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1241  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1242  return true;
1243  }
1244  } else if (Addr.getOpcode() == ISD::SUB) {
1245  // sub C, x -> add (sub 0, x), C
1246  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1247  unsigned DWordOffset0 = C->getZExtValue() / 4;
1248  unsigned DWordOffset1 = DWordOffset0 + 1;
1249 
1250  if (isUInt<8>(DWordOffset0)) {
1251  SDLoc DL(Addr);
1252  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1253 
1254  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1255  // the known bits in isDSOffsetLegal. We need to emit the selected node
1256  // here, so this is thrown away.
1257  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1258  Zero, Addr.getOperand(1));
1259 
1260  if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
1262  Opnds.push_back(Zero);
1263  Opnds.push_back(Addr.getOperand(1));
1264  unsigned SubOp = AMDGPU::V_SUB_I32_e32;
1265  if (Subtarget->hasAddNoCarry()) {
1266  SubOp = AMDGPU::V_SUB_U32_e64;
1267  Opnds.push_back(
1268  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1269  }
1270 
1271  MachineSDNode *MachineSub
1272  = CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1273 
1274  Base = SDValue(MachineSub, 0);
1275  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1276  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1277  return true;
1278  }
1279  }
1280  }
1281  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1282  unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
1283  unsigned DWordOffset1 = DWordOffset0 + 1;
1284  assert(4 * DWordOffset0 == CAddr->getZExtValue());
1285 
1286  if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
1287  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1288  MachineSDNode *MovZero
1289  = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1290  DL, MVT::i32, Zero);
1291  Base = SDValue(MovZero, 0);
1292  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1293  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1294  return true;
1295  }
1296  }
1297 
1298  // default case
1299 
1300  Base = Addr;
1301  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
1302  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
1303  return true;
1304 }
1305 
1306 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
1307  SDValue &VAddr, SDValue &SOffset,
1308  SDValue &Offset, SDValue &Offen,
1309  SDValue &Idxen, SDValue &Addr64,
1310  SDValue &GLC, SDValue &SLC,
1311  SDValue &TFE, SDValue &DLC) const {
1312  // Subtarget prefers to use flat instruction
1313  if (Subtarget->useFlatForGlobal())
1314  return false;
1315 
1316  SDLoc DL(Addr);
1317 
1318  if (!GLC.getNode())
1319  GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1320  if (!SLC.getNode())
1321  SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1322  TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
1323  DLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1324 
1325  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1326  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1327  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1328  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1329 
1330  ConstantSDNode *C1 = nullptr;
1331  SDValue N0 = Addr;
1332  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1333  C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1334  if (isUInt<32>(C1->getZExtValue()))
1335  N0 = Addr.getOperand(0);
1336  else
1337  C1 = nullptr;
1338  }
1339 
1340  if (N0.getOpcode() == ISD::ADD) {
1341  // (add N2, N3) -> addr64, or
1342  // (add (add N2, N3), C1) -> addr64
1343  SDValue N2 = N0.getOperand(0);
1344  SDValue N3 = N0.getOperand(1);
1345  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1346 
1347  if (N2->isDivergent()) {
1348  if (N3->isDivergent()) {
1349  // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1350  // addr64, and construct the resource from a 0 address.
1351  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1352  VAddr = N0;
1353  } else {
1354  // N2 is divergent, N3 is not.
1355  Ptr = N3;
1356  VAddr = N2;
1357  }
1358  } else {
1359  // N2 is not divergent.
1360  Ptr = N2;
1361  VAddr = N3;
1362  }
1363  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1364  } else if (N0->isDivergent()) {
1365  // N0 is divergent. Use it as the addr64, and construct the resource from a
1366  // 0 address.
1367  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1368  VAddr = N0;
1369  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1370  } else {
1371  // N0 -> offset, or
1372  // (N0 + C1) -> offset
1373  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1374  Ptr = N0;
1375  }
1376 
1377  if (!C1) {
1378  // No offset.
1379  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1380  return true;
1381  }
1382 
1384  // Legal offset for instruction.
1385  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1386  return true;
1387  }
1388 
1389  // Illegal offset, store it in soffset.
1390  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1391  SOffset =
1392  SDValue(CurDAG->getMachineNode(
1393  AMDGPU::S_MOV_B32, DL, MVT::i32,
1394  CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1395  0);
1396  return true;
1397 }
1398 
1399 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1400  SDValue &VAddr, SDValue &SOffset,
1401  SDValue &Offset, SDValue &GLC,
1402  SDValue &SLC, SDValue &TFE,
1403  SDValue &DLC) const {
1404  SDValue Ptr, Offen, Idxen, Addr64;
1405 
1406  // addr64 bit was removed for volcanic islands.
1407  if (!Subtarget->hasAddr64())
1408  return false;
1409 
1410  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1411  GLC, SLC, TFE, DLC))
1412  return false;
1413 
1414  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1415  if (C->getSExtValue()) {
1416  SDLoc DL(Addr);
1417 
1418  const SITargetLowering& Lowering =
1419  *static_cast<const SITargetLowering*>(getTargetLowering());
1420 
1421  SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1422  return true;
1423  }
1424 
1425  return false;
1426 }
1427 
1428 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1429  SDValue &VAddr, SDValue &SOffset,
1430  SDValue &Offset,
1431  SDValue &SLC) const {
1432  SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1433  SDValue GLC, TFE, DLC;
1434 
1435  return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC);
1436 }
1437 
1438 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1439  auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1440  return PSV && PSV->isStack();
1441 }
1442 
1443 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1444  const MachineFunction &MF = CurDAG->getMachineFunction();
1446 
1447  if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
1448  SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1449  FI->getValueType(0));
1450 
1451  // If we can resolve this to a frame index access, this will be relative to
1452  // either the stack or frame pointer SGPR.
1453  return std::make_pair(
1454  TFI, CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32));
1455  }
1456 
1457  // If we don't know this private access is a local stack object, it needs to
1458  // be relative to the entry point's scratch wave offset register.
1459  return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
1460  MVT::i32));
1461 }
1462 
1463 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1464  SDValue Addr, SDValue &Rsrc,
1465  SDValue &VAddr, SDValue &SOffset,
1466  SDValue &ImmOffset) const {
1467 
1468  SDLoc DL(Addr);
1469  MachineFunction &MF = CurDAG->getMachineFunction();
1471 
1472  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1473 
1474  if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1475  unsigned Imm = CAddr->getZExtValue();
1476 
1477  SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1478  MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1479  DL, MVT::i32, HighBits);
1480  VAddr = SDValue(MovHighBits, 0);
1481 
1482  // In a call sequence, stores to the argument stack area are relative to the
1483  // stack pointer.
1484  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1485  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1487 
1488  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1489  ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1490  return true;
1491  }
1492 
1493  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1494  // (add n0, c1)
1495 
1496  SDValue N0 = Addr.getOperand(0);
1497  SDValue N1 = Addr.getOperand(1);
1498 
1499  // Offsets in vaddr must be positive if range checking is enabled.
1500  //
1501  // The total computation of vaddr + soffset + offset must not overflow. If
1502  // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1503  // overflowing.
1504  //
1505  // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1506  // always perform a range check. If a negative vaddr base index was used,
1507  // this would fail the range check. The overall address computation would
1508  // compute a valid address, but this doesn't happen due to the range
1509  // check. For out-of-bounds MUBUF loads, a 0 is returned.
1510  //
1511  // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1512  // MUBUF vaddr, but not on older subtargets which can only do this if the
1513  // sign bit is known 0.
1514  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1516  (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1517  CurDAG->SignBitIsZero(N0))) {
1518  std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1519  ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1520  return true;
1521  }
1522  }
1523 
1524  // (node)
1525  std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1526  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1527  return true;
1528 }
1529 
1530 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1531  SDValue Addr,
1532  SDValue &SRsrc,
1533  SDValue &SOffset,
1534  SDValue &Offset) const {
1535  ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
1536  if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1537  return false;
1538 
1539  SDLoc DL(Addr);
1540  MachineFunction &MF = CurDAG->getMachineFunction();
1542 
1543  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1544 
1545  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1546  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1548 
1549  // FIXME: Get from MachinePointerInfo? We should only be using the frame
1550  // offset if we know this is in a call sequence.
1551  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1552 
1553  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1554  return true;
1555 }
1556 
1557 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1558  SDValue &SOffset, SDValue &Offset,
1559  SDValue &GLC, SDValue &SLC,
1560  SDValue &TFE, SDValue &DLC) const {
1561  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1562  const SIInstrInfo *TII =
1563  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1564 
1565  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1566  GLC, SLC, TFE, DLC))
1567  return false;
1568 
1569  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1570  !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1571  !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1572  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1573  APInt::getAllOnesValue(32).getZExtValue(); // Size
1574  SDLoc DL(Addr);
1575 
1576  const SITargetLowering& Lowering =
1577  *static_cast<const SITargetLowering*>(getTargetLowering());
1578 
1579  SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1580  return true;
1581  }
1582  return false;
1583 }
1584 
1585 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1586  SDValue &Soffset, SDValue &Offset
1587  ) const {
1588  SDValue GLC, SLC, TFE, DLC;
1589 
1590  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
1591 }
1592 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1593  SDValue &Soffset, SDValue &Offset,
1594  SDValue &SLC) const {
1595  SDValue GLC, TFE, DLC;
1596 
1597  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
1598 }
1599 
1600 template <bool IsSigned>
1601 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
1602  SDValue Addr,
1603  SDValue &VAddr,
1604  SDValue &Offset,
1605  SDValue &SLC) const {
1606  return static_cast<const SITargetLowering*>(getTargetLowering())->
1607  SelectFlatOffset(IsSigned, *CurDAG, N, Addr, VAddr, Offset, SLC);
1608 }
1609 
1610 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N,
1611  SDValue Addr,
1612  SDValue &VAddr,
1613  SDValue &Offset,
1614  SDValue &SLC) const {
1615  return SelectFlatOffset<false>(N, Addr, VAddr, Offset, SLC);
1616 }
1617 
1618 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N,
1619  SDValue Addr,
1620  SDValue &VAddr,
1621  SDValue &Offset,
1622  SDValue &SLC) const {
1623  return SelectFlatOffset<true>(N, Addr, VAddr, Offset, SLC);
1624 }
1625 
1626 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1627  SDValue &Offset, bool &Imm) const {
1628 
1629  // FIXME: Handle non-constant offsets.
1630  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1631  if (!C)
1632  return false;
1633 
1634  SDLoc SL(ByteOffsetNode);
1635  GCNSubtarget::Generation Gen = Subtarget->getGeneration();
1636  int64_t ByteOffset = C->getSExtValue();
1637  int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
1638 
1639  if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
1640  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1641  Imm = true;
1642  return true;
1643  }
1644 
1645  if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1646  return false;
1647 
1648  if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1649  // 32-bit Immediates are supported on Sea Islands.
1650  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1651  } else {
1652  SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1653  Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1654  C32Bit), 0);
1655  }
1656  Imm = false;
1657  return true;
1658 }
1659 
1660 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
1661  if (Addr.getValueType() != MVT::i32)
1662  return Addr;
1663 
1664  // Zero-extend a 32-bit address.
1665  SDLoc SL(Addr);
1666 
1667  const MachineFunction &MF = CurDAG->getMachineFunction();
1669  unsigned AddrHiVal = Info->get32BitAddressHighBits();
1670  SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
1671 
1672  const SDValue Ops[] = {
1673  CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
1674  Addr,
1675  CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
1676  SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
1677  0),
1678  CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
1679  };
1680 
1681  return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
1682  Ops), 0);
1683 }
1684 
1685 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1686  SDValue &Offset, bool &Imm) const {
1687  SDLoc SL(Addr);
1688 
1689  // A 32-bit (address + offset) should not cause unsigned 32-bit integer
1690  // wraparound, because s_load instructions perform the addition in 64 bits.
1691  if ((Addr.getValueType() != MVT::i32 ||
1692  Addr->getFlags().hasNoUnsignedWrap()) &&
1693  CurDAG->isBaseWithConstantOffset(Addr)) {
1694  SDValue N0 = Addr.getOperand(0);
1695  SDValue N1 = Addr.getOperand(1);
1696 
1697  if (SelectSMRDOffset(N1, Offset, Imm)) {
1698  SBase = Expand32BitAddress(N0);
1699  return true;
1700  }
1701  }
1702  SBase = Expand32BitAddress(Addr);
1703  Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1704  Imm = true;
1705  return true;
1706 }
1707 
1708 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1709  SDValue &Offset) const {
1710  bool Imm;
1711  return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1712 }
1713 
1714 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1715  SDValue &Offset) const {
1716 
1717  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1718  return false;
1719 
1720  bool Imm;
1721  if (!SelectSMRD(Addr, SBase, Offset, Imm))
1722  return false;
1723 
1724  return !Imm && isa<ConstantSDNode>(Offset);
1725 }
1726 
1727 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1728  SDValue &Offset) const {
1729  bool Imm;
1730  return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1731  !isa<ConstantSDNode>(Offset);
1732 }
1733 
1734 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1735  SDValue &Offset) const {
1736  bool Imm;
1737  return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1738 }
1739 
1740 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1741  SDValue &Offset) const {
1742  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1743  return false;
1744 
1745  bool Imm;
1746  if (!SelectSMRDOffset(Addr, Offset, Imm))
1747  return false;
1748 
1749  return !Imm && isa<ConstantSDNode>(Offset);
1750 }
1751 
1752 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1753  SDValue &Base,
1754  SDValue &Offset) const {
1755  SDLoc DL(Index);
1756 
1757  if (CurDAG->isBaseWithConstantOffset(Index)) {
1758  SDValue N0 = Index.getOperand(0);
1759  SDValue N1 = Index.getOperand(1);
1760  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1761 
1762  // (add n0, c0)
1763  // Don't peel off the offset (c0) if doing so could possibly lead
1764  // the base (n0) to be negative.
1765  if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0)) {
1766  Base = N0;
1767  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1768  return true;
1769  }
1770  }
1771 
1772  if (isa<ConstantSDNode>(Index))
1773  return false;
1774 
1775  Base = Index;
1776  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1777  return true;
1778 }
1779 
1780 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1781  SDValue Val, uint32_t Offset,
1782  uint32_t Width) {
1783  // Transformation function, pack the offset and width of a BFE into
1784  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1785  // source, bits [5:0] contain the offset and bits [22:16] the width.
1786  uint32_t PackedVal = Offset | (Width << 16);
1787  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1788 
1789  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1790 }
1791 
1792 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1793  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1794  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1795  // Predicate: 0 < b <= c < 32
1796 
1797  const SDValue &Shl = N->getOperand(0);
1800 
1801  if (B && C) {
1802  uint32_t BVal = B->getZExtValue();
1803  uint32_t CVal = C->getZExtValue();
1804 
1805  if (0 < BVal && BVal <= CVal && CVal < 32) {
1806  bool Signed = N->getOpcode() == ISD::SRA;
1807  unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1808 
1809  ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1810  32 - CVal));
1811  return;
1812  }
1813  }
1814  SelectCode(N);
1815 }
1816 
1817 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1818  switch (N->getOpcode()) {
1819  case ISD::AND:
1820  if (N->getOperand(0).getOpcode() == ISD::SRL) {
1821  // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1822  // Predicate: isMask(mask)
1823  const SDValue &Srl = N->getOperand(0);
1826 
1827  if (Shift && Mask) {
1828  uint32_t ShiftVal = Shift->getZExtValue();
1829  uint32_t MaskVal = Mask->getZExtValue();
1830 
1831  if (isMask_32(MaskVal)) {
1832  uint32_t WidthVal = countPopulation(MaskVal);
1833 
1834  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1835  Srl.getOperand(0), ShiftVal, WidthVal));
1836  return;
1837  }
1838  }
1839  }
1840  break;
1841  case ISD::SRL:
1842  if (N->getOperand(0).getOpcode() == ISD::AND) {
1843  // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1844  // Predicate: isMask(mask >> b)
1845  const SDValue &And = N->getOperand(0);
1848 
1849  if (Shift && Mask) {
1850  uint32_t ShiftVal = Shift->getZExtValue();
1851  uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1852 
1853  if (isMask_32(MaskVal)) {
1854  uint32_t WidthVal = countPopulation(MaskVal);
1855 
1856  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1857  And.getOperand(0), ShiftVal, WidthVal));
1858  return;
1859  }
1860  }
1861  } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1862  SelectS_BFEFromShifts(N);
1863  return;
1864  }
1865  break;
1866  case ISD::SRA:
1867  if (N->getOperand(0).getOpcode() == ISD::SHL) {
1868  SelectS_BFEFromShifts(N);
1869  return;
1870  }
1871  break;
1872 
1873  case ISD::SIGN_EXTEND_INREG: {
1874  // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1875  SDValue Src = N->getOperand(0);
1876  if (Src.getOpcode() != ISD::SRL)
1877  break;
1878 
1879  const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1880  if (!Amt)
1881  break;
1882 
1883  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1884  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1885  Amt->getZExtValue(), Width));
1886  return;
1887  }
1888  }
1889 
1890  SelectCode(N);
1891 }
1892 
1893 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
1894  assert(N->getOpcode() == ISD::BRCOND);
1895  if (!N->hasOneUse())
1896  return false;
1897 
1898  SDValue Cond = N->getOperand(1);
1899  if (Cond.getOpcode() == ISD::CopyToReg)
1900  Cond = Cond.getOperand(2);
1901 
1902  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
1903  return false;
1904 
1905  MVT VT = Cond.getOperand(0).getSimpleValueType();
1906  if (VT == MVT::i32)
1907  return true;
1908 
1909  if (VT == MVT::i64) {
1910  auto ST = static_cast<const GCNSubtarget *>(Subtarget);
1911 
1912  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
1913  return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
1914  }
1915 
1916  return false;
1917 }
1918 
1919 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1920  SDValue Cond = N->getOperand(1);
1921 
1922  if (Cond.isUndef()) {
1923  CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
1924  N->getOperand(2), N->getOperand(0));
1925  return;
1926  }
1927 
1928  const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
1929  const SIRegisterInfo *TRI = ST->getRegisterInfo();
1930 
1931  bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
1932  unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
1933  unsigned CondReg = UseSCCBr ? (unsigned)AMDGPU::SCC : TRI->getVCC();
1934  SDLoc SL(N);
1935 
1936  if (!UseSCCBr) {
1937  // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
1938  // analyzed what generates the vcc value, so we do not know whether vcc
1939  // bits for disabled lanes are 0. Thus we need to mask out bits for
1940  // disabled lanes.
1941  //
1942  // For the case that we select S_CBRANCH_SCC1 and it gets
1943  // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
1944  // SIInstrInfo::moveToVALU which inserts the S_AND).
1945  //
1946  // We could add an analysis of what generates the vcc value here and omit
1947  // the S_AND when is unnecessary. But it would be better to add a separate
1948  // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
1949  // catches both cases.
1950  Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32
1951  : AMDGPU::S_AND_B64,
1952  SL, MVT::i1,
1953  CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO
1954  : AMDGPU::EXEC,
1955  MVT::i1),
1956  Cond),
1957  0);
1958  }
1959 
1960  SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
1961  CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
1962  N->getOperand(2), // Basic Block
1963  VCC.getValue(0));
1964 }
1965 
1966 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
1967  MVT VT = N->getSimpleValueType(0);
1968  bool IsFMA = N->getOpcode() == ISD::FMA;
1969  if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
1970  !Subtarget->hasFmaMixInsts()) ||
1971  ((IsFMA && Subtarget->hasMadMixInsts()) ||
1972  (!IsFMA && Subtarget->hasFmaMixInsts()))) {
1973  SelectCode(N);
1974  return;
1975  }
1976 
1977  SDValue Src0 = N->getOperand(0);
1978  SDValue Src1 = N->getOperand(1);
1979  SDValue Src2 = N->getOperand(2);
1980  unsigned Src0Mods, Src1Mods, Src2Mods;
1981 
1982  // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
1983  // using the conversion from f16.
1984  bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
1985  bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
1986  bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
1987 
1988  assert((IsFMA || !Subtarget->hasFP32Denormals()) &&
1989  "fmad selected with denormals enabled");
1990  // TODO: We can select this with f32 denormals enabled if all the sources are
1991  // converted from f16 (in which case fmad isn't legal).
1992 
1993  if (Sel0 || Sel1 || Sel2) {
1994  // For dummy operands.
1995  SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
1996  SDValue Ops[] = {
1997  CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
1998  CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
1999  CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
2000  CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
2001  Zero, Zero
2002  };
2003 
2004  CurDAG->SelectNodeTo(N,
2005  IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
2006  MVT::f32, Ops);
2007  } else {
2008  SelectCode(N);
2009  }
2010 }
2011 
2012 // This is here because there isn't a way to use the generated sub0_sub1 as the
2013 // subreg index to EXTRACT_SUBREG in tablegen.
2014 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
2015  MemSDNode *Mem = cast<MemSDNode>(N);
2016  unsigned AS = Mem->getAddressSpace();
2017  if (AS == AMDGPUAS::FLAT_ADDRESS) {
2018  SelectCode(N);
2019  return;
2020  }
2021 
2022  MVT VT = N->getSimpleValueType(0);
2023  bool Is32 = (VT == MVT::i32);
2024  SDLoc SL(N);
2025 
2026  MachineSDNode *CmpSwap = nullptr;
2027  if (Subtarget->hasAddr64()) {
2028  SDValue SRsrc, VAddr, SOffset, Offset, SLC;
2029 
2030  if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
2031  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
2032  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
2033  SDValue CmpVal = Mem->getOperand(2);
2034 
2035  // XXX - Do we care about glue operands?
2036 
2037  SDValue Ops[] = {
2038  CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
2039  };
2040 
2041  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2042  }
2043  }
2044 
2045  if (!CmpSwap) {
2046  SDValue SRsrc, SOffset, Offset, SLC;
2047  if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
2048  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
2049  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
2050 
2051  SDValue CmpVal = Mem->getOperand(2);
2052  SDValue Ops[] = {
2053  CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
2054  };
2055 
2056  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2057  }
2058  }
2059 
2060  if (!CmpSwap) {
2061  SelectCode(N);
2062  return;
2063  }
2064 
2065  MachineMemOperand *MMO = Mem->getMemOperand();
2066  CurDAG->setNodeMemRefs(CmpSwap, {MMO});
2067 
2068  unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
2069  SDValue Extract
2070  = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
2071 
2072  ReplaceUses(SDValue(N, 0), Extract);
2073  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
2074  CurDAG->RemoveDeadNode(N);
2075 }
2076 
2077 void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
2078  // The address is assumed to be uniform, so if it ends up in a VGPR, it will
2079  // be copied to an SGPR with readfirstlane.
2080  unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2081  AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2082 
2083  SDValue Chain = N->getOperand(0);
2084  SDValue Ptr = N->getOperand(2);
2085  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2086  MachineMemOperand *MMO = M->getMemOperand();
2087  bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
2088 
2089  SDValue Offset;
2090  if (CurDAG->isBaseWithConstantOffset(Ptr)) {
2091  SDValue PtrBase = Ptr.getOperand(0);
2092  SDValue PtrOffset = Ptr.getOperand(1);
2093 
2094  const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2095  if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue(), 16)) {
2096  N = glueCopyToM0(N, PtrBase);
2097  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2098  }
2099  }
2100 
2101  if (!Offset) {
2102  N = glueCopyToM0(N, Ptr);
2103  Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2104  }
2105 
2106  SDValue Ops[] = {
2107  Offset,
2108  CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2109  Chain,
2110  N->getOperand(N->getNumOperands() - 1) // New glue
2111  };
2112 
2113  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2114  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2115 }
2116 
2117 static unsigned gwsIntrinToOpcode(unsigned IntrID) {
2118  switch (IntrID) {
2119  case Intrinsic::amdgcn_ds_gws_init:
2120  return AMDGPU::DS_GWS_INIT;
2121  case Intrinsic::amdgcn_ds_gws_barrier:
2122  return AMDGPU::DS_GWS_BARRIER;
2123  case Intrinsic::amdgcn_ds_gws_sema_v:
2124  return AMDGPU::DS_GWS_SEMA_V;
2125  case Intrinsic::amdgcn_ds_gws_sema_br:
2126  return AMDGPU::DS_GWS_SEMA_BR;
2127  case Intrinsic::amdgcn_ds_gws_sema_p:
2128  return AMDGPU::DS_GWS_SEMA_P;
2129  case Intrinsic::amdgcn_ds_gws_sema_release_all:
2130  return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2131  default:
2132  llvm_unreachable("not a gws intrinsic");
2133  }
2134 }
2135 
2136 void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
2137  if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2138  !Subtarget->hasGWSSemaReleaseAll()) {
2139  // Let this error.
2140  SelectCode(N);
2141  return;
2142  }
2143 
2144  // Chain, intrinsic ID, vsrc, offset
2145  const bool HasVSrc = N->getNumOperands() == 4;
2146  assert(HasVSrc || N->getNumOperands() == 3);
2147 
2148  SDLoc SL(N);
2149  SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
2150  int ImmOffset = 0;
2151  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2152  MachineMemOperand *MMO = M->getMemOperand();
2153 
2154  // Don't worry if the offset ends up in a VGPR. Only one lane will have
2155  // effect, so SIFixSGPRCopies will validly insert readfirstlane.
2156 
2157  // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
2158  // offset field) % 64. Some versions of the programming guide omit the m0
2159  // part, or claim it's from offset 0.
2160  if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2161  // If we have a constant offset, try to use the default value for m0 as a
2162  // base to possibly avoid setting it up.
2163  glueCopyToM0(N, CurDAG->getTargetConstant(-1, SL, MVT::i32));
2164  ImmOffset = ConstOffset->getZExtValue() + 1;
2165  } else {
2166  if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
2167  ImmOffset = BaseOffset.getConstantOperandVal(1);
2168  BaseOffset = BaseOffset.getOperand(0);
2169  }
2170 
2171  // Prefer to do the shift in an SGPR since it should be possible to use m0
2172  // as the result directly. If it's already an SGPR, it will be eliminated
2173  // later.
2174  SDNode *SGPROffset
2175  = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
2176  BaseOffset);
2177  // Shift to offset in m0
2178  SDNode *M0Base
2179  = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
2180  SDValue(SGPROffset, 0),
2181  CurDAG->getTargetConstant(16, SL, MVT::i32));
2182  glueCopyToM0(N, SDValue(M0Base, 0));
2183  }
2184 
2185  SDValue V0;
2186  SDValue Chain = N->getOperand(0);
2187  SDValue Glue;
2188  if (HasVSrc) {
2189  SDValue VSrc0 = N->getOperand(2);
2190 
2191  // The manual doesn't mention this, but it seems only v0 works.
2192  V0 = CurDAG->getRegister(AMDGPU::VGPR0, MVT::i32);
2193 
2194  SDValue CopyToV0 = CurDAG->getCopyToReg(
2195  N->getOperand(0), SL, V0, VSrc0,
2196  N->getOperand(N->getNumOperands() - 1));
2197  Chain = CopyToV0;
2198  Glue = CopyToV0.getValue(1);
2199  }
2200 
2201  SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
2202 
2203  // TODO: Can this just be removed from the instruction?
2204  SDValue GDS = CurDAG->getTargetConstant(1, SL, MVT::i1);
2205 
2206  const unsigned Opc = gwsIntrinToOpcode(IntrID);
2208  if (HasVSrc)
2209  Ops.push_back(V0);
2210  Ops.push_back(OffsetField);
2211  Ops.push_back(GDS);
2212  Ops.push_back(Chain);
2213 
2214  if (HasVSrc)
2215  Ops.push_back(Glue);
2216 
2217  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2218  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2219 }
2220 
2221 void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
2222  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2223  switch (IntrID) {
2224  case Intrinsic::amdgcn_ds_append:
2225  case Intrinsic::amdgcn_ds_consume: {
2226  if (N->getValueType(0) != MVT::i32)
2227  break;
2228  SelectDSAppendConsume(N, IntrID);
2229  return;
2230  }
2231  }
2232 
2233  SelectCode(N);
2234 }
2235 
2236 void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
2237  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2238  switch (IntrID) {
2239  case Intrinsic::amdgcn_ds_gws_init:
2240  case Intrinsic::amdgcn_ds_gws_barrier:
2241  case Intrinsic::amdgcn_ds_gws_sema_v:
2242  case Intrinsic::amdgcn_ds_gws_sema_br:
2243  case Intrinsic::amdgcn_ds_gws_sema_p:
2244  case Intrinsic::amdgcn_ds_gws_sema_release_all:
2245  SelectDS_GWS(N, IntrID);
2246  return;
2247  default:
2248  break;
2249  }
2250 
2251  SelectCode(N);
2252 }
2253 
2254 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
2255  unsigned &Mods) const {
2256  Mods = 0;
2257  Src = In;
2258 
2259  if (Src.getOpcode() == ISD::FNEG) {
2260  Mods |= SISrcMods::NEG;
2261  Src = Src.getOperand(0);
2262  }
2263 
2264  if (Src.getOpcode() == ISD::FABS) {
2265  Mods |= SISrcMods::ABS;
2266  Src = Src.getOperand(0);
2267  }
2268 
2269  return true;
2270 }
2271 
2272 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
2273  SDValue &SrcMods) const {
2274  unsigned Mods;
2275  if (SelectVOP3ModsImpl(In, Src, Mods)) {
2276  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2277  return true;
2278  }
2279 
2280  return false;
2281 }
2282 
2283 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
2284  SDValue &SrcMods) const {
2285  SelectVOP3Mods(In, Src, SrcMods);
2286  return isNoNanSrc(Src);
2287 }
2288 
2289 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_f32(SDValue In, SDValue &Src,
2290  SDValue &SrcMods) const {
2291  if (In.getValueType() == MVT::f32)
2292  return SelectVOP3Mods(In, Src, SrcMods);
2293  Src = In;
2294  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);;
2295  return true;
2296 }
2297 
2298 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
2299  if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
2300  return false;
2301 
2302  Src = In;
2303  return true;
2304 }
2305 
2306 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
2307  SDValue &SrcMods, SDValue &Clamp,
2308  SDValue &Omod) const {
2309  SDLoc DL(In);
2310  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2311  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2312 
2313  return SelectVOP3Mods(In, Src, SrcMods);
2314 }
2315 
2316 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
2317  SDValue &SrcMods,
2318  SDValue &Clamp,
2319  SDValue &Omod) const {
2320  Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2321  return SelectVOP3Mods(In, Src, SrcMods);
2322 }
2323 
2324 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
2325  SDValue &Clamp, SDValue &Omod) const {
2326  Src = In;
2327 
2328  SDLoc DL(In);
2329  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2330  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2331 
2332  return true;
2333 }
2334 
2335 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
2336  SDValue &SrcMods) const {
2337  unsigned Mods = 0;
2338  Src = In;
2339 
2340  if (Src.getOpcode() == ISD::FNEG) {
2341  Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
2342  Src = Src.getOperand(0);
2343  }
2344 
2345  if (Src.getOpcode() == ISD::BUILD_VECTOR) {
2346  unsigned VecMods = Mods;
2347 
2348  SDValue Lo = stripBitcast(Src.getOperand(0));
2349  SDValue Hi = stripBitcast(Src.getOperand(1));
2350 
2351  if (Lo.getOpcode() == ISD::FNEG) {
2352  Lo = stripBitcast(Lo.getOperand(0));
2353  Mods ^= SISrcMods::NEG;
2354  }
2355 
2356  if (Hi.getOpcode() == ISD::FNEG) {
2357  Hi = stripBitcast(Hi.getOperand(0));
2358  Mods ^= SISrcMods::NEG_HI;
2359  }
2360 
2361  if (isExtractHiElt(Lo, Lo))
2362  Mods |= SISrcMods::OP_SEL_0;
2363 
2364  if (isExtractHiElt(Hi, Hi))
2365  Mods |= SISrcMods::OP_SEL_1;
2366 
2367  Lo = stripExtractLoElt(Lo);
2368  Hi = stripExtractLoElt(Hi);
2369 
2370  if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
2371  // Really a scalar input. Just select from the low half of the register to
2372  // avoid packing.
2373 
2374  Src = Lo;
2375  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2376  return true;
2377  }
2378 
2379  Mods = VecMods;
2380  }
2381 
2382  // Packed instructions do not have abs modifiers.
2383  Mods |= SISrcMods::OP_SEL_1;
2384 
2385  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2386  return true;
2387 }
2388 
2389 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
2390  SDValue &SrcMods,
2391  SDValue &Clamp) const {
2392  SDLoc SL(In);
2393 
2394  // FIXME: Handle clamp and op_sel
2395  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2396 
2397  return SelectVOP3PMods(In, Src, SrcMods);
2398 }
2399 
2400 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
2401  SDValue &SrcMods) const {
2402  Src = In;
2403  // FIXME: Handle op_sel
2404  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2405  return true;
2406 }
2407 
2408 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
2409  SDValue &SrcMods,
2410  SDValue &Clamp) const {
2411  SDLoc SL(In);
2412 
2413  // FIXME: Handle clamp
2414  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2415 
2416  return SelectVOP3OpSel(In, Src, SrcMods);
2417 }
2418 
2419 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2420  SDValue &SrcMods) const {
2421  // FIXME: Handle op_sel
2422  return SelectVOP3Mods(In, Src, SrcMods);
2423 }
2424 
2425 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
2426  SDValue &SrcMods,
2427  SDValue &Clamp) const {
2428  SDLoc SL(In);
2429 
2430  // FIXME: Handle clamp
2431  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2432 
2433  return SelectVOP3OpSelMods(In, Src, SrcMods);
2434 }
2435 
2436 // The return value is not whether the match is possible (which it always is),
2437 // but whether or not it a conversion is really used.
2438 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2439  unsigned &Mods) const {
2440  Mods = 0;
2441  SelectVOP3ModsImpl(In, Src, Mods);
2442 
2443  if (Src.getOpcode() == ISD::FP_EXTEND) {
2444  Src = Src.getOperand(0);
2445  assert(Src.getValueType() == MVT::f16);
2446  Src = stripBitcast(Src);
2447 
2448  // Be careful about folding modifiers if we already have an abs. fneg is
2449  // applied last, so we don't want to apply an earlier fneg.
2450  if ((Mods & SISrcMods::ABS) == 0) {
2451  unsigned ModsTmp;
2452  SelectVOP3ModsImpl(Src, Src, ModsTmp);
2453 
2454  if ((ModsTmp & SISrcMods::NEG) != 0)
2455  Mods ^= SISrcMods::NEG;
2456 
2457  if ((ModsTmp & SISrcMods::ABS) != 0)
2458  Mods |= SISrcMods::ABS;
2459  }
2460 
2461  // op_sel/op_sel_hi decide the source type and source.
2462  // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2463  // If the sources's op_sel is set, it picks the high half of the source
2464  // register.
2465 
2466  Mods |= SISrcMods::OP_SEL_1;
2467  if (isExtractHiElt(Src, Src)) {
2468  Mods |= SISrcMods::OP_SEL_0;
2469 
2470  // TODO: Should we try to look for neg/abs here?
2471  }
2472 
2473  return true;
2474  }
2475 
2476  return false;
2477 }
2478 
2479 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2480  SDValue &SrcMods) const {
2481  unsigned Mods = 0;
2482  SelectVOP3PMadMixModsImpl(In, Src, Mods);
2483  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2484  return true;
2485 }
2486 
2487 SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
2488  if (In.isUndef())
2489  return CurDAG->getUNDEF(MVT::i32);
2490 
2491  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2492  SDLoc SL(In);
2493  return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
2494  }
2495 
2496  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2497  SDLoc SL(In);
2498  return CurDAG->getConstant(
2499  C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2500  }
2501 
2502  SDValue Src;
2503  if (isExtractHiElt(In, Src))
2504  return Src;
2505 
2506  return SDValue();
2507 }
2508 
2509 bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2510  assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn);
2511 
2512  const SIRegisterInfo *SIRI =
2513  static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2514  const SIInstrInfo * SII =
2515  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2516 
2517  unsigned Limit = 0;
2518  bool AllUsesAcceptSReg = true;
2519  for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2520  Limit < 10 && U != E; ++U, ++Limit) {
2521  const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
2522 
2523  // If the register class is unknown, it could be an unknown
2524  // register class that needs to be an SGPR, e.g. an inline asm
2525  // constraint
2526  if (!RC || SIRI->isSGPRClass(RC))
2527  return false;
2528 
2529  if (RC != &AMDGPU::VS_32RegClass) {
2530  AllUsesAcceptSReg = false;
2531  SDNode * User = *U;
2532  if (User->isMachineOpcode()) {
2533  unsigned Opc = User->getMachineOpcode();
2534  MCInstrDesc Desc = SII->get(Opc);
2535  if (Desc.isCommutable()) {
2536  unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2537  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2538  if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2539  unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2540  const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
2541  if (CommutedRC == &AMDGPU::VS_32RegClass)
2542  AllUsesAcceptSReg = true;
2543  }
2544  }
2545  }
2546  // If "AllUsesAcceptSReg == false" so far we haven't suceeded
2547  // commuting current user. This means have at least one use
2548  // that strictly require VGPR. Thus, we will not attempt to commute
2549  // other user instructions.
2550  if (!AllUsesAcceptSReg)
2551  break;
2552  }
2553  }
2554  return !AllUsesAcceptSReg && (Limit < 10);
2555 }
2556 
2557 bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
2558  auto Ld = cast<LoadSDNode>(N);
2559 
2560  return Ld->getAlignment() >= 4 &&
2561  (
2562  (
2563  (
2564  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2565  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
2566  )
2567  &&
2568  !N->isDivergent()
2569  )
2570  ||
2571  (
2572  Subtarget->getScalarizeGlobalBehavior() &&
2573  Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
2574  !Ld->isVolatile() &&
2575  !N->isDivergent() &&
2576  static_cast<const SITargetLowering *>(
2577  getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
2578  )
2579  );
2580 }
2581 
2582 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
2584  *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
2585  bool IsModified = false;
2586  do {
2587  IsModified = false;
2588 
2589  // Go over all selected nodes and try to fold them a bit more
2590  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
2591  while (Position != CurDAG->allnodes_end()) {
2592  SDNode *Node = &*Position++;
2593  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
2594  if (!MachineNode)
2595  continue;
2596 
2597  SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
2598  if (ResNode != Node) {
2599  if (ResNode)
2600  ReplaceUses(Node, ResNode);
2601  IsModified = true;
2602  }
2603  }
2604  CurDAG->RemoveDeadNodes();
2605  } while (IsModified);
2606 }
2607 
2608 bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
2609  Subtarget = &MF.getSubtarget<R600Subtarget>();
2611 }
2612 
2613 bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
2614  if (!N->readMem())
2615  return false;
2616  if (CbId == -1)
2619 
2620  return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
2621 }
2622 
2623 bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
2624  SDValue& IntPtr) {
2625  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
2626  IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
2627  true);
2628  return true;
2629  }
2630  return false;
2631 }
2632 
2633 bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
2634  SDValue& BaseReg, SDValue &Offset) {
2635  if (!isa<ConstantSDNode>(Addr)) {
2636  BaseReg = Addr;
2637  Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
2638  return true;
2639  }
2640  return false;
2641 }
2642 
2644  unsigned int Opc = N->getOpcode();
2645  if (N->isMachineOpcode()) {
2646  N->setNodeId(-1);
2647  return; // Already selected.
2648  }
2649 
2650  switch (Opc) {
2651  default: break;
2653  case ISD::SCALAR_TO_VECTOR:
2654  case ISD::BUILD_VECTOR: {
2655  EVT VT = N->getValueType(0);
2656  unsigned NumVectorElts = VT.getVectorNumElements();
2657  unsigned RegClassID;
2658  // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
2659  // that adds a 128 bits reg copy when going through TwoAddressInstructions
2660  // pass. We want to avoid 128 bits copies as much as possible because they
2661  // can't be bundled by our scheduler.
2662  switch(NumVectorElts) {
2663  case 2: RegClassID = R600::R600_Reg64RegClassID; break;
2664  case 4:
2666  RegClassID = R600::R600_Reg128VerticalRegClassID;
2667  else
2668  RegClassID = R600::R600_Reg128RegClassID;
2669  break;
2670  default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
2671  }
2672  SelectBuildVector(N, RegClassID);
2673  return;
2674  }
2675  }
2676 
2677  SelectCode(N);
2678 }
2679 
2680 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
2681  SDValue &Offset) {
2682  ConstantSDNode *C;
2683  SDLoc DL(Addr);
2684 
2685  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
2686  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2687  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2688  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
2689  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
2690  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2691  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2692  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
2693  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
2694  Base = Addr.getOperand(0);
2695  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2696  } else {
2697  Base = Addr;
2698  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2699  }
2700 
2701  return true;
2702 }
2703 
2704 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
2705  SDValue &Offset) {
2706  ConstantSDNode *IMMOffset;
2707 
2708  if (Addr.getOpcode() == ISD::ADD
2709  && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
2710  && isInt<16>(IMMOffset->getZExtValue())) {
2711 
2712  Base = Addr.getOperand(0);
2713  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2714  MVT::i32);
2715  return true;
2716  // If the pointer address is constant, we can move it to the offset field.
2717  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
2718  && isInt<16>(IMMOffset->getZExtValue())) {
2719  Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
2720  SDLoc(CurDAG->getEntryNode()),
2721  R600::ZERO, MVT::i32);
2722  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2723  MVT::i32);
2724  return true;
2725  }
2726 
2727  // Default case, no offset
2728  Base = Addr;
2729  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2730  return true;
2731 }
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:595
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:348
EVT getValueType() const
Return the ValueType of the referenced return value.
Interface definition for SIRegisterInfo.
unsigned getVCC() const
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:266
static unsigned getSubRegFromChannel(unsigned Channel)
bool isUndef() const
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand...
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_END(AMDGPUDAGToDAGISel
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
AMDGPU specific subclass of TargetSubtarget.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
static unsigned gwsIntrinToOpcode(unsigned IntrID)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
uint64_t getDefaultRsrcDataFormat() const
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MCInstrDesc.h:442
Shadow Stack GC Lowering
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, SDValue V) const
const SDValue & getBasePtr() const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:222
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
SDVTList getVTList() const
unsigned Reg
Address space for region memory. (GDS)
Definition: AMDGPU.h:267
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
const SDValue & getChain() const
virtual SDNode * PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const =0
SDNode * legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const
Legalize target independent instructions (e.g.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:305
unsigned const TargetRegisterInfo * TRI
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
const SDNodeFlags getFlags() const
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
bool isInlineConstant(const APInt &Imm) const
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:404
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:158
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineSDNode * wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr) const
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwises returns null...
Definition: PointerUnion.h:212
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:211
const HexagonInstrInfo * TII
Shift and rotation operations.
Definition: ISDOpcodes.h:434
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:190
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:169
unsigned SubReg
uint64_t getConstantOperandVal(unsigned i) const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Position
Position to insert a new instruction relative to an existing instruction.
This file implements a class to represent arbitrary precision integral constant values and operations...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
TargetRegisterInfo interface that is implemented by all hw codegen targets.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
int64_t getSExtValue() const
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:297
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:234
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:995
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:586
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:351
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static const unsigned CommuteAnyOperandIndex
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:165
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:342
unsigned const MachineRegisterInfo * MRI
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:272
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const SDValue & getOperand(unsigned Num) const
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:231
SI DAG Lowering interface definition.
Represent the analysis usage information of a pass.
This class provides iterator support for SDUse operands that use a specific SDNode.
bool hasNoNaNs() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:71
bool isDefined() const
Returns true if the flags are in a defined state.
The AMDGPU TargetMachine interface definition for hw codgen targets.
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:580
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
Extended Value Type.
Definition: ValueTypes.h:33
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts)
bool use_empty() const
Return true if there are no uses of this node.
Address space for 32-bit constant memory.
Definition: AMDGPU.h:273
MachineSDNode * buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr, uint32_t RsrcDword1, uint64_t RsrcDword2And3) const
Return a resource descriptor with the &#39;Add TID&#39; bit enabled The TID (Thread ID) is multiplied by the ...
Iterator for intrusive lists based on ilist_node.
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:519
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:264
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:221
Interface definition of the TargetLowering class that is common to all AMD GPUs.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:672
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
bool isDivergent() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:226
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
bool readMem() const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
EVT getMemoryVT() const
Return the type of the in-memory value.
Class for arbitrary precision integers.
Definition: APInt.h:69
Special value supplied for machine level alias analysis.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo)
Address space for local memory.
Definition: AMDGPU.h:270
static use_iterator use_end()
Address space for flat memory.
Definition: AMDGPU.h:265
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:411
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:72
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:510
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:642
#define N
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
unsigned getOpcode() const
SDValue getValue(unsigned R) const
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:345
Address space for constant memory (VTX2).
Definition: AMDGPU.h:269
SmallVector< LoopT *, 4 > getLoopsInPreorder()
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
Definition: LoopInfoImpl.h:602
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isLegalMUBUFImmOffset(unsigned Imm)
Definition: SIInstrInfo.h:969
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
LLVM Value Representation.
Definition: Value.h:72
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:326
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
bool hasNoUnsignedWrap() const
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:175
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
bool isUndef() const
Return true if the type of the node type undefined.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:1181
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:330
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:467
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:222
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:820
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:498
#define LLVM_DEBUG(X)
Definition: Debug.h:122
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:610
Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:816
const SDValue & getBasePtr() const
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:399
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:241
This class is used to represent ISD::LOAD nodes.
const SIRegisterInfo * getRegisterInfo() const override