LLVM  10.0.0svn
AMDGPUISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines an instruction selector for the AMDGPU target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
16 #include "AMDGPUISelLowering.h" // For AMDGPUISD
17 #include "AMDGPUInstrInfo.h"
18 #include "AMDGPUPerfHintAnalysis.h"
19 #include "AMDGPURegisterInfo.h"
20 #include "AMDGPUSubtarget.h"
21 #include "AMDGPUTargetMachine.h"
22 #include "SIDefines.h"
23 #include "SIISelLowering.h"
24 #include "SIInstrInfo.h"
25 #include "SIMachineFunctionInfo.h"
26 #include "SIRegisterInfo.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/StringRef.h"
41 #include "llvm/IR/BasicBlock.h"
42 #ifdef EXPENSIVE_CHECKS
43 #include "llvm/IR/Dominators.h"
44 #endif
45 #include "llvm/IR/Instruction.h"
46 #include "llvm/MC/MCInstrDesc.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/CodeGen.h"
52 #include <cassert>
53 #include <cstdint>
54 #include <new>
55 #include <vector>
56 
57 #define DEBUG_TYPE "isel"
58 
59 using namespace llvm;
60 
61 namespace llvm {
62 
63 class R600InstrInfo;
64 
65 } // end namespace llvm
66 
67 //===----------------------------------------------------------------------===//
68 // Instruction Selector Implementation
69 //===----------------------------------------------------------------------===//
70 
71 namespace {
72 
73 static bool isNullConstantOrUndef(SDValue V) {
74  if (V.isUndef())
75  return true;
76 
78  return Const != nullptr && Const->isNullValue();
79 }
80 
81 static bool getConstantValue(SDValue N, uint32_t &Out) {
82  // This is only used for packed vectors, where ussing 0 for undef should
83  // always be good.
84  if (N.isUndef()) {
85  Out = 0;
86  return true;
87  }
88 
89  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
90  Out = C->getAPIntValue().getSExtValue();
91  return true;
92  }
93 
94  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
95  Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
96  return true;
97  }
98 
99  return false;
100 }
101 
102 // TODO: Handle undef as zero
103 static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
104  bool Negate = false) {
105  assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
106  uint32_t LHSVal, RHSVal;
107  if (getConstantValue(N->getOperand(0), LHSVal) &&
108  getConstantValue(N->getOperand(1), RHSVal)) {
109  SDLoc SL(N);
110  uint32_t K = Negate ?
111  (-LHSVal & 0xffff) | (-RHSVal << 16) :
112  (LHSVal & 0xffff) | (RHSVal << 16);
113  return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
114  DAG.getTargetConstant(K, SL, MVT::i32));
115  }
116 
117  return nullptr;
118 }
119 
120 static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
121  return packConstantV2I16(N, DAG, true);
122 }
123 
124 /// AMDGPU specific code to select AMDGPU machine instructions for
125 /// SelectionDAG operations.
126 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
127  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
128  // make the right decision when generating code for different targets.
129  const GCNSubtarget *Subtarget;
130  bool EnableLateStructurizeCFG;
131 
132 public:
133  explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
135  : SelectionDAGISel(*TM, OptLevel) {
136  EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
137  }
138  ~AMDGPUDAGToDAGISel() override = default;
139 
140  void getAnalysisUsage(AnalysisUsage &AU) const override {
143 #ifdef EXPENSIVE_CHECKS
146 #endif
148  }
149 
150  bool matchLoadD16FromBuildVector(SDNode *N) const;
151 
152  bool runOnMachineFunction(MachineFunction &MF) override;
153  void PreprocessISelDAG() override;
154  void Select(SDNode *N) override;
155  StringRef getPassName() const override;
156  void PostprocessISelDAG() override;
157 
158 protected:
159  void SelectBuildVector(SDNode *N, unsigned RegClassID);
160 
161 private:
162  std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
163  bool isNoNanSrc(SDValue N) const;
164  bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
165  bool isNegInlineImmediate(const SDNode *N) const {
166  return isInlineImmediate(N, true);
167  }
168 
169  bool isVGPRImm(const SDNode *N) const;
170  bool isUniformLoad(const SDNode *N) const;
171  bool isUniformBr(const SDNode *N) const;
172 
173  MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
174 
175  SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
176  SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
177  SDNode *glueCopyToM0LDSInit(SDNode *N) const;
178 
179  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
180  virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
181  virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
182  bool isDSOffsetLegal(SDValue Base, unsigned Offset,
183  unsigned OffsetBits) const;
184  bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
185  bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
186  SDValue &Offset1) const;
187  bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
188  SDValue &SOffset, SDValue &Offset, SDValue &Offen,
189  SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
190  SDValue &TFE, SDValue &DLC, SDValue &SWZ) const;
191  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
192  SDValue &SOffset, SDValue &Offset, SDValue &GLC,
193  SDValue &SLC, SDValue &TFE, SDValue &DLC,
194  SDValue &SWZ) const;
195  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
196  SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
197  SDValue &SLC) const;
198  bool SelectMUBUFScratchOffen(SDNode *Parent,
199  SDValue Addr, SDValue &RSrc, SDValue &VAddr,
200  SDValue &SOffset, SDValue &ImmOffset) const;
201  bool SelectMUBUFScratchOffset(SDNode *Parent,
202  SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
203  SDValue &Offset) const;
204 
205  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
206  SDValue &Offset, SDValue &GLC, SDValue &SLC,
207  SDValue &TFE, SDValue &DLC, SDValue &SWZ) const;
208  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
209  SDValue &Offset, SDValue &SLC) const;
210  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
211  SDValue &Offset) const;
212 
213  template <bool IsSigned>
214  bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
215  SDValue &Offset, SDValue &SLC) const;
216  bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr,
217  SDValue &Offset, SDValue &SLC) const;
218  bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr,
219  SDValue &Offset, SDValue &SLC) const;
220 
221  bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
222  bool &Imm) const;
223  SDValue Expand32BitAddress(SDValue Addr) const;
224  bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
225  bool &Imm) const;
226  bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
227  bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
228  bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
229  bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
230  bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
231  bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
232 
233  bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
234  bool SelectVOP3Mods_f32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
235  bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
236  bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
237  bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
238  bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
239  SDValue &Clamp, SDValue &Omod) const;
240  bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
241  SDValue &Clamp, SDValue &Omod) const;
242 
243  bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
244  SDValue &Clamp,
245  SDValue &Omod) const;
246 
247  bool SelectVOP3OMods(SDValue In, SDValue &Src,
248  SDValue &Clamp, SDValue &Omod) const;
249 
250  bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
251  bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
252  SDValue &Clamp) const;
253 
254  bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
255  bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
256  SDValue &Clamp) const;
257 
258  bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
259  bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
260  SDValue &Clamp) const;
261  bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
262  bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
263 
264  SDValue getHi16Elt(SDValue In) const;
265 
266  SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
267 
268  void SelectADD_SUB_I64(SDNode *N);
269  void SelectAddcSubb(SDNode *N);
270  void SelectUADDO_USUBO(SDNode *N);
271  void SelectDIV_SCALE(SDNode *N);
272  void SelectDIV_FMAS(SDNode *N);
273  void SelectMAD_64_32(SDNode *N);
274  void SelectFMA_W_CHAIN(SDNode *N);
275  void SelectFMUL_W_CHAIN(SDNode *N);
276 
277  SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
278  uint32_t Offset, uint32_t Width);
279  void SelectS_BFEFromShifts(SDNode *N);
280  void SelectS_BFE(SDNode *N);
281  bool isCBranchSCC(const SDNode *N) const;
282  void SelectBRCOND(SDNode *N);
283  void SelectFMAD_FMA(SDNode *N);
284  void SelectATOMIC_CMP_SWAP(SDNode *N);
285  void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
286  void SelectDS_GWS(SDNode *N, unsigned IntrID);
287  void SelectINTRINSIC_W_CHAIN(SDNode *N);
288  void SelectINTRINSIC_WO_CHAIN(SDNode *N);
289  void SelectINTRINSIC_VOID(SDNode *N);
290 
291 protected:
292  // Include the pieces autogenerated from the target description.
293 #include "AMDGPUGenDAGISel.inc"
294 };
295 
296 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
297  const R600Subtarget *Subtarget;
298 
299  bool isConstantLoad(const MemSDNode *N, int cbID) const;
300  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
301  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
302  SDValue& Offset);
303 public:
304  explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
305  AMDGPUDAGToDAGISel(TM, OptLevel) {}
306 
307  void Select(SDNode *N) override;
308 
309  bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
310  SDValue &Offset) override;
311  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
312  SDValue &Offset) override;
313 
314  bool runOnMachineFunction(MachineFunction &MF) override;
315 
316  void PreprocessISelDAG() override {}
317 
318 protected:
319  // Include the pieces autogenerated from the target description.
320 #include "R600GenDAGISel.inc"
321 };
322 
323 static SDValue stripBitcast(SDValue Val) {
324  return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
325 }
326 
327 // Figure out if this is really an extract of the high 16-bits of a dword.
328 static bool isExtractHiElt(SDValue In, SDValue &Out) {
329  In = stripBitcast(In);
330  if (In.getOpcode() != ISD::TRUNCATE)
331  return false;
332 
333  SDValue Srl = In.getOperand(0);
334  if (Srl.getOpcode() == ISD::SRL) {
335  if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
336  if (ShiftAmt->getZExtValue() == 16) {
337  Out = stripBitcast(Srl.getOperand(0));
338  return true;
339  }
340  }
341  }
342 
343  return false;
344 }
345 
346 // Look through operations that obscure just looking at the low 16-bits of the
347 // same register.
348 static SDValue stripExtractLoElt(SDValue In) {
349  if (In.getOpcode() == ISD::TRUNCATE) {
350  SDValue Src = In.getOperand(0);
351  if (Src.getValueType().getSizeInBits() == 32)
352  return stripBitcast(Src);
353  }
354 
355  return In;
356 }
357 
358 } // end anonymous namespace
359 
360 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
361  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
365 #ifdef EXPENSIVE_CHECKS
368 #endif
369 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
370  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
371 
372 /// This pass converts a legalized DAG into a AMDGPU-specific
373 // DAG, ready for instruction scheduling.
375  CodeGenOpt::Level OptLevel) {
376  return new AMDGPUDAGToDAGISel(TM, OptLevel);
377 }
378 
379 /// This pass converts a legalized DAG into a R600-specific
380 // DAG, ready for instruction scheduling.
382  CodeGenOpt::Level OptLevel) {
383  return new R600DAGToDAGISel(TM, OptLevel);
384 }
385 
386 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
387 #ifdef EXPENSIVE_CHECKS
388  DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
389  LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
390  for (auto &L : LI->getLoopsInPreorder()) {
391  assert(L->isLCSSAForm(DT));
392  }
393 #endif
394  Subtarget = &MF.getSubtarget<GCNSubtarget>();
396 }
397 
398 bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
399  assert(Subtarget->d16PreservesUnusedBits());
400  MVT VT = N->getValueType(0).getSimpleVT();
401  if (VT != MVT::v2i16 && VT != MVT::v2f16)
402  return false;
403 
404  SDValue Lo = N->getOperand(0);
405  SDValue Hi = N->getOperand(1);
406 
407  LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
408 
409  // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
410  // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
411  // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
412 
413  // Need to check for possible indirect dependencies on the other half of the
414  // vector to avoid introducing a cycle.
415  if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
416  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
417 
418  SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);
419  SDValue Ops[] = {
420  LdHi->getChain(), LdHi->getBasePtr(), TiedIn
421  };
422 
423  unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
424  if (LdHi->getMemoryVT() == MVT::i8) {
425  LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
427  } else {
428  assert(LdHi->getMemoryVT() == MVT::i16);
429  }
430 
431  SDValue NewLoadHi =
432  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
433  Ops, LdHi->getMemoryVT(),
434  LdHi->getMemOperand());
435 
436  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
437  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
438  return true;
439  }
440 
441  // build_vector (load ptr), hi -> load_d16_lo ptr, hi
442  // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
443  // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
444  LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
445  if (LdLo && Lo.hasOneUse()) {
446  SDValue TiedIn = getHi16Elt(Hi);
447  if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
448  return false;
449 
450  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
451  unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
452  if (LdLo->getMemoryVT() == MVT::i8) {
453  LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
455  } else {
456  assert(LdLo->getMemoryVT() == MVT::i16);
457  }
458 
459  TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
460 
461  SDValue Ops[] = {
462  LdLo->getChain(), LdLo->getBasePtr(), TiedIn
463  };
464 
465  SDValue NewLoadLo =
466  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
467  Ops, LdLo->getMemoryVT(),
468  LdLo->getMemOperand());
469 
470  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
471  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
472  return true;
473  }
474 
475  return false;
476 }
477 
478 void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
479  if (!Subtarget->d16PreservesUnusedBits())
480  return;
481 
482  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
483 
484  bool MadeChange = false;
485  while (Position != CurDAG->allnodes_begin()) {
486  SDNode *N = &*--Position;
487  if (N->use_empty())
488  continue;
489 
490  switch (N->getOpcode()) {
491  case ISD::BUILD_VECTOR:
492  MadeChange |= matchLoadD16FromBuildVector(N);
493  break;
494  default:
495  break;
496  }
497  }
498 
499  if (MadeChange) {
500  CurDAG->RemoveDeadNodes();
501  LLVM_DEBUG(dbgs() << "After PreProcess:\n";
502  CurDAG->dump(););
503  }
504 }
505 
506 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
507  if (TM.Options.NoNaNsFPMath)
508  return true;
509 
510  // TODO: Move into isKnownNeverNaN
511  if (N->getFlags().isDefined())
512  return N->getFlags().hasNoNaNs();
513 
514  return CurDAG->isKnownNeverNaN(N);
515 }
516 
517 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
518  bool Negated) const {
519  if (N->isUndef())
520  return true;
521 
522  const SIInstrInfo *TII = Subtarget->getInstrInfo();
523  if (Negated) {
524  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
525  return TII->isInlineConstant(-C->getAPIntValue());
526 
527  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
528  return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
529 
530  } else {
531  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
532  return TII->isInlineConstant(C->getAPIntValue());
533 
534  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
535  return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
536  }
537 
538  return false;
539 }
540 
541 /// Determine the register class for \p OpNo
542 /// \returns The register class of the virtual register that will be used for
543 /// the given operand number \OpNo or NULL if the register class cannot be
544 /// determined.
545 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
546  unsigned OpNo) const {
547  if (!N->isMachineOpcode()) {
548  if (N->getOpcode() == ISD::CopyToReg) {
549  unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
550  if (Register::isVirtualRegister(Reg)) {
551  MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
552  return MRI.getRegClass(Reg);
553  }
554 
555  const SIRegisterInfo *TRI
556  = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
557  return TRI->getPhysRegClass(Reg);
558  }
559 
560  return nullptr;
561  }
562 
563  switch (N->getMachineOpcode()) {
564  default: {
565  const MCInstrDesc &Desc =
566  Subtarget->getInstrInfo()->get(N->getMachineOpcode());
567  unsigned OpIdx = Desc.getNumDefs() + OpNo;
568  if (OpIdx >= Desc.getNumOperands())
569  return nullptr;
570  int RegClass = Desc.OpInfo[OpIdx].RegClass;
571  if (RegClass == -1)
572  return nullptr;
573 
574  return Subtarget->getRegisterInfo()->getRegClass(RegClass);
575  }
576  case AMDGPU::REG_SEQUENCE: {
577  unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
578  const TargetRegisterClass *SuperRC =
579  Subtarget->getRegisterInfo()->getRegClass(RCID);
580 
581  SDValue SubRegOp = N->getOperand(OpNo + 1);
582  unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
583  return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
584  SubRegIdx);
585  }
586  }
587 }
588 
589 SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain,
590  SDValue Glue) const {
592  Ops.push_back(NewChain); // Replace the chain.
593  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
594  Ops.push_back(N->getOperand(i));
595 
596  Ops.push_back(Glue);
597  return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
598 }
599 
600 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
601  const SITargetLowering& Lowering =
602  *static_cast<const SITargetLowering*>(getTargetLowering());
603 
604  assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
605 
606  SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val);
607  return glueCopyToOp(N, M0, M0.getValue(1));
608 }
609 
610 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
611  unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
612  if (AS == AMDGPUAS::LOCAL_ADDRESS) {
613  if (Subtarget->ldsRequiresM0Init())
614  return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
615  } else if (AS == AMDGPUAS::REGION_ADDRESS) {
616  MachineFunction &MF = CurDAG->getMachineFunction();
617  unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
618  return
619  glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
620  }
621  return N;
622 }
623 
624 MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
625  EVT VT) const {
626  SDNode *Lo = CurDAG->getMachineNode(
627  AMDGPU::S_MOV_B32, DL, MVT::i32,
628  CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
629  SDNode *Hi =
630  CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
631  CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32));
632  const SDValue Ops[] = {
633  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
634  SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
635  SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
636 
637  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
638 }
639 
640 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
641  switch (NumVectorElts) {
642  case 1:
643  return AMDGPU::SReg_32RegClassID;
644  case 2:
645  return AMDGPU::SReg_64RegClassID;
646  case 3:
647  return AMDGPU::SGPR_96RegClassID;
648  case 4:
649  return AMDGPU::SGPR_128RegClassID;
650  case 5:
651  return AMDGPU::SGPR_160RegClassID;
652  case 8:
653  return AMDGPU::SReg_256RegClassID;
654  case 16:
655  return AMDGPU::SReg_512RegClassID;
656  case 32:
657  return AMDGPU::SReg_1024RegClassID;
658  }
659 
660  llvm_unreachable("invalid vector size");
661 }
662 
663 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
664  EVT VT = N->getValueType(0);
665  unsigned NumVectorElts = VT.getVectorNumElements();
666  EVT EltVT = VT.getVectorElementType();
667  SDLoc DL(N);
668  SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
669 
670  if (NumVectorElts == 1) {
671  CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
672  RegClass);
673  return;
674  }
675 
676  assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
677  "supported yet");
678  // 32 = Max Num Vector Elements
679  // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
680  // 1 = Vector Register Class
681  SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
682 
683  RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
684  bool IsRegSeq = true;
685  unsigned NOps = N->getNumOperands();
686  for (unsigned i = 0; i < NOps; i++) {
687  // XXX: Why is this here?
688  if (isa<RegisterSDNode>(N->getOperand(i))) {
689  IsRegSeq = false;
690  break;
691  }
693  RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
694  RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
695  }
696  if (NOps != NumVectorElts) {
697  // Fill in the missing undef elements if this was a scalar_to_vector.
698  assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
699  MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
700  DL, EltVT);
701  for (unsigned i = NOps; i < NumVectorElts; ++i) {
703  RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
704  RegSeqArgs[1 + (2 * i) + 1] =
705  CurDAG->getTargetConstant(Sub, DL, MVT::i32);
706  }
707  }
708 
709  if (!IsRegSeq)
710  SelectCode(N);
711  CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
712 }
713 
715  unsigned int Opc = N->getOpcode();
716  if (N->isMachineOpcode()) {
717  N->setNodeId(-1);
718  return; // Already selected.
719  }
720 
721  // isa<MemSDNode> almost works but is slightly too permissive for some DS
722  // intrinsics.
723  if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) ||
724  (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
725  Opc == ISD::ATOMIC_LOAD_FADD ||
727  Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) {
728  N = glueCopyToM0LDSInit(N);
729  SelectCode(N);
730  return;
731  }
732 
733  switch (Opc) {
734  default:
735  break;
736  // We are selecting i64 ADD here instead of custom lower it during
737  // DAG legalization, so we can fold some i64 ADDs used for address
738  // calculation into the LOAD and STORE instructions.
739  case ISD::ADDC:
740  case ISD::ADDE:
741  case ISD::SUBC:
742  case ISD::SUBE: {
743  if (N->getValueType(0) != MVT::i64)
744  break;
745 
746  SelectADD_SUB_I64(N);
747  return;
748  }
749  case ISD::ADDCARRY:
750  case ISD::SUBCARRY:
751  if (N->getValueType(0) != MVT::i32)
752  break;
753 
754  SelectAddcSubb(N);
755  return;
756  case ISD::UADDO:
757  case ISD::USUBO: {
758  SelectUADDO_USUBO(N);
759  return;
760  }
762  SelectFMUL_W_CHAIN(N);
763  return;
764  }
765  case AMDGPUISD::FMA_W_CHAIN: {
766  SelectFMA_W_CHAIN(N);
767  return;
768  }
769 
771  case ISD::BUILD_VECTOR: {
772  EVT VT = N->getValueType(0);
773  unsigned NumVectorElts = VT.getVectorNumElements();
774  if (VT.getScalarSizeInBits() == 16) {
775  if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
776  if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
777  ReplaceNode(N, Packed);
778  return;
779  }
780  }
781 
782  break;
783  }
784 
786  unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
787  SelectBuildVector(N, RegClassID);
788  return;
789  }
790  case ISD::BUILD_PAIR: {
791  SDValue RC, SubReg0, SubReg1;
792  SDLoc DL(N);
793  if (N->getValueType(0) == MVT::i128) {
794  RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);
795  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
796  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
797  } else if (N->getValueType(0) == MVT::i64) {
798  RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
799  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
800  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
801  } else {
802  llvm_unreachable("Unhandled value type for BUILD_PAIR");
803  }
804  const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
805  N->getOperand(1), SubReg1 };
806  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
807  N->getValueType(0), Ops));
808  return;
809  }
810 
811  case ISD::Constant:
812  case ISD::ConstantFP: {
813  if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
814  break;
815 
816  uint64_t Imm;
817  if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
818  Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
819  else {
820  ConstantSDNode *C = cast<ConstantSDNode>(N);
821  Imm = C->getZExtValue();
822  }
823 
824  SDLoc DL(N);
825  ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
826  return;
827  }
828  case AMDGPUISD::BFE_I32:
829  case AMDGPUISD::BFE_U32: {
830  // There is a scalar version available, but unlike the vector version which
831  // has a separate operand for the offset and width, the scalar version packs
832  // the width and offset into a single operand. Try to move to the scalar
833  // version if the offsets are constant, so that we can try to keep extended
834  // loads of kernel arguments in SGPRs.
835 
836  // TODO: Technically we could try to pattern match scalar bitshifts of
837  // dynamic values, but it's probably not useful.
839  if (!Offset)
840  break;
841 
843  if (!Width)
844  break;
845 
846  bool Signed = Opc == AMDGPUISD::BFE_I32;
847 
848  uint32_t OffsetVal = Offset->getZExtValue();
849  uint32_t WidthVal = Width->getZExtValue();
850 
851  ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
852  SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
853  return;
854  }
855  case AMDGPUISD::DIV_SCALE: {
856  SelectDIV_SCALE(N);
857  return;
858  }
859  case AMDGPUISD::DIV_FMAS: {
860  SelectDIV_FMAS(N);
861  return;
862  }
864  case AMDGPUISD::MAD_U64_U32: {
865  SelectMAD_64_32(N);
866  return;
867  }
868  case ISD::CopyToReg: {
869  const SITargetLowering& Lowering =
870  *static_cast<const SITargetLowering*>(getTargetLowering());
871  N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
872  break;
873  }
874  case ISD::AND:
875  case ISD::SRL:
876  case ISD::SRA:
878  if (N->getValueType(0) != MVT::i32)
879  break;
880 
881  SelectS_BFE(N);
882  return;
883  case ISD::BRCOND:
884  SelectBRCOND(N);
885  return;
886  case ISD::FMAD:
887  case ISD::FMA:
888  SelectFMAD_FMA(N);
889  return;
891  SelectATOMIC_CMP_SWAP(N);
892  return;
898  // Hack around using a legal type if f16 is illegal.
899  if (N->getValueType(0) == MVT::i32) {
901  N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
902  { N->getOperand(0), N->getOperand(1) });
903  SelectCode(N);
904  return;
905  }
906 
907  break;
908  }
909  case ISD::INTRINSIC_W_CHAIN: {
910  SelectINTRINSIC_W_CHAIN(N);
911  return;
912  }
914  SelectINTRINSIC_WO_CHAIN(N);
915  return;
916  }
917  case ISD::INTRINSIC_VOID: {
918  SelectINTRINSIC_VOID(N);
919  return;
920  }
921  }
922 
923  SelectCode(N);
924 }
925 
926 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
927  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
928  const Instruction *Term = BB->getTerminator();
929  return Term->getMetadata("amdgpu.uniform") ||
930  Term->getMetadata("structurizecfg.uniform");
931 }
932 
933 StringRef AMDGPUDAGToDAGISel::getPassName() const {
934  return "AMDGPU DAG->DAG Pattern Instruction Selection";
935 }
936 
937 //===----------------------------------------------------------------------===//
938 // Complex Patterns
939 //===----------------------------------------------------------------------===//
940 
941 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
942  SDValue &Offset) {
943  return false;
944 }
945 
946 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
947  SDValue &Offset) {
948  ConstantSDNode *C;
949  SDLoc DL(Addr);
950 
951  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
952  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
953  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
954  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
955  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
956  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
957  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
958  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
959  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
960  Base = Addr.getOperand(0);
961  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
962  } else {
963  Base = Addr;
964  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
965  }
966 
967  return true;
968 }
969 
970 SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
971  const SDLoc &DL) const {
972  SDNode *Mov = CurDAG->getMachineNode(
973  AMDGPU::S_MOV_B32, DL, MVT::i32,
974  CurDAG->getTargetConstant(Val, DL, MVT::i32));
975  return SDValue(Mov, 0);
976 }
977 
978 // FIXME: Should only handle addcarry/subcarry
979 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
980  SDLoc DL(N);
981  SDValue LHS = N->getOperand(0);
982  SDValue RHS = N->getOperand(1);
983 
984  unsigned Opcode = N->getOpcode();
985  bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
986  bool ProduceCarry =
987  ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
988  bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
989 
990  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
991  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
992 
993  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
994  DL, MVT::i32, LHS, Sub0);
995  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
996  DL, MVT::i32, LHS, Sub1);
997 
998  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
999  DL, MVT::i32, RHS, Sub0);
1000  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1001  DL, MVT::i32, RHS, Sub1);
1002 
1003  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
1004 
1005  unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
1006  unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
1007 
1008  SDNode *AddLo;
1009  if (!ConsumeCarry) {
1010  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
1011  AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
1012  } else {
1013  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
1014  AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
1015  }
1016  SDValue AddHiArgs[] = {
1017  SDValue(Hi0, 0),
1018  SDValue(Hi1, 0),
1019  SDValue(AddLo, 1)
1020  };
1021  SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
1022 
1023  SDValue RegSequenceArgs[] = {
1024  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
1025  SDValue(AddLo,0),
1026  Sub0,
1027  SDValue(AddHi,0),
1028  Sub1,
1029  };
1030  SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1031  MVT::i64, RegSequenceArgs);
1032 
1033  if (ProduceCarry) {
1034  // Replace the carry-use
1035  ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
1036  }
1037 
1038  // Replace the remaining uses.
1039  ReplaceNode(N, RegSequence);
1040 }
1041 
1042 void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
1043  SDLoc DL(N);
1044  SDValue LHS = N->getOperand(0);
1045  SDValue RHS = N->getOperand(1);
1046  SDValue CI = N->getOperand(2);
1047 
1048  unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
1049  : AMDGPU::V_SUBB_U32_e64;
1050  CurDAG->SelectNodeTo(
1051  N, Opc, N->getVTList(),
1052  {LHS, RHS, CI, CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
1053 }
1054 
1055 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
1056  // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
1057  // carry out despite the _i32 name. These were renamed in VI to _U32.
1058  // FIXME: We should probably rename the opcodes here.
1059  unsigned Opc = N->getOpcode() == ISD::UADDO ?
1060  AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
1061 
1062  CurDAG->SelectNodeTo(
1063  N, Opc, N->getVTList(),
1064  {N->getOperand(0), N->getOperand(1),
1065  CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
1066 }
1067 
1068 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
1069  SDLoc SL(N);
1070  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
1071  SDValue Ops[10];
1072 
1073  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
1074  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
1075  SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
1076  Ops[8] = N->getOperand(0);
1077  Ops[9] = N->getOperand(4);
1078 
1079  CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
1080 }
1081 
1082 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
1083  SDLoc SL(N);
1084  // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
1085  SDValue Ops[8];
1086 
1087  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
1088  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
1089  Ops[6] = N->getOperand(0);
1090  Ops[7] = N->getOperand(3);
1091 
1092  CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
1093 }
1094 
1095 // We need to handle this here because tablegen doesn't support matching
1096 // instructions with multiple outputs.
1097 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
1098  SDLoc SL(N);
1099  EVT VT = N->getValueType(0);
1100 
1101  assert(VT == MVT::f32 || VT == MVT::f64);
1102 
1103  unsigned Opc
1104  = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
1105 
1106  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
1107  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1108 }
1109 
1110 void AMDGPUDAGToDAGISel::SelectDIV_FMAS(SDNode *N) {
1111  const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
1112  const SIRegisterInfo *TRI = ST->getRegisterInfo();
1113 
1114  SDLoc SL(N);
1115  EVT VT = N->getValueType(0);
1116 
1117  assert(VT == MVT::f32 || VT == MVT::f64);
1118 
1119  unsigned Opc
1120  = (VT == MVT::f64) ? AMDGPU::V_DIV_FMAS_F64 : AMDGPU::V_DIV_FMAS_F32;
1121 
1122  SDValue CarryIn = N->getOperand(3);
1123  // V_DIV_FMAS implicitly reads VCC.
1124  SDValue VCC = CurDAG->getCopyToReg(CurDAG->getEntryNode(), SL,
1125  TRI->getVCC(), CarryIn, SDValue());
1126 
1127  SDValue Ops[10];
1128 
1129  SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1130  SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
1131  SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
1132 
1133  Ops[8] = VCC;
1134  Ops[9] = VCC.getValue(1);
1135 
1136  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1137 }
1138 
1139 // We need to handle this here because tablegen doesn't support matching
1140 // instructions with multiple outputs.
1141 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
1142  SDLoc SL(N);
1143  bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
1144  unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
1145 
1146  SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1147  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
1148  Clamp };
1149  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1150 }
1151 
1152 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset,
1153  unsigned OffsetBits) const {
1154  if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
1155  (OffsetBits == 8 && !isUInt<8>(Offset)))
1156  return false;
1157 
1158  if (Subtarget->hasUsableDSOffset() ||
1159  Subtarget->unsafeDSOffsetFoldingEnabled())
1160  return true;
1161 
1162  // On Southern Islands instruction with a negative base value and an offset
1163  // don't seem to work.
1164  return CurDAG->SignBitIsZero(Base);
1165 }
1166 
1167 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
1168  SDValue &Offset) const {
1169  SDLoc DL(Addr);
1170  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1171  SDValue N0 = Addr.getOperand(0);
1172  SDValue N1 = Addr.getOperand(1);
1173  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1174  if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
1175  // (add n0, c0)
1176  Base = N0;
1177  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1178  return true;
1179  }
1180  } else if (Addr.getOpcode() == ISD::SUB) {
1181  // sub C, x -> add (sub 0, x), C
1182  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1183  int64_t ByteOffset = C->getSExtValue();
1184  if (isUInt<16>(ByteOffset)) {
1185  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1186 
1187  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1188  // the known bits in isDSOffsetLegal. We need to emit the selected node
1189  // here, so this is thrown away.
1190  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1191  Zero, Addr.getOperand(1));
1192 
1193  if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
1195  Opnds.push_back(Zero);
1196  Opnds.push_back(Addr.getOperand(1));
1197 
1198  // FIXME: Select to VOP3 version for with-carry.
1199  unsigned SubOp = AMDGPU::V_SUB_I32_e32;
1200  if (Subtarget->hasAddNoCarry()) {
1201  SubOp = AMDGPU::V_SUB_U32_e64;
1202  Opnds.push_back(
1203  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1204  }
1205 
1206  MachineSDNode *MachineSub =
1207  CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1208 
1209  Base = SDValue(MachineSub, 0);
1210  Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
1211  return true;
1212  }
1213  }
1214  }
1215  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1216  // If we have a constant address, prefer to put the constant into the
1217  // offset. This can save moves to load the constant address since multiple
1218  // operations can share the zero base address register, and enables merging
1219  // into read2 / write2 instructions.
1220 
1221  SDLoc DL(Addr);
1222 
1223  if (isUInt<16>(CAddr->getZExtValue())) {
1224  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1225  MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1226  DL, MVT::i32, Zero);
1227  Base = SDValue(MovZero, 0);
1228  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1229  return true;
1230  }
1231  }
1232 
1233  // default case
1234  Base = Addr;
1235  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1236  return true;
1237 }
1238 
1239 // TODO: If offset is too big, put low 16-bit into offset.
1240 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
1241  SDValue &Offset0,
1242  SDValue &Offset1) const {
1243  SDLoc DL(Addr);
1244 
1245  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1246  SDValue N0 = Addr.getOperand(0);
1247  SDValue N1 = Addr.getOperand(1);
1248  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1249  unsigned DWordOffset0 = C1->getZExtValue() / 4;
1250  unsigned DWordOffset1 = DWordOffset0 + 1;
1251  // (add n0, c0)
1252  if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
1253  Base = N0;
1254  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1255  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1256  return true;
1257  }
1258  } else if (Addr.getOpcode() == ISD::SUB) {
1259  // sub C, x -> add (sub 0, x), C
1260  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1261  unsigned DWordOffset0 = C->getZExtValue() / 4;
1262  unsigned DWordOffset1 = DWordOffset0 + 1;
1263 
1264  if (isUInt<8>(DWordOffset0)) {
1265  SDLoc DL(Addr);
1266  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1267 
1268  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1269  // the known bits in isDSOffsetLegal. We need to emit the selected node
1270  // here, so this is thrown away.
1271  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1272  Zero, Addr.getOperand(1));
1273 
1274  if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
1276  Opnds.push_back(Zero);
1277  Opnds.push_back(Addr.getOperand(1));
1278  unsigned SubOp = AMDGPU::V_SUB_I32_e32;
1279  if (Subtarget->hasAddNoCarry()) {
1280  SubOp = AMDGPU::V_SUB_U32_e64;
1281  Opnds.push_back(
1282  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1283  }
1284 
1285  MachineSDNode *MachineSub
1286  = CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1287 
1288  Base = SDValue(MachineSub, 0);
1289  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1290  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1291  return true;
1292  }
1293  }
1294  }
1295  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1296  unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
1297  unsigned DWordOffset1 = DWordOffset0 + 1;
1298  assert(4 * DWordOffset0 == CAddr->getZExtValue());
1299 
1300  if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
1301  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1302  MachineSDNode *MovZero
1303  = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1304  DL, MVT::i32, Zero);
1305  Base = SDValue(MovZero, 0);
1306  Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
1307  Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
1308  return true;
1309  }
1310  }
1311 
1312  // default case
1313 
1314  Base = Addr;
1315  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
1316  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
1317  return true;
1318 }
1319 
1320 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
1321  SDValue &VAddr, SDValue &SOffset,
1322  SDValue &Offset, SDValue &Offen,
1323  SDValue &Idxen, SDValue &Addr64,
1324  SDValue &GLC, SDValue &SLC,
1325  SDValue &TFE, SDValue &DLC,
1326  SDValue &SWZ) const {
1327  // Subtarget prefers to use flat instruction
1328  if (Subtarget->useFlatForGlobal())
1329  return false;
1330 
1331  SDLoc DL(Addr);
1332 
1333  if (!GLC.getNode())
1334  GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1335  if (!SLC.getNode())
1336  SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1337  TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
1338  DLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1339  SWZ = CurDAG->getTargetConstant(0, DL, MVT::i1);
1340 
1341  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1342  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1343  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1344  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1345 
1346  ConstantSDNode *C1 = nullptr;
1347  SDValue N0 = Addr;
1348  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1349  C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1350  if (isUInt<32>(C1->getZExtValue()))
1351  N0 = Addr.getOperand(0);
1352  else
1353  C1 = nullptr;
1354  }
1355 
1356  if (N0.getOpcode() == ISD::ADD) {
1357  // (add N2, N3) -> addr64, or
1358  // (add (add N2, N3), C1) -> addr64
1359  SDValue N2 = N0.getOperand(0);
1360  SDValue N3 = N0.getOperand(1);
1361  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1362 
1363  if (N2->isDivergent()) {
1364  if (N3->isDivergent()) {
1365  // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1366  // addr64, and construct the resource from a 0 address.
1367  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1368  VAddr = N0;
1369  } else {
1370  // N2 is divergent, N3 is not.
1371  Ptr = N3;
1372  VAddr = N2;
1373  }
1374  } else {
1375  // N2 is not divergent.
1376  Ptr = N2;
1377  VAddr = N3;
1378  }
1379  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1380  } else if (N0->isDivergent()) {
1381  // N0 is divergent. Use it as the addr64, and construct the resource from a
1382  // 0 address.
1383  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1384  VAddr = N0;
1385  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1386  } else {
1387  // N0 -> offset, or
1388  // (N0 + C1) -> offset
1389  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1390  Ptr = N0;
1391  }
1392 
1393  if (!C1) {
1394  // No offset.
1395  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1396  return true;
1397  }
1398 
1400  // Legal offset for instruction.
1401  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1402  return true;
1403  }
1404 
1405  // Illegal offset, store it in soffset.
1406  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1407  SOffset =
1408  SDValue(CurDAG->getMachineNode(
1409  AMDGPU::S_MOV_B32, DL, MVT::i32,
1410  CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1411  0);
1412  return true;
1413 }
1414 
1415 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1416  SDValue &VAddr, SDValue &SOffset,
1417  SDValue &Offset, SDValue &GLC,
1418  SDValue &SLC, SDValue &TFE,
1419  SDValue &DLC, SDValue &SWZ) const {
1420  SDValue Ptr, Offen, Idxen, Addr64;
1421 
1422  // addr64 bit was removed for volcanic islands.
1423  if (!Subtarget->hasAddr64())
1424  return false;
1425 
1426  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1427  GLC, SLC, TFE, DLC, SWZ))
1428  return false;
1429 
1430  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1431  if (C->getSExtValue()) {
1432  SDLoc DL(Addr);
1433 
1434  const SITargetLowering& Lowering =
1435  *static_cast<const SITargetLowering*>(getTargetLowering());
1436 
1437  SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1438  return true;
1439  }
1440 
1441  return false;
1442 }
1443 
1444 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1445  SDValue &VAddr, SDValue &SOffset,
1446  SDValue &Offset,
1447  SDValue &SLC) const {
1448  SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1449  SDValue GLC, TFE, DLC, SWZ;
1450 
1451  return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC, SWZ);
1452 }
1453 
1454 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1455  auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1456  return PSV && PSV->isStack();
1457 }
1458 
1459 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1460  const MachineFunction &MF = CurDAG->getMachineFunction();
1462 
1463  if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
1464  SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1465  FI->getValueType(0));
1466 
1467  // If we can resolve this to a frame index access, this will be relative to
1468  // either the stack or frame pointer SGPR.
1469  return std::make_pair(
1470  TFI, CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32));
1471  }
1472 
1473  // If we don't know this private access is a local stack object, it needs to
1474  // be relative to the entry point's scratch wave offset register.
1475  return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
1476  MVT::i32));
1477 }
1478 
1479 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1480  SDValue Addr, SDValue &Rsrc,
1481  SDValue &VAddr, SDValue &SOffset,
1482  SDValue &ImmOffset) const {
1483 
1484  SDLoc DL(Addr);
1485  MachineFunction &MF = CurDAG->getMachineFunction();
1487 
1488  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1489 
1490  if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1491  unsigned Imm = CAddr->getZExtValue();
1492 
1493  SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1494  MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1495  DL, MVT::i32, HighBits);
1496  VAddr = SDValue(MovHighBits, 0);
1497 
1498  // In a call sequence, stores to the argument stack area are relative to the
1499  // stack pointer.
1500  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1501  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1503 
1504  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1505  ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1506  return true;
1507  }
1508 
1509  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1510  // (add n0, c1)
1511 
1512  SDValue N0 = Addr.getOperand(0);
1513  SDValue N1 = Addr.getOperand(1);
1514 
1515  // Offsets in vaddr must be positive if range checking is enabled.
1516  //
1517  // The total computation of vaddr + soffset + offset must not overflow. If
1518  // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1519  // overflowing.
1520  //
1521  // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1522  // always perform a range check. If a negative vaddr base index was used,
1523  // this would fail the range check. The overall address computation would
1524  // compute a valid address, but this doesn't happen due to the range
1525  // check. For out-of-bounds MUBUF loads, a 0 is returned.
1526  //
1527  // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1528  // MUBUF vaddr, but not on older subtargets which can only do this if the
1529  // sign bit is known 0.
1530  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1532  (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1533  CurDAG->SignBitIsZero(N0))) {
1534  std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1535  ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1536  return true;
1537  }
1538  }
1539 
1540  // (node)
1541  std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1542  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1543  return true;
1544 }
1545 
1546 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1547  SDValue Addr,
1548  SDValue &SRsrc,
1549  SDValue &SOffset,
1550  SDValue &Offset) const {
1551  ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
1552  if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1553  return false;
1554 
1555  SDLoc DL(Addr);
1556  MachineFunction &MF = CurDAG->getMachineFunction();
1558 
1559  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1560 
1561  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1562  unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1564 
1565  // FIXME: Get from MachinePointerInfo? We should only be using the frame
1566  // offset if we know this is in a call sequence.
1567  SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1568 
1569  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1570  return true;
1571 }
1572 
1573 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1574  SDValue &SOffset, SDValue &Offset,
1575  SDValue &GLC, SDValue &SLC,
1576  SDValue &TFE, SDValue &DLC,
1577  SDValue &SWZ) const {
1578  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1579  const SIInstrInfo *TII =
1580  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1581 
1582  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1583  GLC, SLC, TFE, DLC, SWZ))
1584  return false;
1585 
1586  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1587  !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1588  !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1589  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1590  APInt::getAllOnesValue(32).getZExtValue(); // Size
1591  SDLoc DL(Addr);
1592 
1593  const SITargetLowering& Lowering =
1594  *static_cast<const SITargetLowering*>(getTargetLowering());
1595 
1596  SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1597  return true;
1598  }
1599  return false;
1600 }
1601 
1602 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1603  SDValue &Soffset, SDValue &Offset
1604  ) const {
1605  SDValue GLC, SLC, TFE, DLC, SWZ;
1606 
1607  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ);
1608 }
1609 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1610  SDValue &Soffset, SDValue &Offset,
1611  SDValue &SLC) const {
1612  SDValue GLC, TFE, DLC, SWZ;
1613 
1614  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ);
1615 }
1616 
1617 // Find a load or store from corresponding pattern root.
1618 // Roots may be build_vector, bitconvert or their combinations.
1621  if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
1622  return MN;
1623  assert(isa<BuildVectorSDNode>(N));
1624  for (SDValue V : N->op_values())
1625  if (MemSDNode *MN =
1626  dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
1627  return MN;
1628  llvm_unreachable("cannot find MemSDNode in the pattern!");
1629 }
1630 
1631 template <bool IsSigned>
1632 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
1633  SDValue Addr,
1634  SDValue &VAddr,
1635  SDValue &Offset,
1636  SDValue &SLC) const {
1637  int64_t OffsetVal = 0;
1638 
1639  if (Subtarget->hasFlatInstOffsets() &&
1640  (!Subtarget->hasFlatSegmentOffsetBug() ||
1642  CurDAG->isBaseWithConstantOffset(Addr)) {
1643  SDValue N0 = Addr.getOperand(0);
1644  SDValue N1 = Addr.getOperand(1);
1645  uint64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1646 
1647  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1648  unsigned AS = findMemSDNode(N)->getAddressSpace();
1649  if (TII->isLegalFLATOffset(COffsetVal, AS, IsSigned)) {
1650  Addr = N0;
1651  OffsetVal = COffsetVal;
1652  } else {
1653  // If the offset doesn't fit, put the low bits into the offset field and
1654  // add the rest.
1655 
1656  SDLoc DL(N);
1657  uint64_t ImmField;
1658  const unsigned NumBits = TII->getNumFlatOffsetBits(AS, IsSigned);
1659  if (IsSigned) {
1660  ImmField = SignExtend64(COffsetVal, NumBits);
1661 
1662  // Don't use a negative offset field if the base offset is positive.
1663  // Since the scheduler currently relies on the offset field, doing so
1664  // could result in strange scheduling decisions.
1665 
1666  // TODO: Should we not do this in the opposite direction as well?
1667  if (static_cast<int64_t>(COffsetVal) > 0) {
1668  if (static_cast<int64_t>(ImmField) < 0) {
1669  const uint64_t OffsetMask = maskTrailingOnes<uint64_t>(NumBits - 1);
1670  ImmField = COffsetVal & OffsetMask;
1671  }
1672  }
1673  } else {
1674  // TODO: Should we do this for a negative offset?
1675  const uint64_t OffsetMask = maskTrailingOnes<uint64_t>(NumBits);
1676  ImmField = COffsetVal & OffsetMask;
1677  }
1678 
1679  uint64_t RemainderOffset = COffsetVal - ImmField;
1680 
1681  assert(TII->isLegalFLATOffset(ImmField, AS, IsSigned));
1682  assert(RemainderOffset + ImmField == COffsetVal);
1683 
1684  OffsetVal = ImmField;
1685 
1686  // TODO: Should this try to use a scalar add pseudo if the base address is
1687  // uniform and saddr is usable?
1688  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
1689  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
1690 
1691  SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1692  DL, MVT::i32, N0, Sub0);
1693  SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1694  DL, MVT::i32, N0, Sub1);
1695 
1696  SDValue AddOffsetLo
1697  = getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
1698  SDValue AddOffsetHi
1699  = getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
1700 
1701  SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
1702  SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1703 
1704  SDNode *Add = CurDAG->getMachineNode(
1705  AMDGPU::V_ADD_I32_e64, DL, VTs,
1706  {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
1707 
1708  SDNode *Addc = CurDAG->getMachineNode(
1709  AMDGPU::V_ADDC_U32_e64, DL, VTs,
1710  {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
1711 
1712  SDValue RegSequenceArgs[] = {
1713  CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
1714  SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1
1715  };
1716 
1717  Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1718  MVT::i64, RegSequenceArgs), 0);
1719  }
1720  }
1721 
1722  VAddr = Addr;
1723  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1724  SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
1725  return true;
1726 }
1727 
1728 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N,
1729  SDValue Addr,
1730  SDValue &VAddr,
1731  SDValue &Offset,
1732  SDValue &SLC) const {
1733  return SelectFlatOffset<false>(N, Addr, VAddr, Offset, SLC);
1734 }
1735 
1736 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N,
1737  SDValue Addr,
1738  SDValue &VAddr,
1739  SDValue &Offset,
1740  SDValue &SLC) const {
1741  return SelectFlatOffset<true>(N, Addr, VAddr, Offset, SLC);
1742 }
1743 
1744 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1745  SDValue &Offset, bool &Imm) const {
1746 
1747  // FIXME: Handle non-constant offsets.
1748  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1749  if (!C)
1750  return false;
1751 
1752  SDLoc SL(ByteOffsetNode);
1753  GCNSubtarget::Generation Gen = Subtarget->getGeneration();
1754  int64_t ByteOffset = C->getSExtValue();
1755  int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
1756 
1757  if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
1758  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1759  Imm = true;
1760  return true;
1761  }
1762 
1763  if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1764  return false;
1765 
1766  if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1767  // 32-bit Immediates are supported on Sea Islands.
1768  Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1769  } else {
1770  SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1771  Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1772  C32Bit), 0);
1773  }
1774  Imm = false;
1775  return true;
1776 }
1777 
1778 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
1779  if (Addr.getValueType() != MVT::i32)
1780  return Addr;
1781 
1782  // Zero-extend a 32-bit address.
1783  SDLoc SL(Addr);
1784 
1785  const MachineFunction &MF = CurDAG->getMachineFunction();
1787  unsigned AddrHiVal = Info->get32BitAddressHighBits();
1788  SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
1789 
1790  const SDValue Ops[] = {
1791  CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
1792  Addr,
1793  CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
1794  SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
1795  0),
1796  CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
1797  };
1798 
1799  return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
1800  Ops), 0);
1801 }
1802 
1803 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1804  SDValue &Offset, bool &Imm) const {
1805  SDLoc SL(Addr);
1806 
1807  // A 32-bit (address + offset) should not cause unsigned 32-bit integer
1808  // wraparound, because s_load instructions perform the addition in 64 bits.
1809  if ((Addr.getValueType() != MVT::i32 ||
1810  Addr->getFlags().hasNoUnsignedWrap()) &&
1811  CurDAG->isBaseWithConstantOffset(Addr)) {
1812  SDValue N0 = Addr.getOperand(0);
1813  SDValue N1 = Addr.getOperand(1);
1814 
1815  if (SelectSMRDOffset(N1, Offset, Imm)) {
1816  SBase = Expand32BitAddress(N0);
1817  return true;
1818  }
1819  }
1820  SBase = Expand32BitAddress(Addr);
1821  Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1822  Imm = true;
1823  return true;
1824 }
1825 
1826 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1827  SDValue &Offset) const {
1828  bool Imm;
1829  return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1830 }
1831 
1832 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1833  SDValue &Offset) const {
1834 
1835  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1836  return false;
1837 
1838  bool Imm;
1839  if (!SelectSMRD(Addr, SBase, Offset, Imm))
1840  return false;
1841 
1842  return !Imm && isa<ConstantSDNode>(Offset);
1843 }
1844 
1845 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1846  SDValue &Offset) const {
1847  bool Imm;
1848  return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1849  !isa<ConstantSDNode>(Offset);
1850 }
1851 
1852 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1853  SDValue &Offset) const {
1854  bool Imm;
1855  return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1856 }
1857 
1858 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1859  SDValue &Offset) const {
1860  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1861  return false;
1862 
1863  bool Imm;
1864  if (!SelectSMRDOffset(Addr, Offset, Imm))
1865  return false;
1866 
1867  return !Imm && isa<ConstantSDNode>(Offset);
1868 }
1869 
1870 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1871  SDValue &Base,
1872  SDValue &Offset) const {
1873  SDLoc DL(Index);
1874 
1875  if (CurDAG->isBaseWithConstantOffset(Index)) {
1876  SDValue N0 = Index.getOperand(0);
1877  SDValue N1 = Index.getOperand(1);
1878  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1879 
1880  // (add n0, c0)
1881  // Don't peel off the offset (c0) if doing so could possibly lead
1882  // the base (n0) to be negative.
1883  if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0)) {
1884  Base = N0;
1885  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1886  return true;
1887  }
1888  }
1889 
1890  if (isa<ConstantSDNode>(Index))
1891  return false;
1892 
1893  Base = Index;
1894  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1895  return true;
1896 }
1897 
1898 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1899  SDValue Val, uint32_t Offset,
1900  uint32_t Width) {
1901  // Transformation function, pack the offset and width of a BFE into
1902  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1903  // source, bits [5:0] contain the offset and bits [22:16] the width.
1904  uint32_t PackedVal = Offset | (Width << 16);
1905  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1906 
1907  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1908 }
1909 
1910 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1911  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1912  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1913  // Predicate: 0 < b <= c < 32
1914 
1915  const SDValue &Shl = N->getOperand(0);
1918 
1919  if (B && C) {
1920  uint32_t BVal = B->getZExtValue();
1921  uint32_t CVal = C->getZExtValue();
1922 
1923  if (0 < BVal && BVal <= CVal && CVal < 32) {
1924  bool Signed = N->getOpcode() == ISD::SRA;
1925  unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1926 
1927  ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1928  32 - CVal));
1929  return;
1930  }
1931  }
1932  SelectCode(N);
1933 }
1934 
1935 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1936  switch (N->getOpcode()) {
1937  case ISD::AND:
1938  if (N->getOperand(0).getOpcode() == ISD::SRL) {
1939  // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1940  // Predicate: isMask(mask)
1941  const SDValue &Srl = N->getOperand(0);
1944 
1945  if (Shift && Mask) {
1946  uint32_t ShiftVal = Shift->getZExtValue();
1947  uint32_t MaskVal = Mask->getZExtValue();
1948 
1949  if (isMask_32(MaskVal)) {
1950  uint32_t WidthVal = countPopulation(MaskVal);
1951 
1952  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1953  Srl.getOperand(0), ShiftVal, WidthVal));
1954  return;
1955  }
1956  }
1957  }
1958  break;
1959  case ISD::SRL:
1960  if (N->getOperand(0).getOpcode() == ISD::AND) {
1961  // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1962  // Predicate: isMask(mask >> b)
1963  const SDValue &And = N->getOperand(0);
1966 
1967  if (Shift && Mask) {
1968  uint32_t ShiftVal = Shift->getZExtValue();
1969  uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1970 
1971  if (isMask_32(MaskVal)) {
1972  uint32_t WidthVal = countPopulation(MaskVal);
1973 
1974  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1975  And.getOperand(0), ShiftVal, WidthVal));
1976  return;
1977  }
1978  }
1979  } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1980  SelectS_BFEFromShifts(N);
1981  return;
1982  }
1983  break;
1984  case ISD::SRA:
1985  if (N->getOperand(0).getOpcode() == ISD::SHL) {
1986  SelectS_BFEFromShifts(N);
1987  return;
1988  }
1989  break;
1990 
1991  case ISD::SIGN_EXTEND_INREG: {
1992  // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1993  SDValue Src = N->getOperand(0);
1994  if (Src.getOpcode() != ISD::SRL)
1995  break;
1996 
1997  const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1998  if (!Amt)
1999  break;
2000 
2001  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2002  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
2003  Amt->getZExtValue(), Width));
2004  return;
2005  }
2006  }
2007 
2008  SelectCode(N);
2009 }
2010 
2011 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
2012  assert(N->getOpcode() == ISD::BRCOND);
2013  if (!N->hasOneUse())
2014  return false;
2015 
2016  SDValue Cond = N->getOperand(1);
2017  if (Cond.getOpcode() == ISD::CopyToReg)
2018  Cond = Cond.getOperand(2);
2019 
2020  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
2021  return false;
2022 
2023  MVT VT = Cond.getOperand(0).getSimpleValueType();
2024  if (VT == MVT::i32)
2025  return true;
2026 
2027  if (VT == MVT::i64) {
2028  auto ST = static_cast<const GCNSubtarget *>(Subtarget);
2029 
2030  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
2031  return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
2032  }
2033 
2034  return false;
2035 }
2036 
2037 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
2038  SDValue Cond = N->getOperand(1);
2039 
2040  if (Cond.isUndef()) {
2041  CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2042  N->getOperand(2), N->getOperand(0));
2043  return;
2044  }
2045 
2046  const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
2047  const SIRegisterInfo *TRI = ST->getRegisterInfo();
2048 
2049  bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
2050  unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
2051  unsigned CondReg = UseSCCBr ? (unsigned)AMDGPU::SCC : TRI->getVCC();
2052  SDLoc SL(N);
2053 
2054  if (!UseSCCBr) {
2055  // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
2056  // analyzed what generates the vcc value, so we do not know whether vcc
2057  // bits for disabled lanes are 0. Thus we need to mask out bits for
2058  // disabled lanes.
2059  //
2060  // For the case that we select S_CBRANCH_SCC1 and it gets
2061  // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
2062  // SIInstrInfo::moveToVALU which inserts the S_AND).
2063  //
2064  // We could add an analysis of what generates the vcc value here and omit
2065  // the S_AND when is unnecessary. But it would be better to add a separate
2066  // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
2067  // catches both cases.
2068  Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32
2069  : AMDGPU::S_AND_B64,
2070  SL, MVT::i1,
2071  CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO
2072  : AMDGPU::EXEC,
2073  MVT::i1),
2074  Cond),
2075  0);
2076  }
2077 
2078  SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
2079  CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
2080  N->getOperand(2), // Basic Block
2081  VCC.getValue(0));
2082 }
2083 
2084 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
2085  MVT VT = N->getSimpleValueType(0);
2086  bool IsFMA = N->getOpcode() == ISD::FMA;
2087  if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
2088  !Subtarget->hasFmaMixInsts()) ||
2089  ((IsFMA && Subtarget->hasMadMixInsts()) ||
2090  (!IsFMA && Subtarget->hasFmaMixInsts()))) {
2091  SelectCode(N);
2092  return;
2093  }
2094 
2095  SDValue Src0 = N->getOperand(0);
2096  SDValue Src1 = N->getOperand(1);
2097  SDValue Src2 = N->getOperand(2);
2098  unsigned Src0Mods, Src1Mods, Src2Mods;
2099 
2100  // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
2101  // using the conversion from f16.
2102  bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
2103  bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
2104  bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
2105 
2106  assert((IsFMA || !Subtarget->hasFP32Denormals()) &&
2107  "fmad selected with denormals enabled");
2108  // TODO: We can select this with f32 denormals enabled if all the sources are
2109  // converted from f16 (in which case fmad isn't legal).
2110 
2111  if (Sel0 || Sel1 || Sel2) {
2112  // For dummy operands.
2113  SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2114  SDValue Ops[] = {
2115  CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
2116  CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
2117  CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
2118  CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
2119  Zero, Zero
2120  };
2121 
2122  CurDAG->SelectNodeTo(N,
2123  IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
2124  MVT::f32, Ops);
2125  } else {
2126  SelectCode(N);
2127  }
2128 }
2129 
2130 // This is here because there isn't a way to use the generated sub0_sub1 as the
2131 // subreg index to EXTRACT_SUBREG in tablegen.
2132 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
2133  MemSDNode *Mem = cast<MemSDNode>(N);
2134  unsigned AS = Mem->getAddressSpace();
2135  if (AS == AMDGPUAS::FLAT_ADDRESS) {
2136  SelectCode(N);
2137  return;
2138  }
2139 
2140  MVT VT = N->getSimpleValueType(0);
2141  bool Is32 = (VT == MVT::i32);
2142  SDLoc SL(N);
2143 
2144  MachineSDNode *CmpSwap = nullptr;
2145  if (Subtarget->hasAddr64()) {
2146  SDValue SRsrc, VAddr, SOffset, Offset, SLC;
2147 
2148  if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
2149  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
2150  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
2151  SDValue CmpVal = Mem->getOperand(2);
2152 
2153  // XXX - Do we care about glue operands?
2154 
2155  SDValue Ops[] = {
2156  CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
2157  };
2158 
2159  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2160  }
2161  }
2162 
2163  if (!CmpSwap) {
2164  SDValue SRsrc, SOffset, Offset, SLC;
2165  if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
2166  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
2167  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
2168 
2169  SDValue CmpVal = Mem->getOperand(2);
2170  SDValue Ops[] = {
2171  CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
2172  };
2173 
2174  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2175  }
2176  }
2177 
2178  if (!CmpSwap) {
2179  SelectCode(N);
2180  return;
2181  }
2182 
2183  MachineMemOperand *MMO = Mem->getMemOperand();
2184  CurDAG->setNodeMemRefs(CmpSwap, {MMO});
2185 
2186  unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
2187  SDValue Extract
2188  = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
2189 
2190  ReplaceUses(SDValue(N, 0), Extract);
2191  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
2192  CurDAG->RemoveDeadNode(N);
2193 }
2194 
2195 void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
2196  // The address is assumed to be uniform, so if it ends up in a VGPR, it will
2197  // be copied to an SGPR with readfirstlane.
2198  unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2199  AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2200 
2201  SDValue Chain = N->getOperand(0);
2202  SDValue Ptr = N->getOperand(2);
2203  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2204  MachineMemOperand *MMO = M->getMemOperand();
2205  bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
2206 
2207  SDValue Offset;
2208  if (CurDAG->isBaseWithConstantOffset(Ptr)) {
2209  SDValue PtrBase = Ptr.getOperand(0);
2210  SDValue PtrOffset = Ptr.getOperand(1);
2211 
2212  const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2213  if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue(), 16)) {
2214  N = glueCopyToM0(N, PtrBase);
2215  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2216  }
2217  }
2218 
2219  if (!Offset) {
2220  N = glueCopyToM0(N, Ptr);
2221  Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2222  }
2223 
2224  SDValue Ops[] = {
2225  Offset,
2226  CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2227  Chain,
2228  N->getOperand(N->getNumOperands() - 1) // New glue
2229  };
2230 
2231  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2232  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2233 }
2234 
2235 static unsigned gwsIntrinToOpcode(unsigned IntrID) {
2236  switch (IntrID) {
2237  case Intrinsic::amdgcn_ds_gws_init:
2238  return AMDGPU::DS_GWS_INIT;
2239  case Intrinsic::amdgcn_ds_gws_barrier:
2240  return AMDGPU::DS_GWS_BARRIER;
2241  case Intrinsic::amdgcn_ds_gws_sema_v:
2242  return AMDGPU::DS_GWS_SEMA_V;
2243  case Intrinsic::amdgcn_ds_gws_sema_br:
2244  return AMDGPU::DS_GWS_SEMA_BR;
2245  case Intrinsic::amdgcn_ds_gws_sema_p:
2246  return AMDGPU::DS_GWS_SEMA_P;
2247  case Intrinsic::amdgcn_ds_gws_sema_release_all:
2248  return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2249  default:
2250  llvm_unreachable("not a gws intrinsic");
2251  }
2252 }
2253 
2254 void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
2255  if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2256  !Subtarget->hasGWSSemaReleaseAll()) {
2257  // Let this error.
2258  SelectCode(N);
2259  return;
2260  }
2261 
2262  // Chain, intrinsic ID, vsrc, offset
2263  const bool HasVSrc = N->getNumOperands() == 4;
2264  assert(HasVSrc || N->getNumOperands() == 3);
2265 
2266  SDLoc SL(N);
2267  SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
2268  int ImmOffset = 0;
2269  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2270  MachineMemOperand *MMO = M->getMemOperand();
2271 
2272  // Don't worry if the offset ends up in a VGPR. Only one lane will have
2273  // effect, so SIFixSGPRCopies will validly insert readfirstlane.
2274 
2275  // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
2276  // offset field) % 64. Some versions of the programming guide omit the m0
2277  // part, or claim it's from offset 0.
2278  if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2279  // If we have a constant offset, try to use the 0 in m0 as the base.
2280  // TODO: Look into changing the default m0 initialization value. If the
2281  // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
2282  // the immediate offset.
2283  glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
2284  ImmOffset = ConstOffset->getZExtValue();
2285  } else {
2286  if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
2287  ImmOffset = BaseOffset.getConstantOperandVal(1);
2288  BaseOffset = BaseOffset.getOperand(0);
2289  }
2290 
2291  // Prefer to do the shift in an SGPR since it should be possible to use m0
2292  // as the result directly. If it's already an SGPR, it will be eliminated
2293  // later.
2294  SDNode *SGPROffset
2295  = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
2296  BaseOffset);
2297  // Shift to offset in m0
2298  SDNode *M0Base
2299  = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
2300  SDValue(SGPROffset, 0),
2301  CurDAG->getTargetConstant(16, SL, MVT::i32));
2302  glueCopyToM0(N, SDValue(M0Base, 0));
2303  }
2304 
2305  SDValue Chain = N->getOperand(0);
2306  SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
2307 
2308  // TODO: Can this just be removed from the instruction?
2309  SDValue GDS = CurDAG->getTargetConstant(1, SL, MVT::i1);
2310 
2311  const unsigned Opc = gwsIntrinToOpcode(IntrID);
2313  if (HasVSrc)
2314  Ops.push_back(N->getOperand(2));
2315  Ops.push_back(OffsetField);
2316  Ops.push_back(GDS);
2317  Ops.push_back(Chain);
2318 
2319  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2320  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2321 }
2322 
2323 void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
2324  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2325  switch (IntrID) {
2326  case Intrinsic::amdgcn_ds_append:
2327  case Intrinsic::amdgcn_ds_consume: {
2328  if (N->getValueType(0) != MVT::i32)
2329  break;
2330  SelectDSAppendConsume(N, IntrID);
2331  return;
2332  }
2333  }
2334 
2335  SelectCode(N);
2336 }
2337 
2338 void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
2339  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
2340  unsigned Opcode;
2341  switch (IntrID) {
2342  case Intrinsic::amdgcn_wqm:
2343  Opcode = AMDGPU::WQM;
2344  break;
2345  case Intrinsic::amdgcn_softwqm:
2346  Opcode = AMDGPU::SOFT_WQM;
2347  break;
2348  case Intrinsic::amdgcn_wwm:
2349  Opcode = AMDGPU::WWM;
2350  break;
2351  default:
2352  SelectCode(N);
2353  return;
2354  }
2355 
2356  SDValue Src = N->getOperand(1);
2357  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
2358 }
2359 
2360 void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
2361  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2362  switch (IntrID) {
2363  case Intrinsic::amdgcn_ds_gws_init:
2364  case Intrinsic::amdgcn_ds_gws_barrier:
2365  case Intrinsic::amdgcn_ds_gws_sema_v:
2366  case Intrinsic::amdgcn_ds_gws_sema_br:
2367  case Intrinsic::amdgcn_ds_gws_sema_p:
2368  case Intrinsic::amdgcn_ds_gws_sema_release_all:
2369  SelectDS_GWS(N, IntrID);
2370  return;
2371  default:
2372  break;
2373  }
2374 
2375  SelectCode(N);
2376 }
2377 
2378 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
2379  unsigned &Mods) const {
2380  Mods = 0;
2381  Src = In;
2382 
2383  if (Src.getOpcode() == ISD::FNEG) {
2384  Mods |= SISrcMods::NEG;
2385  Src = Src.getOperand(0);
2386  }
2387 
2388  if (Src.getOpcode() == ISD::FABS) {
2389  Mods |= SISrcMods::ABS;
2390  Src = Src.getOperand(0);
2391  }
2392 
2393  return true;
2394 }
2395 
2396 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
2397  SDValue &SrcMods) const {
2398  unsigned Mods;
2399  if (SelectVOP3ModsImpl(In, Src, Mods)) {
2400  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2401  return true;
2402  }
2403 
2404  return false;
2405 }
2406 
2407 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
2408  SDValue &SrcMods) const {
2409  SelectVOP3Mods(In, Src, SrcMods);
2410  return isNoNanSrc(Src);
2411 }
2412 
2413 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_f32(SDValue In, SDValue &Src,
2414  SDValue &SrcMods) const {
2415  if (In.getValueType() == MVT::f32)
2416  return SelectVOP3Mods(In, Src, SrcMods);
2417  Src = In;
2418  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);;
2419  return true;
2420 }
2421 
2422 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
2423  if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
2424  return false;
2425 
2426  Src = In;
2427  return true;
2428 }
2429 
2430 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
2431  SDValue &SrcMods, SDValue &Clamp,
2432  SDValue &Omod) const {
2433  SDLoc DL(In);
2434  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2435  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2436 
2437  return SelectVOP3Mods(In, Src, SrcMods);
2438 }
2439 
2440 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
2441  SDValue &SrcMods,
2442  SDValue &Clamp,
2443  SDValue &Omod) const {
2444  Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2445  return SelectVOP3Mods(In, Src, SrcMods);
2446 }
2447 
2448 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
2449  SDValue &Clamp, SDValue &Omod) const {
2450  Src = In;
2451 
2452  SDLoc DL(In);
2453  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2454  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2455 
2456  return true;
2457 }
2458 
2459 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
2460  SDValue &SrcMods) const {
2461  unsigned Mods = 0;
2462  Src = In;
2463 
2464  if (Src.getOpcode() == ISD::FNEG) {
2465  Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
2466  Src = Src.getOperand(0);
2467  }
2468 
2469  if (Src.getOpcode() == ISD::BUILD_VECTOR) {
2470  unsigned VecMods = Mods;
2471 
2472  SDValue Lo = stripBitcast(Src.getOperand(0));
2473  SDValue Hi = stripBitcast(Src.getOperand(1));
2474 
2475  if (Lo.getOpcode() == ISD::FNEG) {
2476  Lo = stripBitcast(Lo.getOperand(0));
2477  Mods ^= SISrcMods::NEG;
2478  }
2479 
2480  if (Hi.getOpcode() == ISD::FNEG) {
2481  Hi = stripBitcast(Hi.getOperand(0));
2482  Mods ^= SISrcMods::NEG_HI;
2483  }
2484 
2485  if (isExtractHiElt(Lo, Lo))
2486  Mods |= SISrcMods::OP_SEL_0;
2487 
2488  if (isExtractHiElt(Hi, Hi))
2489  Mods |= SISrcMods::OP_SEL_1;
2490 
2491  Lo = stripExtractLoElt(Lo);
2492  Hi = stripExtractLoElt(Hi);
2493 
2494  if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
2495  // Really a scalar input. Just select from the low half of the register to
2496  // avoid packing.
2497 
2498  Src = Lo;
2499  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2500  return true;
2501  }
2502 
2503  Mods = VecMods;
2504  }
2505 
2506  // Packed instructions do not have abs modifiers.
2507  Mods |= SISrcMods::OP_SEL_1;
2508 
2509  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2510  return true;
2511 }
2512 
2513 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
2514  SDValue &SrcMods,
2515  SDValue &Clamp) const {
2516  SDLoc SL(In);
2517 
2518  // FIXME: Handle clamp and op_sel
2519  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2520 
2521  return SelectVOP3PMods(In, Src, SrcMods);
2522 }
2523 
2524 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
2525  SDValue &SrcMods) const {
2526  Src = In;
2527  // FIXME: Handle op_sel
2528  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2529  return true;
2530 }
2531 
2532 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
2533  SDValue &SrcMods,
2534  SDValue &Clamp) const {
2535  SDLoc SL(In);
2536 
2537  // FIXME: Handle clamp
2538  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2539 
2540  return SelectVOP3OpSel(In, Src, SrcMods);
2541 }
2542 
2543 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2544  SDValue &SrcMods) const {
2545  // FIXME: Handle op_sel
2546  return SelectVOP3Mods(In, Src, SrcMods);
2547 }
2548 
2549 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
2550  SDValue &SrcMods,
2551  SDValue &Clamp) const {
2552  SDLoc SL(In);
2553 
2554  // FIXME: Handle clamp
2555  Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2556 
2557  return SelectVOP3OpSelMods(In, Src, SrcMods);
2558 }
2559 
2560 // The return value is not whether the match is possible (which it always is),
2561 // but whether or not it a conversion is really used.
2562 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2563  unsigned &Mods) const {
2564  Mods = 0;
2565  SelectVOP3ModsImpl(In, Src, Mods);
2566 
2567  if (Src.getOpcode() == ISD::FP_EXTEND) {
2568  Src = Src.getOperand(0);
2569  assert(Src.getValueType() == MVT::f16);
2570  Src = stripBitcast(Src);
2571 
2572  // Be careful about folding modifiers if we already have an abs. fneg is
2573  // applied last, so we don't want to apply an earlier fneg.
2574  if ((Mods & SISrcMods::ABS) == 0) {
2575  unsigned ModsTmp;
2576  SelectVOP3ModsImpl(Src, Src, ModsTmp);
2577 
2578  if ((ModsTmp & SISrcMods::NEG) != 0)
2579  Mods ^= SISrcMods::NEG;
2580 
2581  if ((ModsTmp & SISrcMods::ABS) != 0)
2582  Mods |= SISrcMods::ABS;
2583  }
2584 
2585  // op_sel/op_sel_hi decide the source type and source.
2586  // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2587  // If the sources's op_sel is set, it picks the high half of the source
2588  // register.
2589 
2590  Mods |= SISrcMods::OP_SEL_1;
2591  if (isExtractHiElt(Src, Src)) {
2592  Mods |= SISrcMods::OP_SEL_0;
2593 
2594  // TODO: Should we try to look for neg/abs here?
2595  }
2596 
2597  return true;
2598  }
2599 
2600  return false;
2601 }
2602 
2603 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2604  SDValue &SrcMods) const {
2605  unsigned Mods = 0;
2606  SelectVOP3PMadMixModsImpl(In, Src, Mods);
2607  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2608  return true;
2609 }
2610 
2611 SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
2612  if (In.isUndef())
2613  return CurDAG->getUNDEF(MVT::i32);
2614 
2615  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2616  SDLoc SL(In);
2617  return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
2618  }
2619 
2620  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2621  SDLoc SL(In);
2622  return CurDAG->getConstant(
2623  C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2624  }
2625 
2626  SDValue Src;
2627  if (isExtractHiElt(In, Src))
2628  return Src;
2629 
2630  return SDValue();
2631 }
2632 
2633 bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2634  assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn);
2635 
2636  const SIRegisterInfo *SIRI =
2637  static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2638  const SIInstrInfo * SII =
2639  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2640 
2641  unsigned Limit = 0;
2642  bool AllUsesAcceptSReg = true;
2643  for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2644  Limit < 10 && U != E; ++U, ++Limit) {
2645  const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
2646 
2647  // If the register class is unknown, it could be an unknown
2648  // register class that needs to be an SGPR, e.g. an inline asm
2649  // constraint
2650  if (!RC || SIRI->isSGPRClass(RC))
2651  return false;
2652 
2653  if (RC != &AMDGPU::VS_32RegClass) {
2654  AllUsesAcceptSReg = false;
2655  SDNode * User = *U;
2656  if (User->isMachineOpcode()) {
2657  unsigned Opc = User->getMachineOpcode();
2658  MCInstrDesc Desc = SII->get(Opc);
2659  if (Desc.isCommutable()) {
2660  unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2661  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2662  if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2663  unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2664  const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
2665  if (CommutedRC == &AMDGPU::VS_32RegClass)
2666  AllUsesAcceptSReg = true;
2667  }
2668  }
2669  }
2670  // If "AllUsesAcceptSReg == false" so far we haven't suceeded
2671  // commuting current user. This means have at least one use
2672  // that strictly require VGPR. Thus, we will not attempt to commute
2673  // other user instructions.
2674  if (!AllUsesAcceptSReg)
2675  break;
2676  }
2677  }
2678  return !AllUsesAcceptSReg && (Limit < 10);
2679 }
2680 
2681 bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
2682  auto Ld = cast<LoadSDNode>(N);
2683 
2684  return Ld->getAlignment() >= 4 &&
2685  (
2686  (
2687  (
2688  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2689  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
2690  )
2691  &&
2692  !N->isDivergent()
2693  )
2694  ||
2695  (
2696  Subtarget->getScalarizeGlobalBehavior() &&
2697  Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
2698  !Ld->isVolatile() &&
2699  !N->isDivergent() &&
2700  static_cast<const SITargetLowering *>(
2701  getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
2702  )
2703  );
2704 }
2705 
2706 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
2708  *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
2709  bool IsModified = false;
2710  do {
2711  IsModified = false;
2712 
2713  // Go over all selected nodes and try to fold them a bit more
2714  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
2715  while (Position != CurDAG->allnodes_end()) {
2716  SDNode *Node = &*Position++;
2717  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
2718  if (!MachineNode)
2719  continue;
2720 
2721  SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
2722  if (ResNode != Node) {
2723  if (ResNode)
2724  ReplaceUses(Node, ResNode);
2725  IsModified = true;
2726  }
2727  }
2728  CurDAG->RemoveDeadNodes();
2729  } while (IsModified);
2730 }
2731 
2732 bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
2733  Subtarget = &MF.getSubtarget<R600Subtarget>();
2735 }
2736 
2737 bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
2738  if (!N->readMem())
2739  return false;
2740  if (CbId == -1)
2743 
2744  return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
2745 }
2746 
2747 bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
2748  SDValue& IntPtr) {
2749  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
2750  IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
2751  true);
2752  return true;
2753  }
2754  return false;
2755 }
2756 
2757 bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
2758  SDValue& BaseReg, SDValue &Offset) {
2759  if (!isa<ConstantSDNode>(Addr)) {
2760  BaseReg = Addr;
2761  Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
2762  return true;
2763  }
2764  return false;
2765 }
2766 
2768  unsigned int Opc = N->getOpcode();
2769  if (N->isMachineOpcode()) {
2770  N->setNodeId(-1);
2771  return; // Already selected.
2772  }
2773 
2774  switch (Opc) {
2775  default: break;
2777  case ISD::SCALAR_TO_VECTOR:
2778  case ISD::BUILD_VECTOR: {
2779  EVT VT = N->getValueType(0);
2780  unsigned NumVectorElts = VT.getVectorNumElements();
2781  unsigned RegClassID;
2782  // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
2783  // that adds a 128 bits reg copy when going through TwoAddressInstructions
2784  // pass. We want to avoid 128 bits copies as much as possible because they
2785  // can't be bundled by our scheduler.
2786  switch(NumVectorElts) {
2787  case 2: RegClassID = R600::R600_Reg64RegClassID; break;
2788  case 4:
2790  RegClassID = R600::R600_Reg128VerticalRegClassID;
2791  else
2792  RegClassID = R600::R600_Reg128RegClassID;
2793  break;
2794  default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
2795  }
2796  SelectBuildVector(N, RegClassID);
2797  return;
2798  }
2799  }
2800 
2801  SelectCode(N);
2802 }
2803 
2804 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
2805  SDValue &Offset) {
2806  ConstantSDNode *C;
2807  SDLoc DL(Addr);
2808 
2809  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
2810  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2811  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2812  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
2813  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
2814  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2815  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2816  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
2817  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
2818  Base = Addr.getOperand(0);
2819  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2820  } else {
2821  Base = Addr;
2822  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2823  }
2824 
2825  return true;
2826 }
2827 
2828 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
2829  SDValue &Offset) {
2830  ConstantSDNode *IMMOffset;
2831 
2832  if (Addr.getOpcode() == ISD::ADD
2833  && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
2834  && isInt<16>(IMMOffset->getZExtValue())) {
2835 
2836  Base = Addr.getOperand(0);
2837  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2838  MVT::i32);
2839  return true;
2840  // If the pointer address is constant, we can move it to the offset field.
2841  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
2842  && isInt<16>(IMMOffset->getZExtValue())) {
2843  Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
2844  SDLoc(CurDAG->getEntryNode()),
2845  R600::ZERO, MVT::i32);
2846  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2847  MVT::i32);
2848  return true;
2849  }
2850 
2851  // Default case, no offset
2852  Base = Addr;
2853  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2854  return true;
2855 }
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:603
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:385
EVT getValueType() const
Return the ValueType of the referenced return value.
Interface definition for SIRegisterInfo.
unsigned getVCC() const
bool isUndef() const
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand...
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1571
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_END(AMDGPUDAGToDAGISel
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
AMDGPU specific subclass of TargetSubtarget.
static unsigned gwsIntrinToOpcode(unsigned IntrID)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
uint64_t getDefaultRsrcDataFormat() const
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MCInstrDesc.h:461
Shadow Stack GC Lowering
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, SDValue V) const
const SDValue & getBasePtr() const
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:325
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:222
Address space for 32-bit constant memory.
Definition: AMDGPU.h:277
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:179
SDVTList getVTList() const
unsigned Reg
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
const SDValue & getChain() const
virtual SDNode * PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const =0
SDNode * legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const
Legalize target independent instructions (e.g.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:342
unsigned const TargetRegisterInfo * TRI
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
const SDNodeFlags getFlags() const
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:144
Address space for region memory. (GDS)
Definition: AMDGPU.h:271
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
bool isInlineConstant(const APInt &Imm) const
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:441
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:158
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineSDNode * wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr) const
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:270
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwises returns null...
Definition: PointerUnion.h:201
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:226
const HexagonInstrInfo * TII
Shift and rotation operations.
Definition: ISDOpcodes.h:449
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:190
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:169
unsigned SubReg
uint64_t getConstantOperandVal(unsigned i) const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
static MemSDNode * findMemSDNode(SDNode *N)
Position
Position to insert a new instruction relative to an existing instruction.
This file implements a class to represent arbitrary precision integral constant values and operations...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
TargetRegisterInfo interface that is implemented by all hw codegen targets.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
int64_t getSExtValue() const
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:297
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:244
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:1020
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:596
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:150
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:359
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static const unsigned CommuteAnyOperandIndex
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:165
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:379
unsigned const MachineRegisterInfo * MRI
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Address space for local memory.
Definition: AMDGPU.h:274
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:272
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
iterator_range< value_op_iterator > op_values() const
Address space for flat memory.
Definition: AMDGPU.h:269
const SDValue & getOperand(unsigned Num) const
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:231
SI DAG Lowering interface definition.
Represent the analysis usage information of a pass.
This class provides iterator support for SDUse operands that use a specific SDNode.
bool hasNoNaNs() const
constexpr double e
Definition: MathExtras.h:57
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:71
bool isDefined() const
Returns true if the flags are in a defined state.
The AMDGPU TargetMachine interface definition for hw codgen targets.
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:588
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
Extended Value Type.
Definition: ValueTypes.h:33
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts)
bool use_empty() const
Return true if there are no uses of this node.
MachineSDNode * buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr, uint32_t RsrcDword1, uint64_t RsrcDword2And3) const
Return a resource descriptor with the &#39;Add TID&#39; bit enabled The TID (Thread ID) is multiplied by the ...
Iterator for intrusive lists based on ilist_node.
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:556
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:264
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:221
Interface definition of the TargetLowering class that is common to all AMD GPUs.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:680
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
bool isDivergent() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:241
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
bool readMem() const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
EVT getMemoryVT() const
Return the type of the in-memory value.
Class for arbitrary precision integers.
Definition: APInt.h:69
Special value supplied for machine level alias analysis.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo)
Address space for constant memory (VTX2).
Definition: AMDGPU.h:273
static use_iterator use_end()
static SDValue stripBitcast(SDValue Val)
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
unsigned getNumFlatOffsetBits(unsigned AddrSpace, bool Signed) const
Provides AMDGPU specific target descriptions.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:426
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:76
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:525
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:650
#define N
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
unsigned getOpcode() const
SDValue getValue(unsigned R) const
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:382
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:761
SmallVector< LoopT *, 4 > getLoopsInPreorder()
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
Definition: LoopInfoImpl.h:567
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isLegalMUBUFImmOffset(unsigned Imm)
Definition: SIInstrInfo.h:1003
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:320
LLVM Value Representation.
Definition: Value.h:74
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:334
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
bool hasNoUnsignedWrap() const
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:190
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
bool isUndef() const
Return true if the type of the node type undefined.
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:1208
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, bool Signed) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:338
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:482
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:222
const SDValue & getOperand(unsigned i) const
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:513
#define LLVM_DEBUG(X)
Definition: Debug.h:122
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:618
Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...
const SDValue & getBasePtr() const
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:407
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:241
This class is used to represent ISD::LOAD nodes.
const SIRegisterInfo * getRegisterInfo() const override