LLVM  13.0.0git
AMDGPUISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines an instruction selector for the AMDGPU target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPUTargetMachine.h"
16 #include "SIMachineFunctionInfo.h"
23 #include "llvm/IR/IntrinsicsAMDGPU.h"
24 #include "llvm/InitializePasses.h"
25 
26 #ifdef EXPENSIVE_CHECKS
27 #include "llvm/Analysis/LoopInfo.h"
28 #include "llvm/IR/Dominators.h"
29 #endif
30 
31 #define DEBUG_TYPE "isel"
32 
33 using namespace llvm;
34 
35 namespace llvm {
36 
37 class R600InstrInfo;
38 
39 } // end namespace llvm
40 
41 //===----------------------------------------------------------------------===//
42 // Instruction Selector Implementation
43 //===----------------------------------------------------------------------===//
44 
45 namespace {
46 
47 static bool isNullConstantOrUndef(SDValue V) {
48  if (V.isUndef())
49  return true;
50 
51  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
52  return Const != nullptr && Const->isNullValue();
53 }
54 
55 static bool getConstantValue(SDValue N, uint32_t &Out) {
56  // This is only used for packed vectors, where ussing 0 for undef should
57  // always be good.
58  if (N.isUndef()) {
59  Out = 0;
60  return true;
61  }
62 
63  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
64  Out = C->getAPIntValue().getSExtValue();
65  return true;
66  }
67 
68  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
69  Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
70  return true;
71  }
72 
73  return false;
74 }
75 
76 // TODO: Handle undef as zero
77 static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
78  bool Negate = false) {
79  assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
80  uint32_t LHSVal, RHSVal;
81  if (getConstantValue(N->getOperand(0), LHSVal) &&
82  getConstantValue(N->getOperand(1), RHSVal)) {
83  SDLoc SL(N);
84  uint32_t K = Negate ?
85  (-LHSVal & 0xffff) | (-RHSVal << 16) :
86  (LHSVal & 0xffff) | (RHSVal << 16);
87  return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
88  DAG.getTargetConstant(K, SL, MVT::i32));
89  }
90 
91  return nullptr;
92 }
93 
94 static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
95  return packConstantV2I16(N, DAG, true);
96 }
97 
98 /// AMDGPU specific code to select AMDGPU machine instructions for
99 /// SelectionDAG operations.
100 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
101  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
102  // make the right decision when generating code for different targets.
103  const GCNSubtarget *Subtarget;
104 
105  // Default FP mode for the current function.
107 
108  bool EnableLateStructurizeCFG;
109 
110 public:
111  explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
113  : SelectionDAGISel(*TM, OptLevel) {
114  EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
115  }
116  ~AMDGPUDAGToDAGISel() override = default;
117 
118  void getAnalysisUsage(AnalysisUsage &AU) const override {
121 #ifdef EXPENSIVE_CHECKS
124 #endif
126  }
127 
128  bool matchLoadD16FromBuildVector(SDNode *N) const;
129 
130  bool runOnMachineFunction(MachineFunction &MF) override;
131  void PreprocessISelDAG() override;
132  void Select(SDNode *N) override;
133  StringRef getPassName() const override;
134  void PostprocessISelDAG() override;
135 
136 protected:
137  void SelectBuildVector(SDNode *N, unsigned RegClassID);
138 
139 private:
140  std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
141  bool isNoNanSrc(SDValue N) const;
142  bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
143  bool isNegInlineImmediate(const SDNode *N) const {
144  return isInlineImmediate(N, true);
145  }
146 
147  bool isInlineImmediate16(int64_t Imm) const {
148  return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm());
149  }
150 
151  bool isInlineImmediate32(int64_t Imm) const {
152  return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm());
153  }
154 
155  bool isInlineImmediate64(int64_t Imm) const {
156  return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm());
157  }
158 
159  bool isInlineImmediate(const APFloat &Imm) const {
160  return Subtarget->getInstrInfo()->isInlineConstant(Imm);
161  }
162 
163  bool isVGPRImm(const SDNode *N) const;
164  bool isUniformLoad(const SDNode *N) const;
165  bool isUniformBr(const SDNode *N) const;
166 
167  bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
168  SDValue &RHS) const;
169 
170  MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
171 
172  SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
173  SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
174  SDNode *glueCopyToM0LDSInit(SDNode *N) const;
175 
176  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
177  virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
178  virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
179  bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
180  bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
181  unsigned Size) const;
182  bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
183  bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
184  SDValue &Offset1) const;
185  bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
186  SDValue &Offset1) const;
187  bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0,
188  SDValue &Offset1, unsigned Size) const;
189  bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
190  SDValue &SOffset, SDValue &Offset, SDValue &Offen,
191  SDValue &Idxen, SDValue &Addr64) const;
192  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
193  SDValue &SOffset, SDValue &Offset) const;
194  bool SelectMUBUFScratchOffen(SDNode *Parent,
195  SDValue Addr, SDValue &RSrc, SDValue &VAddr,
196  SDValue &SOffset, SDValue &ImmOffset) const;
197  bool SelectMUBUFScratchOffset(SDNode *Parent,
198  SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
199  SDValue &Offset) const;
200 
201  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
202  SDValue &Offset) const;
203 
204  bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr,
205  SDValue &Offset, uint64_t FlatVariant) const;
206  bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
207  SDValue &Offset) const;
208  bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
209  SDValue &Offset) const;
210  bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
211  SDValue &Offset) const;
212  bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
213  SDValue &VOffset, SDValue &Offset) const;
214  bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
215  SDValue &Offset) const;
216 
217  bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
218  bool &Imm) const;
219  SDValue Expand32BitAddress(SDValue Addr) const;
220  bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
221  bool &Imm) const;
222  bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
223  bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
224  bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
225  bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
226  bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
227  bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
228 
229  bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
230  bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
231  bool AllowAbs = true) const;
232  bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
233  bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
234  bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
235  bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
236  SDValue &Clamp, SDValue &Omod) const;
237  bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
238  SDValue &Clamp, SDValue &Omod) const;
239  bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
240  SDValue &Clamp, SDValue &Omod) const;
241 
242  bool SelectVOP3OMods(SDValue In, SDValue &Src,
243  SDValue &Clamp, SDValue &Omod) const;
244 
245  bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
246 
247  bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
248 
249  bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
250  bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
251  bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
252 
253  SDValue getHi16Elt(SDValue In) const;
254 
255  SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
256 
257  void SelectADD_SUB_I64(SDNode *N);
258  void SelectAddcSubb(SDNode *N);
259  void SelectUADDO_USUBO(SDNode *N);
260  void SelectDIV_SCALE(SDNode *N);
261  void SelectMAD_64_32(SDNode *N);
262  void SelectFMA_W_CHAIN(SDNode *N);
263  void SelectFMUL_W_CHAIN(SDNode *N);
264 
265  SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
267  void SelectS_BFEFromShifts(SDNode *N);
268  void SelectS_BFE(SDNode *N);
269  bool isCBranchSCC(const SDNode *N) const;
270  void SelectBRCOND(SDNode *N);
271  void SelectFMAD_FMA(SDNode *N);
272  void SelectATOMIC_CMP_SWAP(SDNode *N);
273  void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
274  void SelectDS_GWS(SDNode *N, unsigned IntrID);
275  void SelectInterpP1F16(SDNode *N);
276  void SelectINTRINSIC_W_CHAIN(SDNode *N);
277  void SelectINTRINSIC_WO_CHAIN(SDNode *N);
278  void SelectINTRINSIC_VOID(SDNode *N);
279 
280 protected:
281  // Include the pieces autogenerated from the target description.
282 #include "AMDGPUGenDAGISel.inc"
283 };
284 
285 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
286  const R600Subtarget *Subtarget;
287 
288  bool isConstantLoad(const MemSDNode *N, int cbID) const;
289  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
290  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
291  SDValue& Offset);
292 public:
293  explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
294  AMDGPUDAGToDAGISel(TM, OptLevel) {}
295 
296  void Select(SDNode *N) override;
297 
298  bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
299  SDValue &Offset) override;
300  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
301  SDValue &Offset) override;
302 
303  bool runOnMachineFunction(MachineFunction &MF) override;
304 
305  void PreprocessISelDAG() override {}
306 
307 protected:
308  // Include the pieces autogenerated from the target description.
309 #include "R600GenDAGISel.inc"
310 };
311 
312 static SDValue stripBitcast(SDValue Val) {
313  return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
314 }
315 
316 // Figure out if this is really an extract of the high 16-bits of a dword.
317 static bool isExtractHiElt(SDValue In, SDValue &Out) {
318  In = stripBitcast(In);
319 
320  if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
321  if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
322  if (!Idx->isOne())
323  return false;
324  Out = In.getOperand(0);
325  return true;
326  }
327  }
328 
329  if (In.getOpcode() != ISD::TRUNCATE)
330  return false;
331 
332  SDValue Srl = In.getOperand(0);
333  if (Srl.getOpcode() == ISD::SRL) {
334  if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
335  if (ShiftAmt->getZExtValue() == 16) {
336  Out = stripBitcast(Srl.getOperand(0));
337  return true;
338  }
339  }
340  }
341 
342  return false;
343 }
344 
345 // Look through operations that obscure just looking at the low 16-bits of the
346 // same register.
347 static SDValue stripExtractLoElt(SDValue In) {
348  if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
349  if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
350  if (Idx->isNullValue() && In.getValueSizeInBits() <= 32)
351  return In.getOperand(0);
352  }
353  }
354 
355  if (In.getOpcode() == ISD::TRUNCATE) {
356  SDValue Src = In.getOperand(0);
357  if (Src.getValueType().getSizeInBits() == 32)
358  return stripBitcast(Src);
359  }
360 
361  return In;
362 }
363 
364 } // end anonymous namespace
365 
366 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
367  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
371 #ifdef EXPENSIVE_CHECKS
374 #endif
375 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
376  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
377 
378 /// This pass converts a legalized DAG into a AMDGPU-specific
379 // DAG, ready for instruction scheduling.
381  CodeGenOpt::Level OptLevel) {
382  return new AMDGPUDAGToDAGISel(TM, OptLevel);
383 }
384 
385 /// This pass converts a legalized DAG into a R600-specific
386 // DAG, ready for instruction scheduling.
388  CodeGenOpt::Level OptLevel) {
389  return new R600DAGToDAGISel(TM, OptLevel);
390 }
391 
392 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
393 #ifdef EXPENSIVE_CHECKS
394  DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
395  LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
396  for (auto &L : LI->getLoopsInPreorder()) {
397  assert(L->isLCSSAForm(DT));
398  }
399 #endif
400  Subtarget = &MF.getSubtarget<GCNSubtarget>();
403 }
404 
405 bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
406  assert(Subtarget->d16PreservesUnusedBits());
407  MVT VT = N->getValueType(0).getSimpleVT();
408  if (VT != MVT::v2i16 && VT != MVT::v2f16)
409  return false;
410 
411  SDValue Lo = N->getOperand(0);
412  SDValue Hi = N->getOperand(1);
413 
414  LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
415 
416  // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
417  // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
418  // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
419 
420  // Need to check for possible indirect dependencies on the other half of the
421  // vector to avoid introducing a cycle.
422  if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
423  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
424 
425  SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);
426  SDValue Ops[] = {
427  LdHi->getChain(), LdHi->getBasePtr(), TiedIn
428  };
429 
430  unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
431  if (LdHi->getMemoryVT() == MVT::i8) {
432  LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
434  } else {
435  assert(LdHi->getMemoryVT() == MVT::i16);
436  }
437 
438  SDValue NewLoadHi =
439  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
440  Ops, LdHi->getMemoryVT(),
441  LdHi->getMemOperand());
442 
443  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
444  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
445  return true;
446  }
447 
448  // build_vector (load ptr), hi -> load_d16_lo ptr, hi
449  // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
450  // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
451  LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
452  if (LdLo && Lo.hasOneUse()) {
453  SDValue TiedIn = getHi16Elt(Hi);
454  if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
455  return false;
456 
457  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
458  unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
459  if (LdLo->getMemoryVT() == MVT::i8) {
460  LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
462  } else {
463  assert(LdLo->getMemoryVT() == MVT::i16);
464  }
465 
466  TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
467 
468  SDValue Ops[] = {
469  LdLo->getChain(), LdLo->getBasePtr(), TiedIn
470  };
471 
472  SDValue NewLoadLo =
473  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
474  Ops, LdLo->getMemoryVT(),
475  LdLo->getMemOperand());
476 
477  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
478  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
479  return true;
480  }
481 
482  return false;
483 }
484 
485 void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
486  if (!Subtarget->d16PreservesUnusedBits())
487  return;
488 
489  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
490 
491  bool MadeChange = false;
492  while (Position != CurDAG->allnodes_begin()) {
493  SDNode *N = &*--Position;
494  if (N->use_empty())
495  continue;
496 
497  switch (N->getOpcode()) {
498  case ISD::BUILD_VECTOR:
499  MadeChange |= matchLoadD16FromBuildVector(N);
500  break;
501  default:
502  break;
503  }
504  }
505 
506  if (MadeChange) {
507  CurDAG->RemoveDeadNodes();
508  LLVM_DEBUG(dbgs() << "After PreProcess:\n";
509  CurDAG->dump(););
510  }
511 }
512 
513 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
514  if (TM.Options.NoNaNsFPMath)
515  return true;
516 
517  // TODO: Move into isKnownNeverNaN
518  if (N->getFlags().hasNoNaNs())
519  return true;
520 
521  return CurDAG->isKnownNeverNaN(N);
522 }
523 
524 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
525  bool Negated) const {
526  if (N->isUndef())
527  return true;
528 
529  const SIInstrInfo *TII = Subtarget->getInstrInfo();
530  if (Negated) {
531  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
532  return TII->isInlineConstant(-C->getAPIntValue());
533 
534  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
535  return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
536 
537  } else {
538  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
539  return TII->isInlineConstant(C->getAPIntValue());
540 
541  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
542  return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
543  }
544 
545  return false;
546 }
547 
548 /// Determine the register class for \p OpNo
549 /// \returns The register class of the virtual register that will be used for
550 /// the given operand number \OpNo or NULL if the register class cannot be
551 /// determined.
552 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
553  unsigned OpNo) const {
554  if (!N->isMachineOpcode()) {
555  if (N->getOpcode() == ISD::CopyToReg) {
556  Register Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
557  if (Reg.isVirtual()) {
558  MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
559  return MRI.getRegClass(Reg);
560  }
561 
562  const SIRegisterInfo *TRI
563  = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
564  return TRI->getPhysRegClass(Reg);
565  }
566 
567  return nullptr;
568  }
569 
570  switch (N->getMachineOpcode()) {
571  default: {
572  const MCInstrDesc &Desc =
573  Subtarget->getInstrInfo()->get(N->getMachineOpcode());
574  unsigned OpIdx = Desc.getNumDefs() + OpNo;
575  if (OpIdx >= Desc.getNumOperands())
576  return nullptr;
577  int RegClass = Desc.OpInfo[OpIdx].RegClass;
578  if (RegClass == -1)
579  return nullptr;
580 
581  return Subtarget->getRegisterInfo()->getRegClass(RegClass);
582  }
583  case AMDGPU::REG_SEQUENCE: {
584  unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
585  const TargetRegisterClass *SuperRC =
586  Subtarget->getRegisterInfo()->getRegClass(RCID);
587 
588  SDValue SubRegOp = N->getOperand(OpNo + 1);
589  unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
590  return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
591  SubRegIdx);
592  }
593  }
594 }
595 
596 SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain,
597  SDValue Glue) const {
599  Ops.push_back(NewChain); // Replace the chain.
600  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
601  Ops.push_back(N->getOperand(i));
602 
603  Ops.push_back(Glue);
604  return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
605 }
606 
607 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
608  const SITargetLowering& Lowering =
609  *static_cast<const SITargetLowering*>(getTargetLowering());
610 
611  assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
612 
613  SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val);
614  return glueCopyToOp(N, M0, M0.getValue(1));
615 }
616 
617 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
618  unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
619  if (AS == AMDGPUAS::LOCAL_ADDRESS) {
620  if (Subtarget->ldsRequiresM0Init())
621  return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
622  } else if (AS == AMDGPUAS::REGION_ADDRESS) {
623  MachineFunction &MF = CurDAG->getMachineFunction();
624  unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
625  return
626  glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
627  }
628  return N;
629 }
630 
631 MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
632  EVT VT) const {
633  SDNode *Lo = CurDAG->getMachineNode(
634  AMDGPU::S_MOV_B32, DL, MVT::i32,
635  CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
636  SDNode *Hi =
637  CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
638  CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32));
639  const SDValue Ops[] = {
640  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
641  SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
642  SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
643 
644  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
645 }
646 
647 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
648  EVT VT = N->getValueType(0);
649  unsigned NumVectorElts = VT.getVectorNumElements();
650  EVT EltVT = VT.getVectorElementType();
651  SDLoc DL(N);
652  SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
653 
654  if (NumVectorElts == 1) {
655  CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
656  RegClass);
657  return;
658  }
659 
660  assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
661  "supported yet");
662  // 32 = Max Num Vector Elements
663  // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
664  // 1 = Vector Register Class
665  SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
666 
667  bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() ==
669  RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
670  bool IsRegSeq = true;
671  unsigned NOps = N->getNumOperands();
672  for (unsigned i = 0; i < NOps; i++) {
673  // XXX: Why is this here?
674  if (isa<RegisterSDNode>(N->getOperand(i))) {
675  IsRegSeq = false;
676  break;
677  }
678  unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
679  : R600RegisterInfo::getSubRegFromChannel(i);
680  RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
681  RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
682  }
683  if (NOps != NumVectorElts) {
684  // Fill in the missing undef elements if this was a scalar_to_vector.
685  assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
686  MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
687  DL, EltVT);
688  for (unsigned i = NOps; i < NumVectorElts; ++i) {
689  unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
690  : R600RegisterInfo::getSubRegFromChannel(i);
691  RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
692  RegSeqArgs[1 + (2 * i) + 1] =
693  CurDAG->getTargetConstant(Sub, DL, MVT::i32);
694  }
695  }
696 
697  if (!IsRegSeq)
698  SelectCode(N);
699  CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
700 }
701 
703  unsigned int Opc = N->getOpcode();
704  if (N->isMachineOpcode()) {
705  N->setNodeId(-1);
706  return; // Already selected.
707  }
708 
709  // isa<MemSDNode> almost works but is slightly too permissive for some DS
710  // intrinsics.
711  if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) ||
712  (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
713  Opc == ISD::ATOMIC_LOAD_FADD ||
715  Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) {
716  N = glueCopyToM0LDSInit(N);
717  SelectCode(N);
718  return;
719  }
720 
721  switch (Opc) {
722  default:
723  break;
724  // We are selecting i64 ADD here instead of custom lower it during
725  // DAG legalization, so we can fold some i64 ADDs used for address
726  // calculation into the LOAD and STORE instructions.
727  case ISD::ADDC:
728  case ISD::ADDE:
729  case ISD::SUBC:
730  case ISD::SUBE: {
731  if (N->getValueType(0) != MVT::i64)
732  break;
733 
734  SelectADD_SUB_I64(N);
735  return;
736  }
737  case ISD::ADDCARRY:
738  case ISD::SUBCARRY:
739  if (N->getValueType(0) != MVT::i32)
740  break;
741 
742  SelectAddcSubb(N);
743  return;
744  case ISD::UADDO:
745  case ISD::USUBO: {
746  SelectUADDO_USUBO(N);
747  return;
748  }
750  SelectFMUL_W_CHAIN(N);
751  return;
752  }
753  case AMDGPUISD::FMA_W_CHAIN: {
754  SelectFMA_W_CHAIN(N);
755  return;
756  }
757 
759  case ISD::BUILD_VECTOR: {
760  EVT VT = N->getValueType(0);
761  unsigned NumVectorElts = VT.getVectorNumElements();
762  if (VT.getScalarSizeInBits() == 16) {
763  if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
764  if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
765  ReplaceNode(N, Packed);
766  return;
767  }
768  }
769 
770  break;
771  }
772 
774  unsigned RegClassID =
775  SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
776  SelectBuildVector(N, RegClassID);
777  return;
778  }
779  case ISD::BUILD_PAIR: {
780  SDValue RC, SubReg0, SubReg1;
781  SDLoc DL(N);
782  if (N->getValueType(0) == MVT::i128) {
783  RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);
784  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
785  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
786  } else if (N->getValueType(0) == MVT::i64) {
787  RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
788  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
789  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
790  } else {
791  llvm_unreachable("Unhandled value type for BUILD_PAIR");
792  }
793  const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
794  N->getOperand(1), SubReg1 };
795  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
796  N->getValueType(0), Ops));
797  return;
798  }
799 
800  case ISD::Constant:
801  case ISD::ConstantFP: {
802  if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
803  break;
804 
805  uint64_t Imm;
806  if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
807  Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
808  else {
809  ConstantSDNode *C = cast<ConstantSDNode>(N);
810  Imm = C->getZExtValue();
811  }
812 
813  SDLoc DL(N);
814  ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
815  return;
816  }
817  case AMDGPUISD::BFE_I32:
818  case AMDGPUISD::BFE_U32: {
819  // There is a scalar version available, but unlike the vector version which
820  // has a separate operand for the offset and width, the scalar version packs
821  // the width and offset into a single operand. Try to move to the scalar
822  // version if the offsets are constant, so that we can try to keep extended
823  // loads of kernel arguments in SGPRs.
824 
825  // TODO: Technically we could try to pattern match scalar bitshifts of
826  // dynamic values, but it's probably not useful.
827  ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
828  if (!Offset)
829  break;
830 
831  ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
832  if (!Width)
833  break;
834 
835  bool Signed = Opc == AMDGPUISD::BFE_I32;
836 
837  uint32_t OffsetVal = Offset->getZExtValue();
838  uint32_t WidthVal = Width->getZExtValue();
839 
840  ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
841  SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
842  return;
843  }
844  case AMDGPUISD::DIV_SCALE: {
845  SelectDIV_SCALE(N);
846  return;
847  }
849  case AMDGPUISD::MAD_U64_U32: {
850  SelectMAD_64_32(N);
851  return;
852  }
853  case ISD::CopyToReg: {
854  const SITargetLowering& Lowering =
855  *static_cast<const SITargetLowering*>(getTargetLowering());
856  N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
857  break;
858  }
859  case ISD::AND:
860  case ISD::SRL:
861  case ISD::SRA:
863  if (N->getValueType(0) != MVT::i32)
864  break;
865 
866  SelectS_BFE(N);
867  return;
868  case ISD::BRCOND:
869  SelectBRCOND(N);
870  return;
871  case ISD::FMAD:
872  case ISD::FMA:
873  SelectFMAD_FMA(N);
874  return;
876  SelectATOMIC_CMP_SWAP(N);
877  return;
883  // Hack around using a legal type if f16 is illegal.
884  if (N->getValueType(0) == MVT::i32) {
886  N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
887  { N->getOperand(0), N->getOperand(1) });
888  SelectCode(N);
889  return;
890  }
891 
892  break;
893  }
894  case ISD::INTRINSIC_W_CHAIN: {
895  SelectINTRINSIC_W_CHAIN(N);
896  return;
897  }
899  SelectINTRINSIC_WO_CHAIN(N);
900  return;
901  }
902  case ISD::INTRINSIC_VOID: {
903  SelectINTRINSIC_VOID(N);
904  return;
905  }
906  }
907 
908  SelectCode(N);
909 }
910 
911 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
912  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
913  const Instruction *Term = BB->getTerminator();
914  return Term->getMetadata("amdgpu.uniform") ||
915  Term->getMetadata("structurizecfg.uniform");
916 }
917 
919  SDValue &N0, SDValue &N1) {
920  if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
921  Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
922  // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.
923  // (i64 (bitcast (v2i32 (build_vector
924  // (or (extract_vector_elt V, 0), OFFSET),
925  // (extract_vector_elt V, 1)))))
926  SDValue Lo = Addr.getOperand(0).getOperand(0);
927  if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) {
928  SDValue BaseLo = Lo.getOperand(0);
929  SDValue BaseHi = Addr.getOperand(0).getOperand(1);
930  // Check that split base (Lo and Hi) are extracted from the same one.
931  if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
932  BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
933  BaseLo.getOperand(0) == BaseHi.getOperand(0) &&
934  // Lo is statically extracted from index 0.
935  isa<ConstantSDNode>(BaseLo.getOperand(1)) &&
936  BaseLo.getConstantOperandVal(1) == 0 &&
937  // Hi is statically extracted from index 0.
938  isa<ConstantSDNode>(BaseHi.getOperand(1)) &&
939  BaseHi.getConstantOperandVal(1) == 1) {
940  N0 = BaseLo.getOperand(0).getOperand(0);
941  N1 = Lo.getOperand(1);
942  return true;
943  }
944  }
945  }
946  return false;
947 }
948 
949 bool AMDGPUDAGToDAGISel::isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
950  SDValue &RHS) const {
951  if (CurDAG->isBaseWithConstantOffset(Addr)) {
952  LHS = Addr.getOperand(0);
953  RHS = Addr.getOperand(1);
954  return true;
955  }
956 
957  if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, LHS, RHS)) {
958  assert(LHS && RHS && isa<ConstantSDNode>(RHS));
959  return true;
960  }
961 
962  return false;
963 }
964 
965 StringRef AMDGPUDAGToDAGISel::getPassName() const {
966  return "AMDGPU DAG->DAG Pattern Instruction Selection";
967 }
968 
969 //===----------------------------------------------------------------------===//
970 // Complex Patterns
971 //===----------------------------------------------------------------------===//
972 
973 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
974  SDValue &Offset) {
975  return false;
976 }
977 
978 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
979  SDValue &Offset) {
980  ConstantSDNode *C;
981  SDLoc DL(Addr);
982 
983  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
984  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
985  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
986  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
987  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
988  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
989  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
990  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
991  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
992  Base = Addr.getOperand(0);
993  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
994  } else {
995  Base = Addr;
996  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
997  }
998 
999  return true;
1000 }
1001 
1002 SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
1003  const SDLoc &DL) const {
1004  SDNode *Mov = CurDAG->getMachineNode(
1005  AMDGPU::S_MOV_B32, DL, MVT::i32,
1006  CurDAG->getTargetConstant(Val, DL, MVT::i32));
1007  return SDValue(Mov, 0);
1008 }
1009 
1010 // FIXME: Should only handle addcarry/subcarry
1011 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
1012  SDLoc DL(N);
1013  SDValue LHS = N->getOperand(0);
1014  SDValue RHS = N->getOperand(1);
1015 
1016  unsigned Opcode = N->getOpcode();
1017  bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
1018  bool ProduceCarry =
1019  ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
1020  bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
1021 
1022  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
1023  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
1024 
1025  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1026  DL, MVT::i32, LHS, Sub0);
1027  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1028  DL, MVT::i32, LHS, Sub1);
1029 
1030  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1031  DL, MVT::i32, RHS, Sub0);
1032  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1033  DL, MVT::i32, RHS, Sub1);
1034 
1035  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
1036 
1037  static const unsigned OpcMap[2][2][2] = {
1038  {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
1039  {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
1040  {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
1041  {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
1042 
1043  unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd];
1044  unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd];
1045 
1046  SDNode *AddLo;
1047  if (!ConsumeCarry) {
1048  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
1049  AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
1050  } else {
1051  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
1052  AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
1053  }
1054  SDValue AddHiArgs[] = {
1055  SDValue(Hi0, 0),
1056  SDValue(Hi1, 0),
1057  SDValue(AddLo, 1)
1058  };
1059  SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
1060 
1061  SDValue RegSequenceArgs[] = {
1062  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
1063  SDValue(AddLo,0),
1064  Sub0,
1065  SDValue(AddHi,0),
1066  Sub1,
1067  };
1068  SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1069  MVT::i64, RegSequenceArgs);
1070 
1071  if (ProduceCarry) {
1072  // Replace the carry-use
1073  ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
1074  }
1075 
1076  // Replace the remaining uses.
1077  ReplaceNode(N, RegSequence);
1078 }
1079 
1080 void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
1081  SDLoc DL(N);
1082  SDValue LHS = N->getOperand(0);
1083  SDValue RHS = N->getOperand(1);
1084  SDValue CI = N->getOperand(2);
1085 
1086  if (N->isDivergent()) {
1087  unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
1088  : AMDGPU::V_SUBB_U32_e64;
1089  CurDAG->SelectNodeTo(
1090  N, Opc, N->getVTList(),
1091  {LHS, RHS, CI,
1092  CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
1093  } else {
1094  unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO
1095  : AMDGPU::S_SUB_CO_PSEUDO;
1096  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});
1097  }
1098 }
1099 
1100 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
1101  // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
1102  // carry out despite the _i32 name. These were renamed in VI to _U32.
1103  // FIXME: We should probably rename the opcodes here.
1104  bool IsAdd = N->getOpcode() == ISD::UADDO;
1105  bool IsVALU = N->isDivergent();
1106 
1107  for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;
1108  ++UI)
1109  if (UI.getUse().getResNo() == 1) {
1110  if ((IsAdd && (UI->getOpcode() != ISD::ADDCARRY)) ||
1111  (!IsAdd && (UI->getOpcode() != ISD::SUBCARRY))) {
1112  IsVALU = true;
1113  break;
1114  }
1115  }
1116 
1117  if (IsVALU) {
1118  unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
1119 
1120  CurDAG->SelectNodeTo(
1121  N, Opc, N->getVTList(),
1122  {N->getOperand(0), N->getOperand(1),
1123  CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
1124  } else {
1125  unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
1126  : AMDGPU::S_USUBO_PSEUDO;
1127 
1128  CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
1129  {N->getOperand(0), N->getOperand(1)});
1130  }
1131 }
1132 
1133 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
1134  SDLoc SL(N);
1135  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
1136  SDValue Ops[10];
1137 
1138  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
1139  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
1140  SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
1141  Ops[8] = N->getOperand(0);
1142  Ops[9] = N->getOperand(4);
1143 
1144  CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32_e64, N->getVTList(), Ops);
1145 }
1146 
1147 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
1148  SDLoc SL(N);
1149  // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
1150  SDValue Ops[8];
1151 
1152  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
1153  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
1154  Ops[6] = N->getOperand(0);
1155  Ops[7] = N->getOperand(3);
1156 
1157  CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
1158 }
1159 
1160 // We need to handle this here because tablegen doesn't support matching
1161 // instructions with multiple outputs.
1162 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
1163  SDLoc SL(N);
1164  EVT VT = N->getValueType(0);
1165 
1166  assert(VT == MVT::f32 || VT == MVT::f64);
1167 
1168  unsigned Opc
1169  = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
1170 
1171  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
1172  // omod
1173  SDValue Ops[8];
1174  SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1175  SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]);
1176  SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]);
1177  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1178 }
1179 
1180 // We need to handle this here because tablegen doesn't support matching
1181 // instructions with multiple outputs.
1182 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
1183  SDLoc SL(N);
1184  bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
1185  unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1186 
1187  SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1188  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
1189  Clamp };
1190  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1191 }
1192 
1193 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const {
1194  if (!isUInt<16>(Offset))
1195  return false;
1196 
1197  if (!Base || Subtarget->hasUsableDSOffset() ||
1198  Subtarget->unsafeDSOffsetFoldingEnabled())
1199  return true;
1200 
1201  // On Southern Islands instruction with a negative base value and an offset
1202  // don't seem to work.
1203  return CurDAG->SignBitIsZero(Base);
1204 }
1205 
1206 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
1207  SDValue &Offset) const {
1208  SDLoc DL(Addr);
1209  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1210  SDValue N0 = Addr.getOperand(0);
1211  SDValue N1 = Addr.getOperand(1);
1212  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1213  if (isDSOffsetLegal(N0, C1->getSExtValue())) {
1214  // (add n0, c0)
1215  Base = N0;
1216  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1217  return true;
1218  }
1219  } else if (Addr.getOpcode() == ISD::SUB) {
1220  // sub C, x -> add (sub 0, x), C
1221  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1222  int64_t ByteOffset = C->getSExtValue();
1223  if (isDSOffsetLegal(SDValue(), ByteOffset)) {
1224  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1225 
1226  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1227  // the known bits in isDSOffsetLegal. We need to emit the selected node
1228  // here, so this is thrown away.
1229  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1230  Zero, Addr.getOperand(1));
1231 
1232  if (isDSOffsetLegal(Sub, ByteOffset)) {
1234  Opnds.push_back(Zero);
1235  Opnds.push_back(Addr.getOperand(1));
1236 
1237  // FIXME: Select to VOP3 version for with-carry.
1238  unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1239  if (Subtarget->hasAddNoCarry()) {
1240  SubOp = AMDGPU::V_SUB_U32_e64;
1241  Opnds.push_back(
1242  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1243  }
1244 
1245  MachineSDNode *MachineSub =
1246  CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1247 
1248  Base = SDValue(MachineSub, 0);
1249  Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
1250  return true;
1251  }
1252  }
1253  }
1254  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1255  // If we have a constant address, prefer to put the constant into the
1256  // offset. This can save moves to load the constant address since multiple
1257  // operations can share the zero base address register, and enables merging
1258  // into read2 / write2 instructions.
1259 
1260  SDLoc DL(Addr);
1261 
1262  if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) {
1263  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1264  MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1265  DL, MVT::i32, Zero);
1266  Base = SDValue(MovZero, 0);
1267  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1268  return true;
1269  }
1270  }
1271 
1272  // default case
1273  Base = Addr;
1274  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1275  return true;
1276 }
1277 
1278 bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
1279  unsigned Offset1,
1280  unsigned Size) const {
1281  if (Offset0 % Size != 0 || Offset1 % Size != 0)
1282  return false;
1283  if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size))
1284  return false;
1285 
1286  if (!Base || Subtarget->hasUsableDSOffset() ||
1287  Subtarget->unsafeDSOffsetFoldingEnabled())
1288  return true;
1289 
1290  // On Southern Islands instruction with a negative base value and an offset
1291  // don't seem to work.
1292  return CurDAG->SignBitIsZero(Base);
1293 }
1294 
1295 // TODO: If offset is too big, put low 16-bit into offset.
1296 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
1297  SDValue &Offset0,
1298  SDValue &Offset1) const {
1299  return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 4);
1300 }
1301 
1302 bool AMDGPUDAGToDAGISel::SelectDS128Bit8ByteAligned(SDValue Addr, SDValue &Base,
1303  SDValue &Offset0,
1304  SDValue &Offset1) const {
1305  return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 8);
1306 }
1307 
1308 bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
1309  SDValue &Offset0, SDValue &Offset1,
1310  unsigned Size) const {
1311  SDLoc DL(Addr);
1312 
1313  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1314  SDValue N0 = Addr.getOperand(0);
1315  SDValue N1 = Addr.getOperand(1);
1316  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1317  unsigned OffsetValue0 = C1->getZExtValue();
1318  unsigned OffsetValue1 = OffsetValue0 + Size;
1319 
1320  // (add n0, c0)
1321  if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1, Size)) {
1322  Base = N0;
1323  Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1324  Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1325  return true;
1326  }
1327  } else if (Addr.getOpcode() == ISD::SUB) {
1328  // sub C, x -> add (sub 0, x), C
1329  if (const ConstantSDNode *C =
1330  dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1331  unsigned OffsetValue0 = C->getZExtValue();
1332  unsigned OffsetValue1 = OffsetValue0 + Size;
1333 
1334  if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1335  SDLoc DL(Addr);
1336  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1337 
1338  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1339  // the known bits in isDSOffsetLegal. We need to emit the selected node
1340  // here, so this is thrown away.
1341  SDValue Sub =
1342  CurDAG->getNode(ISD::SUB, DL, MVT::i32, Zero, Addr.getOperand(1));
1343 
1344  if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1, Size)) {
1346  Opnds.push_back(Zero);
1347  Opnds.push_back(Addr.getOperand(1));
1348  unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1349  if (Subtarget->hasAddNoCarry()) {
1350  SubOp = AMDGPU::V_SUB_U32_e64;
1351  Opnds.push_back(
1352  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1353  }
1354 
1355  MachineSDNode *MachineSub = CurDAG->getMachineNode(
1356  SubOp, DL, MVT::getIntegerVT(Size * 8), Opnds);
1357 
1358  Base = SDValue(MachineSub, 0);
1359  Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1360  Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1361  return true;
1362  }
1363  }
1364  }
1365  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1366  unsigned OffsetValue0 = CAddr->getZExtValue();
1367  unsigned OffsetValue1 = OffsetValue0 + Size;
1368 
1369  if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1370  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1371  MachineSDNode *MovZero =
1372  CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero);
1373  Base = SDValue(MovZero, 0);
1374  Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1375  Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1376  return true;
1377  }
1378  }
1379 
1380  // default case
1381 
1382  Base = Addr;
1383  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
1384  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
1385  return true;
1386 }
1387 
1388 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
1389  SDValue &SOffset, SDValue &Offset,
1390  SDValue &Offen, SDValue &Idxen,
1391  SDValue &Addr64) const {
1392  // Subtarget prefers to use flat instruction
1393  // FIXME: This should be a pattern predicate and not reach here
1394  if (Subtarget->useFlatForGlobal())
1395  return false;
1396 
1397  SDLoc DL(Addr);
1398 
1399  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1400  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1401  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1402  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1403 
1404  ConstantSDNode *C1 = nullptr;
1405  SDValue N0 = Addr;
1406  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1407  C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1408  if (isUInt<32>(C1->getZExtValue()))
1409  N0 = Addr.getOperand(0);
1410  else
1411  C1 = nullptr;
1412  }
1413 
1414  if (N0.getOpcode() == ISD::ADD) {
1415  // (add N2, N3) -> addr64, or
1416  // (add (add N2, N3), C1) -> addr64
1417  SDValue N2 = N0.getOperand(0);
1418  SDValue N3 = N0.getOperand(1);
1419  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1420 
1421  if (N2->isDivergent()) {
1422  if (N3->isDivergent()) {
1423  // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1424  // addr64, and construct the resource from a 0 address.
1425  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1426  VAddr = N0;
1427  } else {
1428  // N2 is divergent, N3 is not.
1429  Ptr = N3;
1430  VAddr = N2;
1431  }
1432  } else {
1433  // N2 is not divergent.
1434  Ptr = N2;
1435  VAddr = N3;
1436  }
1437  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1438  } else if (N0->isDivergent()) {
1439  // N0 is divergent. Use it as the addr64, and construct the resource from a
1440  // 0 address.
1441  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1442  VAddr = N0;
1443  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1444  } else {
1445  // N0 -> offset, or
1446  // (N0 + C1) -> offset
1447  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1448  Ptr = N0;
1449  }
1450 
1451  if (!C1) {
1452  // No offset.
1453  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1454  return true;
1455  }
1456 
1457  if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
1458  // Legal offset for instruction.
1459  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1460  return true;
1461  }
1462 
1463  // Illegal offset, store it in soffset.
1464  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1465  SOffset =
1466  SDValue(CurDAG->getMachineNode(
1467  AMDGPU::S_MOV_B32, DL, MVT::i32,
1468  CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1469  0);
1470  return true;
1471 }
1472 
1473 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1474  SDValue &VAddr, SDValue &SOffset,
1475  SDValue &Offset) const {
1476  SDValue Ptr, Offen, Idxen, Addr64;
1477 
1478  // addr64 bit was removed for volcanic islands.
1479  // FIXME: This should be a pattern predicate and not reach here
1480  if (!Subtarget->hasAddr64())
1481  return false;
1482 
1483  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
1484  return false;
1485 
1486  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1487  if (C->getSExtValue()) {
1488  SDLoc DL(Addr);
1489 
1490  const SITargetLowering& Lowering =
1491  *static_cast<const SITargetLowering*>(getTargetLowering());
1492 
1493  SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1494  return true;
1495  }
1496 
1497  return false;
1498 }
1499 
1500 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1501  auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1502  return PSV && PSV->isStack();
1503 }
1504 
1505 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1506  SDLoc DL(N);
1507 
1508  auto *FI = dyn_cast<FrameIndexSDNode>(N);
1509  SDValue TFI =
1510  FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N;
1511 
1512  // We rebase the base address into an absolute stack address and hence
1513  // use constant 0 for soffset. This value must be retained until
1514  // frame elimination and eliminateFrameIndex will choose the appropriate
1515  // frame register if need be.
1516  return std::make_pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32));
1517 }
1518 
1519 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1520  SDValue Addr, SDValue &Rsrc,
1521  SDValue &VAddr, SDValue &SOffset,
1522  SDValue &ImmOffset) const {
1523 
1524  SDLoc DL(Addr);
1525  MachineFunction &MF = CurDAG->getMachineFunction();
1527 
1528  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1529 
1530  if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1531  int64_t Imm = CAddr->getSExtValue();
1532  const int64_t NullPtr =
1533  AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
1534  // Don't fold null pointer.
1535  if (Imm != NullPtr) {
1536  SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1537  MachineSDNode *MovHighBits = CurDAG->getMachineNode(
1538  AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
1539  VAddr = SDValue(MovHighBits, 0);
1540 
1541  // In a call sequence, stores to the argument stack area are relative to the
1542  // stack pointer.
1543  const MachinePointerInfo &PtrInfo
1544  = cast<MemSDNode>(Parent)->getPointerInfo();
1545  SOffset = isStackPtrRelative(PtrInfo)
1546  ? CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32)
1547  : CurDAG->getTargetConstant(0, DL, MVT::i32);
1548  ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1549  return true;
1550  }
1551  }
1552 
1553  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1554  // (add n0, c1)
1555 
1556  SDValue N0 = Addr.getOperand(0);
1557  SDValue N1 = Addr.getOperand(1);
1558 
1559  // Offsets in vaddr must be positive if range checking is enabled.
1560  //
1561  // The total computation of vaddr + soffset + offset must not overflow. If
1562  // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1563  // overflowing.
1564  //
1565  // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1566  // always perform a range check. If a negative vaddr base index was used,
1567  // this would fail the range check. The overall address computation would
1568  // compute a valid address, but this doesn't happen due to the range
1569  // check. For out-of-bounds MUBUF loads, a 0 is returned.
1570  //
1571  // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1572  // MUBUF vaddr, but not on older subtargets which can only do this if the
1573  // sign bit is known 0.
1574  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1575  if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
1576  (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1577  CurDAG->SignBitIsZero(N0))) {
1578  std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1579  ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1580  return true;
1581  }
1582  }
1583 
1584  // (node)
1585  std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1586  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1587  return true;
1588 }
1589 
1590 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1591  SDValue Addr,
1592  SDValue &SRsrc,
1593  SDValue &SOffset,
1594  SDValue &Offset) const {
1595  ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
1596  if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1597  return false;
1598 
1599  SDLoc DL(Addr);
1600  MachineFunction &MF = CurDAG->getMachineFunction();
1602 
1603  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1604 
1605  const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1606 
1607  // FIXME: Get from MachinePointerInfo? We should only be using the frame
1608  // offset if we know this is in a call sequence.
1609  SOffset = isStackPtrRelative(PtrInfo)
1610  ? CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32)
1611  : CurDAG->getTargetConstant(0, DL, MVT::i32);
1612 
1613  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1614  return true;
1615 }
1616 
1617 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1618  SDValue &SOffset, SDValue &Offset
1619  ) const {
1620  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1621  const SIInstrInfo *TII =
1622  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1623 
1624  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
1625  return false;
1626 
1627  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1628  !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1629  !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1630  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1631  APInt::getAllOnesValue(32).getZExtValue(); // Size
1632  SDLoc DL(Addr);
1633 
1634  const SITargetLowering& Lowering =
1635  *static_cast<const SITargetLowering*>(getTargetLowering());
1636 
1637  SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1638  return true;
1639  }
1640  return false;
1641 }
1642 
1643 // Find a load or store from corresponding pattern root.
1644 // Roots may be build_vector, bitconvert or their combinations.
1646  N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();
1647  if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
1648  return MN;
1649  assert(isa<BuildVectorSDNode>(N));
1650  for (SDValue V : N->op_values())
1651  if (MemSDNode *MN =
1652  dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
1653  return MN;
1654  llvm_unreachable("cannot find MemSDNode in the pattern!");
1655 }
1656 
1657 bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
1658  SDValue &VAddr, SDValue &Offset,
1659  uint64_t FlatVariant) const {
1660  int64_t OffsetVal = 0;
1661 
1662  unsigned AS = findMemSDNode(N)->getAddressSpace();
1663 
1664  bool CanHaveFlatSegmentOffsetBug =
1665  Subtarget->hasFlatSegmentOffsetBug() &&
1666  FlatVariant == SIInstrFlags::FLAT &&
1668 
1669  if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
1670  SDValue N0, N1;
1671  if (isBaseWithConstantOffset64(Addr, N0, N1)) {
1672  int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1673 
1674  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1675  if (TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1676  Addr = N0;
1677  OffsetVal = COffsetVal;
1678  } else {
1679  // If the offset doesn't fit, put the low bits into the offset field and
1680  // add the rest.
1681  //
1682  // For a FLAT instruction the hardware decides whether to access
1683  // global/scratch/shared memory based on the high bits of vaddr,
1684  // ignoring the offset field, so we have to ensure that when we add
1685  // remainder to vaddr it still points into the same underlying object.
1686  // The easiest way to do that is to make sure that we split the offset
1687  // into two pieces that are both >= 0 or both <= 0.
1688 
1689  SDLoc DL(N);
1690  uint64_t RemainderOffset;
1691 
1692  std::tie(OffsetVal, RemainderOffset) =
1693  TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1694 
1695  SDValue AddOffsetLo =
1696  getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
1697  SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1698 
1699  if (Addr.getValueType().getSizeInBits() == 32) {
1701  Opnds.push_back(N0);
1702  Opnds.push_back(AddOffsetLo);
1703  unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1704  if (Subtarget->hasAddNoCarry()) {
1705  AddOp = AMDGPU::V_ADD_U32_e64;
1706  Opnds.push_back(Clamp);
1707  }
1708  Addr = SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0);
1709  } else {
1710  // TODO: Should this try to use a scalar add pseudo if the base address
1711  // is uniform and saddr is usable?
1712  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
1713  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
1714 
1715  SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1716  DL, MVT::i32, N0, Sub0);
1717  SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1718  DL, MVT::i32, N0, Sub1);
1719 
1720  SDValue AddOffsetHi =
1721  getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
1722 
1723  SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
1724 
1725  SDNode *Add =
1726  CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs,
1727  {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
1728 
1729  SDNode *Addc = CurDAG->getMachineNode(
1730  AMDGPU::V_ADDC_U32_e64, DL, VTs,
1731  {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
1732 
1733  SDValue RegSequenceArgs[] = {
1734  CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
1735  SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1};
1736 
1737  Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1738  MVT::i64, RegSequenceArgs),
1739  0);
1740  }
1741  }
1742  }
1743  }
1744 
1745  VAddr = Addr;
1746  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1747  return true;
1748 }
1749 
1750 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue Addr,
1751  SDValue &VAddr,
1752  SDValue &Offset) const {
1753  return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FLAT);
1754 }
1755 
1756 bool AMDGPUDAGToDAGISel::SelectGlobalOffset(SDNode *N, SDValue Addr,
1757  SDValue &VAddr,
1758  SDValue &Offset) const {
1759  return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FlatGlobal);
1760 }
1761 
1762 bool AMDGPUDAGToDAGISel::SelectScratchOffset(SDNode *N, SDValue Addr,
1763  SDValue &VAddr,
1764  SDValue &Offset) const {
1765  return SelectFlatOffsetImpl(N, Addr, VAddr, Offset,
1767 }
1768 
1769 // If this matches zero_extend i32:x, return x
1771  if (Op.getOpcode() != ISD::ZERO_EXTEND)
1772  return SDValue();
1773 
1774  SDValue ExtSrc = Op.getOperand(0);
1775  return (ExtSrc.getValueType() == MVT::i32) ? ExtSrc : SDValue();
1776 }
1777 
1778 // Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
1779 bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
1780  SDValue Addr,
1781  SDValue &SAddr,
1782  SDValue &VOffset,
1783  SDValue &Offset) const {
1784  int64_t ImmOffset = 0;
1785 
1786  // Match the immediate offset first, which canonically is moved as low as
1787  // possible.
1788 
1789  SDValue LHS, RHS;
1790  if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
1791  int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1792  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1793 
1794  if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS,
1796  Addr = LHS;
1797  ImmOffset = COffsetVal;
1798  } else if (!LHS->isDivergent()) {
1799  if (COffsetVal > 0) {
1800  SDLoc SL(N);
1801  // saddr + large_offset -> saddr +
1802  // (voffset = large_offset & ~MaxOffset) +
1803  // (large_offset & MaxOffset);
1804  int64_t SplitImmOffset, RemainderOffset;
1805  std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
1807 
1808  if (isUInt<32>(RemainderOffset)) {
1809  SDNode *VMov = CurDAG->getMachineNode(
1810  AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1811  CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
1812  VOffset = SDValue(VMov, 0);
1813  SAddr = LHS;
1814  Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
1815  return true;
1816  }
1817  }
1818 
1819  // We are adding a 64 bit SGPR and a constant. If constant bus limit
1820  // is 1 we would need to perform 1 or 2 extra moves for each half of
1821  // the constant and it is better to do a scalar add and then issue a
1822  // single VALU instruction to materialize zero. Otherwise it is less
1823  // instructions to perform VALU adds with immediates or inline literals.
1824  unsigned NumLiterals =
1825  !TII->isInlineConstant(APInt(32, COffsetVal & 0xffffffff)) +
1826  !TII->isInlineConstant(APInt(32, COffsetVal >> 32));
1827  if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
1828  return false;
1829  }
1830  }
1831 
1832  // Match the variable offset.
1833  if (Addr.getOpcode() == ISD::ADD) {
1834  LHS = Addr.getOperand(0);
1835  RHS = Addr.getOperand(1);
1836 
1837  if (!LHS->isDivergent()) {
1838  // add (i64 sgpr), (zero_extend (i32 vgpr))
1839  if (SDValue ZextRHS = matchZExtFromI32(RHS)) {
1840  SAddr = LHS;
1841  VOffset = ZextRHS;
1842  }
1843  }
1844 
1845  if (!SAddr && !RHS->isDivergent()) {
1846  // add (zero_extend (i32 vgpr)), (i64 sgpr)
1847  if (SDValue ZextLHS = matchZExtFromI32(LHS)) {
1848  SAddr = RHS;
1849  VOffset = ZextLHS;
1850  }
1851  }
1852 
1853  if (SAddr) {
1854  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1855  return true;
1856  }
1857  }
1858 
1859  if (Addr->isDivergent() || Addr.getOpcode() == ISD::UNDEF ||
1860  isa<ConstantSDNode>(Addr))
1861  return false;
1862 
1863  // It's cheaper to materialize a single 32-bit zero for vaddr than the two
1864  // moves required to copy a 64-bit SGPR to VGPR.
1865  SAddr = Addr;
1866  SDNode *VMov =
1867  CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
1868  CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
1869  VOffset = SDValue(VMov, 0);
1870  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1871  return true;
1872 }
1873 
1874 static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr) {
1875  if (auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1876  SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
1877  } else if (SAddr.getOpcode() == ISD::ADD &&
1878  isa<FrameIndexSDNode>(SAddr.getOperand(0))) {
1879  // Materialize this into a scalar move for scalar address to avoid
1880  // readfirstlane.
1881  auto FI = cast<FrameIndexSDNode>(SAddr.getOperand(0));
1882  SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1883  FI->getValueType(0));
1884  SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_U32, SDLoc(SAddr),
1885  MVT::i32, TFI, SAddr.getOperand(1)),
1886  0);
1887  }
1888 
1889  return SAddr;
1890 }
1891 
1892 // Match (32-bit SGPR base) + sext(imm offset)
1893 bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *N,
1894  SDValue Addr,
1895  SDValue &SAddr,
1896  SDValue &Offset) const {
1897  if (Addr->isDivergent())
1898  return false;
1899 
1900  SAddr = Addr;
1901  int64_t COffsetVal = 0;
1902 
1903  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1904  COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
1905  SAddr = Addr.getOperand(0);
1906  }
1907 
1908  SAddr = SelectSAddrFI(CurDAG, SAddr);
1909 
1910  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1911 
1912  if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS,
1914  int64_t SplitImmOffset, RemainderOffset;
1915  std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
1917 
1918  COffsetVal = SplitImmOffset;
1919 
1920  SDLoc DL(N);
1921  SDValue AddOffset =
1922  getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
1923  SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_U32, DL, MVT::i32,
1924  SAddr, AddOffset), 0);
1925  }
1926 
1927  Offset = CurDAG->getTargetConstant(COffsetVal, SDLoc(), MVT::i16);
1928 
1929  return true;
1930 }
1931 
1932 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1933  SDValue &Offset, bool &Imm) const {
1934  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1935  if (!C) {
1936  if (ByteOffsetNode.getValueType().isScalarInteger() &&
1937  ByteOffsetNode.getValueType().getSizeInBits() == 32) {
1938  Offset = ByteOffsetNode;
1939  Imm = false;
1940  return true;
1941  }
1942  if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
1943  if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
1944  Offset = ByteOffsetNode.getOperand(0);
1945  Imm = false;
1946  return true;
1947  }
1948  }
1949  return false;
1950  }
1951 
1952  SDLoc SL(ByteOffsetNode);
1953  // GFX9 and GFX10 have signed byte immediate offsets.
1954  int64_t ByteOffset = C->getSExtValue();
1955  Optional<int64_t> EncodedOffset =
1956  AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, false);
1957  if (EncodedOffset) {
1958  Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1959  Imm = true;
1960  return true;
1961  }
1962 
1963  // SGPR and literal offsets are unsigned.
1964  if (ByteOffset < 0)
1965  return false;
1966 
1967  EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);
1968  if (EncodedOffset) {
1969  Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1970  return true;
1971  }
1972 
1973  if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
1974  return false;
1975 
1976  SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1977  Offset = SDValue(
1978  CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
1979 
1980  return true;
1981 }
1982 
1983 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
1984  if (Addr.getValueType() != MVT::i32)
1985  return Addr;
1986 
1987  // Zero-extend a 32-bit address.
1988  SDLoc SL(Addr);
1989 
1990  const MachineFunction &MF = CurDAG->getMachineFunction();
1992  unsigned AddrHiVal = Info->get32BitAddressHighBits();
1993  SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
1994 
1995  const SDValue Ops[] = {
1996  CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
1997  Addr,
1998  CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
1999  SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
2000  0),
2001  CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
2002  };
2003 
2004  return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
2005  Ops), 0);
2006 }
2007 
2008 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
2009  SDValue &Offset, bool &Imm) const {
2010  SDLoc SL(Addr);
2011 
2012  // A 32-bit (address + offset) should not cause unsigned 32-bit integer
2013  // wraparound, because s_load instructions perform the addition in 64 bits.
2014  if ((Addr.getValueType() != MVT::i32 ||
2015  Addr->getFlags().hasNoUnsignedWrap())) {
2016  SDValue N0, N1;
2017  // Extract the base and offset if possible.
2018  if (CurDAG->isBaseWithConstantOffset(Addr) ||
2019  Addr.getOpcode() == ISD::ADD) {
2020  N0 = Addr.getOperand(0);
2021  N1 = Addr.getOperand(1);
2022  } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
2023  assert(N0 && N1 && isa<ConstantSDNode>(N1));
2024  }
2025  if (N0 && N1) {
2026  if (SelectSMRDOffset(N1, Offset, Imm)) {
2027  SBase = Expand32BitAddress(N0);
2028  return true;
2029  }
2030  }
2031  }
2032  SBase = Expand32BitAddress(Addr);
2033  Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
2034  Imm = true;
2035  return true;
2036 }
2037 
2038 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
2039  SDValue &Offset) const {
2040  bool Imm = false;
2041  return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
2042 }
2043 
2044 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
2045  SDValue &Offset) const {
2046 
2047  assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
2048 
2049  bool Imm = false;
2050  if (!SelectSMRD(Addr, SBase, Offset, Imm))
2051  return false;
2052 
2053  return !Imm && isa<ConstantSDNode>(Offset);
2054 }
2055 
2056 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
2057  SDValue &Offset) const {
2058  bool Imm = false;
2059  return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
2060  !isa<ConstantSDNode>(Offset);
2061 }
2062 
2063 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
2064  SDValue &Offset) const {
2065  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
2066  // The immediate offset for S_BUFFER instructions is unsigned.
2067  if (auto Imm =
2068  AMDGPU::getSMRDEncodedOffset(*Subtarget, C->getZExtValue(), true)) {
2069  Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32);
2070  return true;
2071  }
2072  }
2073 
2074  return false;
2075 }
2076 
2077 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
2078  SDValue &Offset) const {
2079  assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
2080 
2081  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
2082  if (auto Imm = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget,
2083  C->getZExtValue())) {
2084  Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32);
2085  return true;
2086  }
2087  }
2088 
2089  return false;
2090 }
2091 
2092 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
2093  SDValue &Base,
2094  SDValue &Offset) const {
2095  SDLoc DL(Index);
2096 
2097  if (CurDAG->isBaseWithConstantOffset(Index)) {
2098  SDValue N0 = Index.getOperand(0);
2099  SDValue N1 = Index.getOperand(1);
2100  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
2101 
2102  // (add n0, c0)
2103  // Don't peel off the offset (c0) if doing so could possibly lead
2104  // the base (n0) to be negative.
2105  // (or n0, |c0|) can never change a sign given isBaseWithConstantOffset.
2106  if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0) ||
2107  (Index->getOpcode() == ISD::OR && C1->getSExtValue() >= 0)) {
2108  Base = N0;
2109  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
2110  return true;
2111  }
2112  }
2113 
2114  if (isa<ConstantSDNode>(Index))
2115  return false;
2116 
2117  Base = Index;
2118  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2119  return true;
2120 }
2121 
2122 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
2123  SDValue Val, uint32_t Offset,
2124  uint32_t Width) {
2125  // Transformation function, pack the offset and width of a BFE into
2126  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
2127  // source, bits [5:0] contain the offset and bits [22:16] the width.
2128  uint32_t PackedVal = Offset | (Width << 16);
2129  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
2130 
2131  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
2132 }
2133 
2134 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
2135  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
2136  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
2137  // Predicate: 0 < b <= c < 32
2138 
2139  const SDValue &Shl = N->getOperand(0);
2140  ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
2141  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2142 
2143  if (B && C) {
2144  uint32_t BVal = B->getZExtValue();
2145  uint32_t CVal = C->getZExtValue();
2146 
2147  if (0 < BVal && BVal <= CVal && CVal < 32) {
2148  bool Signed = N->getOpcode() == ISD::SRA;
2149  unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2150 
2151  ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
2152  32 - CVal));
2153  return;
2154  }
2155  }
2156  SelectCode(N);
2157 }
2158 
2159 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
2160  switch (N->getOpcode()) {
2161  case ISD::AND:
2162  if (N->getOperand(0).getOpcode() == ISD::SRL) {
2163  // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
2164  // Predicate: isMask(mask)
2165  const SDValue &Srl = N->getOperand(0);
2166  ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
2167  ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
2168 
2169  if (Shift && Mask) {
2170  uint32_t ShiftVal = Shift->getZExtValue();
2171  uint32_t MaskVal = Mask->getZExtValue();
2172 
2173  if (isMask_32(MaskVal)) {
2174  uint32_t WidthVal = countPopulation(MaskVal);
2175 
2176  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
2177  Srl.getOperand(0), ShiftVal, WidthVal));
2178  return;
2179  }
2180  }
2181  }
2182  break;
2183  case ISD::SRL:
2184  if (N->getOperand(0).getOpcode() == ISD::AND) {
2185  // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
2186  // Predicate: isMask(mask >> b)
2187  const SDValue &And = N->getOperand(0);
2188  ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
2189  ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
2190 
2191  if (Shift && Mask) {
2192  uint32_t ShiftVal = Shift->getZExtValue();
2193  uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
2194 
2195  if (isMask_32(MaskVal)) {
2196  uint32_t WidthVal = countPopulation(MaskVal);
2197 
2198  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
2199  And.getOperand(0), ShiftVal, WidthVal));
2200  return;
2201  }
2202  }
2203  } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
2204  SelectS_BFEFromShifts(N);
2205  return;
2206  }
2207  break;
2208  case ISD::SRA:
2209  if (N->getOperand(0).getOpcode() == ISD::SHL) {
2210  SelectS_BFEFromShifts(N);
2211  return;
2212  }
2213  break;
2214 
2215  case ISD::SIGN_EXTEND_INREG: {
2216  // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
2217  SDValue Src = N->getOperand(0);
2218  if (Src.getOpcode() != ISD::SRL)
2219  break;
2220 
2221  const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
2222  if (!Amt)
2223  break;
2224 
2225  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2226  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
2227  Amt->getZExtValue(), Width));
2228  return;
2229  }
2230  }
2231 
2232  SelectCode(N);
2233 }
2234 
2235 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
2236  assert(N->getOpcode() == ISD::BRCOND);
2237  if (!N->hasOneUse())
2238  return false;
2239 
2240  SDValue Cond = N->getOperand(1);
2241  if (Cond.getOpcode() == ISD::CopyToReg)
2242  Cond = Cond.getOperand(2);
2243 
2244  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
2245  return false;
2246 
2247  MVT VT = Cond.getOperand(0).getSimpleValueType();
2248  if (VT == MVT::i32)
2249  return true;
2250 
2251  if (VT == MVT::i64) {
2252  auto ST = static_cast<const GCNSubtarget *>(Subtarget);
2253 
2254  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
2255  return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
2256  }
2257 
2258  return false;
2259 }
2260 
2261 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
2262  SDValue Cond = N->getOperand(1);
2263 
2264  if (Cond.isUndef()) {
2265  CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2266  N->getOperand(2), N->getOperand(0));
2267  return;
2268  }
2269 
2270  const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
2271  const SIRegisterInfo *TRI = ST->getRegisterInfo();
2272 
2273  bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
2274  unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
2275  Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC();
2276  SDLoc SL(N);
2277 
2278  if (!UseSCCBr) {
2279  // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
2280  // analyzed what generates the vcc value, so we do not know whether vcc
2281  // bits for disabled lanes are 0. Thus we need to mask out bits for
2282  // disabled lanes.
2283  //
2284  // For the case that we select S_CBRANCH_SCC1 and it gets
2285  // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
2286  // SIInstrInfo::moveToVALU which inserts the S_AND).
2287  //
2288  // We could add an analysis of what generates the vcc value here and omit
2289  // the S_AND when is unnecessary. But it would be better to add a separate
2290  // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
2291  // catches both cases.
2292  Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32
2293  : AMDGPU::S_AND_B64,
2294  SL, MVT::i1,
2295  CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO
2296  : AMDGPU::EXEC,
2297  MVT::i1),
2298  Cond),
2299  0);
2300  }
2301 
2302  SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
2303  CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
2304  N->getOperand(2), // Basic Block
2305  VCC.getValue(0));
2306 }
2307 
2308 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
2309  MVT VT = N->getSimpleValueType(0);
2310  bool IsFMA = N->getOpcode() == ISD::FMA;
2311  if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
2312  !Subtarget->hasFmaMixInsts()) ||
2313  ((IsFMA && Subtarget->hasMadMixInsts()) ||
2314  (!IsFMA && Subtarget->hasFmaMixInsts()))) {
2315  SelectCode(N);
2316  return;
2317  }
2318 
2319  SDValue Src0 = N->getOperand(0);
2320  SDValue Src1 = N->getOperand(1);
2321  SDValue Src2 = N->getOperand(2);
2322  unsigned Src0Mods, Src1Mods, Src2Mods;
2323 
2324  // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
2325  // using the conversion from f16.
2326  bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
2327  bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
2328  bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
2329 
2330  assert((IsFMA || !Mode.allFP32Denormals()) &&
2331  "fmad selected with denormals enabled");
2332  // TODO: We can select this with f32 denormals enabled if all the sources are
2333  // converted from f16 (in which case fmad isn't legal).
2334 
2335  if (Sel0 || Sel1 || Sel2) {
2336  // For dummy operands.
2337  SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2338  SDValue Ops[] = {
2339  CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
2340  CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
2341  CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
2342  CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
2343  Zero, Zero
2344  };
2345 
2346  CurDAG->SelectNodeTo(N,
2347  IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
2348  MVT::f32, Ops);
2349  } else {
2350  SelectCode(N);
2351  }
2352 }
2353 
2354 // This is here because there isn't a way to use the generated sub0_sub1 as the
2355 // subreg index to EXTRACT_SUBREG in tablegen.
2356 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
2357  MemSDNode *Mem = cast<MemSDNode>(N);
2358  unsigned AS = Mem->getAddressSpace();
2359  if (AS == AMDGPUAS::FLAT_ADDRESS) {
2360  SelectCode(N);
2361  return;
2362  }
2363 
2364  MVT VT = N->getSimpleValueType(0);
2365  bool Is32 = (VT == MVT::i32);
2366  SDLoc SL(N);
2367 
2368  MachineSDNode *CmpSwap = nullptr;
2369  if (Subtarget->hasAddr64()) {
2370  SDValue SRsrc, VAddr, SOffset, Offset;
2371 
2372  if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset)) {
2373  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
2374  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
2375  SDValue CmpVal = Mem->getOperand(2);
2376  SDValue CPol = CurDAG->getTargetConstant(AMDGPU::CPol::GLC, SL, MVT::i32);
2377 
2378  // XXX - Do we care about glue operands?
2379 
2380  SDValue Ops[] = {CmpVal, VAddr, SRsrc, SOffset, Offset, CPol,
2381  Mem->getChain()};
2382 
2383  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2384  }
2385  }
2386 
2387  if (!CmpSwap) {
2388  SDValue SRsrc, SOffset, Offset;
2389  if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset)) {
2390  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
2391  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
2392 
2393  SDValue CmpVal = Mem->getOperand(2);
2394  SDValue CPol = CurDAG->getTargetConstant(AMDGPU::CPol::GLC, SL, MVT::i32);
2395  SDValue Ops[] = {CmpVal, SRsrc, SOffset, Offset, CPol, Mem->getChain()};
2396 
2397  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2398  }
2399  }
2400 
2401  if (!CmpSwap) {
2402  SelectCode(N);
2403  return;
2404  }
2405 
2406  MachineMemOperand *MMO = Mem->getMemOperand();
2407  CurDAG->setNodeMemRefs(CmpSwap, {MMO});
2408 
2409  unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
2410  SDValue Extract
2411  = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
2412 
2413  ReplaceUses(SDValue(N, 0), Extract);
2414  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
2415  CurDAG->RemoveDeadNode(N);
2416 }
2417 
2418 void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
2419  // The address is assumed to be uniform, so if it ends up in a VGPR, it will
2420  // be copied to an SGPR with readfirstlane.
2421  unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2422  AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2423 
2424  SDValue Chain = N->getOperand(0);
2425  SDValue Ptr = N->getOperand(2);
2426  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2427  MachineMemOperand *MMO = M->getMemOperand();
2428  bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
2429 
2430  SDValue Offset;
2431  if (CurDAG->isBaseWithConstantOffset(Ptr)) {
2432  SDValue PtrBase = Ptr.getOperand(0);
2433  SDValue PtrOffset = Ptr.getOperand(1);
2434 
2435  const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2436  if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) {
2437  N = glueCopyToM0(N, PtrBase);
2438  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2439  }
2440  }
2441 
2442  if (!Offset) {
2443  N = glueCopyToM0(N, Ptr);
2444  Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2445  }
2446 
2447  SDValue Ops[] = {
2448  Offset,
2449  CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2450  Chain,
2451  N->getOperand(N->getNumOperands() - 1) // New glue
2452  };
2453 
2454  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2455  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2456 }
2457 
2458 static unsigned gwsIntrinToOpcode(unsigned IntrID) {
2459  switch (IntrID) {
2460  case Intrinsic::amdgcn_ds_gws_init:
2461  return AMDGPU::DS_GWS_INIT;
2462  case Intrinsic::amdgcn_ds_gws_barrier:
2463  return AMDGPU::DS_GWS_BARRIER;
2464  case Intrinsic::amdgcn_ds_gws_sema_v:
2465  return AMDGPU::DS_GWS_SEMA_V;
2466  case Intrinsic::amdgcn_ds_gws_sema_br:
2467  return AMDGPU::DS_GWS_SEMA_BR;
2468  case Intrinsic::amdgcn_ds_gws_sema_p:
2469  return AMDGPU::DS_GWS_SEMA_P;
2470  case Intrinsic::amdgcn_ds_gws_sema_release_all:
2471  return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2472  default:
2473  llvm_unreachable("not a gws intrinsic");
2474  }
2475 }
2476 
2477 void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
2478  if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2479  !Subtarget->hasGWSSemaReleaseAll()) {
2480  // Let this error.
2481  SelectCode(N);
2482  return;
2483  }
2484 
2485  // Chain, intrinsic ID, vsrc, offset
2486  const bool HasVSrc = N->getNumOperands() == 4;
2487  assert(HasVSrc || N->getNumOperands() == 3);
2488 
2489  SDLoc SL(N);
2490  SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
2491  int ImmOffset = 0;
2492  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2493  MachineMemOperand *MMO = M->getMemOperand();
2494 
2495  // Don't worry if the offset ends up in a VGPR. Only one lane will have
2496  // effect, so SIFixSGPRCopies will validly insert readfirstlane.
2497 
2498  // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
2499  // offset field) % 64. Some versions of the programming guide omit the m0
2500  // part, or claim it's from offset 0.
2501  if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2502  // If we have a constant offset, try to use the 0 in m0 as the base.
2503  // TODO: Look into changing the default m0 initialization value. If the
2504  // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
2505  // the immediate offset.
2506  glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
2507  ImmOffset = ConstOffset->getZExtValue();
2508  } else {
2509  if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
2510  ImmOffset = BaseOffset.getConstantOperandVal(1);
2511  BaseOffset = BaseOffset.getOperand(0);
2512  }
2513 
2514  // Prefer to do the shift in an SGPR since it should be possible to use m0
2515  // as the result directly. If it's already an SGPR, it will be eliminated
2516  // later.
2517  SDNode *SGPROffset
2518  = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
2519  BaseOffset);
2520  // Shift to offset in m0
2521  SDNode *M0Base
2522  = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
2523  SDValue(SGPROffset, 0),
2524  CurDAG->getTargetConstant(16, SL, MVT::i32));
2525  glueCopyToM0(N, SDValue(M0Base, 0));
2526  }
2527 
2528  SDValue Chain = N->getOperand(0);
2529  SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
2530 
2531  const unsigned Opc = gwsIntrinToOpcode(IntrID);
2533  if (HasVSrc)
2534  Ops.push_back(N->getOperand(2));
2535  Ops.push_back(OffsetField);
2536  Ops.push_back(Chain);
2537 
2538  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2539  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2540 }
2541 
2542 void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) {
2543  if (Subtarget->getLDSBankCount() != 16) {
2544  // This is a single instruction with a pattern.
2545  SelectCode(N);
2546  return;
2547  }
2548 
2549  SDLoc DL(N);
2550 
2551  // This requires 2 instructions. It is possible to write a pattern to support
2552  // this, but the generated isel emitter doesn't correctly deal with multiple
2553  // output instructions using the same physical register input. The copy to m0
2554  // is incorrectly placed before the second instruction.
2555  //
2556  // TODO: Match source modifiers.
2557  //
2558  // def : Pat <
2559  // (int_amdgcn_interp_p1_f16
2560  // (VOP3Mods f32:$src0, i32:$src0_modifiers),
2561  // (i32 timm:$attrchan), (i32 timm:$attr),
2562  // (i1 timm:$high), M0),
2563  // (V_INTERP_P1LV_F16 $src0_modifiers, VGPR_32:$src0, timm:$attr,
2564  // timm:$attrchan, 0,
2565  // (V_INTERP_MOV_F32 2, timm:$attr, timm:$attrchan), timm:$high)> {
2566  // let Predicates = [has16BankLDS];
2567  // }
2568 
2569  // 16 bank LDS
2570  SDValue ToM0 = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, AMDGPU::M0,
2571  N->getOperand(5), SDValue());
2572 
2573  SDVTList VTs = CurDAG->getVTList(MVT::f32, MVT::Other);
2574 
2575  SDNode *InterpMov =
2576  CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, {
2577  CurDAG->getTargetConstant(2, DL, MVT::i32), // P0
2578  N->getOperand(3), // Attr
2579  N->getOperand(2), // Attrchan
2580  ToM0.getValue(1) // In glue
2581  });
2582 
2583  SDNode *InterpP1LV =
2584  CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, {
2585  CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
2586  N->getOperand(1), // Src0
2587  N->getOperand(3), // Attr
2588  N->getOperand(2), // Attrchan
2589  CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
2590  SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high
2591  N->getOperand(4), // high
2592  CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp
2593  CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod
2594  SDValue(InterpMov, 1)
2595  });
2596 
2597  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0));
2598 }
2599 
2600 void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
2601  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2602  switch (IntrID) {
2603  case Intrinsic::amdgcn_ds_append:
2604  case Intrinsic::amdgcn_ds_consume: {
2605  if (N->getValueType(0) != MVT::i32)
2606  break;
2607  SelectDSAppendConsume(N, IntrID);
2608  return;
2609  }
2610  }
2611 
2612  SelectCode(N);
2613 }
2614 
2615 void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
2616  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
2617  unsigned Opcode;
2618  switch (IntrID) {
2619  case Intrinsic::amdgcn_wqm:
2620  Opcode = AMDGPU::WQM;
2621  break;
2622  case Intrinsic::amdgcn_softwqm:
2623  Opcode = AMDGPU::SOFT_WQM;
2624  break;
2625  case Intrinsic::amdgcn_wwm:
2626  case Intrinsic::amdgcn_strict_wwm:
2627  Opcode = AMDGPU::STRICT_WWM;
2628  break;
2629  case Intrinsic::amdgcn_strict_wqm:
2630  Opcode = AMDGPU::STRICT_WQM;
2631  break;
2632  case Intrinsic::amdgcn_interp_p1_f16:
2633  SelectInterpP1F16(N);
2634  return;
2635  default:
2636  SelectCode(N);
2637  return;
2638  }
2639 
2640  SDValue Src = N->getOperand(1);
2641  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
2642 }
2643 
2644 void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
2645  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2646  switch (IntrID) {
2647  case Intrinsic::amdgcn_ds_gws_init:
2648  case Intrinsic::amdgcn_ds_gws_barrier:
2649  case Intrinsic::amdgcn_ds_gws_sema_v:
2650  case Intrinsic::amdgcn_ds_gws_sema_br:
2651  case Intrinsic::amdgcn_ds_gws_sema_p:
2652  case Intrinsic::amdgcn_ds_gws_sema_release_all:
2653  SelectDS_GWS(N, IntrID);
2654  return;
2655  default:
2656  break;
2657  }
2658 
2659  SelectCode(N);
2660 }
2661 
2662 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
2663  unsigned &Mods,
2664  bool AllowAbs) const {
2665  Mods = 0;
2666  Src = In;
2667 
2668  if (Src.getOpcode() == ISD::FNEG) {
2669  Mods |= SISrcMods::NEG;
2670  Src = Src.getOperand(0);
2671  }
2672 
2673  if (AllowAbs && Src.getOpcode() == ISD::FABS) {
2674  Mods |= SISrcMods::ABS;
2675  Src = Src.getOperand(0);
2676  }
2677 
2678  return true;
2679 }
2680 
2681 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
2682  SDValue &SrcMods) const {
2683  unsigned Mods;
2684  if (SelectVOP3ModsImpl(In, Src, Mods)) {
2685  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2686  return true;
2687  }
2688 
2689  return false;
2690 }
2691 
2692 bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,
2693  SDValue &SrcMods) const {
2694  unsigned Mods;
2695  if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
2696  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2697  return true;
2698  }
2699 
2700  return false;
2701 }
2702 
2703 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
2704  SDValue &SrcMods) const {
2705  SelectVOP3Mods(In, Src, SrcMods);
2706  return isNoNanSrc(Src);
2707 }
2708 
2709 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
2710  if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
2711  return false;
2712 
2713  Src = In;
2714  return true;
2715 }
2716 
2717 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
2718  SDValue &SrcMods, SDValue &Clamp,
2719  SDValue &Omod) const {
2720  SDLoc DL(In);
2721  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2722  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2723 
2724  return SelectVOP3Mods(In, Src, SrcMods);
2725 }
2726 
2727 bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src,
2728  SDValue &SrcMods, SDValue &Clamp,
2729  SDValue &Omod) const {
2730  SDLoc DL(In);
2731  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2732  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2733 
2734  return SelectVOP3BMods(In, Src, SrcMods);
2735 }
2736 
2737 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
2738  SDValue &Clamp, SDValue &Omod) const {
2739  Src = In;
2740 
2741  SDLoc DL(In);
2742  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2743  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2744 
2745  return true;
2746 }
2747 
2748 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
2749  SDValue &SrcMods) const {
2750  unsigned Mods = 0;
2751  Src = In;
2752 
2753  if (Src.getOpcode() == ISD::FNEG) {
2754  Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
2755  Src = Src.getOperand(0);
2756  }
2757 
2758  if (Src.getOpcode() == ISD::BUILD_VECTOR) {
2759  unsigned VecMods = Mods;
2760 
2761  SDValue Lo = stripBitcast(Src.getOperand(0));
2762  SDValue Hi = stripBitcast(Src.getOperand(1));
2763 
2764  if (Lo.getOpcode() == ISD::FNEG) {
2765  Lo = stripBitcast(Lo.getOperand(0));
2766  Mods ^= SISrcMods::NEG;
2767  }
2768 
2769  if (Hi.getOpcode() == ISD::FNEG) {
2770  Hi = stripBitcast(Hi.getOperand(0));
2771  Mods ^= SISrcMods::NEG_HI;
2772  }
2773 
2774  if (isExtractHiElt(Lo, Lo))
2775  Mods |= SISrcMods::OP_SEL_0;
2776 
2777  if (isExtractHiElt(Hi, Hi))
2778  Mods |= SISrcMods::OP_SEL_1;
2779 
2780  unsigned VecSize = Src.getValueSizeInBits();
2781  Lo = stripExtractLoElt(Lo);
2782  Hi = stripExtractLoElt(Hi);
2783 
2784  if (Lo.getValueSizeInBits() > VecSize) {
2785  Lo = CurDAG->getTargetExtractSubreg(
2786  (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
2787  MVT::getIntegerVT(VecSize), Lo);
2788  }
2789 
2790  if (Hi.getValueSizeInBits() > VecSize) {
2791  Hi = CurDAG->getTargetExtractSubreg(
2792  (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
2793  MVT::getIntegerVT(VecSize), Hi);
2794  }
2795 
2796  assert(Lo.getValueSizeInBits() <= VecSize &&
2797  Hi.getValueSizeInBits() <= VecSize);
2798 
2799  if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
2800  // Really a scalar input. Just select from the low half of the register to
2801  // avoid packing.
2802 
2803  if (VecSize == 32 || VecSize == Lo.getValueSizeInBits()) {
2804  Src = Lo;
2805  } else {
2806  assert(Lo.getValueSizeInBits() == 32 && VecSize == 64);
2807 
2808  SDLoc SL(In);
2809  SDValue Undef = SDValue(
2810  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
2811  Lo.getValueType()), 0);
2812  auto RC = Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
2813  : AMDGPU::SReg_64RegClassID;
2814  const SDValue Ops[] = {
2815  CurDAG->getTargetConstant(RC, SL, MVT::i32),
2816  Lo, CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2817  Undef, CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
2818 
2819  Src = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
2820  Src.getValueType(), Ops), 0);
2821  }
2822  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2823  return true;
2824  }
2825 
2826  if (VecSize == 64 && Lo == Hi && isa<ConstantFPSDNode>(Lo)) {
2827  uint64_t Lit = cast<ConstantFPSDNode>(Lo)->getValueAPF()
2828  .bitcastToAPInt().getZExtValue();
2829  if (AMDGPU::isInlinableLiteral32(Lit, Subtarget->hasInv2PiInlineImm())) {
2830  Src = CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64);;
2831  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2832  return true;
2833  }
2834  }
2835 
2836  Mods = VecMods;
2837  }
2838 
2839  // Packed instructions do not have abs modifiers.
2840  Mods |= SISrcMods::OP_SEL_1;
2841 
2842  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2843  return true;
2844 }
2845 
2846 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
2847  SDValue &SrcMods) const {
2848  Src = In;
2849  // FIXME: Handle op_sel
2850  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2851  return true;
2852 }
2853 
2854 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2855  SDValue &SrcMods) const {
2856  // FIXME: Handle op_sel
2857  return SelectVOP3Mods(In, Src, SrcMods);
2858 }
2859 
2860 // The return value is not whether the match is possible (which it always is),
2861 // but whether or not it a conversion is really used.
2862 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2863  unsigned &Mods) const {
2864  Mods = 0;
2865  SelectVOP3ModsImpl(In, Src, Mods);
2866 
2867  if (Src.getOpcode() == ISD::FP_EXTEND) {
2868  Src = Src.getOperand(0);
2869  assert(Src.getValueType() == MVT::f16);
2870  Src = stripBitcast(Src);
2871 
2872  // Be careful about folding modifiers if we already have an abs. fneg is
2873  // applied last, so we don't want to apply an earlier fneg.
2874  if ((Mods & SISrcMods::ABS) == 0) {
2875  unsigned ModsTmp;
2876  SelectVOP3ModsImpl(Src, Src, ModsTmp);
2877 
2878  if ((ModsTmp & SISrcMods::NEG) != 0)
2879  Mods ^= SISrcMods::NEG;
2880 
2881  if ((ModsTmp & SISrcMods::ABS) != 0)
2882  Mods |= SISrcMods::ABS;
2883  }
2884 
2885  // op_sel/op_sel_hi decide the source type and source.
2886  // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2887  // If the sources's op_sel is set, it picks the high half of the source
2888  // register.
2889 
2890  Mods |= SISrcMods::OP_SEL_1;
2891  if (isExtractHiElt(Src, Src)) {
2892  Mods |= SISrcMods::OP_SEL_0;
2893 
2894  // TODO: Should we try to look for neg/abs here?
2895  }
2896 
2897  return true;
2898  }
2899 
2900  return false;
2901 }
2902 
2903 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2904  SDValue &SrcMods) const {
2905  unsigned Mods = 0;
2906  SelectVOP3PMadMixModsImpl(In, Src, Mods);
2907  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2908  return true;
2909 }
2910 
2911 SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
2912  if (In.isUndef())
2913  return CurDAG->getUNDEF(MVT::i32);
2914 
2915  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2916  SDLoc SL(In);
2917  return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
2918  }
2919 
2920  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2921  SDLoc SL(In);
2922  return CurDAG->getConstant(
2923  C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2924  }
2925 
2926  SDValue Src;
2927  if (isExtractHiElt(In, Src))
2928  return Src;
2929 
2930  return SDValue();
2931 }
2932 
2933 bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2934  assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn);
2935 
2936  const SIRegisterInfo *SIRI =
2937  static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2938  const SIInstrInfo * SII =
2939  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2940 
2941  unsigned Limit = 0;
2942  bool AllUsesAcceptSReg = true;
2943  for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2944  Limit < 10 && U != E; ++U, ++Limit) {
2945  const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
2946 
2947  // If the register class is unknown, it could be an unknown
2948  // register class that needs to be an SGPR, e.g. an inline asm
2949  // constraint
2950  if (!RC || SIRI->isSGPRClass(RC))
2951  return false;
2952 
2953  if (RC != &AMDGPU::VS_32RegClass) {
2954  AllUsesAcceptSReg = false;
2955  SDNode * User = *U;
2956  if (User->isMachineOpcode()) {
2957  unsigned Opc = User->getMachineOpcode();
2958  MCInstrDesc Desc = SII->get(Opc);
2959  if (Desc.isCommutable()) {
2960  unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2961  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2962  if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2963  unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2964  const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
2965  if (CommutedRC == &AMDGPU::VS_32RegClass)
2966  AllUsesAcceptSReg = true;
2967  }
2968  }
2969  }
2970  // If "AllUsesAcceptSReg == false" so far we haven't suceeded
2971  // commuting current user. This means have at least one use
2972  // that strictly require VGPR. Thus, we will not attempt to commute
2973  // other user instructions.
2974  if (!AllUsesAcceptSReg)
2975  break;
2976  }
2977  }
2978  return !AllUsesAcceptSReg && (Limit < 10);
2979 }
2980 
2981 bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
2982  auto Ld = cast<LoadSDNode>(N);
2983 
2984  return Ld->getAlignment() >= 4 &&
2985  (
2986  (
2987  (
2988  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2989  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
2990  )
2991  &&
2992  !N->isDivergent()
2993  )
2994  ||
2995  (
2996  Subtarget->getScalarizeGlobalBehavior() &&
2997  Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
2998  Ld->isSimple() &&
2999  !N->isDivergent() &&
3000  static_cast<const SITargetLowering *>(
3001  getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
3002  )
3003  );
3004 }
3005 
3006 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
3008  *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
3009  bool IsModified = false;
3010  do {
3011  IsModified = false;
3012 
3013  // Go over all selected nodes and try to fold them a bit more
3014  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
3015  while (Position != CurDAG->allnodes_end()) {
3016  SDNode *Node = &*Position++;
3017  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
3018  if (!MachineNode)
3019  continue;
3020 
3021  SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
3022  if (ResNode != Node) {
3023  if (ResNode)
3024  ReplaceUses(Node, ResNode);
3025  IsModified = true;
3026  }
3027  }
3028  CurDAG->RemoveDeadNodes();
3029  } while (IsModified);
3030 }
3031 
3032 bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
3033  Subtarget = &MF.getSubtarget<R600Subtarget>();
3034  return SelectionDAGISel::runOnMachineFunction(MF);
3035 }
3036 
3037 bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
3038  if (!N->readMem())
3039  return false;
3040  if (CbId == -1)
3041  return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
3042  N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
3043 
3044  return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
3045 }
3046 
3047 bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
3048  SDValue& IntPtr) {
3049  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
3050  IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
3051  true);
3052  return true;
3053  }
3054  return false;
3055 }
3056 
3057 bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
3058  SDValue& BaseReg, SDValue &Offset) {
3059  if (!isa<ConstantSDNode>(Addr)) {
3060  BaseReg = Addr;
3061  Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
3062  return true;
3063  }
3064  return false;
3065 }
3066 
3068  unsigned int Opc = N->getOpcode();
3069  if (N->isMachineOpcode()) {
3070  N->setNodeId(-1);
3071  return; // Already selected.
3072  }
3073 
3074  switch (Opc) {
3075  default: break;
3077  case ISD::SCALAR_TO_VECTOR:
3078  case ISD::BUILD_VECTOR: {
3079  EVT VT = N->getValueType(0);
3080  unsigned NumVectorElts = VT.getVectorNumElements();
3081  unsigned RegClassID;
3082  // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
3083  // that adds a 128 bits reg copy when going through TwoAddressInstructions
3084  // pass. We want to avoid 128 bits copies as much as possible because they
3085  // can't be bundled by our scheduler.
3086  switch(NumVectorElts) {
3087  case 2: RegClassID = R600::R600_Reg64RegClassID; break;
3088  case 4:
3090  RegClassID = R600::R600_Reg128VerticalRegClassID;
3091  else
3092  RegClassID = R600::R600_Reg128RegClassID;
3093  break;
3094  default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
3095  }
3096  SelectBuildVector(N, RegClassID);
3097  return;
3098  }
3099  }
3100 
3101  SelectCode(N);
3102 }
3103 
3104 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
3105  SDValue &Offset) {
3106  ConstantSDNode *C;
3107  SDLoc DL(Addr);
3108 
3109  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
3110  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
3111  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
3112  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
3113  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
3114  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
3115  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
3116  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
3117  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
3118  Base = Addr.getOperand(0);
3119  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
3120  } else {
3121  Base = Addr;
3122  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
3123  }
3124 
3125  return true;
3126 }
3127 
3128 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
3129  SDValue &Offset) {
3130  ConstantSDNode *IMMOffset;
3131 
3132  if (Addr.getOpcode() == ISD::ADD
3133  && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
3134  && isInt<16>(IMMOffset->getZExtValue())) {
3135 
3136  Base = Addr.getOperand(0);
3137  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
3138  MVT::i32);
3139  return true;
3140  // If the pointer address is constant, we can move it to the offset field.
3141  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
3142  && isInt<16>(IMMOffset->getZExtValue())) {
3143  Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
3144  SDLoc(CurDAG->getEntryNode()),
3145  R600::ZERO, MVT::i32);
3146  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
3147  MVT::i32);
3148  return true;
3149  }
3150 
3151  // Default case, no offset
3152  Base = Addr;
3153  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
3154  return true;
3155 }
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:233
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::MCInstrDesc::getNumDefs
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:243
CmpMode::FP
@ FP
llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1532
Lowering
Shadow Stack GC Lowering
Definition: ShadowStackGCLowering.cpp:98
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4543
llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:192
llvm::AMDGPUSubtarget::hasInv2PiInlineImm
bool hasInv2PiInlineImm() const
Definition: AMDGPUSubtarget.h:164
llvm::TargetRegisterClass::getID
unsigned getID() const
Return the register class ID number.
Definition: TargetRegisterInfo.h:69
llvm
Definition: AllocatorList.h:23
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::EVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:144
llvm::AMDGPUISD::DIV_SCALE
@ DIV_SCALE
Definition: AMDGPUISelLowering.h:396
llvm::SystemZISD::TM
@ TM
Definition: SystemZISelLowering.h:65
v2i32
gets compiled into this on rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movq rsp movq rsp movq rsp movq rsp movq rsp rax movq rsp rax movq rsp rsp rsp eax eax jbe LBB1_3 rcx rax movq rsp eax rsp ret ecx eax rcx movl rsp jmp LBB1_2 gcc rsp rax movq rsp rsp movq rsp rax movq rsp eax eax jb L6 rdx eax rsp ret p2align edx rdx eax movl rsp eax rsp ret and it gets compiled into this on ebp esp eax movl ebp eax movl ebp eax esp popl ebp ret gcc ebp eax popl ebp ret Teach tblgen not to check bitconvert source type in some cases This allows us to consolidate the following patterns in X86InstrMMX v2i32(MMX_MOVDQ2Qrr VR128:$src))>
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1078
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:623
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:838
llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1366
llvm::AMDGPUTargetLowering
Definition: AMDGPUISelLowering.h:27
llvm::SIRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned RCID) const
Definition: SIRegisterInfo.cpp:2367
SIMachineFunctionInfo.h
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:152
llvm::ISD::ConstantFP
@ ConstantFP
Definition: ISDOpcodes.h:70
llvm::AMDGPU::getSMRDEncodedOffset
Optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
Definition: AMDGPUBaseInfo.cpp:1811
llvm::ISD::UADDO
@ UADDO
Definition: ISDOpcodes.h:304
llvm::AMDGPUSubtarget::hasMadMixInsts
bool hasMadMixInsts() const
Definition: AMDGPUSubtarget.h:136
llvm::MVT::i128
@ i128
Definition: MachineValueType.h:45
llvm::ISD::ADDC
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:262
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::ISD::SETEQ
@ SETEQ
Definition: ISDOpcodes.h:1361
llvm::AMDGPUISD::ATOMIC_CMP_SWAP
@ ATOMIC_CMP_SWAP
Definition: AMDGPUISelLowering.h:492
llvm::Triple::amdgcn
@ amdgcn
Definition: Triple.h:72
llvm::AMDGPUISD::CVT_PKNORM_I16_F32
@ CVT_PKNORM_I16_F32
Definition: AMDGPUISelLowering.h:451
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::MachineSDNode
An SDNode that represents everything that will be needed to construct a MachineInstr.
Definition: SelectionDAGNodes.h:2518
llvm::AMDGPUISD::FMUL_W_CHAIN
@ FMUL_W_CHAIN
Definition: AMDGPUISelLowering.h:376
llvm::AMDGPUISD::LOAD_D16_HI_I8
@ LOAD_D16_HI_I8
Definition: AMDGPUISelLowering.h:480
llvm::GCNSubtarget::hasFlatSegmentOffsetBug
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:585
llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition: SelectionDAGNodes.h:1321
ValueTracking.h
llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition: SelectionDAGNodes.h:1344
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:455
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:460
llvm::AMDGPU::getSMRDEncodedLiteralOffset32
Optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
Definition: AMDGPUBaseInfo.cpp:1828
llvm::SIInstrFlags::FlatScratch
@ FlatScratch
Definition: SIDefines.h:109
llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition: SelectionDAGNodes.h:2251
llvm::MVT::Glue
@ Glue
Definition: MachineValueType.h:249
llvm::SDNode::use_iterator
This class provides iterator support for SDUse operands that use a specific SDNode.
Definition: SelectionDAGNodes.h:719
Shift
bool Shift
Definition: README.txt:468
llvm::MemSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:1346
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
i8
Clang compiles this i8
Definition: README.txt:504
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1258
llvm::SelectionDAG::isBaseWithConstantOffset
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
Definition: SelectionDAG.cpp:4245
llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:692
llvm::Optional< int64_t >
llvm::SDNode::isPredecessorOf
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
Definition: SelectionDAGNodes.h:805
llvm::AMDGPU::SIModeRegisterDefaults
Definition: AMDGPUBaseInfo.h:893
i1
Decimal Convert From to National Zoned Signed int_ppc_altivec_bcdcfno i1
Definition: README_P9.txt:147
llvm::createAMDGPUISelDag
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
Definition: AMDGPUISelDAGToDAG.cpp:380
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::MemSDNode
This is an abstract virtual class for memory operations.
Definition: SelectionDAGNodes.h:1246
llvm::GCNSubtarget
Definition: GCNSubtarget.h:38
matchZExtFromI32
static SDValue matchZExtFromI32(SDValue Op)
Definition: AMDGPUISelDAGToDAG.cpp:1770
llvm::SDNode::isDivergent
bool isDivergent() const
Definition: SelectionDAGNodes.h:686
llvm::Lo_32
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:353
SelectionDAG.h
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:228
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::ISD::FABS
@ FABS
Definition: ISDOpcodes.h:857
llvm::ISD::ADDCARRY
@ ADDCARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:283
llvm::ISD::BRCOND
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:946
llvm::SDNode::getVTList
SDVTList getVTList() const
Definition: SelectionDAGNodes.h:924
llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:216
llvm::GCNSubtarget::unsafeDSOffsetFoldingEnabled
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:403
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
llvm::R600RegisterInfo
Definition: R600RegisterInfo.h:22
llvm::GCNSubtarget::hasGWSSemaReleaseAll
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:621
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
SelectSAddrFI
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
Definition: AMDGPUISelDAGToDAG.cpp:1874
llvm::AArch64ISD::NEG
@ NEG
Definition: AArch64ISelLowering.h:173
llvm::AMDGPUAS::CONSTANT_ADDRESS_32BIT
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
Definition: AMDGPU.h:367
llvm::AMDGPU::CPol::CPol
CPol
Definition: SIDefines.h:281
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:40
i64
Clang compiles this i64
Definition: README.txt:504
llvm::SelectionDAG::getTargetFrameIndex
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:688
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1113
llvm::LoopInfoBase::getLoopsInPreorder
SmallVector< LoopT *, 4 > getLoopsInPreorder()
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
Definition: LoopInfoImpl.h:577
llvm::MemIntrinsicSDNode
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
Definition: SelectionDAGNodes.h:1448
llvm::AMDGPUSubtarget::get
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Definition: AMDGPUSubtarget.cpp:977
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:216
SelectionDAGNodes.h
llvm::ISD::Constant
@ Constant
Definition: ISDOpcodes.h:69
llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:719
llvm::SIInstrInfo::isInlineConstant
bool isInlineConstant(const APInt &Imm) const
Definition: SIInstrInfo.cpp:3313
llvm::GCNSubtarget::hasAddr64
bool hasAddr64() const
Definition: GCNSubtarget.h:314
llvm::ISD::ABS
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:630
FunctionLoweringInfo.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::GCNSubtarget::useFlatForGlobal
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:462
llvm::GCNSubtarget::hasFmaMixInsts
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:364
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:656
llvm::PseudoSourceValue::isStack
bool isStack() const
Definition: PseudoSourceValue.h:68
llvm::User
Definition: User.h:44
llvm::AMDGPUISD::CVT_PKNORM_U16_F32
@ CVT_PKNORM_U16_F32
Definition: AMDGPUISelLowering.h:452
llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:737
llvm::ISD::CopyToReg
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:196
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:301
f32
So we should use XX3Form_Rcr to implement instrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store load store see def memrix16 in PPCInstrInfo td Load Store Vector load store outs ins lxsdx set load store with conversion from to outs ins lxsspx set f32
Definition: README_P9.txt:522
llvm::SISrcMods::OP_SEL_0
@ OP_SEL_0
Definition: SIDefines.h:203
llvm::AMDGPUISD::LOAD_D16_LO_I8
@ LOAD_D16_LO_I8
Definition: AMDGPUISelLowering.h:482
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:725
llvm::AMDGPUISD::ATOMIC_LOAD_FMAX
@ ATOMIC_LOAD_FMAX
Definition: AMDGPUISelLowering.h:496
llvm::ISD::SRA
@ SRA
Definition: ISDOpcodes.h:648
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:360
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:195
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MCInstrDesc::isCommutable
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
Definition: MCInstrDesc.h:472
llvm::SISrcMods::NEG_HI
@ NEG_HI
Definition: SIDefines.h:202
llvm::M0
unsigned M0(unsigned Val)
Definition: VE.h:371
llvm::Instruction
Definition: Instruction.h:45
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:281
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1631
llvm::SIInstrInfo::findCommutedOpIndices
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
Definition: SIInstrInfo.cpp:2138
llvm::LegacyDivergenceAnalysis
Definition: LegacyDivergenceAnalysis.h:31
llvm::SIRegisterInfo::getSubRegFromChannel
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
Definition: SIRegisterInfo.cpp:427
llvm::AMDGPUISD::CVT_PK_U16_U32
@ CVT_PK_U16_U32
Definition: AMDGPUISelLowering.h:454
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:29
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:622
llvm::PseudoSourceValue
Special value supplied for machine level alias analysis.
Definition: PseudoSourceValue.h:35
llvm::MCOperandInfo::RegClass
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:89
llvm::AMDGPUAS::CONSTANT_BUFFER_0
@ CONSTANT_BUFFER_0
Definition: AMDGPU.h:382
llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:361
llvm::SDValue::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned i) const
Definition: SelectionDAGNodes.h:1125
llvm::SDValue::getValueSizeInBits
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
Definition: SelectionDAGNodes.h:192
llvm::MCID::RegSequence
@ RegSequence
Definition: MCInstrDesc.h:179
llvm::AMDGPUISD::FMA_W_CHAIN
@ FMA_W_CHAIN
Definition: AMDGPUISelLowering.h:375
llvm::GCNSubtarget::getScalarizeGlobalBehavior
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:767
llvm::ISD::USUBO
@ USUBO
Definition: ISDOpcodes.h:308
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
findMemSDNode
static MemSDNode * findMemSDNode(SDNode *N)
Definition: AMDGPUISelDAGToDAG.cpp:1645
llvm::Hi_32
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:348
LoopInfo.h
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:365
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::ISD::ATOMIC_LOAD_FADD
@ ATOMIC_LOAD_FADD
Definition: ISDOpcodes.h:1137
i32
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM ID Predecessors according to mbb< bb27, 0x8b0a7c0 > Note ADDri is not a two address instruction its result reg1037 is an operand of the PHI node in bb76 and its operand reg1039 is the result of the PHI node We should treat it as a two address code and make sure the ADDri is scheduled after any node that reads reg1039 Use info(i.e. register scavenger) to assign it a free register to allow reuse the collector could move the objects and invalidate the derived pointer This is bad enough in the first but safe points can crop up unpredictably **array_addr i32
Definition: README.txt:122
llvm::AMDGPUISD::BUILD_VERTICAL_VECTOR
@ BUILD_VERTICAL_VECTOR
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
Definition: AMDGPUISelLowering.h:471
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:558
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::APFloat
Definition: APFloat.h:701
llvm::R600Subtarget
Definition: R600Subtarget.h:36
llvm::countPopulation
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:567
llvm::CodeGenOpt::Default
@ Default
Definition: CodeGen.h:55
llvm::isInt< 32 >
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:373
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:333
llvm::ISD::FMAD
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:464
llvm::isUInt< 16 >
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:408
llvm::AMDGPU::CPol::GLC
@ GLC
Definition: SIDefines.h:282
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::MVT::v2f16
@ v2f16
Definition: MachineValueType.h:125
llvm::SelectionDAGISel::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: SelectionDAGISel.cpp:330
llvm::ConstantFPSDNode
Definition: SelectionDAGNodes.h:1575
llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition: SelectionDAGNodes.h:1325
llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:911
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::AMDGPUISD::LOAD_D16_HI
@ LOAD_D16_HI
Definition: AMDGPUISelLowering.h:478
getBaseWithOffsetUsingSplitOR
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
Definition: AMDGPUISelDAGToDAG.cpp:918
llvm::PointerUnion::dyn_cast
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Definition: PointerUnion.h:194
llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition: MachineMemOperand.h:37
llvm::GCNSubtarget::hasAddNoCarry
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:629
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::GCNSubtarget::privateMemoryResourceIsRangeChecked
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:484
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:505
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:896
llvm::AMDGPUISD::ATOMIC_DEC
@ ATOMIC_DEC
Definition: AMDGPUISelLowering.h:494
llvm::AMDGPUISD::CVT_PK_I16_I32
@ CVT_PK_I16_I32
Definition: AMDGPUISelLowering.h:453
llvm::AMDGPUISD::BFE_I32
@ BFE_I32
Definition: AMDGPUISelLowering.h:417
llvm::LoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Definition: SelectionDAGNodes.h:2266
llvm::AMDGPUArgumentUsageInfo
Definition: AMDGPUArgumentUsageInfo.h:158
isStackPtrRelative
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo)
Definition: AMDGPUISelDAGToDAG.cpp:1500
llvm::isUInt< 32 >
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:411
llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition: SelectionDAGNodes.h:172
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:41
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:39
llvm::isUInt< 8 >
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:405
llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition: SelectionDAGNodes.h:1547
i16
< i32 > ret i32 conv5 And the following x86 eax movsbl ecx cmpl ecx sete al movzbl eax ret It should be possible to eliminate the sign extensions LLVM misses a load store narrowing opportunity in this i16
Definition: README.txt:1493
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1341
llvm::MCInstrDesc::OpInfo
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:206
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:8491
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:30
llvm::ISD::SUBCARRY
@ SUBCARRY
Definition: ISDOpcodes.h:284
llvm::GCNSubtarget::getLDSBankCount
int getLDSBankCount() const
Definition: GCNSubtarget.h:276
v4i32
Vector Rotate Left Mask Mask v4i32
Definition: README_P9.txt:112
llvm::SIRegisterInfo::isSGPRClass
bool isSGPRClass(const TargetRegisterClass *RC) const
Definition: SIRegisterInfo.h:154
llvm::isMask_32
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:467
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:70
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::AMDGPU::isInlinableLiteral16
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi)
Definition: AMDGPUBaseInfo.cpp:1691
llvm::AMDGPU::CPol::SCC
@ SCC
Definition: SIDefines.h:285
SelectionDAGISel.h
llvm::LoopInfo
Definition: LoopInfo.h:1080
llvm::MachinePointerInfo::V
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.
Definition: MachineMemOperand.h:41
llvm::MVT::i64
@ i64
Definition: MachineValueType.h:44
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:167
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::GCNSubtarget::hasFlatInstOffsets
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:550
llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:345
AMDGPU.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::AMDGPUISD::LOAD_D16_LO_U8
@ LOAD_D16_LO_U8
Definition: AMDGPUISelLowering.h:483
uint32_t
llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition: SelectionDAGNodes.h:1121
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::ISD::UNDEF
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:204
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::CodeGenOpt::Level
Level
Definition: CodeGen.h:52
llvm::SDVTList
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Definition: SelectionDAGNodes.h:79
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1321
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:177
llvm::LoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2270
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:162
llvm::isInt< 16 >
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:370
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:524
llvm::AMDGPU::isInlinableLiteral64
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
Definition: AMDGPUBaseInfo.cpp:1648
llvm::AMDGPUISD::LOAD_D16_HI_U8
@ LOAD_D16_HI_U8
Definition: AMDGPUISelLowering.h:481
llvm::AMDGPUISD::MAD_U64_U32
@ MAD_U64_U32
Definition: AMDGPUISelLowering.h:429
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::MemSDNode::getAddressSpace
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Definition: SelectionDAGNodes.h:1332
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
llvm::AMDGPUISD::ATOMIC_LOAD_FMIN
@ ATOMIC_LOAD_FMIN
Definition: AMDGPUISelLowering.h:495
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:364
llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:485
llvm::SITargetLowering
Definition: SIISelLowering.h:30
llvm::ISD::SUBC
@ SUBC
Definition: ISDOpcodes.h:263
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:43
llvm::GCNSubtarget::ldsRequiresM0Init
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be iniitalized.
Definition: GCNSubtarget.h:607
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:138
llvm::R600InstrInfo
Definition: R600InstrInfo.h:39
gwsIntrinToOpcode
static unsigned gwsIntrinToOpcode(unsigned IntrID)
Definition: AMDGPUISelDAGToDAG.cpp:2458
llvm::SIInstrFlags::FlatGlobal
@ FlatGlobal
Definition: SIDefines.h:94
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
llvm::AMDGPUPerfHintAnalysis
Definition: AMDGPUPerfHintAnalysis.h:23
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:912
llvm::AMDGPUISD::CVT_PKRTZ_F16_F32
@ CVT_PKRTZ_F16_F32
Definition: AMDGPUISelLowering.h:450
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:403
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:232
llvm::SDValue::isUndef
bool isUndef() const
Definition: SelectionDAGNodes.h:1149
llvm::codeview::ModifierOptions::Const
@ Const
llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition: GCNSubtarget.h:267
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
llvm::AMDGPU::isInlinableLiteral32
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
Definition: AMDGPUBaseInfo.cpp:1665
llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:293
llvm::ISD::FP_EXTEND
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:823
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:647
LegacyDivergenceAnalysis.h
llvm::SelectionDAGISel
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
Definition: SelectionDAGISel.h:39
llvm::GCNSubtarget::getConstantBusLimit
unsigned getConstantBusLimit(unsigned Opcode) const
Definition: AMDGPUSubtarget.cpp:332
llvm::AMDGPUISD::MAD_I64_I32
@ MAD_I64_I32
Definition: AMDGPUISelLowering.h:430
Dominators.h
N
#define N
llvm::ISD::SUBE
@ SUBE
Definition: ISDOpcodes.h:273
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:335
llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:649
llvm::GCNSubtarget::d16PreservesUnusedBits
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:597
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_END(AMDGPUDAGToDAGISel
llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition: SelectionDAGNodes.h:1109
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:637
llvm::MVT::i16
@ i16
Definition: MachineValueType.h:42
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::ISD::INTRINSIC_W_CHAIN
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:185
llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:856
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:359
llvm::ISD::BUILD_PAIR
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:222
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::AMDGPUISD::DWORDADDR
@ DWORDADDR
Definition: AMDGPUISelLowering.h:360
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:363
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
f64
QP Compare Ordered outs ins xscmpudp No builtin are required Or llvm fcmp order unorder compare DP QP Compare builtin are required DP xscmp *dp write to VSX register Use int_ppc_vsx_xscmpeqdp f64
Definition: README_P9.txt:314
llvm::SISrcMods::OP_SEL_1
@ OP_SEL_1
Definition: SIDefines.h:204
llvm::SelectionDAGISel::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Definition: SelectionDAGISel.cpp:411
llvm::createR600ISelDag
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
Definition: AMDGPUISelDAGToDAG.cpp:387
llvm::AMDGPUISD::ATOMIC_INC
@ ATOMIC_INC
Definition: AMDGPUISelLowering.h:493
llvm::MVT::v2i16
@ v2i16
Definition: MachineValueType.h:84
InitializePasses.h
llvm::GCNSubtarget::hasUsableDSOffset
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:399
llvm::ISD::ADDE
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:272
llvm::MCInstrDesc::getNumOperands
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:228
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::SIInstrFlags::WQM
@ WQM
Definition: SIDefines.h:63
llvm::AMDGPUTargetMachine::EnableLateStructurizeCFG
static bool EnableLateStructurizeCFG
Definition: AMDGPUTargetMachine.h:35
llvm::EVT::bitsEq
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:221
AMDGPUTargetMachine.h
SubReg
unsigned SubReg
Definition: AArch64AdvSIMDScalarPass.cpp:104
llvm::SIInstrFlags::FLAT
@ FLAT
Definition: SIDefines.h:51
llvm::AMDGPUISD::BFE_U32
@ BFE_U32
Definition: AMDGPUISelLowering.h:416
llvm::ISD::SCALAR_TO_VECTOR
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:577
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1167
llvm::AMDGPUISD::LOAD_D16_LO
@ LOAD_D16_LO
Definition: AMDGPUISelLowering.h:479
llvm::SIRegisterInfo::getSGPRClassForBitWidth
static const LLVM_READONLY TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
Definition: SIRegisterInfo.cpp:1976