LLVM  14.0.0git
AMDGPUISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines an instruction selector for the AMDGPU target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPUTargetMachine.h"
16 #include "SIMachineFunctionInfo.h"
23 #include "llvm/IR/IntrinsicsAMDGPU.h"
24 #include "llvm/InitializePasses.h"
25 
26 #ifdef EXPENSIVE_CHECKS
27 #include "llvm/Analysis/LoopInfo.h"
28 #include "llvm/IR/Dominators.h"
29 #endif
30 
31 #define DEBUG_TYPE "isel"
32 
33 using namespace llvm;
34 
35 namespace llvm {
36 
37 class R600InstrInfo;
38 
39 } // end namespace llvm
40 
41 //===----------------------------------------------------------------------===//
42 // Instruction Selector Implementation
43 //===----------------------------------------------------------------------===//
44 
45 namespace {
46 
47 static bool isNullConstantOrUndef(SDValue V) {
48  if (V.isUndef())
49  return true;
50 
51  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
52  return Const != nullptr && Const->isNullValue();
53 }
54 
55 static bool getConstantValue(SDValue N, uint32_t &Out) {
56  // This is only used for packed vectors, where ussing 0 for undef should
57  // always be good.
58  if (N.isUndef()) {
59  Out = 0;
60  return true;
61  }
62 
63  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
64  Out = C->getAPIntValue().getSExtValue();
65  return true;
66  }
67 
68  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
69  Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
70  return true;
71  }
72 
73  return false;
74 }
75 
76 // TODO: Handle undef as zero
77 static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
78  bool Negate = false) {
79  assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
80  uint32_t LHSVal, RHSVal;
81  if (getConstantValue(N->getOperand(0), LHSVal) &&
82  getConstantValue(N->getOperand(1), RHSVal)) {
83  SDLoc SL(N);
84  uint32_t K = Negate ?
85  (-LHSVal & 0xffff) | (-RHSVal << 16) :
86  (LHSVal & 0xffff) | (RHSVal << 16);
87  return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
88  DAG.getTargetConstant(K, SL, MVT::i32));
89  }
90 
91  return nullptr;
92 }
93 
94 static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
95  return packConstantV2I16(N, DAG, true);
96 }
97 
98 /// AMDGPU specific code to select AMDGPU machine instructions for
99 /// SelectionDAG operations.
100 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
101  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
102  // make the right decision when generating code for different targets.
103  const GCNSubtarget *Subtarget;
104 
105  // Default FP mode for the current function.
107 
108  bool EnableLateStructurizeCFG;
109 
110  // Instructions that will be lowered with a final instruction that zeros the
111  // high result bits.
112  bool fp16SrcZerosHighBits(unsigned Opc) const;
113 
114 public:
115  explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
117  : SelectionDAGISel(*TM, OptLevel) {
118  EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
119  }
120  ~AMDGPUDAGToDAGISel() override = default;
121 
122  void getAnalysisUsage(AnalysisUsage &AU) const override {
125 #ifdef EXPENSIVE_CHECKS
128 #endif
130  }
131 
132  bool matchLoadD16FromBuildVector(SDNode *N) const;
133 
134  bool runOnMachineFunction(MachineFunction &MF) override;
135  void PreprocessISelDAG() override;
136  void Select(SDNode *N) override;
137  StringRef getPassName() const override;
138  void PostprocessISelDAG() override;
139 
140 protected:
141  void SelectBuildVector(SDNode *N, unsigned RegClassID);
142 
143 private:
144  std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
145  bool isNoNanSrc(SDValue N) const;
146  bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
147  bool isNegInlineImmediate(const SDNode *N) const {
148  return isInlineImmediate(N, true);
149  }
150 
151  bool isInlineImmediate16(int64_t Imm) const {
152  return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm());
153  }
154 
155  bool isInlineImmediate32(int64_t Imm) const {
156  return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm());
157  }
158 
159  bool isInlineImmediate64(int64_t Imm) const {
160  return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm());
161  }
162 
163  bool isInlineImmediate(const APFloat &Imm) const {
164  return Subtarget->getInstrInfo()->isInlineConstant(Imm);
165  }
166 
167  bool isVGPRImm(const SDNode *N) const;
168  bool isUniformLoad(const SDNode *N) const;
169  bool isUniformBr(const SDNode *N) const;
170 
171  bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
172  SDValue &RHS) const;
173 
174  MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
175 
176  SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
177  SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
178  SDNode *glueCopyToM0LDSInit(SDNode *N) const;
179 
180  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
181  virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
182  virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
183  bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
184  bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
185  unsigned Size) const;
186  bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
187  bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
188  SDValue &Offset1) const;
189  bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
190  SDValue &Offset1) const;
191  bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0,
192  SDValue &Offset1, unsigned Size) const;
193  bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
194  SDValue &SOffset, SDValue &Offset, SDValue &Offen,
195  SDValue &Idxen, SDValue &Addr64) const;
196  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
197  SDValue &SOffset, SDValue &Offset) const;
198  bool SelectMUBUFScratchOffen(SDNode *Parent,
199  SDValue Addr, SDValue &RSrc, SDValue &VAddr,
200  SDValue &SOffset, SDValue &ImmOffset) const;
201  bool SelectMUBUFScratchOffset(SDNode *Parent,
202  SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
203  SDValue &Offset) const;
204 
205  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
206  SDValue &Offset) const;
207 
208  bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr,
209  SDValue &Offset, uint64_t FlatVariant) const;
210  bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
211  SDValue &Offset) const;
212  bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
213  SDValue &Offset) const;
214  bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
215  SDValue &Offset) const;
216  bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
217  SDValue &VOffset, SDValue &Offset) const;
218  bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
219  SDValue &Offset) const;
220 
221  bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
222  bool &Imm) const;
223  SDValue Expand32BitAddress(SDValue Addr) const;
224  bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
225  bool &Imm) const;
226  bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
227  bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
228  bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
229  bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
230  bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
231  bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
232 
233  bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
234  bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
235  bool AllowAbs = true) const;
236  bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
237  bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
238  bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
239  bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
240  SDValue &Clamp, SDValue &Omod) const;
241  bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
242  SDValue &Clamp, SDValue &Omod) const;
243  bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
244  SDValue &Clamp, SDValue &Omod) const;
245 
246  bool SelectVOP3OMods(SDValue In, SDValue &Src,
247  SDValue &Clamp, SDValue &Omod) const;
248 
249  bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
250 
251  bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
252 
253  bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
254  bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
255  bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
256 
257  SDValue getHi16Elt(SDValue In) const;
258 
259  SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
260 
261  void SelectADD_SUB_I64(SDNode *N);
262  void SelectAddcSubb(SDNode *N);
263  void SelectUADDO_USUBO(SDNode *N);
264  void SelectDIV_SCALE(SDNode *N);
265  void SelectMAD_64_32(SDNode *N);
266  void SelectFMA_W_CHAIN(SDNode *N);
267  void SelectFMUL_W_CHAIN(SDNode *N);
268 
269  SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
271  void SelectS_BFEFromShifts(SDNode *N);
272  void SelectS_BFE(SDNode *N);
273  bool isCBranchSCC(const SDNode *N) const;
274  void SelectBRCOND(SDNode *N);
275  void SelectFMAD_FMA(SDNode *N);
276  void SelectATOMIC_CMP_SWAP(SDNode *N);
277  void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
278  void SelectDS_GWS(SDNode *N, unsigned IntrID);
279  void SelectInterpP1F16(SDNode *N);
280  void SelectINTRINSIC_W_CHAIN(SDNode *N);
281  void SelectINTRINSIC_WO_CHAIN(SDNode *N);
282  void SelectINTRINSIC_VOID(SDNode *N);
283 
284 protected:
285  // Include the pieces autogenerated from the target description.
286 #include "AMDGPUGenDAGISel.inc"
287 };
288 
289 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
290  const R600Subtarget *Subtarget;
291 
292  bool isConstantLoad(const MemSDNode *N, int cbID) const;
293  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
294  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
295  SDValue& Offset);
296 public:
297  explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
298  AMDGPUDAGToDAGISel(TM, OptLevel) {}
299 
300  void Select(SDNode *N) override;
301 
302  bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
303  SDValue &Offset) override;
304  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
305  SDValue &Offset) override;
306 
307  bool runOnMachineFunction(MachineFunction &MF) override;
308 
309  void PreprocessISelDAG() override {}
310 
311 protected:
312  // Include the pieces autogenerated from the target description.
313 #include "R600GenDAGISel.inc"
314 };
315 
316 static SDValue stripBitcast(SDValue Val) {
317  return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
318 }
319 
320 // Figure out if this is really an extract of the high 16-bits of a dword.
321 static bool isExtractHiElt(SDValue In, SDValue &Out) {
322  In = stripBitcast(In);
323 
324  if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
325  if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
326  if (!Idx->isOne())
327  return false;
328  Out = In.getOperand(0);
329  return true;
330  }
331  }
332 
333  if (In.getOpcode() != ISD::TRUNCATE)
334  return false;
335 
336  SDValue Srl = In.getOperand(0);
337  if (Srl.getOpcode() == ISD::SRL) {
338  if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
339  if (ShiftAmt->getZExtValue() == 16) {
340  Out = stripBitcast(Srl.getOperand(0));
341  return true;
342  }
343  }
344  }
345 
346  return false;
347 }
348 
349 // Look through operations that obscure just looking at the low 16-bits of the
350 // same register.
351 static SDValue stripExtractLoElt(SDValue In) {
352  if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
353  if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
354  if (Idx->isNullValue() && In.getValueSizeInBits() <= 32)
355  return In.getOperand(0);
356  }
357  }
358 
359  if (In.getOpcode() == ISD::TRUNCATE) {
360  SDValue Src = In.getOperand(0);
361  if (Src.getValueType().getSizeInBits() == 32)
362  return stripBitcast(Src);
363  }
364 
365  return In;
366 }
367 
368 } // end anonymous namespace
369 
370 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
371  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
375 #ifdef EXPENSIVE_CHECKS
378 #endif
379 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
380  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
381 
382 /// This pass converts a legalized DAG into a AMDGPU-specific
383 // DAG, ready for instruction scheduling.
385  CodeGenOpt::Level OptLevel) {
386  return new AMDGPUDAGToDAGISel(TM, OptLevel);
387 }
388 
389 /// This pass converts a legalized DAG into a R600-specific
390 // DAG, ready for instruction scheduling.
392  CodeGenOpt::Level OptLevel) {
393  return new R600DAGToDAGISel(TM, OptLevel);
394 }
395 
396 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
397 #ifdef EXPENSIVE_CHECKS
398  DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
399  LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
400  for (auto &L : LI->getLoopsInPreorder()) {
401  assert(L->isLCSSAForm(DT));
402  }
403 #endif
404  Subtarget = &MF.getSubtarget<GCNSubtarget>();
407 }
408 
409 bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
410  // XXX - only need to list legal operations.
411  switch (Opc) {
412  case ISD::FADD:
413  case ISD::FSUB:
414  case ISD::FMUL:
415  case ISD::FDIV:
416  case ISD::FREM:
417  case ISD::FCANONICALIZE:
418  case ISD::UINT_TO_FP:
419  case ISD::SINT_TO_FP:
420  case ISD::FABS:
421  // Fabs is lowered to a bit operation, but it's an and which will clear the
422  // high bits anyway.
423  case ISD::FSQRT:
424  case ISD::FSIN:
425  case ISD::FCOS:
426  case ISD::FPOWI:
427  case ISD::FPOW:
428  case ISD::FLOG:
429  case ISD::FLOG2:
430  case ISD::FLOG10:
431  case ISD::FEXP:
432  case ISD::FEXP2:
433  case ISD::FCEIL:
434  case ISD::FTRUNC:
435  case ISD::FRINT:
436  case ISD::FNEARBYINT:
437  case ISD::FROUND:
438  case ISD::FFLOOR:
439  case ISD::FMINNUM:
440  case ISD::FMAXNUM:
441  case AMDGPUISD::FRACT:
442  case AMDGPUISD::CLAMP:
443  case AMDGPUISD::COS_HW:
444  case AMDGPUISD::SIN_HW:
445  case AMDGPUISD::FMIN3:
446  case AMDGPUISD::FMAX3:
447  case AMDGPUISD::FMED3:
448  case AMDGPUISD::FMAD_FTZ:
449  case AMDGPUISD::RCP:
450  case AMDGPUISD::RSQ:
452  case AMDGPUISD::LDEXP:
453  // On gfx10, all 16-bit instructions preserve the high bits.
454  return Subtarget->getGeneration() <= AMDGPUSubtarget::GFX9;
455  case ISD::FP_ROUND:
456  // We may select fptrunc (fma/mad) to mad_mixlo, which does not zero the
457  // high bits on gfx9.
458  // TODO: If we had the source node we could see if the source was fma/mad
459  return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
460  case ISD::FMA:
461  case ISD::FMAD:
463  return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
464  default:
465  // fcopysign, select and others may be lowered to 32-bit bit operations
466  // which don't zero the high bits.
467  return false;
468  }
469 }
470 
471 bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
472  assert(Subtarget->d16PreservesUnusedBits());
473  MVT VT = N->getValueType(0).getSimpleVT();
474  if (VT != MVT::v2i16 && VT != MVT::v2f16)
475  return false;
476 
477  SDValue Lo = N->getOperand(0);
478  SDValue Hi = N->getOperand(1);
479 
480  LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
481 
482  // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
483  // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
484  // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
485 
486  // Need to check for possible indirect dependencies on the other half of the
487  // vector to avoid introducing a cycle.
488  if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
489  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
490 
491  SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);
492  SDValue Ops[] = {
493  LdHi->getChain(), LdHi->getBasePtr(), TiedIn
494  };
495 
496  unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
497  if (LdHi->getMemoryVT() == MVT::i8) {
498  LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
500  } else {
501  assert(LdHi->getMemoryVT() == MVT::i16);
502  }
503 
504  SDValue NewLoadHi =
505  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
506  Ops, LdHi->getMemoryVT(),
507  LdHi->getMemOperand());
508 
509  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
510  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
511  return true;
512  }
513 
514  // build_vector (load ptr), hi -> load_d16_lo ptr, hi
515  // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
516  // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
517  LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
518  if (LdLo && Lo.hasOneUse()) {
519  SDValue TiedIn = getHi16Elt(Hi);
520  if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
521  return false;
522 
523  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
524  unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
525  if (LdLo->getMemoryVT() == MVT::i8) {
526  LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
528  } else {
529  assert(LdLo->getMemoryVT() == MVT::i16);
530  }
531 
532  TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
533 
534  SDValue Ops[] = {
535  LdLo->getChain(), LdLo->getBasePtr(), TiedIn
536  };
537 
538  SDValue NewLoadLo =
539  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
540  Ops, LdLo->getMemoryVT(),
541  LdLo->getMemOperand());
542 
543  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
544  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
545  return true;
546  }
547 
548  return false;
549 }
550 
551 void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
552  if (!Subtarget->d16PreservesUnusedBits())
553  return;
554 
555  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
556 
557  bool MadeChange = false;
558  while (Position != CurDAG->allnodes_begin()) {
559  SDNode *N = &*--Position;
560  if (N->use_empty())
561  continue;
562 
563  switch (N->getOpcode()) {
564  case ISD::BUILD_VECTOR:
565  MadeChange |= matchLoadD16FromBuildVector(N);
566  break;
567  default:
568  break;
569  }
570  }
571 
572  if (MadeChange) {
573  CurDAG->RemoveDeadNodes();
574  LLVM_DEBUG(dbgs() << "After PreProcess:\n";
575  CurDAG->dump(););
576  }
577 }
578 
579 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
580  if (TM.Options.NoNaNsFPMath)
581  return true;
582 
583  // TODO: Move into isKnownNeverNaN
584  if (N->getFlags().hasNoNaNs())
585  return true;
586 
587  return CurDAG->isKnownNeverNaN(N);
588 }
589 
590 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
591  bool Negated) const {
592  if (N->isUndef())
593  return true;
594 
595  const SIInstrInfo *TII = Subtarget->getInstrInfo();
596  if (Negated) {
597  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
598  return TII->isInlineConstant(-C->getAPIntValue());
599 
600  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
601  return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
602 
603  } else {
604  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
605  return TII->isInlineConstant(C->getAPIntValue());
606 
607  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
608  return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
609  }
610 
611  return false;
612 }
613 
614 /// Determine the register class for \p OpNo
615 /// \returns The register class of the virtual register that will be used for
616 /// the given operand number \OpNo or NULL if the register class cannot be
617 /// determined.
618 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
619  unsigned OpNo) const {
620  if (!N->isMachineOpcode()) {
621  if (N->getOpcode() == ISD::CopyToReg) {
622  Register Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
623  if (Reg.isVirtual()) {
624  MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
625  return MRI.getRegClass(Reg);
626  }
627 
628  const SIRegisterInfo *TRI
629  = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
630  return TRI->getPhysRegClass(Reg);
631  }
632 
633  return nullptr;
634  }
635 
636  switch (N->getMachineOpcode()) {
637  default: {
638  const MCInstrDesc &Desc =
639  Subtarget->getInstrInfo()->get(N->getMachineOpcode());
640  unsigned OpIdx = Desc.getNumDefs() + OpNo;
641  if (OpIdx >= Desc.getNumOperands())
642  return nullptr;
643  int RegClass = Desc.OpInfo[OpIdx].RegClass;
644  if (RegClass == -1)
645  return nullptr;
646 
647  return Subtarget->getRegisterInfo()->getRegClass(RegClass);
648  }
649  case AMDGPU::REG_SEQUENCE: {
650  unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
651  const TargetRegisterClass *SuperRC =
652  Subtarget->getRegisterInfo()->getRegClass(RCID);
653 
654  SDValue SubRegOp = N->getOperand(OpNo + 1);
655  unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
656  return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
657  SubRegIdx);
658  }
659  }
660 }
661 
662 SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain,
663  SDValue Glue) const {
665  Ops.push_back(NewChain); // Replace the chain.
666  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
667  Ops.push_back(N->getOperand(i));
668 
669  Ops.push_back(Glue);
670  return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
671 }
672 
673 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
674  const SITargetLowering& Lowering =
675  *static_cast<const SITargetLowering*>(getTargetLowering());
676 
677  assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
678 
679  SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val);
680  return glueCopyToOp(N, M0, M0.getValue(1));
681 }
682 
683 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
684  unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
685  if (AS == AMDGPUAS::LOCAL_ADDRESS) {
686  if (Subtarget->ldsRequiresM0Init())
687  return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
688  } else if (AS == AMDGPUAS::REGION_ADDRESS) {
689  MachineFunction &MF = CurDAG->getMachineFunction();
690  unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
691  return
692  glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
693  }
694  return N;
695 }
696 
697 MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
698  EVT VT) const {
699  SDNode *Lo = CurDAG->getMachineNode(
700  AMDGPU::S_MOV_B32, DL, MVT::i32,
701  CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
702  SDNode *Hi =
703  CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
704  CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32));
705  const SDValue Ops[] = {
706  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
707  SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
708  SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
709 
710  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
711 }
712 
713 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
714  EVT VT = N->getValueType(0);
715  unsigned NumVectorElts = VT.getVectorNumElements();
716  EVT EltVT = VT.getVectorElementType();
717  SDLoc DL(N);
718  SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
719 
720  if (NumVectorElts == 1) {
721  CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
722  RegClass);
723  return;
724  }
725 
726  assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
727  "supported yet");
728  // 32 = Max Num Vector Elements
729  // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
730  // 1 = Vector Register Class
731  SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
732 
733  bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() ==
735  RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
736  bool IsRegSeq = true;
737  unsigned NOps = N->getNumOperands();
738  for (unsigned i = 0; i < NOps; i++) {
739  // XXX: Why is this here?
740  if (isa<RegisterSDNode>(N->getOperand(i))) {
741  IsRegSeq = false;
742  break;
743  }
744  unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
745  : R600RegisterInfo::getSubRegFromChannel(i);
746  RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
747  RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
748  }
749  if (NOps != NumVectorElts) {
750  // Fill in the missing undef elements if this was a scalar_to_vector.
751  assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
752  MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
753  DL, EltVT);
754  for (unsigned i = NOps; i < NumVectorElts; ++i) {
755  unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
756  : R600RegisterInfo::getSubRegFromChannel(i);
757  RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
758  RegSeqArgs[1 + (2 * i) + 1] =
759  CurDAG->getTargetConstant(Sub, DL, MVT::i32);
760  }
761  }
762 
763  if (!IsRegSeq)
764  SelectCode(N);
765  CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
766 }
767 
769  unsigned int Opc = N->getOpcode();
770  if (N->isMachineOpcode()) {
771  N->setNodeId(-1);
772  return; // Already selected.
773  }
774 
775  // isa<MemSDNode> almost works but is slightly too permissive for some DS
776  // intrinsics.
777  if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) ||
778  (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
779  Opc == ISD::ATOMIC_LOAD_FADD ||
781  Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) {
782  N = glueCopyToM0LDSInit(N);
783  SelectCode(N);
784  return;
785  }
786 
787  switch (Opc) {
788  default:
789  break;
790  // We are selecting i64 ADD here instead of custom lower it during
791  // DAG legalization, so we can fold some i64 ADDs used for address
792  // calculation into the LOAD and STORE instructions.
793  case ISD::ADDC:
794  case ISD::ADDE:
795  case ISD::SUBC:
796  case ISD::SUBE: {
797  if (N->getValueType(0) != MVT::i64)
798  break;
799 
800  SelectADD_SUB_I64(N);
801  return;
802  }
803  case ISD::ADDCARRY:
804  case ISD::SUBCARRY:
805  if (N->getValueType(0) != MVT::i32)
806  break;
807 
808  SelectAddcSubb(N);
809  return;
810  case ISD::UADDO:
811  case ISD::USUBO: {
812  SelectUADDO_USUBO(N);
813  return;
814  }
816  SelectFMUL_W_CHAIN(N);
817  return;
818  }
819  case AMDGPUISD::FMA_W_CHAIN: {
820  SelectFMA_W_CHAIN(N);
821  return;
822  }
823 
825  case ISD::BUILD_VECTOR: {
826  EVT VT = N->getValueType(0);
827  unsigned NumVectorElts = VT.getVectorNumElements();
828  if (VT.getScalarSizeInBits() == 16) {
829  if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
830  if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
831  ReplaceNode(N, Packed);
832  return;
833  }
834  }
835 
836  break;
837  }
838 
840  unsigned RegClassID =
841  SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
842  SelectBuildVector(N, RegClassID);
843  return;
844  }
845  case ISD::BUILD_PAIR: {
846  SDValue RC, SubReg0, SubReg1;
847  SDLoc DL(N);
848  if (N->getValueType(0) == MVT::i128) {
849  RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);
850  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
851  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
852  } else if (N->getValueType(0) == MVT::i64) {
853  RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
854  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
855  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
856  } else {
857  llvm_unreachable("Unhandled value type for BUILD_PAIR");
858  }
859  const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
860  N->getOperand(1), SubReg1 };
861  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
862  N->getValueType(0), Ops));
863  return;
864  }
865 
866  case ISD::Constant:
867  case ISD::ConstantFP: {
868  if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
869  break;
870 
871  uint64_t Imm;
872  if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
873  Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
874  else {
875  ConstantSDNode *C = cast<ConstantSDNode>(N);
876  Imm = C->getZExtValue();
877  }
878 
879  SDLoc DL(N);
880  ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
881  return;
882  }
883  case AMDGPUISD::BFE_I32:
884  case AMDGPUISD::BFE_U32: {
885  // There is a scalar version available, but unlike the vector version which
886  // has a separate operand for the offset and width, the scalar version packs
887  // the width and offset into a single operand. Try to move to the scalar
888  // version if the offsets are constant, so that we can try to keep extended
889  // loads of kernel arguments in SGPRs.
890 
891  // TODO: Technically we could try to pattern match scalar bitshifts of
892  // dynamic values, but it's probably not useful.
893  ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
894  if (!Offset)
895  break;
896 
897  ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
898  if (!Width)
899  break;
900 
901  bool Signed = Opc == AMDGPUISD::BFE_I32;
902 
903  uint32_t OffsetVal = Offset->getZExtValue();
904  uint32_t WidthVal = Width->getZExtValue();
905 
906  ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
907  SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
908  return;
909  }
910  case AMDGPUISD::DIV_SCALE: {
911  SelectDIV_SCALE(N);
912  return;
913  }
915  case AMDGPUISD::MAD_U64_U32: {
916  SelectMAD_64_32(N);
917  return;
918  }
919  case ISD::CopyToReg: {
920  const SITargetLowering& Lowering =
921  *static_cast<const SITargetLowering*>(getTargetLowering());
922  N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
923  break;
924  }
925  case ISD::AND:
926  case ISD::SRL:
927  case ISD::SRA:
929  if (N->getValueType(0) != MVT::i32)
930  break;
931 
932  SelectS_BFE(N);
933  return;
934  case ISD::BRCOND:
935  SelectBRCOND(N);
936  return;
937  case ISD::FMAD:
938  case ISD::FMA:
939  SelectFMAD_FMA(N);
940  return;
942  SelectATOMIC_CMP_SWAP(N);
943  return;
949  // Hack around using a legal type if f16 is illegal.
950  if (N->getValueType(0) == MVT::i32) {
952  N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
953  { N->getOperand(0), N->getOperand(1) });
954  SelectCode(N);
955  return;
956  }
957 
958  break;
959  }
960  case ISD::INTRINSIC_W_CHAIN: {
961  SelectINTRINSIC_W_CHAIN(N);
962  return;
963  }
965  SelectINTRINSIC_WO_CHAIN(N);
966  return;
967  }
968  case ISD::INTRINSIC_VOID: {
969  SelectINTRINSIC_VOID(N);
970  return;
971  }
972  }
973 
974  SelectCode(N);
975 }
976 
977 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
978  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
979  const Instruction *Term = BB->getTerminator();
980  return Term->getMetadata("amdgpu.uniform") ||
981  Term->getMetadata("structurizecfg.uniform");
982 }
983 
985  SDValue &N0, SDValue &N1) {
986  if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
987  Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
988  // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.
989  // (i64 (bitcast (v2i32 (build_vector
990  // (or (extract_vector_elt V, 0), OFFSET),
991  // (extract_vector_elt V, 1)))))
992  SDValue Lo = Addr.getOperand(0).getOperand(0);
993  if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) {
994  SDValue BaseLo = Lo.getOperand(0);
995  SDValue BaseHi = Addr.getOperand(0).getOperand(1);
996  // Check that split base (Lo and Hi) are extracted from the same one.
997  if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
998  BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
999  BaseLo.getOperand(0) == BaseHi.getOperand(0) &&
1000  // Lo is statically extracted from index 0.
1001  isa<ConstantSDNode>(BaseLo.getOperand(1)) &&
1002  BaseLo.getConstantOperandVal(1) == 0 &&
1003  // Hi is statically extracted from index 0.
1004  isa<ConstantSDNode>(BaseHi.getOperand(1)) &&
1005  BaseHi.getConstantOperandVal(1) == 1) {
1006  N0 = BaseLo.getOperand(0).getOperand(0);
1007  N1 = Lo.getOperand(1);
1008  return true;
1009  }
1010  }
1011  }
1012  return false;
1013 }
1014 
1015 bool AMDGPUDAGToDAGISel::isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
1016  SDValue &RHS) const {
1017  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1018  LHS = Addr.getOperand(0);
1019  RHS = Addr.getOperand(1);
1020  return true;
1021  }
1022 
1023  if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, LHS, RHS)) {
1024  assert(LHS && RHS && isa<ConstantSDNode>(RHS));
1025  return true;
1026  }
1027 
1028  return false;
1029 }
1030 
1031 StringRef AMDGPUDAGToDAGISel::getPassName() const {
1032  return "AMDGPU DAG->DAG Pattern Instruction Selection";
1033 }
1034 
1035 //===----------------------------------------------------------------------===//
1036 // Complex Patterns
1037 //===----------------------------------------------------------------------===//
1038 
1039 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
1040  SDValue &Offset) {
1041  return false;
1042 }
1043 
1044 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
1045  SDValue &Offset) {
1046  ConstantSDNode *C;
1047  SDLoc DL(Addr);
1048 
1049  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
1050  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
1051  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
1052  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
1053  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
1054  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
1055  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
1056  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
1057  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
1058  Base = Addr.getOperand(0);
1059  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
1060  } else {
1061  Base = Addr;
1062  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1063  }
1064 
1065  return true;
1066 }
1067 
1068 SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
1069  const SDLoc &DL) const {
1070  SDNode *Mov = CurDAG->getMachineNode(
1071  AMDGPU::S_MOV_B32, DL, MVT::i32,
1072  CurDAG->getTargetConstant(Val, DL, MVT::i32));
1073  return SDValue(Mov, 0);
1074 }
1075 
1076 // FIXME: Should only handle addcarry/subcarry
1077 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
1078  SDLoc DL(N);
1079  SDValue LHS = N->getOperand(0);
1080  SDValue RHS = N->getOperand(1);
1081 
1082  unsigned Opcode = N->getOpcode();
1083  bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
1084  bool ProduceCarry =
1085  ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
1086  bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
1087 
1088  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
1089  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
1090 
1091  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1092  DL, MVT::i32, LHS, Sub0);
1093  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1094  DL, MVT::i32, LHS, Sub1);
1095 
1096  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1097  DL, MVT::i32, RHS, Sub0);
1098  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1099  DL, MVT::i32, RHS, Sub1);
1100 
1101  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
1102 
1103  static const unsigned OpcMap[2][2][2] = {
1104  {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
1105  {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
1106  {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
1107  {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
1108 
1109  unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd];
1110  unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd];
1111 
1112  SDNode *AddLo;
1113  if (!ConsumeCarry) {
1114  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
1115  AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
1116  } else {
1117  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
1118  AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
1119  }
1120  SDValue AddHiArgs[] = {
1121  SDValue(Hi0, 0),
1122  SDValue(Hi1, 0),
1123  SDValue(AddLo, 1)
1124  };
1125  SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
1126 
1127  SDValue RegSequenceArgs[] = {
1128  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
1129  SDValue(AddLo,0),
1130  Sub0,
1131  SDValue(AddHi,0),
1132  Sub1,
1133  };
1134  SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1135  MVT::i64, RegSequenceArgs);
1136 
1137  if (ProduceCarry) {
1138  // Replace the carry-use
1139  ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
1140  }
1141 
1142  // Replace the remaining uses.
1143  ReplaceNode(N, RegSequence);
1144 }
1145 
1146 void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
1147  SDLoc DL(N);
1148  SDValue LHS = N->getOperand(0);
1149  SDValue RHS = N->getOperand(1);
1150  SDValue CI = N->getOperand(2);
1151 
1152  if (N->isDivergent()) {
1153  unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
1154  : AMDGPU::V_SUBB_U32_e64;
1155  CurDAG->SelectNodeTo(
1156  N, Opc, N->getVTList(),
1157  {LHS, RHS, CI,
1158  CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
1159  } else {
1160  unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO
1161  : AMDGPU::S_SUB_CO_PSEUDO;
1162  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});
1163  }
1164 }
1165 
1166 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
1167  // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
1168  // carry out despite the _i32 name. These were renamed in VI to _U32.
1169  // FIXME: We should probably rename the opcodes here.
1170  bool IsAdd = N->getOpcode() == ISD::UADDO;
1171  bool IsVALU = N->isDivergent();
1172 
1173  for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;
1174  ++UI)
1175  if (UI.getUse().getResNo() == 1) {
1176  if ((IsAdd && (UI->getOpcode() != ISD::ADDCARRY)) ||
1177  (!IsAdd && (UI->getOpcode() != ISD::SUBCARRY))) {
1178  IsVALU = true;
1179  break;
1180  }
1181  }
1182 
1183  if (IsVALU) {
1184  unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
1185 
1186  CurDAG->SelectNodeTo(
1187  N, Opc, N->getVTList(),
1188  {N->getOperand(0), N->getOperand(1),
1189  CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
1190  } else {
1191  unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
1192  : AMDGPU::S_USUBO_PSEUDO;
1193 
1194  CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
1195  {N->getOperand(0), N->getOperand(1)});
1196  }
1197 }
1198 
1199 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
1200  SDLoc SL(N);
1201  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
1202  SDValue Ops[10];
1203 
1204  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
1205  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
1206  SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
1207  Ops[8] = N->getOperand(0);
1208  Ops[9] = N->getOperand(4);
1209 
1210  CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32_e64, N->getVTList(), Ops);
1211 }
1212 
1213 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
1214  SDLoc SL(N);
1215  // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
1216  SDValue Ops[8];
1217 
1218  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
1219  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
1220  Ops[6] = N->getOperand(0);
1221  Ops[7] = N->getOperand(3);
1222 
1223  CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
1224 }
1225 
1226 // We need to handle this here because tablegen doesn't support matching
1227 // instructions with multiple outputs.
1228 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
1229  SDLoc SL(N);
1230  EVT VT = N->getValueType(0);
1231 
1232  assert(VT == MVT::f32 || VT == MVT::f64);
1233 
1234  unsigned Opc
1235  = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
1236 
1237  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
1238  // omod
1239  SDValue Ops[8];
1240  SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1241  SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]);
1242  SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]);
1243  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1244 }
1245 
1246 // We need to handle this here because tablegen doesn't support matching
1247 // instructions with multiple outputs.
1248 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
1249  SDLoc SL(N);
1250  bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
1251  unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1252 
1253  SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1254  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
1255  Clamp };
1256  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1257 }
1258 
1259 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const {
1260  if (!isUInt<16>(Offset))
1261  return false;
1262 
1263  if (!Base || Subtarget->hasUsableDSOffset() ||
1264  Subtarget->unsafeDSOffsetFoldingEnabled())
1265  return true;
1266 
1267  // On Southern Islands instruction with a negative base value and an offset
1268  // don't seem to work.
1269  return CurDAG->SignBitIsZero(Base);
1270 }
1271 
1272 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
1273  SDValue &Offset) const {
1274  SDLoc DL(Addr);
1275  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1276  SDValue N0 = Addr.getOperand(0);
1277  SDValue N1 = Addr.getOperand(1);
1278  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1279  if (isDSOffsetLegal(N0, C1->getSExtValue())) {
1280  // (add n0, c0)
1281  Base = N0;
1282  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1283  return true;
1284  }
1285  } else if (Addr.getOpcode() == ISD::SUB) {
1286  // sub C, x -> add (sub 0, x), C
1287  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1288  int64_t ByteOffset = C->getSExtValue();
1289  if (isDSOffsetLegal(SDValue(), ByteOffset)) {
1290  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1291 
1292  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1293  // the known bits in isDSOffsetLegal. We need to emit the selected node
1294  // here, so this is thrown away.
1295  SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1296  Zero, Addr.getOperand(1));
1297 
1298  if (isDSOffsetLegal(Sub, ByteOffset)) {
1300  Opnds.push_back(Zero);
1301  Opnds.push_back(Addr.getOperand(1));
1302 
1303  // FIXME: Select to VOP3 version for with-carry.
1304  unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1305  if (Subtarget->hasAddNoCarry()) {
1306  SubOp = AMDGPU::V_SUB_U32_e64;
1307  Opnds.push_back(
1308  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1309  }
1310 
1311  MachineSDNode *MachineSub =
1312  CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1313 
1314  Base = SDValue(MachineSub, 0);
1315  Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
1316  return true;
1317  }
1318  }
1319  }
1320  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1321  // If we have a constant address, prefer to put the constant into the
1322  // offset. This can save moves to load the constant address since multiple
1323  // operations can share the zero base address register, and enables merging
1324  // into read2 / write2 instructions.
1325 
1326  SDLoc DL(Addr);
1327 
1328  if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) {
1329  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1330  MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1331  DL, MVT::i32, Zero);
1332  Base = SDValue(MovZero, 0);
1333  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1334  return true;
1335  }
1336  }
1337 
1338  // default case
1339  Base = Addr;
1340  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1341  return true;
1342 }
1343 
1344 bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
1345  unsigned Offset1,
1346  unsigned Size) const {
1347  if (Offset0 % Size != 0 || Offset1 % Size != 0)
1348  return false;
1349  if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size))
1350  return false;
1351 
1352  if (!Base || Subtarget->hasUsableDSOffset() ||
1353  Subtarget->unsafeDSOffsetFoldingEnabled())
1354  return true;
1355 
1356  // On Southern Islands instruction with a negative base value and an offset
1357  // don't seem to work.
1358  return CurDAG->SignBitIsZero(Base);
1359 }
1360 
1361 // TODO: If offset is too big, put low 16-bit into offset.
1362 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
1363  SDValue &Offset0,
1364  SDValue &Offset1) const {
1365  return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 4);
1366 }
1367 
1368 bool AMDGPUDAGToDAGISel::SelectDS128Bit8ByteAligned(SDValue Addr, SDValue &Base,
1369  SDValue &Offset0,
1370  SDValue &Offset1) const {
1371  return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 8);
1372 }
1373 
1374 bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
1375  SDValue &Offset0, SDValue &Offset1,
1376  unsigned Size) const {
1377  SDLoc DL(Addr);
1378 
1379  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1380  SDValue N0 = Addr.getOperand(0);
1381  SDValue N1 = Addr.getOperand(1);
1382  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1383  unsigned OffsetValue0 = C1->getZExtValue();
1384  unsigned OffsetValue1 = OffsetValue0 + Size;
1385 
1386  // (add n0, c0)
1387  if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1, Size)) {
1388  Base = N0;
1389  Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1390  Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1391  return true;
1392  }
1393  } else if (Addr.getOpcode() == ISD::SUB) {
1394  // sub C, x -> add (sub 0, x), C
1395  if (const ConstantSDNode *C =
1396  dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1397  unsigned OffsetValue0 = C->getZExtValue();
1398  unsigned OffsetValue1 = OffsetValue0 + Size;
1399 
1400  if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1401  SDLoc DL(Addr);
1402  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1403 
1404  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1405  // the known bits in isDSOffsetLegal. We need to emit the selected node
1406  // here, so this is thrown away.
1407  SDValue Sub =
1408  CurDAG->getNode(ISD::SUB, DL, MVT::i32, Zero, Addr.getOperand(1));
1409 
1410  if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1, Size)) {
1412  Opnds.push_back(Zero);
1413  Opnds.push_back(Addr.getOperand(1));
1414  unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1415  if (Subtarget->hasAddNoCarry()) {
1416  SubOp = AMDGPU::V_SUB_U32_e64;
1417  Opnds.push_back(
1418  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1419  }
1420 
1421  MachineSDNode *MachineSub = CurDAG->getMachineNode(
1422  SubOp, DL, MVT::getIntegerVT(Size * 8), Opnds);
1423 
1424  Base = SDValue(MachineSub, 0);
1425  Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1426  Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1427  return true;
1428  }
1429  }
1430  }
1431  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1432  unsigned OffsetValue0 = CAddr->getZExtValue();
1433  unsigned OffsetValue1 = OffsetValue0 + Size;
1434 
1435  if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1436  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1437  MachineSDNode *MovZero =
1438  CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero);
1439  Base = SDValue(MovZero, 0);
1440  Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1441  Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1442  return true;
1443  }
1444  }
1445 
1446  // default case
1447 
1448  Base = Addr;
1449  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
1450  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
1451  return true;
1452 }
1453 
1454 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
1455  SDValue &SOffset, SDValue &Offset,
1456  SDValue &Offen, SDValue &Idxen,
1457  SDValue &Addr64) const {
1458  // Subtarget prefers to use flat instruction
1459  // FIXME: This should be a pattern predicate and not reach here
1460  if (Subtarget->useFlatForGlobal())
1461  return false;
1462 
1463  SDLoc DL(Addr);
1464 
1465  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1466  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1467  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1468  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1469 
1470  ConstantSDNode *C1 = nullptr;
1471  SDValue N0 = Addr;
1472  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1473  C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1474  if (isUInt<32>(C1->getZExtValue()))
1475  N0 = Addr.getOperand(0);
1476  else
1477  C1 = nullptr;
1478  }
1479 
1480  if (N0.getOpcode() == ISD::ADD) {
1481  // (add N2, N3) -> addr64, or
1482  // (add (add N2, N3), C1) -> addr64
1483  SDValue N2 = N0.getOperand(0);
1484  SDValue N3 = N0.getOperand(1);
1485  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1486 
1487  if (N2->isDivergent()) {
1488  if (N3->isDivergent()) {
1489  // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1490  // addr64, and construct the resource from a 0 address.
1491  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1492  VAddr = N0;
1493  } else {
1494  // N2 is divergent, N3 is not.
1495  Ptr = N3;
1496  VAddr = N2;
1497  }
1498  } else {
1499  // N2 is not divergent.
1500  Ptr = N2;
1501  VAddr = N3;
1502  }
1503  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1504  } else if (N0->isDivergent()) {
1505  // N0 is divergent. Use it as the addr64, and construct the resource from a
1506  // 0 address.
1507  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1508  VAddr = N0;
1509  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1510  } else {
1511  // N0 -> offset, or
1512  // (N0 + C1) -> offset
1513  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1514  Ptr = N0;
1515  }
1516 
1517  if (!C1) {
1518  // No offset.
1519  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1520  return true;
1521  }
1522 
1523  if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
1524  // Legal offset for instruction.
1525  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1526  return true;
1527  }
1528 
1529  // Illegal offset, store it in soffset.
1530  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1531  SOffset =
1532  SDValue(CurDAG->getMachineNode(
1533  AMDGPU::S_MOV_B32, DL, MVT::i32,
1534  CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1535  0);
1536  return true;
1537 }
1538 
1539 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1540  SDValue &VAddr, SDValue &SOffset,
1541  SDValue &Offset) const {
1542  SDValue Ptr, Offen, Idxen, Addr64;
1543 
1544  // addr64 bit was removed for volcanic islands.
1545  // FIXME: This should be a pattern predicate and not reach here
1546  if (!Subtarget->hasAddr64())
1547  return false;
1548 
1549  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
1550  return false;
1551 
1552  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1553  if (C->getSExtValue()) {
1554  SDLoc DL(Addr);
1555 
1556  const SITargetLowering& Lowering =
1557  *static_cast<const SITargetLowering*>(getTargetLowering());
1558 
1559  SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1560  return true;
1561  }
1562 
1563  return false;
1564 }
1565 
1566 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1567  SDLoc DL(N);
1568 
1569  auto *FI = dyn_cast<FrameIndexSDNode>(N);
1570  SDValue TFI =
1571  FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N;
1572 
1573  // We rebase the base address into an absolute stack address and hence
1574  // use constant 0 for soffset. This value must be retained until
1575  // frame elimination and eliminateFrameIndex will choose the appropriate
1576  // frame register if need be.
1577  return std::make_pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32));
1578 }
1579 
1580 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1581  SDValue Addr, SDValue &Rsrc,
1582  SDValue &VAddr, SDValue &SOffset,
1583  SDValue &ImmOffset) const {
1584 
1585  SDLoc DL(Addr);
1586  MachineFunction &MF = CurDAG->getMachineFunction();
1588 
1589  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1590 
1591  if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1592  int64_t Imm = CAddr->getSExtValue();
1593  const int64_t NullPtr =
1594  AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
1595  // Don't fold null pointer.
1596  if (Imm != NullPtr) {
1597  SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1598  MachineSDNode *MovHighBits = CurDAG->getMachineNode(
1599  AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
1600  VAddr = SDValue(MovHighBits, 0);
1601 
1602  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1603  ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1604  return true;
1605  }
1606  }
1607 
1608  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1609  // (add n0, c1)
1610 
1611  SDValue N0 = Addr.getOperand(0);
1612  SDValue N1 = Addr.getOperand(1);
1613 
1614  // Offsets in vaddr must be positive if range checking is enabled.
1615  //
1616  // The total computation of vaddr + soffset + offset must not overflow. If
1617  // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1618  // overflowing.
1619  //
1620  // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1621  // always perform a range check. If a negative vaddr base index was used,
1622  // this would fail the range check. The overall address computation would
1623  // compute a valid address, but this doesn't happen due to the range
1624  // check. For out-of-bounds MUBUF loads, a 0 is returned.
1625  //
1626  // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1627  // MUBUF vaddr, but not on older subtargets which can only do this if the
1628  // sign bit is known 0.
1629  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1630  if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
1631  (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1632  CurDAG->SignBitIsZero(N0))) {
1633  std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1634  ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1635  return true;
1636  }
1637  }
1638 
1639  // (node)
1640  std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1641  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1642  return true;
1643 }
1644 
1645 static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val) {
1646  if (Val.getOpcode() != ISD::CopyFromReg)
1647  return false;
1648  auto RC =
1649  TRI.getPhysRegClass(cast<RegisterSDNode>(Val.getOperand(1))->getReg());
1650  return RC && TRI.isSGPRClass(RC);
1651 }
1652 
1653 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1654  SDValue Addr,
1655  SDValue &SRsrc,
1656  SDValue &SOffset,
1657  SDValue &Offset) const {
1658  const SIRegisterInfo *TRI =
1659  static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
1660  MachineFunction &MF = CurDAG->getMachineFunction();
1662  SDLoc DL(Addr);
1663 
1664  // CopyFromReg <sgpr>
1665  if (IsCopyFromSGPR(*TRI, Addr)) {
1666  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1667  SOffset = Addr;
1668  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1669  return true;
1670  }
1671 
1672  ConstantSDNode *CAddr;
1673  if (Addr.getOpcode() == ISD::ADD) {
1674  // Add (CopyFromReg <sgpr>) <constant>
1675  CAddr = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
1676  if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1677  return false;
1678  if (!IsCopyFromSGPR(*TRI, Addr.getOperand(0)))
1679  return false;
1680 
1681  SOffset = Addr.getOperand(0);
1682  } else if ((CAddr = dyn_cast<ConstantSDNode>(Addr)) &&
1683  SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) {
1684  // <constant>
1685  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1686  } else {
1687  return false;
1688  }
1689 
1690  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1691 
1692  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1693  return true;
1694 }
1695 
1696 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1697  SDValue &SOffset, SDValue &Offset
1698  ) const {
1699  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1700  const SIInstrInfo *TII =
1701  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1702 
1703  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
1704  return false;
1705 
1706  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1707  !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1708  !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1709  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1710  APInt::getAllOnesValue(32).getZExtValue(); // Size
1711  SDLoc DL(Addr);
1712 
1713  const SITargetLowering& Lowering =
1714  *static_cast<const SITargetLowering*>(getTargetLowering());
1715 
1716  SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1717  return true;
1718  }
1719  return false;
1720 }
1721 
1722 // Find a load or store from corresponding pattern root.
1723 // Roots may be build_vector, bitconvert or their combinations.
1725  N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();
1726  if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
1727  return MN;
1728  assert(isa<BuildVectorSDNode>(N));
1729  for (SDValue V : N->op_values())
1730  if (MemSDNode *MN =
1731  dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
1732  return MN;
1733  llvm_unreachable("cannot find MemSDNode in the pattern!");
1734 }
1735 
1736 bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
1737  SDValue &VAddr, SDValue &Offset,
1738  uint64_t FlatVariant) const {
1739  int64_t OffsetVal = 0;
1740 
1741  unsigned AS = findMemSDNode(N)->getAddressSpace();
1742 
1743  bool CanHaveFlatSegmentOffsetBug =
1744  Subtarget->hasFlatSegmentOffsetBug() &&
1745  FlatVariant == SIInstrFlags::FLAT &&
1747 
1748  if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
1749  SDValue N0, N1;
1750  if (isBaseWithConstantOffset64(Addr, N0, N1)) {
1751  int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1752 
1753  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1754  if (TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1755  Addr = N0;
1756  OffsetVal = COffsetVal;
1757  } else {
1758  // If the offset doesn't fit, put the low bits into the offset field and
1759  // add the rest.
1760  //
1761  // For a FLAT instruction the hardware decides whether to access
1762  // global/scratch/shared memory based on the high bits of vaddr,
1763  // ignoring the offset field, so we have to ensure that when we add
1764  // remainder to vaddr it still points into the same underlying object.
1765  // The easiest way to do that is to make sure that we split the offset
1766  // into two pieces that are both >= 0 or both <= 0.
1767 
1768  SDLoc DL(N);
1769  uint64_t RemainderOffset;
1770 
1771  std::tie(OffsetVal, RemainderOffset) =
1772  TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1773 
1774  SDValue AddOffsetLo =
1775  getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
1776  SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1777 
1778  if (Addr.getValueType().getSizeInBits() == 32) {
1780  Opnds.push_back(N0);
1781  Opnds.push_back(AddOffsetLo);
1782  unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1783  if (Subtarget->hasAddNoCarry()) {
1784  AddOp = AMDGPU::V_ADD_U32_e64;
1785  Opnds.push_back(Clamp);
1786  }
1787  Addr = SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0);
1788  } else {
1789  // TODO: Should this try to use a scalar add pseudo if the base address
1790  // is uniform and saddr is usable?
1791  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
1792  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
1793 
1794  SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1795  DL, MVT::i32, N0, Sub0);
1796  SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1797  DL, MVT::i32, N0, Sub1);
1798 
1799  SDValue AddOffsetHi =
1800  getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
1801 
1802  SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
1803 
1804  SDNode *Add =
1805  CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs,
1806  {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
1807 
1808  SDNode *Addc = CurDAG->getMachineNode(
1809  AMDGPU::V_ADDC_U32_e64, DL, VTs,
1810  {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
1811 
1812  SDValue RegSequenceArgs[] = {
1813  CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
1814  SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1};
1815 
1816  Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1817  MVT::i64, RegSequenceArgs),
1818  0);
1819  }
1820  }
1821  }
1822  }
1823 
1824  VAddr = Addr;
1825  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1826  return true;
1827 }
1828 
1829 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue Addr,
1830  SDValue &VAddr,
1831  SDValue &Offset) const {
1832  return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FLAT);
1833 }
1834 
1835 bool AMDGPUDAGToDAGISel::SelectGlobalOffset(SDNode *N, SDValue Addr,
1836  SDValue &VAddr,
1837  SDValue &Offset) const {
1838  return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FlatGlobal);
1839 }
1840 
1841 bool AMDGPUDAGToDAGISel::SelectScratchOffset(SDNode *N, SDValue Addr,
1842  SDValue &VAddr,
1843  SDValue &Offset) const {
1844  return SelectFlatOffsetImpl(N, Addr, VAddr, Offset,
1846 }
1847 
1848 // If this matches zero_extend i32:x, return x
1850  if (Op.getOpcode() != ISD::ZERO_EXTEND)
1851  return SDValue();
1852 
1853  SDValue ExtSrc = Op.getOperand(0);
1854  return (ExtSrc.getValueType() == MVT::i32) ? ExtSrc : SDValue();
1855 }
1856 
1857 // Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
1858 bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
1859  SDValue Addr,
1860  SDValue &SAddr,
1861  SDValue &VOffset,
1862  SDValue &Offset) const {
1863  int64_t ImmOffset = 0;
1864 
1865  // Match the immediate offset first, which canonically is moved as low as
1866  // possible.
1867 
1868  SDValue LHS, RHS;
1869  if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
1870  int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1871  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1872 
1873  if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS,
1875  Addr = LHS;
1876  ImmOffset = COffsetVal;
1877  } else if (!LHS->isDivergent()) {
1878  if (COffsetVal > 0) {
1879  SDLoc SL(N);
1880  // saddr + large_offset -> saddr +
1881  // (voffset = large_offset & ~MaxOffset) +
1882  // (large_offset & MaxOffset);
1883  int64_t SplitImmOffset, RemainderOffset;
1884  std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
1886 
1887  if (isUInt<32>(RemainderOffset)) {
1888  SDNode *VMov = CurDAG->getMachineNode(
1889  AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1890  CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
1891  VOffset = SDValue(VMov, 0);
1892  SAddr = LHS;
1893  Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
1894  return true;
1895  }
1896  }
1897 
1898  // We are adding a 64 bit SGPR and a constant. If constant bus limit
1899  // is 1 we would need to perform 1 or 2 extra moves for each half of
1900  // the constant and it is better to do a scalar add and then issue a
1901  // single VALU instruction to materialize zero. Otherwise it is less
1902  // instructions to perform VALU adds with immediates or inline literals.
1903  unsigned NumLiterals =
1904  !TII->isInlineConstant(APInt(32, COffsetVal & 0xffffffff)) +
1905  !TII->isInlineConstant(APInt(32, COffsetVal >> 32));
1906  if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
1907  return false;
1908  }
1909  }
1910 
1911  // Match the variable offset.
1912  if (Addr.getOpcode() == ISD::ADD) {
1913  LHS = Addr.getOperand(0);
1914  RHS = Addr.getOperand(1);
1915 
1916  if (!LHS->isDivergent()) {
1917  // add (i64 sgpr), (zero_extend (i32 vgpr))
1918  if (SDValue ZextRHS = matchZExtFromI32(RHS)) {
1919  SAddr = LHS;
1920  VOffset = ZextRHS;
1921  }
1922  }
1923 
1924  if (!SAddr && !RHS->isDivergent()) {
1925  // add (zero_extend (i32 vgpr)), (i64 sgpr)
1926  if (SDValue ZextLHS = matchZExtFromI32(LHS)) {
1927  SAddr = RHS;
1928  VOffset = ZextLHS;
1929  }
1930  }
1931 
1932  if (SAddr) {
1933  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1934  return true;
1935  }
1936  }
1937 
1938  if (Addr->isDivergent() || Addr.getOpcode() == ISD::UNDEF ||
1939  isa<ConstantSDNode>(Addr))
1940  return false;
1941 
1942  // It's cheaper to materialize a single 32-bit zero for vaddr than the two
1943  // moves required to copy a 64-bit SGPR to VGPR.
1944  SAddr = Addr;
1945  SDNode *VMov =
1946  CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
1947  CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
1948  VOffset = SDValue(VMov, 0);
1949  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1950  return true;
1951 }
1952 
1953 static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr) {
1954  if (auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1955  SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
1956  } else if (SAddr.getOpcode() == ISD::ADD &&
1957  isa<FrameIndexSDNode>(SAddr.getOperand(0))) {
1958  // Materialize this into a scalar move for scalar address to avoid
1959  // readfirstlane.
1960  auto FI = cast<FrameIndexSDNode>(SAddr.getOperand(0));
1961  SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1962  FI->getValueType(0));
1963  SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, SDLoc(SAddr),
1964  MVT::i32, TFI, SAddr.getOperand(1)),
1965  0);
1966  }
1967 
1968  return SAddr;
1969 }
1970 
1971 // Match (32-bit SGPR base) + sext(imm offset)
1972 bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
1973  SDValue &SAddr,
1974  SDValue &Offset) const {
1975  if (Addr->isDivergent())
1976  return false;
1977 
1978  SDLoc DL(Addr);
1979 
1980  int64_t COffsetVal = 0;
1981 
1982  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1983  COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
1984  SAddr = Addr.getOperand(0);
1985  } else {
1986  SAddr = Addr;
1987  }
1988 
1989  SAddr = SelectSAddrFI(CurDAG, SAddr);
1990 
1991  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1992 
1993  if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS,
1995  int64_t SplitImmOffset, RemainderOffset;
1996  std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
1998 
1999  COffsetVal = SplitImmOffset;
2000 
2001  SDValue AddOffset =
2002  SAddr.getOpcode() == ISD::TargetFrameIndex
2003  ? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL)
2004  : CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32);
2005  SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32,
2006  SAddr, AddOffset),
2007  0);
2008  }
2009 
2010  Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16);
2011 
2012  return true;
2013 }
2014 
2015 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
2016  SDValue &Offset, bool &Imm) const {
2017  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
2018  if (!C) {
2019  if (ByteOffsetNode.getValueType().isScalarInteger() &&
2020  ByteOffsetNode.getValueType().getSizeInBits() == 32) {
2021  Offset = ByteOffsetNode;
2022  Imm = false;
2023  return true;
2024  }
2025  if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
2026  if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
2027  Offset = ByteOffsetNode.getOperand(0);
2028  Imm = false;
2029  return true;
2030  }
2031  }
2032  return false;
2033  }
2034 
2035  SDLoc SL(ByteOffsetNode);
2036  // GFX9 and GFX10 have signed byte immediate offsets.
2037  int64_t ByteOffset = C->getSExtValue();
2038  Optional<int64_t> EncodedOffset =
2039  AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, false);
2040  if (EncodedOffset) {
2041  Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
2042  Imm = true;
2043  return true;
2044  }
2045 
2046  // SGPR and literal offsets are unsigned.
2047  if (ByteOffset < 0)
2048  return false;
2049 
2050  EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);
2051  if (EncodedOffset) {
2052  Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
2053  return true;
2054  }
2055 
2056  if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
2057  return false;
2058 
2059  SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
2060  Offset = SDValue(
2061  CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
2062 
2063  return true;
2064 }
2065 
2066 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
2067  if (Addr.getValueType() != MVT::i32)
2068  return Addr;
2069 
2070  // Zero-extend a 32-bit address.
2071  SDLoc SL(Addr);
2072 
2073  const MachineFunction &MF = CurDAG->getMachineFunction();
2075  unsigned AddrHiVal = Info->get32BitAddressHighBits();
2076  SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
2077 
2078  const SDValue Ops[] = {
2079  CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
2080  Addr,
2081  CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2082  SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
2083  0),
2084  CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
2085  };
2086 
2087  return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
2088  Ops), 0);
2089 }
2090 
2091 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
2092  SDValue &Offset, bool &Imm) const {
2093  SDLoc SL(Addr);
2094 
2095  // A 32-bit (address + offset) should not cause unsigned 32-bit integer
2096  // wraparound, because s_load instructions perform the addition in 64 bits.
2097  if ((Addr.getValueType() != MVT::i32 ||
2098  Addr->getFlags().hasNoUnsignedWrap())) {
2099  SDValue N0, N1;
2100  // Extract the base and offset if possible.
2101  if (CurDAG->isBaseWithConstantOffset(Addr) ||
2102  Addr.getOpcode() == ISD::ADD) {
2103  N0 = Addr.getOperand(0);
2104  N1 = Addr.getOperand(1);
2105  } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
2106  assert(N0 && N1 && isa<ConstantSDNode>(N1));
2107  }
2108  if (N0 && N1) {
2109  if (SelectSMRDOffset(N1, Offset, Imm)) {
2110  SBase = Expand32BitAddress(N0);
2111  return true;
2112  }
2113  }
2114  }
2115  SBase = Expand32BitAddress(Addr);
2116  Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
2117  Imm = true;
2118  return true;
2119 }
2120 
2121 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
2122  SDValue &Offset) const {
2123  bool Imm = false;
2124  return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
2125 }
2126 
2127 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
2128  SDValue &Offset) const {
2129 
2130  assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
2131 
2132  bool Imm = false;
2133  if (!SelectSMRD(Addr, SBase, Offset, Imm))
2134  return false;
2135 
2136  return !Imm && isa<ConstantSDNode>(Offset);
2137 }
2138 
2139 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
2140  SDValue &Offset) const {
2141  bool Imm = false;
2142  return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
2143  !isa<ConstantSDNode>(Offset);
2144 }
2145 
2146 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
2147  SDValue &Offset) const {
2148  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
2149  // The immediate offset for S_BUFFER instructions is unsigned.
2150  if (auto Imm =
2151  AMDGPU::getSMRDEncodedOffset(*Subtarget, C->getZExtValue(), true)) {
2152  Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32);
2153  return true;
2154  }
2155  }
2156 
2157  return false;
2158 }
2159 
2160 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
2161  SDValue &Offset) const {
2162  assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
2163 
2164  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
2165  if (auto Imm = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget,
2166  C->getZExtValue())) {
2167  Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32);
2168  return true;
2169  }
2170  }
2171 
2172  return false;
2173 }
2174 
2175 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
2176  SDValue &Base,
2177  SDValue &Offset) const {
2178  SDLoc DL(Index);
2179 
2180  if (CurDAG->isBaseWithConstantOffset(Index)) {
2181  SDValue N0 = Index.getOperand(0);
2182  SDValue N1 = Index.getOperand(1);
2183  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
2184 
2185  // (add n0, c0)
2186  // Don't peel off the offset (c0) if doing so could possibly lead
2187  // the base (n0) to be negative.
2188  // (or n0, |c0|) can never change a sign given isBaseWithConstantOffset.
2189  if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0) ||
2190  (Index->getOpcode() == ISD::OR && C1->getSExtValue() >= 0)) {
2191  Base = N0;
2192  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
2193  return true;
2194  }
2195  }
2196 
2197  if (isa<ConstantSDNode>(Index))
2198  return false;
2199 
2200  Base = Index;
2201  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2202  return true;
2203 }
2204 
2205 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
2206  SDValue Val, uint32_t Offset,
2207  uint32_t Width) {
2208  // Transformation function, pack the offset and width of a BFE into
2209  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
2210  // source, bits [5:0] contain the offset and bits [22:16] the width.
2211  uint32_t PackedVal = Offset | (Width << 16);
2212  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
2213 
2214  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
2215 }
2216 
2217 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
2218  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
2219  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
2220  // Predicate: 0 < b <= c < 32
2221 
2222  const SDValue &Shl = N->getOperand(0);
2223  ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
2224  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2225 
2226  if (B && C) {
2227  uint32_t BVal = B->getZExtValue();
2228  uint32_t CVal = C->getZExtValue();
2229 
2230  if (0 < BVal && BVal <= CVal && CVal < 32) {
2231  bool Signed = N->getOpcode() == ISD::SRA;
2232  unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2233 
2234  ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
2235  32 - CVal));
2236  return;
2237  }
2238  }
2239  SelectCode(N);
2240 }
2241 
2242 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
2243  switch (N->getOpcode()) {
2244  case ISD::AND:
2245  if (N->getOperand(0).getOpcode() == ISD::SRL) {
2246  // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
2247  // Predicate: isMask(mask)
2248  const SDValue &Srl = N->getOperand(0);
2249  ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
2250  ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
2251 
2252  if (Shift && Mask) {
2253  uint32_t ShiftVal = Shift->getZExtValue();
2254  uint32_t MaskVal = Mask->getZExtValue();
2255 
2256  if (isMask_32(MaskVal)) {
2257  uint32_t WidthVal = countPopulation(MaskVal);
2258 
2259  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
2260  Srl.getOperand(0), ShiftVal, WidthVal));
2261  return;
2262  }
2263  }
2264  }
2265  break;
2266  case ISD::SRL:
2267  if (N->getOperand(0).getOpcode() == ISD::AND) {
2268  // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
2269  // Predicate: isMask(mask >> b)
2270  const SDValue &And = N->getOperand(0);
2271  ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
2272  ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
2273 
2274  if (Shift && Mask) {
2275  uint32_t ShiftVal = Shift->getZExtValue();
2276  uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
2277 
2278  if (isMask_32(MaskVal)) {
2279  uint32_t WidthVal = countPopulation(MaskVal);
2280 
2281  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
2282  And.getOperand(0), ShiftVal, WidthVal));
2283  return;
2284  }
2285  }
2286  } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
2287  SelectS_BFEFromShifts(N);
2288  return;
2289  }
2290  break;
2291  case ISD::SRA:
2292  if (N->getOperand(0).getOpcode() == ISD::SHL) {
2293  SelectS_BFEFromShifts(N);
2294  return;
2295  }
2296  break;
2297 
2298  case ISD::SIGN_EXTEND_INREG: {
2299  // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
2300  SDValue Src = N->getOperand(0);
2301  if (Src.getOpcode() != ISD::SRL)
2302  break;
2303 
2304  const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
2305  if (!Amt)
2306  break;
2307 
2308  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2309  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
2310  Amt->getZExtValue(), Width));
2311  return;
2312  }
2313  }
2314 
2315  SelectCode(N);
2316 }
2317 
2318 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
2319  assert(N->getOpcode() == ISD::BRCOND);
2320  if (!N->hasOneUse())
2321  return false;
2322 
2323  SDValue Cond = N->getOperand(1);
2324  if (Cond.getOpcode() == ISD::CopyToReg)
2325  Cond = Cond.getOperand(2);
2326 
2327  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
2328  return false;
2329 
2330  MVT VT = Cond.getOperand(0).getSimpleValueType();
2331  if (VT == MVT::i32)
2332  return true;
2333 
2334  if (VT == MVT::i64) {
2335  auto ST = static_cast<const GCNSubtarget *>(Subtarget);
2336 
2337  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
2338  return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
2339  }
2340 
2341  return false;
2342 }
2343 
2344 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
2345  SDValue Cond = N->getOperand(1);
2346 
2347  if (Cond.isUndef()) {
2348  CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2349  N->getOperand(2), N->getOperand(0));
2350  return;
2351  }
2352 
2353  const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
2354  const SIRegisterInfo *TRI = ST->getRegisterInfo();
2355 
2356  bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
2357  unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
2358  Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC();
2359  SDLoc SL(N);
2360 
2361  if (!UseSCCBr) {
2362  // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
2363  // analyzed what generates the vcc value, so we do not know whether vcc
2364  // bits for disabled lanes are 0. Thus we need to mask out bits for
2365  // disabled lanes.
2366  //
2367  // For the case that we select S_CBRANCH_SCC1 and it gets
2368  // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
2369  // SIInstrInfo::moveToVALU which inserts the S_AND).
2370  //
2371  // We could add an analysis of what generates the vcc value here and omit
2372  // the S_AND when is unnecessary. But it would be better to add a separate
2373  // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
2374  // catches both cases.
2375  Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32
2376  : AMDGPU::S_AND_B64,
2377  SL, MVT::i1,
2378  CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO
2379  : AMDGPU::EXEC,
2380  MVT::i1),
2381  Cond),
2382  0);
2383  }
2384 
2385  SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
2386  CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
2387  N->getOperand(2), // Basic Block
2388  VCC.getValue(0));
2389 }
2390 
2391 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
2392  MVT VT = N->getSimpleValueType(0);
2393  bool IsFMA = N->getOpcode() == ISD::FMA;
2394  if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
2395  !Subtarget->hasFmaMixInsts()) ||
2396  ((IsFMA && Subtarget->hasMadMixInsts()) ||
2397  (!IsFMA && Subtarget->hasFmaMixInsts()))) {
2398  SelectCode(N);
2399  return;
2400  }
2401 
2402  SDValue Src0 = N->getOperand(0);
2403  SDValue Src1 = N->getOperand(1);
2404  SDValue Src2 = N->getOperand(2);
2405  unsigned Src0Mods, Src1Mods, Src2Mods;
2406 
2407  // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
2408  // using the conversion from f16.
2409  bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
2410  bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
2411  bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
2412 
2413  assert((IsFMA || !Mode.allFP32Denormals()) &&
2414  "fmad selected with denormals enabled");
2415  // TODO: We can select this with f32 denormals enabled if all the sources are
2416  // converted from f16 (in which case fmad isn't legal).
2417 
2418  if (Sel0 || Sel1 || Sel2) {
2419  // For dummy operands.
2420  SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2421  SDValue Ops[] = {
2422  CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
2423  CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
2424  CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
2425  CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
2426  Zero, Zero
2427  };
2428 
2429  CurDAG->SelectNodeTo(N,
2430  IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
2431  MVT::f32, Ops);
2432  } else {
2433  SelectCode(N);
2434  }
2435 }
2436 
2437 // This is here because there isn't a way to use the generated sub0_sub1 as the
2438 // subreg index to EXTRACT_SUBREG in tablegen.
2439 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
2440  MemSDNode *Mem = cast<MemSDNode>(N);
2441  unsigned AS = Mem->getAddressSpace();
2442  if (AS == AMDGPUAS::FLAT_ADDRESS) {
2443  SelectCode(N);
2444  return;
2445  }
2446 
2447  MVT VT = N->getSimpleValueType(0);
2448  bool Is32 = (VT == MVT::i32);
2449  SDLoc SL(N);
2450 
2451  MachineSDNode *CmpSwap = nullptr;
2452  if (Subtarget->hasAddr64()) {
2453  SDValue SRsrc, VAddr, SOffset, Offset;
2454 
2455  if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset)) {
2456  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
2457  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
2458  SDValue CmpVal = Mem->getOperand(2);
2459  SDValue CPol = CurDAG->getTargetConstant(AMDGPU::CPol::GLC, SL, MVT::i32);
2460 
2461  // XXX - Do we care about glue operands?
2462 
2463  SDValue Ops[] = {CmpVal, VAddr, SRsrc, SOffset, Offset, CPol,
2464  Mem->getChain()};
2465 
2466  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2467  }
2468  }
2469 
2470  if (!CmpSwap) {
2471  SDValue SRsrc, SOffset, Offset;
2472  if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset)) {
2473  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
2474  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
2475 
2476  SDValue CmpVal = Mem->getOperand(2);
2477  SDValue CPol = CurDAG->getTargetConstant(AMDGPU::CPol::GLC, SL, MVT::i32);
2478  SDValue Ops[] = {CmpVal, SRsrc, SOffset, Offset, CPol, Mem->getChain()};
2479 
2480  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2481  }
2482  }
2483 
2484  if (!CmpSwap) {
2485  SelectCode(N);
2486  return;
2487  }
2488 
2489  MachineMemOperand *MMO = Mem->getMemOperand();
2490  CurDAG->setNodeMemRefs(CmpSwap, {MMO});
2491 
2492  unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
2493  SDValue Extract
2494  = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
2495 
2496  ReplaceUses(SDValue(N, 0), Extract);
2497  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
2498  CurDAG->RemoveDeadNode(N);
2499 }
2500 
2501 void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
2502  // The address is assumed to be uniform, so if it ends up in a VGPR, it will
2503  // be copied to an SGPR with readfirstlane.
2504  unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2505  AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2506 
2507  SDValue Chain = N->getOperand(0);
2508  SDValue Ptr = N->getOperand(2);
2509  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2510  MachineMemOperand *MMO = M->getMemOperand();
2511  bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
2512 
2513  SDValue Offset;
2514  if (CurDAG->isBaseWithConstantOffset(Ptr)) {
2515  SDValue PtrBase = Ptr.getOperand(0);
2516  SDValue PtrOffset = Ptr.getOperand(1);
2517 
2518  const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2519  if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) {
2520  N = glueCopyToM0(N, PtrBase);
2521  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2522  }
2523  }
2524 
2525  if (!Offset) {
2526  N = glueCopyToM0(N, Ptr);
2527  Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2528  }
2529 
2530  SDValue Ops[] = {
2531  Offset,
2532  CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2533  Chain,
2534  N->getOperand(N->getNumOperands() - 1) // New glue
2535  };
2536 
2537  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2538  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2539 }
2540 
2541 static unsigned gwsIntrinToOpcode(unsigned IntrID) {
2542  switch (IntrID) {
2543  case Intrinsic::amdgcn_ds_gws_init:
2544  return AMDGPU::DS_GWS_INIT;
2545  case Intrinsic::amdgcn_ds_gws_barrier:
2546  return AMDGPU::DS_GWS_BARRIER;
2547  case Intrinsic::amdgcn_ds_gws_sema_v:
2548  return AMDGPU::DS_GWS_SEMA_V;
2549  case Intrinsic::amdgcn_ds_gws_sema_br:
2550  return AMDGPU::DS_GWS_SEMA_BR;
2551  case Intrinsic::amdgcn_ds_gws_sema_p:
2552  return AMDGPU::DS_GWS_SEMA_P;
2553  case Intrinsic::amdgcn_ds_gws_sema_release_all:
2554  return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2555  default:
2556  llvm_unreachable("not a gws intrinsic");
2557  }
2558 }
2559 
2560 void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
2561  if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2562  !Subtarget->hasGWSSemaReleaseAll()) {
2563  // Let this error.
2564  SelectCode(N);
2565  return;
2566  }
2567 
2568  // Chain, intrinsic ID, vsrc, offset
2569  const bool HasVSrc = N->getNumOperands() == 4;
2570  assert(HasVSrc || N->getNumOperands() == 3);
2571 
2572  SDLoc SL(N);
2573  SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
2574  int ImmOffset = 0;
2575  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2576  MachineMemOperand *MMO = M->getMemOperand();
2577 
2578  // Don't worry if the offset ends up in a VGPR. Only one lane will have
2579  // effect, so SIFixSGPRCopies will validly insert readfirstlane.
2580 
2581  // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
2582  // offset field) % 64. Some versions of the programming guide omit the m0
2583  // part, or claim it's from offset 0.
2584  if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2585  // If we have a constant offset, try to use the 0 in m0 as the base.
2586  // TODO: Look into changing the default m0 initialization value. If the
2587  // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
2588  // the immediate offset.
2589  glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
2590  ImmOffset = ConstOffset->getZExtValue();
2591  } else {
2592  if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
2593  ImmOffset = BaseOffset.getConstantOperandVal(1);
2594  BaseOffset = BaseOffset.getOperand(0);
2595  }
2596 
2597  // Prefer to do the shift in an SGPR since it should be possible to use m0
2598  // as the result directly. If it's already an SGPR, it will be eliminated
2599  // later.
2600  SDNode *SGPROffset
2601  = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
2602  BaseOffset);
2603  // Shift to offset in m0
2604  SDNode *M0Base
2605  = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
2606  SDValue(SGPROffset, 0),
2607  CurDAG->getTargetConstant(16, SL, MVT::i32));
2608  glueCopyToM0(N, SDValue(M0Base, 0));
2609  }
2610 
2611  SDValue Chain = N->getOperand(0);
2612  SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
2613 
2614  const unsigned Opc = gwsIntrinToOpcode(IntrID);
2616  if (HasVSrc)
2617  Ops.push_back(N->getOperand(2));
2618  Ops.push_back(OffsetField);
2619  Ops.push_back(Chain);
2620 
2621  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2622  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2623 }
2624 
2625 void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) {
2626  if (Subtarget->getLDSBankCount() != 16) {
2627  // This is a single instruction with a pattern.
2628  SelectCode(N);
2629  return;
2630  }
2631 
2632  SDLoc DL(N);
2633 
2634  // This requires 2 instructions. It is possible to write a pattern to support
2635  // this, but the generated isel emitter doesn't correctly deal with multiple
2636  // output instructions using the same physical register input. The copy to m0
2637  // is incorrectly placed before the second instruction.
2638  //
2639  // TODO: Match source modifiers.
2640  //
2641  // def : Pat <
2642  // (int_amdgcn_interp_p1_f16
2643  // (VOP3Mods f32:$src0, i32:$src0_modifiers),
2644  // (i32 timm:$attrchan), (i32 timm:$attr),
2645  // (i1 timm:$high), M0),
2646  // (V_INTERP_P1LV_F16 $src0_modifiers, VGPR_32:$src0, timm:$attr,
2647  // timm:$attrchan, 0,
2648  // (V_INTERP_MOV_F32 2, timm:$attr, timm:$attrchan), timm:$high)> {
2649  // let Predicates = [has16BankLDS];
2650  // }
2651 
2652  // 16 bank LDS
2653  SDValue ToM0 = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, AMDGPU::M0,
2654  N->getOperand(5), SDValue());
2655 
2656  SDVTList VTs = CurDAG->getVTList(MVT::f32, MVT::Other);
2657 
2658  SDNode *InterpMov =
2659  CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, {
2660  CurDAG->getTargetConstant(2, DL, MVT::i32), // P0
2661  N->getOperand(3), // Attr
2662  N->getOperand(2), // Attrchan
2663  ToM0.getValue(1) // In glue
2664  });
2665 
2666  SDNode *InterpP1LV =
2667  CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, {
2668  CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
2669  N->getOperand(1), // Src0
2670  N->getOperand(3), // Attr
2671  N->getOperand(2), // Attrchan
2672  CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
2673  SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high
2674  N->getOperand(4), // high
2675  CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp
2676  CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod
2677  SDValue(InterpMov, 1)
2678  });
2679 
2680  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0));
2681 }
2682 
2683 void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
2684  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2685  switch (IntrID) {
2686  case Intrinsic::amdgcn_ds_append:
2687  case Intrinsic::amdgcn_ds_consume: {
2688  if (N->getValueType(0) != MVT::i32)
2689  break;
2690  SelectDSAppendConsume(N, IntrID);
2691  return;
2692  }
2693  }
2694 
2695  SelectCode(N);
2696 }
2697 
2698 void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
2699  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
2700  unsigned Opcode;
2701  switch (IntrID) {
2702  case Intrinsic::amdgcn_wqm:
2703  Opcode = AMDGPU::WQM;
2704  break;
2705  case Intrinsic::amdgcn_softwqm:
2706  Opcode = AMDGPU::SOFT_WQM;
2707  break;
2708  case Intrinsic::amdgcn_wwm:
2709  case Intrinsic::amdgcn_strict_wwm:
2710  Opcode = AMDGPU::STRICT_WWM;
2711  break;
2712  case Intrinsic::amdgcn_strict_wqm:
2713  Opcode = AMDGPU::STRICT_WQM;
2714  break;
2715  case Intrinsic::amdgcn_interp_p1_f16:
2716  SelectInterpP1F16(N);
2717  return;
2718  default:
2719  SelectCode(N);
2720  return;
2721  }
2722 
2723  SDValue Src = N->getOperand(1);
2724  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
2725 }
2726 
2727 void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
2728  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2729  switch (IntrID) {
2730  case Intrinsic::amdgcn_ds_gws_init:
2731  case Intrinsic::amdgcn_ds_gws_barrier:
2732  case Intrinsic::amdgcn_ds_gws_sema_v:
2733  case Intrinsic::amdgcn_ds_gws_sema_br:
2734  case Intrinsic::amdgcn_ds_gws_sema_p:
2735  case Intrinsic::amdgcn_ds_gws_sema_release_all:
2736  SelectDS_GWS(N, IntrID);
2737  return;
2738  default:
2739  break;
2740  }
2741 
2742  SelectCode(N);
2743 }
2744 
2745 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
2746  unsigned &Mods,
2747  bool AllowAbs) const {
2748  Mods = 0;
2749  Src = In;
2750 
2751  if (Src.getOpcode() == ISD::FNEG) {
2752  Mods |= SISrcMods::NEG;
2753  Src = Src.getOperand(0);
2754  }
2755 
2756  if (AllowAbs && Src.getOpcode() == ISD::FABS) {
2757  Mods |= SISrcMods::ABS;
2758  Src = Src.getOperand(0);
2759  }
2760 
2761  return true;
2762 }
2763 
2764 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
2765  SDValue &SrcMods) const {
2766  unsigned Mods;
2767  if (SelectVOP3ModsImpl(In, Src, Mods)) {
2768  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2769  return true;
2770  }
2771 
2772  return false;
2773 }
2774 
2775 bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,
2776  SDValue &SrcMods) const {
2777  unsigned Mods;
2778  if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
2779  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2780  return true;
2781  }
2782 
2783  return false;
2784 }
2785 
2786 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
2787  SDValue &SrcMods) const {
2788  SelectVOP3Mods(In, Src, SrcMods);
2789  return isNoNanSrc(Src);
2790 }
2791 
2792 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
2793  if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
2794  return false;
2795 
2796  Src = In;
2797  return true;
2798 }
2799 
2800 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
2801  SDValue &SrcMods, SDValue &Clamp,
2802  SDValue &Omod) const {
2803  SDLoc DL(In);
2804  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2805  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2806 
2807  return SelectVOP3Mods(In, Src, SrcMods);
2808 }
2809 
2810 bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src,
2811  SDValue &SrcMods, SDValue &Clamp,
2812  SDValue &Omod) const {
2813  SDLoc DL(In);
2814  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2815  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2816 
2817  return SelectVOP3BMods(In, Src, SrcMods);
2818 }
2819 
2820 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
2821  SDValue &Clamp, SDValue &Omod) const {
2822  Src = In;
2823 
2824  SDLoc DL(In);
2825  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2826  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2827 
2828  return true;
2829 }
2830 
2831 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
2832  SDValue &SrcMods) const {
2833  unsigned Mods = 0;
2834  Src = In;
2835 
2836  if (Src.getOpcode() == ISD::FNEG) {
2837  Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
2838  Src = Src.getOperand(0);
2839  }
2840 
2841  if (Src.getOpcode() == ISD::BUILD_VECTOR) {
2842  unsigned VecMods = Mods;
2843 
2844  SDValue Lo = stripBitcast(Src.getOperand(0));
2845  SDValue Hi = stripBitcast(Src.getOperand(1));
2846 
2847  if (Lo.getOpcode() == ISD::FNEG) {
2848  Lo = stripBitcast(Lo.getOperand(0));
2849  Mods ^= SISrcMods::NEG;
2850  }
2851 
2852  if (Hi.getOpcode() == ISD::FNEG) {
2853  Hi = stripBitcast(Hi.getOperand(0));
2854  Mods ^= SISrcMods::NEG_HI;
2855  }
2856 
2857  if (isExtractHiElt(Lo, Lo))
2858  Mods |= SISrcMods::OP_SEL_0;
2859 
2860  if (isExtractHiElt(Hi, Hi))
2861  Mods |= SISrcMods::OP_SEL_1;
2862 
2863  unsigned VecSize = Src.getValueSizeInBits();
2864  Lo = stripExtractLoElt(Lo);
2865  Hi = stripExtractLoElt(Hi);
2866 
2867  if (Lo.getValueSizeInBits() > VecSize) {
2868  Lo = CurDAG->getTargetExtractSubreg(
2869  (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
2870  MVT::getIntegerVT(VecSize), Lo);
2871  }
2872 
2873  if (Hi.getValueSizeInBits() > VecSize) {
2874  Hi = CurDAG->getTargetExtractSubreg(
2875  (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
2876  MVT::getIntegerVT(VecSize), Hi);
2877  }
2878 
2879  assert(Lo.getValueSizeInBits() <= VecSize &&
2880  Hi.getValueSizeInBits() <= VecSize);
2881 
2882  if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
2883  // Really a scalar input. Just select from the low half of the register to
2884  // avoid packing.
2885 
2886  if (VecSize == 32 || VecSize == Lo.getValueSizeInBits()) {
2887  Src = Lo;
2888  } else {
2889  assert(Lo.getValueSizeInBits() == 32 && VecSize == 64);
2890 
2891  SDLoc SL(In);
2892  SDValue Undef = SDValue(
2893  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
2894  Lo.getValueType()), 0);
2895  auto RC = Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
2896  : AMDGPU::SReg_64RegClassID;
2897  const SDValue Ops[] = {
2898  CurDAG->getTargetConstant(RC, SL, MVT::i32),
2899  Lo, CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2900  Undef, CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
2901 
2902  Src = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
2903  Src.getValueType(), Ops), 0);
2904  }
2905  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2906  return true;
2907  }
2908 
2909  if (VecSize == 64 && Lo == Hi && isa<ConstantFPSDNode>(Lo)) {
2910  uint64_t Lit = cast<ConstantFPSDNode>(Lo)->getValueAPF()
2911  .bitcastToAPInt().getZExtValue();
2912  if (AMDGPU::isInlinableLiteral32(Lit, Subtarget->hasInv2PiInlineImm())) {
2913  Src = CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64);;
2914  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2915  return true;
2916  }
2917  }
2918 
2919  Mods = VecMods;
2920  }
2921 
2922  // Packed instructions do not have abs modifiers.
2923  Mods |= SISrcMods::OP_SEL_1;
2924 
2925  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2926  return true;
2927 }
2928 
2929 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
2930  SDValue &SrcMods) const {
2931  Src = In;
2932  // FIXME: Handle op_sel
2933  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2934  return true;
2935 }
2936 
2937 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2938  SDValue &SrcMods) const {
2939  // FIXME: Handle op_sel
2940  return SelectVOP3Mods(In, Src, SrcMods);
2941 }
2942 
2943 // The return value is not whether the match is possible (which it always is),
2944 // but whether or not it a conversion is really used.
2945 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2946  unsigned &Mods) const {
2947  Mods = 0;
2948  SelectVOP3ModsImpl(In, Src, Mods);
2949 
2950  if (Src.getOpcode() == ISD::FP_EXTEND) {
2951  Src = Src.getOperand(0);
2952  assert(Src.getValueType() == MVT::f16);
2953  Src = stripBitcast(Src);
2954 
2955  // Be careful about folding modifiers if we already have an abs. fneg is
2956  // applied last, so we don't want to apply an earlier fneg.
2957  if ((Mods & SISrcMods::ABS) == 0) {
2958  unsigned ModsTmp;
2959  SelectVOP3ModsImpl(Src, Src, ModsTmp);
2960 
2961  if ((ModsTmp & SISrcMods::NEG) != 0)
2962  Mods ^= SISrcMods::NEG;
2963 
2964  if ((ModsTmp & SISrcMods::ABS) != 0)
2965  Mods |= SISrcMods::ABS;
2966  }
2967 
2968  // op_sel/op_sel_hi decide the source type and source.
2969  // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2970  // If the sources's op_sel is set, it picks the high half of the source
2971  // register.
2972 
2973  Mods |= SISrcMods::OP_SEL_1;
2974  if (isExtractHiElt(Src, Src)) {
2975  Mods |= SISrcMods::OP_SEL_0;
2976 
2977  // TODO: Should we try to look for neg/abs here?
2978  }
2979 
2980  return true;
2981  }
2982 
2983  return false;
2984 }
2985 
2986 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2987  SDValue &SrcMods) const {
2988  unsigned Mods = 0;
2989  SelectVOP3PMadMixModsImpl(In, Src, Mods);
2990  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2991  return true;
2992 }
2993 
2994 SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
2995  if (In.isUndef())
2996  return CurDAG->getUNDEF(MVT::i32);
2997 
2998  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2999  SDLoc SL(In);
3000  return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
3001  }
3002 
3003  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
3004  SDLoc SL(In);
3005  return CurDAG->getConstant(
3006  C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
3007  }
3008 
3009  SDValue Src;
3010  if (isExtractHiElt(In, Src))
3011  return Src;
3012 
3013  return SDValue();
3014 }
3015 
3016 bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
3017  assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn);
3018 
3019  const SIRegisterInfo *SIRI =
3020  static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
3021  const SIInstrInfo * SII =
3022  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
3023 
3024  unsigned Limit = 0;
3025  bool AllUsesAcceptSReg = true;
3026  for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
3027  Limit < 10 && U != E; ++U, ++Limit) {
3028  const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
3029 
3030  // If the register class is unknown, it could be an unknown
3031  // register class that needs to be an SGPR, e.g. an inline asm
3032  // constraint
3033  if (!RC || SIRI->isSGPRClass(RC))
3034  return false;
3035 
3036  if (RC != &AMDGPU::VS_32RegClass) {
3037  AllUsesAcceptSReg = false;
3038  SDNode * User = *U;
3039  if (User->isMachineOpcode()) {
3040  unsigned Opc = User->getMachineOpcode();
3041  MCInstrDesc Desc = SII->get(Opc);
3042  if (Desc.isCommutable()) {
3043  unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
3044  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
3045  if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
3046  unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
3047  const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
3048  if (CommutedRC == &AMDGPU::VS_32RegClass)
3049  AllUsesAcceptSReg = true;
3050  }
3051  }
3052  }
3053  // If "AllUsesAcceptSReg == false" so far we haven't suceeded
3054  // commuting current user. This means have at least one use
3055  // that strictly require VGPR. Thus, we will not attempt to commute
3056  // other user instructions.
3057  if (!AllUsesAcceptSReg)
3058  break;
3059  }
3060  }
3061  return !AllUsesAcceptSReg && (Limit < 10);
3062 }
3063 
3064 bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
3065  auto Ld = cast<LoadSDNode>(N);
3066 
3067  return Ld->getAlignment() >= 4 &&
3068  (
3069  (
3070  (
3071  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
3072  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
3073  )
3074  &&
3075  !N->isDivergent()
3076  )
3077  ||
3078  (
3079  Subtarget->getScalarizeGlobalBehavior() &&
3080  Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
3081  Ld->isSimple() &&
3082  !N->isDivergent() &&
3083  static_cast<const SITargetLowering *>(
3084  getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
3085  )
3086  );
3087 }
3088 
3089 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
3091  *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
3092  bool IsModified = false;
3093  do {
3094  IsModified = false;
3095 
3096  // Go over all selected nodes and try to fold them a bit more
3097  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
3098  while (Position != CurDAG->allnodes_end()) {
3099  SDNode *Node = &*Position++;
3100  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
3101  if (!MachineNode)
3102  continue;
3103 
3104  SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
3105  if (ResNode != Node) {
3106  if (ResNode)
3107  ReplaceUses(Node, ResNode);
3108  IsModified = true;
3109  }
3110  }
3111  CurDAG->RemoveDeadNodes();
3112  } while (IsModified);
3113 }
3114 
3115 bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
3116  Subtarget = &MF.getSubtarget<R600Subtarget>();
3117  return SelectionDAGISel::runOnMachineFunction(MF);
3118 }
3119 
3120 bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
3121  if (!N->readMem())
3122  return false;
3123  if (CbId == -1)
3124  return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
3125  N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
3126 
3127  return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
3128 }
3129 
3130 bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
3131  SDValue& IntPtr) {
3132  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
3133  IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
3134  true);
3135  return true;
3136  }
3137  return false;
3138 }
3139 
3140 bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
3141  SDValue& BaseReg, SDValue &Offset) {
3142  if (!isa<ConstantSDNode>(Addr)) {
3143  BaseReg = Addr;
3144  Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
3145  return true;
3146  }
3147  return false;
3148 }
3149 
3151  unsigned int Opc = N->getOpcode();
3152  if (N->isMachineOpcode()) {
3153  N->setNodeId(-1);
3154  return; // Already selected.
3155  }
3156 
3157  switch (Opc) {
3158  default: break;
3160  case ISD::SCALAR_TO_VECTOR:
3161  case ISD::BUILD_VECTOR: {
3162  EVT VT = N->getValueType(0);
3163  unsigned NumVectorElts = VT.getVectorNumElements();
3164  unsigned RegClassID;
3165  // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
3166  // that adds a 128 bits reg copy when going through TwoAddressInstructions
3167  // pass. We want to avoid 128 bits copies as much as possible because they
3168  // can't be bundled by our scheduler.
3169  switch(NumVectorElts) {
3170  case 2: RegClassID = R600::R600_Reg64RegClassID; break;
3171  case 4:
3173  RegClassID = R600::R600_Reg128VerticalRegClassID;
3174  else
3175  RegClassID = R600::R600_Reg128RegClassID;
3176  break;
3177  default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
3178  }
3179  SelectBuildVector(N, RegClassID);
3180  return;
3181  }
3182  }
3183 
3184  SelectCode(N);
3185 }
3186 
3187 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
3188  SDValue &Offset) {
3189  ConstantSDNode *C;
3190  SDLoc DL(Addr);
3191 
3192  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
3193  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
3194  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
3195  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
3196  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
3197  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
3198  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
3199  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
3200  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
3201  Base = Addr.getOperand(0);
3202  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
3203  } else {
3204  Base = Addr;
3205  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
3206  }
3207 
3208  return true;
3209 }
3210 
3211 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
3212  SDValue &Offset) {
3213  ConstantSDNode *IMMOffset;
3214 
3215  if (Addr.getOpcode() == ISD::ADD
3216  && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
3217  && isInt<16>(IMMOffset->getZExtValue())) {
3218 
3219  Base = Addr.getOperand(0);
3220  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
3221  MVT::i32);
3222  return true;
3223  // If the pointer address is constant, we can move it to the offset field.
3224  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
3225  && isInt<16>(IMMOffset->getZExtValue())) {
3226  Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
3227  SDLoc(CurDAG->getEntryNode()),
3228  R600::ZERO, MVT::i32);
3229  Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
3230  MVT::i32);
3231  return true;
3232  }
3233 
3234  // Default case, no offset
3235  Base = Addr;
3236  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
3237  return true;
3238 }
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:240
llvm::AMDGPUISD::CLAMP
@ CLAMP
CLAMP value between 0.0 and 1.0.
Definition: AMDGPUISelLowering.h:368
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::ISD::FPOWI
@ FPOWI
Definition: ISDOpcodes.h:872
i
i
Definition: README.txt:29
llvm::MCInstrDesc::getNumDefs
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:243
CmpMode::FP
@ FP
llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1542
Lowering
Shadow Stack GC Lowering
Definition: ShadowStackGCLowering.cpp:99
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4630
llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
llvm::AMDGPUSubtarget::hasInv2PiInlineImm
bool hasInv2PiInlineImm() const
Definition: AMDGPUSubtarget.h:169
llvm::TargetRegisterClass::getID
unsigned getID() const
Return the register class ID number.
Definition: TargetRegisterInfo.h:69
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::EVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:150
llvm::AMDGPUISD::DIV_SCALE
@ DIV_SCALE
Definition: AMDGPUISelLowering.h:399
v2i32
gets compiled into this on rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movq rsp movq rsp movq rsp movq rsp movq rsp rax movq rsp rax movq rsp rsp rsp eax eax jbe LBB1_3 rcx rax movq rsp eax rsp ret ecx eax rcx movl rsp jmp LBB1_2 gcc rsp rax movq rsp rsp movq rsp rax movq rsp eax eax jb L6 rdx eax rsp ret p2align edx rdx eax movl rsp eax rsp ret and it gets compiled into this on ebp esp eax movl ebp eax movl ebp eax esp popl ebp ret gcc ebp eax popl ebp ret Teach tblgen not to check bitconvert source type in some cases This allows us to consolidate the following patterns in X86InstrMMX v2i32(MMX_MOVDQ2Qrr VR128:$src))>
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1078
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:633
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:848
llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1380
llvm::AMDGPUTargetLowering
Definition: AMDGPUISelLowering.h:27
llvm::SIRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned RCID) const
Definition: SIRegisterInfo.cpp:2445
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:898
SIMachineFunctionInfo.h
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:152
llvm::ISD::ConstantFP
@ ConstantFP
Definition: ISDOpcodes.h:77
llvm::AMDGPU::getSMRDEncodedOffset
Optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
Definition: AMDGPUBaseInfo.cpp:1871
llvm::ISD::UADDO
@ UADDO
Definition: ISDOpcodes.h:311
llvm::AMDGPUSubtarget::hasMadMixInsts
bool hasMadMixInsts() const
Definition: AMDGPUSubtarget.h:137
llvm::MVT::i128
@ i128
Definition: MachineValueType.h:48
llvm::ISD::ADDC
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52
llvm::ISD::SETEQ
@ SETEQ
Definition: ISDOpcodes.h:1375
llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition: AMDGPUSubtarget.h:40
llvm::AMDGPUISD::ATOMIC_CMP_SWAP
@ ATOMIC_CMP_SWAP
Definition: AMDGPUISelLowering.h:492
llvm::Triple::amdgcn
@ amdgcn
Definition: Triple.h:72
llvm::AMDGPUISD::CVT_PKNORM_I16_F32
@ CVT_PKNORM_I16_F32
Definition: AMDGPUISelLowering.h:454
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::MachineSDNode
An SDNode that represents everything that will be needed to construct a MachineInstr.
Definition: SelectionDAGNodes.h:2530
llvm::AMDGPUISD::FMUL_W_CHAIN
@ FMUL_W_CHAIN
Definition: AMDGPUISelLowering.h:379
llvm::AMDGPUISD::DIV_FIXUP
@ DIV_FIXUP
Definition: AMDGPUISelLowering.h:401
llvm::AMDGPUISD::LOAD_D16_HI_I8
@ LOAD_D16_HI_I8
Definition: AMDGPUISelLowering.h:480
llvm::GCNSubtarget::hasFlatSegmentOffsetBug
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:594
llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition: SelectionDAGNodes.h:1328
ValueTracking.h
llvm::ISD::FLOG2
@ FLOG2
Definition: ISDOpcodes.h:875
llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition: SelectionDAGNodes.h:1351
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:455
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:466
llvm::AMDGPU::getSMRDEncodedLiteralOffset32
Optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
Definition: AMDGPUBaseInfo.cpp:1888
llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition: SelectionDAGNodes.h:2263
llvm::MVT::Glue
@ Glue
Definition: MachineValueType.h:262
llvm::SDNode::use_iterator
This class provides iterator support for SDUse operands that use a specific SDNode.
Definition: SelectionDAGNodes.h:719
Shift
bool Shift
Definition: README.txt:468
llvm::MemSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:1353
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:128
i8
Clang compiles this i8
Definition: README.txt:504
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1261
llvm::SelectionDAG::isBaseWithConstantOffset
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
Definition: SelectionDAG.cpp:4316
llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:702
llvm::Optional< int64_t >
llvm::SDNode::isPredecessorOf
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
Definition: SelectionDAGNodes.h:805
llvm::AMDGPU::SIModeRegisterDefaults
Definition: AMDGPUBaseInfo.h:907
i1
Decimal Convert From to National Zoned Signed int_ppc_altivec_bcdcfno i1
Definition: README_P9.txt:147
llvm::createAMDGPUISelDag
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
Definition: AMDGPUISelDAGToDAG.cpp:384
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::MemSDNode
This is an abstract virtual class for memory operations.
Definition: SelectionDAGNodes.h:1246
llvm::GCNSubtarget
Definition: GCNSubtarget.h:38
matchZExtFromI32
static SDValue matchZExtFromI32(SDValue Op)
Definition: AMDGPUISelDAGToDAG.cpp:1849
llvm::SDNode::isDivergent
bool isDivergent() const
Definition: SelectionDAGNodes.h:686
llvm::Lo_32
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:353
SelectionDAG.h
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:232
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::ISD::FABS
@ FABS
Definition: ISDOpcodes.h:867
llvm::AMDGPUISD::FMIN3
@ FMIN3
Definition: AMDGPUISelLowering.h:391
llvm::ISD::ADDCARRY
@ ADDCARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:290
llvm::ISD::BRCOND
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:956
llvm::SDNode::getVTList
SDVTList getVTList() const
Definition: SelectionDAGNodes.h:924
llvm::SIInstrFlags::FLAT
@ FLAT
Definition: SIDefines.h:51
llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:220
llvm::GCNSubtarget::unsafeDSOffsetFoldingEnabled
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:412
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
llvm::R600RegisterInfo
Definition: R600RegisterInfo.h:22
llvm::GCNSubtarget::hasGWSSemaReleaseAll
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:630
llvm::AMDGPUAS::CONSTANT_ADDRESS_32BIT
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
Definition: AMDGPU.h:389
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:885
SelectSAddrFI
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
Definition: AMDGPUISelDAGToDAG.cpp:1953
llvm::AMDGPU::CPol::CPol
CPol
Definition: SIDefines.h:281
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
i64
Clang compiles this i64
Definition: README.txt:504
llvm::SelectionDAG::getTargetFrameIndex
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:688
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
llvm::SISrcMods::NEG_HI
@ NEG_HI
Definition: SIDefines.h:202
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1113
llvm::LoopInfoBase::getLoopsInPreorder
SmallVector< LoopT *, 4 > getLoopsInPreorder()
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
Definition: LoopInfoImpl.h:577
llvm::MemIntrinsicSDNode
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
Definition: SelectionDAGNodes.h:1458
llvm::AMDGPUSubtarget::get
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Definition: AMDGPUSubtarget.cpp:1139
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:216
SelectionDAGNodes.h
llvm::ISD::Constant
@ Constant
Definition: ISDOpcodes.h:76
llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:729
llvm::SIInstrInfo::isInlineConstant
bool isInlineConstant(const APInt &Imm) const
Definition: SIInstrInfo.cpp:3349
llvm::GCNSubtarget::hasAddr64
bool hasAddr64() const
Definition: GCNSubtarget.h:323
llvm::ISD::ABS
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:640
FunctionLoweringInfo.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::GCNSubtarget::useFlatForGlobal
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:471
llvm::GCNSubtarget::hasFmaMixInsts
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:373
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:720
llvm::User
Definition: User.h:44
llvm::AMDGPUISD::CVT_PKNORM_U16_F32
@ CVT_PKNORM_U16_F32
Definition: AMDGPUISelLowering.h:455
llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:747
llvm::ISD::CopyToReg
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
llvm::AMDGPUISD::FMED3
@ FMED3
Definition: AMDGPUISelLowering.h:394
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:309
f32
So we should use XX3Form_Rcr to implement instrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store load store see def memrix16 in PPCInstrInfo td Load Store Vector load store outs ins lxsdx set load store with conversion from to outs ins lxsspx set f32
Definition: README_P9.txt:522
llvm::AMDGPUISD::LOAD_D16_LO_I8
@ LOAD_D16_LO_I8
Definition: AMDGPUISelLowering.h:482
llvm::ISD::FROUND
@ FROUND
Definition: ISDOpcodes.h:883
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::SIInstrFlags::WQM
@ WQM
Definition: SIDefines.h:63
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:735
llvm::AMDGPUISD::ATOMIC_LOAD_FMAX
@ ATOMIC_LOAD_FMAX
Definition: AMDGPUISelLowering.h:496
llvm::ISD::SRA
@ SRA
Definition: ISDOpcodes.h:658
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:195
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MCInstrDesc::isCommutable
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
Definition: MCInstrDesc.h:472
llvm::M0
unsigned M0(unsigned Val)
Definition: VE.h:371
llvm::Instruction
Definition: Instruction.h:45
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:281
llvm::ISD::SINT_TO_FP
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:739
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1631
llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:383
llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition: ISDOpcodes.h:882
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:881
llvm::SIInstrInfo::findCommutedOpIndices
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
Definition: SIInstrInfo.cpp:2161
llvm::LegacyDivergenceAnalysis
Definition: LegacyDivergenceAnalysis.h:31
llvm::SIRegisterInfo::getSubRegFromChannel
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
Definition: SIRegisterInfo.cpp:427
llvm::AMDGPUISD::CVT_PK_U16_U32
@ CVT_PK_U16_U32
Definition: AMDGPUISelLowering.h:457
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:29
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:632
llvm::MCOperandInfo::RegClass
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:89
llvm::SDValue::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned i) const
Definition: SelectionDAGNodes.h:1125
llvm::SDValue::getValueSizeInBits
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
Definition: SelectionDAGNodes.h:192
llvm::ISD::CopyFromReg
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:208
llvm::MCID::RegSequence
@ RegSequence
Definition: MCInstrDesc.h:179
llvm::AMDGPUISD::FMA_W_CHAIN
@ FMA_W_CHAIN
Definition: AMDGPUISelLowering.h:378
llvm::GCNSubtarget::getScalarizeGlobalBehavior
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:776
llvm::ISD::USUBO
@ USUBO
Definition: ISDOpcodes.h:315
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
findMemSDNode
static MemSDNode * findMemSDNode(SDNode *N)
Definition: AMDGPUISelDAGToDAG.cpp:1724
llvm::Hi_32
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:348
LoopInfo.h
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::ISD::ATOMIC_LOAD_FADD
@ ATOMIC_LOAD_FADD
Definition: ISDOpcodes.h:1151
i32
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM ID Predecessors according to mbb< bb27, 0x8b0a7c0 > Note ADDri is not a two address instruction its result reg1037 is an operand of the PHI node in bb76 and its operand reg1039 is the result of the PHI node We should treat it as a two address code and make sure the ADDri is scheduled after any node that reads reg1039 Use info(i.e. register scavenger) to assign it a free register to allow reuse the collector could move the objects and invalidate the derived pointer This is bad enough in the first but safe points can crop up unpredictably **array_addr i32
Definition: README.txt:122
llvm::ISD::FPOW
@ FPOW
Definition: ISDOpcodes.h:873
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:377
llvm::AMDGPUISD::BUILD_VERTICAL_VECTOR
@ BUILD_VERTICAL_VECTOR
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
Definition: AMDGPUISelLowering.h:471
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:622
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::SISrcMods::NEG
@ NEG
Definition: SIDefines.h:199
llvm::APFloat
Definition: APFloat.h:701
llvm::R600Subtarget
Definition: R600Subtarget.h:36
llvm::SIInstrFlags::FlatScratch
@ FlatScratch
Definition: SIDefines.h:109
llvm::countPopulation
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:567
llvm::AMDGPUISD::LDEXP
@ LDEXP
Definition: AMDGPUISelLowering.h:414
llvm::CodeGenOpt::Default
@ Default
Definition: CodeGen.h:55
llvm::isInt< 32 >
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:373
llvm::ISD::FLOG10
@ FLOG10
Definition: ISDOpcodes.h:876
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:341
llvm::ISD::FMAD
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:470
llvm::isUInt< 16 >
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:408
llvm::AMDGPU::CPol::GLC
@ GLC
Definition: SIDefines.h:282
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::MVT::v2f16
@ v2f16
Definition: MachineValueType.h:134
uint64_t
llvm::SelectionDAGISel::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: SelectionDAGISel.cpp:334
llvm::ConstantFPSDNode
Definition: SelectionDAGNodes.h:1587
llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition: SelectionDAGNodes.h:1332
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:387
llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:921
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::AMDGPUISD::LOAD_D16_HI
@ LOAD_D16_HI
Definition: AMDGPUISelLowering.h:478
getBaseWithOffsetUsingSplitOR
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
Definition: AMDGPUISelDAGToDAG.cpp:984
llvm::GCNSubtarget::hasAddNoCarry
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:638
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::GCNSubtarget::privateMemoryResourceIsRangeChecked
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:493
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:511
llvm::AMDGPUISD::FMAD_FTZ
@ FMAD_FTZ
Definition: AMDGPUISelLowering.h:404
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:896
llvm::AMDGPUISD::ATOMIC_DEC
@ ATOMIC_DEC
Definition: AMDGPUISelLowering.h:494
llvm::AMDGPUISD::CVT_PK_I16_I32
@ CVT_PK_I16_I32
Definition: AMDGPUISelLowering.h:456
llvm::AMDGPUISD::BFE_I32
@ BFE_I32
Definition: AMDGPUISelLowering.h:420
llvm::LoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Definition: SelectionDAGNodes.h:2278
llvm::AMDGPUArgumentUsageInfo
Definition: AMDGPUArgumentUsageInfo.h:158
llvm::isUInt< 32 >
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:411
llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition: SelectionDAGNodes.h:172
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:44
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::AMDGPUISD::SIN_HW
@ SIN_HW
Definition: AMDGPUISelLowering.h:384
llvm::isUInt< 8 >
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:405
llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition: SelectionDAGNodes.h:1557
llvm::AMDGPUISD::RSQ
@ RSQ
Definition: AMDGPUISelLowering.h:409
i16
< i32 > ret i32 conv5 And the following x86 eax movsbl ecx cmpl ecx sete al movzbl eax ret It should be possible to eliminate the sign extensions LLVM misses a load store narrowing opportunity in this i16
Definition: README.txt:1493
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1355
llvm::MCInstrDesc::OpInfo
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:206
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:8595
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::ISD::SUBCARRY
@ SUBCARRY
Definition: ISDOpcodes.h:291
llvm::GCNSubtarget::getLDSBankCount
int getLDSBankCount() const
Definition: GCNSubtarget.h:280
v4i32
Vector Rotate Left Mask Mask v4i32
Definition: README_P9.txt:112
llvm::SIRegisterInfo::isSGPRClass
bool isSGPRClass(const TargetRegisterClass *RC) const
Definition: SIRegisterInfo.h:154
llvm::isMask_32
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:467
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:70
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::AMDGPU::isInlinableLiteral16
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi)
Definition: AMDGPUBaseInfo.cpp:1751
llvm::AMDGPU::CPol::SCC
@ SCC
Definition: SIDefines.h:285
llvm::SISrcMods::OP_SEL_0
@ OP_SEL_0
Definition: SIDefines.h:203
SelectionDAGISel.h
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:386
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::MVT::i64
@ i64
Definition: MachineValueType.h:47
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::GCNSubtarget::hasFlatInstOffsets
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:559
llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:353
AMDGPU.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::AMDGPUISD::LOAD_D16_LO_U8
@ LOAD_D16_LO_U8
Definition: AMDGPUISelLowering.h:483
uint32_t
llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition: SelectionDAGNodes.h:1121
llvm::SIInstrFlags::FlatGlobal
@ FlatGlobal
Definition: SIDefines.h:94
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::ISD::UNDEF
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
llvm::ISD::FEXP
@ FEXP
Definition: ISDOpcodes.h:877
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::ISD::FEXP2
@ FEXP2
Definition: ISDOpcodes.h:878
llvm::CodeGenOpt::Level
Level
Definition: CodeGen.h:52
llvm::SDVTList
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Definition: SelectionDAGNodes.h:79
llvm::ISD::FMUL
@ FMUL
Definition: ISDOpcodes.h:379
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1335
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
llvm::LoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2282
llvm::AMDGPUAS::CONSTANT_BUFFER_0
@ CONSTANT_BUFFER_0
Definition: AMDGPU.h:404
llvm::ISD::FSQRT
@ FSQRT
Definition: ISDOpcodes.h:868
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:162
llvm::isInt< 16 >
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:370
llvm::AMDGPUISD::RCP
@ RCP
Definition: AMDGPUISelLowering.h:408
llvm::ISD::FMAXNUM
@ FMAXNUM
Definition: ISDOpcodes.h:899
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:588
llvm::AMDGPU::isInlinableLiteral64
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
Definition: AMDGPUBaseInfo.cpp:1708
llvm::AMDGPUISD::LOAD_D16_HI_U8
@ LOAD_D16_HI_U8
Definition: AMDGPUISelLowering.h:481
llvm::AMDGPUISD::MAD_U64_U32
@ MAD_U64_U32
Definition: AMDGPUISelLowering.h:432
llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition: AMDGPUSubtarget.h:39
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::MemSDNode::getAddressSpace
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Definition: SelectionDAGNodes.h:1339
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
llvm::AMDGPUISD::ATOMIC_LOAD_FMIN
@ ATOMIC_LOAD_FMIN
Definition: AMDGPUISelLowering.h:495
llvm::SISrcMods::OP_SEL_1
@ OP_SEL_1
Definition: SIDefines.h:204
llvm::ISD::FCOS
@ FCOS
Definition: ISDOpcodes.h:871
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:879
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:385
llvm::ISD::FSIN
@ FSIN
Definition: ISDOpcodes.h:870
llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:491
llvm::SITargetLowering
Definition: SIISelLowering.h:30
llvm::ISD::SUBC
@ SUBC
Definition: ISDOpcodes.h:270
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:46
llvm::GCNSubtarget::ldsRequiresM0Init
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be iniitalized.
Definition: GCNSubtarget.h:616
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:138
llvm::R600InstrInfo
Definition: R600InstrInfo.h:39
gwsIntrinToOpcode
static unsigned gwsIntrinToOpcode(unsigned IntrID)
Definition: AMDGPUISelDAGToDAG.cpp:2541
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:381
llvm::AMDGPUPerfHintAnalysis
Definition: AMDGPUPerfHintAnalysis.h:23
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:922
llvm::AMDGPUISD::CVT_PKRTZ_F16_F32
@ CVT_PKRTZ_F16_F32
Definition: AMDGPUISelLowering.h:453
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:403
llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition: ISDOpcodes.h:740
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
llvm::SDValue::isUndef
bool isUndef() const
Definition: SelectionDAGNodes.h:1149
llvm::codeview::ModifierOptions::Const
@ Const
llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition: GCNSubtarget.h:271
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
llvm::AMDGPU::isInlinableLiteral32
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
Definition: AMDGPUBaseInfo.cpp:1725
llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:301
llvm::ISD::FP_EXTEND
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:833
llvm::AMDGPUISD::FRACT
@ FRACT
Definition: AMDGPUISelLowering.h:364
llvm::ISD::FSUB
@ FSUB
Definition: ISDOpcodes.h:378
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:657
LegacyDivergenceAnalysis.h
llvm::SelectionDAGISel
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
Definition: SelectionDAGISel.h:39
llvm::AMDGPUISD::FMAX3
@ FMAX3
Definition: AMDGPUISelLowering.h:388
llvm::GCNSubtarget::getConstantBusLimit
unsigned getConstantBusLimit(unsigned Opcode) const
Definition: AMDGPUSubtarget.cpp:339
llvm::ISD::FREM
@ FREM
Definition: ISDOpcodes.h:381
llvm::AMDGPUISD::MAD_I64_I32
@ MAD_I64_I32
Definition: AMDGPUISelLowering.h:433
Dominators.h
N
#define N
llvm::ISD::SUBE
@ SUBE
Definition: ISDOpcodes.h:280
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:335
llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:659
llvm::GCNSubtarget::d16PreservesUnusedBits
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:606
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_END(AMDGPUDAGToDAGISel
llvm::AMDGPUISD::COS_HW
@ COS_HW
Definition: AMDGPUISelLowering.h:383
llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition: SelectionDAGNodes.h:1109
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:637
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::MVT::i16
@ i16
Definition: MachineValueType.h:45
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::ISD::INTRINSIC_W_CHAIN
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:866
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::ISD::BUILD_PAIR
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::AMDGPUISD::DWORDADDR
@ DWORDADDR
Definition: AMDGPUISelLowering.h:363
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::ISD::TargetFrameIndex
@ TargetFrameIndex
Definition: ISDOpcodes.h:166
f64
QP Compare Ordered outs ins xscmpudp No builtin are required Or llvm fcmp order unorder compare DP QP Compare builtin are required DP xscmp *dp write to VSX register Use int_ppc_vsx_xscmpeqdp f64
Definition: README_P9.txt:314
llvm::SelectionDAGISel::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Definition: SelectionDAGISel.cpp:415
llvm::createR600ISelDag
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
Definition: AMDGPUISelDAGToDAG.cpp:391
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:880
llvm::AMDGPUISD::RCP_IFLAG
@ RCP_IFLAG
Definition: AMDGPUISelLowering.h:411
llvm::AMDGPUISD::ATOMIC_INC
@ ATOMIC_INC
Definition: AMDGPUISelLowering.h:493
llvm::MVT::v2i16
@ v2i16
Definition: MachineValueType.h:89
llvm::ISD::FCANONICALIZE
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:483
llvm::ISD::FLOG
@ FLOG
Definition: ISDOpcodes.h:874
InitializePasses.h
llvm::GCNSubtarget::hasUsableDSOffset
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:408
llvm::ISD::ADDE
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:814
llvm::MCInstrDesc::getNumOperands
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:228
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::AMDGPUTargetMachine::EnableLateStructurizeCFG
static bool EnableLateStructurizeCFG
Definition: AMDGPUTargetMachine.h:35
llvm::EVT::bitsEq
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:229
AMDGPUTargetMachine.h
SubReg
unsigned SubReg
Definition: AArch64AdvSIMDScalarPass.cpp:104
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:382
llvm::AMDGPUISD::BFE_U32
@ BFE_U32
Definition: AMDGPUISelLowering.h:419
llvm::ISD::SCALAR_TO_VECTOR
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:583
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1172
llvm::AMDGPUISD::LOAD_D16_LO
@ LOAD_D16_LO
Definition: AMDGPUISelLowering.h:479
IsCopyFromSGPR
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
Definition: AMDGPUISelDAGToDAG.cpp:1645
llvm::SIRegisterInfo::getSGPRClassForBitWidth
static const LLVM_READONLY TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
Definition: SIRegisterInfo.cpp:2047
llvm::ISD::FDIV
@ FDIV
Definition: ISDOpcodes.h:380