LLVM  14.0.0git
AMDGPUISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines an instruction selector for the AMDGPU target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPUISelDAGToDAG.h"
15 #include "AMDGPU.h"
16 #include "AMDGPUTargetMachine.h"
18 #include "R600RegisterInfo.h"
19 #include "SIMachineFunctionInfo.h"
26 #include "llvm/IR/IntrinsicsAMDGPU.h"
27 #include "llvm/InitializePasses.h"
28 
29 #ifdef EXPENSIVE_CHECKS
30 #include "llvm/Analysis/LoopInfo.h"
31 #include "llvm/IR/Dominators.h"
32 #endif
33 
34 #define DEBUG_TYPE "isel"
35 
36 using namespace llvm;
37 
38 //===----------------------------------------------------------------------===//
39 // Instruction Selector Implementation
40 //===----------------------------------------------------------------------===//
41 
42 namespace {
43 
44 static SDValue stripBitcast(SDValue Val) {
45  return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
46 }
47 
48 // Figure out if this is really an extract of the high 16-bits of a dword.
49 static bool isExtractHiElt(SDValue In, SDValue &Out) {
50  In = stripBitcast(In);
51 
52  if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
53  if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
54  if (!Idx->isOne())
55  return false;
56  Out = In.getOperand(0);
57  return true;
58  }
59  }
60 
61  if (In.getOpcode() != ISD::TRUNCATE)
62  return false;
63 
64  SDValue Srl = In.getOperand(0);
65  if (Srl.getOpcode() == ISD::SRL) {
66  if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
67  if (ShiftAmt->getZExtValue() == 16) {
68  Out = stripBitcast(Srl.getOperand(0));
69  return true;
70  }
71  }
72  }
73 
74  return false;
75 }
76 
77 // Look through operations that obscure just looking at the low 16-bits of the
78 // same register.
79 static SDValue stripExtractLoElt(SDValue In) {
80  if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
81  if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
82  if (Idx->isZero() && In.getValueSizeInBits() <= 32)
83  return In.getOperand(0);
84  }
85  }
86 
87  if (In.getOpcode() == ISD::TRUNCATE) {
88  SDValue Src = In.getOperand(0);
89  if (Src.getValueType().getSizeInBits() == 32)
90  return stripBitcast(Src);
91  }
92 
93  return In;
94 }
95 
96 } // end anonymous namespace
97 
99  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
103 #ifdef EXPENSIVE_CHECKS
106 #endif
108  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
109 
110 /// This pass converts a legalized DAG into a AMDGPU-specific
111 // DAG, ready for instruction scheduling.
113  CodeGenOpt::Level OptLevel) {
114  return new AMDGPUDAGToDAGISel(TM, OptLevel);
115 }
116 
118  TargetMachine *TM /*= nullptr*/,
119  CodeGenOpt::Level OptLevel /*= CodeGenOpt::Default*/)
120  : SelectionDAGISel(*TM, OptLevel) {
121  EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
122 }
123 
125 #ifdef EXPENSIVE_CHECKS
126  DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
127  LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
128  for (auto &L : LI->getLoopsInPreorder()) {
129  assert(L->isLCSSAForm(DT));
130  }
131 #endif
132  Subtarget = &MF.getSubtarget<GCNSubtarget>();
133  Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction());
134  return SelectionDAGISel::runOnMachineFunction(MF);
135 }
136 
137 bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
138  // XXX - only need to list legal operations.
139  switch (Opc) {
140  case ISD::FADD:
141  case ISD::FSUB:
142  case ISD::FMUL:
143  case ISD::FDIV:
144  case ISD::FREM:
145  case ISD::FCANONICALIZE:
146  case ISD::UINT_TO_FP:
147  case ISD::SINT_TO_FP:
148  case ISD::FABS:
149  // Fabs is lowered to a bit operation, but it's an and which will clear the
150  // high bits anyway.
151  case ISD::FSQRT:
152  case ISD::FSIN:
153  case ISD::FCOS:
154  case ISD::FPOWI:
155  case ISD::FPOW:
156  case ISD::FLOG:
157  case ISD::FLOG2:
158  case ISD::FLOG10:
159  case ISD::FEXP:
160  case ISD::FEXP2:
161  case ISD::FCEIL:
162  case ISD::FTRUNC:
163  case ISD::FRINT:
164  case ISD::FNEARBYINT:
165  case ISD::FROUND:
166  case ISD::FFLOOR:
167  case ISD::FMINNUM:
168  case ISD::FMAXNUM:
169  case AMDGPUISD::FRACT:
170  case AMDGPUISD::CLAMP:
171  case AMDGPUISD::COS_HW:
172  case AMDGPUISD::SIN_HW:
173  case AMDGPUISD::FMIN3:
174  case AMDGPUISD::FMAX3:
175  case AMDGPUISD::FMED3:
176  case AMDGPUISD::FMAD_FTZ:
177  case AMDGPUISD::RCP:
178  case AMDGPUISD::RSQ:
180  case AMDGPUISD::LDEXP:
181  // On gfx10, all 16-bit instructions preserve the high bits.
182  return Subtarget->getGeneration() <= AMDGPUSubtarget::GFX9;
183  case ISD::FP_ROUND:
184  // We may select fptrunc (fma/mad) to mad_mixlo, which does not zero the
185  // high bits on gfx9.
186  // TODO: If we had the source node we could see if the source was fma/mad
187  return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
188  case ISD::FMA:
189  case ISD::FMAD:
191  return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
192  default:
193  // fcopysign, select and others may be lowered to 32-bit bit operations
194  // which don't zero the high bits.
195  return false;
196  }
197 }
198 
202 #ifdef EXPENSIVE_CHECKS
205 #endif
206  SelectionDAGISel::getAnalysisUsage(AU);
207 }
208 
210  assert(Subtarget->d16PreservesUnusedBits());
211  MVT VT = N->getValueType(0).getSimpleVT();
212  if (VT != MVT::v2i16 && VT != MVT::v2f16)
213  return false;
214 
215  SDValue Lo = N->getOperand(0);
216  SDValue Hi = N->getOperand(1);
217 
218  LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
219 
220  // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
221  // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
222  // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
223 
224  // Need to check for possible indirect dependencies on the other half of the
225  // vector to avoid introducing a cycle.
226  if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
227  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
228 
230  SDValue Ops[] = {
231  LdHi->getChain(), LdHi->getBasePtr(), TiedIn
232  };
233 
234  unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
235  if (LdHi->getMemoryVT() == MVT::i8) {
236  LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
238  } else {
239  assert(LdHi->getMemoryVT() == MVT::i16);
240  }
241 
242  SDValue NewLoadHi =
243  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
244  Ops, LdHi->getMemoryVT(),
245  LdHi->getMemOperand());
246 
247  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
248  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
249  return true;
250  }
251 
252  // build_vector (load ptr), hi -> load_d16_lo ptr, hi
253  // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
254  // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
255  LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
256  if (LdLo && Lo.hasOneUse()) {
257  SDValue TiedIn = getHi16Elt(Hi);
258  if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
259  return false;
260 
261  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
262  unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
263  if (LdLo->getMemoryVT() == MVT::i8) {
264  LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
266  } else {
267  assert(LdLo->getMemoryVT() == MVT::i16);
268  }
269 
270  TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
271 
272  SDValue Ops[] = {
273  LdLo->getChain(), LdLo->getBasePtr(), TiedIn
274  };
275 
276  SDValue NewLoadLo =
277  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
278  Ops, LdLo->getMemoryVT(),
279  LdLo->getMemOperand());
280 
281  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
282  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
283  return true;
284  }
285 
286  return false;
287 }
288 
290  if (!Subtarget->d16PreservesUnusedBits())
291  return;
292 
294 
295  bool MadeChange = false;
296  while (Position != CurDAG->allnodes_begin()) {
297  SDNode *N = &*--Position;
298  if (N->use_empty())
299  continue;
300 
301  switch (N->getOpcode()) {
302  case ISD::BUILD_VECTOR:
303  MadeChange |= matchLoadD16FromBuildVector(N);
304  break;
305  default:
306  break;
307  }
308  }
309 
310  if (MadeChange) {
312  LLVM_DEBUG(dbgs() << "After PreProcess:\n";
313  CurDAG->dump(););
314  }
315 }
316 
317 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
318  if (TM.Options.NoNaNsFPMath)
319  return true;
320 
321  // TODO: Move into isKnownNeverNaN
322  if (N->getFlags().hasNoNaNs())
323  return true;
324 
325  return CurDAG->isKnownNeverNaN(N);
326 }
327 
328 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
329  bool Negated) const {
330  if (N->isUndef())
331  return true;
332 
333  const SIInstrInfo *TII = Subtarget->getInstrInfo();
334  if (Negated) {
335  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
336  return TII->isInlineConstant(-C->getAPIntValue());
337 
338  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
339  return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
340 
341  } else {
342  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
343  return TII->isInlineConstant(C->getAPIntValue());
344 
345  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
346  return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
347  }
348 
349  return false;
350 }
351 
352 /// Determine the register class for \p OpNo
353 /// \returns The register class of the virtual register that will be used for
354 /// the given operand number \OpNo or NULL if the register class cannot be
355 /// determined.
356 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
357  unsigned OpNo) const {
358  if (!N->isMachineOpcode()) {
359  if (N->getOpcode() == ISD::CopyToReg) {
360  Register Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
361  if (Reg.isVirtual()) {
363  return MRI.getRegClass(Reg);
364  }
365 
366  const SIRegisterInfo *TRI
367  = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
368  return TRI->getPhysRegClass(Reg);
369  }
370 
371  return nullptr;
372  }
373 
374  switch (N->getMachineOpcode()) {
375  default: {
376  const MCInstrDesc &Desc =
377  Subtarget->getInstrInfo()->get(N->getMachineOpcode());
378  unsigned OpIdx = Desc.getNumDefs() + OpNo;
379  if (OpIdx >= Desc.getNumOperands())
380  return nullptr;
381  int RegClass = Desc.OpInfo[OpIdx].RegClass;
382  if (RegClass == -1)
383  return nullptr;
384 
385  return Subtarget->getRegisterInfo()->getRegClass(RegClass);
386  }
387  case AMDGPU::REG_SEQUENCE: {
388  unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
389  const TargetRegisterClass *SuperRC =
390  Subtarget->getRegisterInfo()->getRegClass(RCID);
391 
392  SDValue SubRegOp = N->getOperand(OpNo + 1);
393  unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
394  return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
395  SubRegIdx);
396  }
397  }
398 }
399 
400 SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain,
401  SDValue Glue) const {
403  Ops.push_back(NewChain); // Replace the chain.
404  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
405  Ops.push_back(N->getOperand(i));
406 
407  Ops.push_back(Glue);
408  return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
409 }
410 
411 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
412  const SITargetLowering& Lowering =
413  *static_cast<const SITargetLowering*>(getTargetLowering());
414 
415  assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
416 
417  SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val);
418  return glueCopyToOp(N, M0, M0.getValue(1));
419 }
420 
421 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
422  unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
423  if (AS == AMDGPUAS::LOCAL_ADDRESS) {
424  if (Subtarget->ldsRequiresM0Init())
425  return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
426  } else if (AS == AMDGPUAS::REGION_ADDRESS) {
428  unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
429  return
430  glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
431  }
432  return N;
433 }
434 
435 MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
436  EVT VT) const {
438  AMDGPU::S_MOV_B32, DL, MVT::i32,
439  CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
440  SDNode *Hi =
441  CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
442  CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32));
443  const SDValue Ops[] = {
444  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
445  SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
446  SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
447 
448  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
449 }
450 
451 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
452  EVT VT = N->getValueType(0);
453  unsigned NumVectorElts = VT.getVectorNumElements();
454  EVT EltVT = VT.getVectorElementType();
455  SDLoc DL(N);
456  SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
457 
458  if (NumVectorElts == 1) {
459  CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
460  RegClass);
461  return;
462  }
463 
464  assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
465  "supported yet");
466  // 32 = Max Num Vector Elements
467  // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
468  // 1 = Vector Register Class
469  SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
470 
471  bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() ==
472  Triple::amdgcn;
473  RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
474  bool IsRegSeq = true;
475  unsigned NOps = N->getNumOperands();
476  for (unsigned i = 0; i < NOps; i++) {
477  // XXX: Why is this here?
478  if (isa<RegisterSDNode>(N->getOperand(i))) {
479  IsRegSeq = false;
480  break;
481  }
482  unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
483  : R600RegisterInfo::getSubRegFromChannel(i);
484  RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
485  RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
486  }
487  if (NOps != NumVectorElts) {
488  // Fill in the missing undef elements if this was a scalar_to_vector.
489  assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
490  MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
491  DL, EltVT);
492  for (unsigned i = NOps; i < NumVectorElts; ++i) {
493  unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
494  : R600RegisterInfo::getSubRegFromChannel(i);
495  RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
496  RegSeqArgs[1 + (2 * i) + 1] =
498  }
499  }
500 
501  if (!IsRegSeq)
502  SelectCode(N);
503  CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
504 }
505 
507  unsigned int Opc = N->getOpcode();
508  if (N->isMachineOpcode()) {
509  N->setNodeId(-1);
510  return; // Already selected.
511  }
512 
513  // isa<MemSDNode> almost works but is slightly too permissive for some DS
514  // intrinsics.
515  if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) ||
516  (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
517  Opc == ISD::ATOMIC_LOAD_FADD ||
519  Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) {
520  N = glueCopyToM0LDSInit(N);
521  SelectCode(N);
522  return;
523  }
524 
525  switch (Opc) {
526  default:
527  break;
528  // We are selecting i64 ADD here instead of custom lower it during
529  // DAG legalization, so we can fold some i64 ADDs used for address
530  // calculation into the LOAD and STORE instructions.
531  case ISD::ADDC:
532  case ISD::ADDE:
533  case ISD::SUBC:
534  case ISD::SUBE: {
535  if (N->getValueType(0) != MVT::i64)
536  break;
537 
538  SelectADD_SUB_I64(N);
539  return;
540  }
541  case ISD::ADDCARRY:
542  case ISD::SUBCARRY:
543  if (N->getValueType(0) != MVT::i32)
544  break;
545 
546  SelectAddcSubb(N);
547  return;
548  case ISD::UADDO:
549  case ISD::USUBO: {
550  SelectUADDO_USUBO(N);
551  return;
552  }
554  SelectFMUL_W_CHAIN(N);
555  return;
556  }
557  case AMDGPUISD::FMA_W_CHAIN: {
558  SelectFMA_W_CHAIN(N);
559  return;
560  }
561 
563  case ISD::BUILD_VECTOR: {
564  EVT VT = N->getValueType(0);
565  unsigned NumVectorElts = VT.getVectorNumElements();
566  if (VT.getScalarSizeInBits() == 16) {
567  if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
568  if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
569  ReplaceNode(N, Packed);
570  return;
571  }
572  }
573 
574  break;
575  }
576 
578  unsigned RegClassID =
579  SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
580  SelectBuildVector(N, RegClassID);
581  return;
582  }
583  case ISD::BUILD_PAIR: {
584  SDValue RC, SubReg0, SubReg1;
585  SDLoc DL(N);
586  if (N->getValueType(0) == MVT::i128) {
587  RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);
588  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
589  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
590  } else if (N->getValueType(0) == MVT::i64) {
591  RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
592  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
593  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
594  } else {
595  llvm_unreachable("Unhandled value type for BUILD_PAIR");
596  }
597  const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
598  N->getOperand(1), SubReg1 };
599  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
600  N->getValueType(0), Ops));
601  return;
602  }
603 
604  case ISD::Constant:
605  case ISD::ConstantFP: {
606  if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
607  break;
608 
609  uint64_t Imm;
610  if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
611  Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
612  else {
613  ConstantSDNode *C = cast<ConstantSDNode>(N);
614  Imm = C->getZExtValue();
615  }
616 
617  SDLoc DL(N);
618  ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
619  return;
620  }
621  case AMDGPUISD::BFE_I32:
622  case AMDGPUISD::BFE_U32: {
623  // There is a scalar version available, but unlike the vector version which
624  // has a separate operand for the offset and width, the scalar version packs
625  // the width and offset into a single operand. Try to move to the scalar
626  // version if the offsets are constant, so that we can try to keep extended
627  // loads of kernel arguments in SGPRs.
628 
629  // TODO: Technically we could try to pattern match scalar bitshifts of
630  // dynamic values, but it's probably not useful.
631  ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
632  if (!Offset)
633  break;
634 
635  ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
636  if (!Width)
637  break;
638 
639  bool Signed = Opc == AMDGPUISD::BFE_I32;
640 
641  uint32_t OffsetVal = Offset->getZExtValue();
642  uint32_t WidthVal = Width->getZExtValue();
643 
644  ReplaceNode(N, getBFE32(Signed, SDLoc(N), N->getOperand(0), OffsetVal,
645  WidthVal));
646  return;
647  }
648  case AMDGPUISD::DIV_SCALE: {
649  SelectDIV_SCALE(N);
650  return;
651  }
653  case AMDGPUISD::MAD_U64_U32: {
654  SelectMAD_64_32(N);
655  return;
656  }
657  case ISD::SMUL_LOHI:
658  case ISD::UMUL_LOHI:
659  return SelectMUL_LOHI(N);
660  case ISD::CopyToReg: {
661  const SITargetLowering& Lowering =
662  *static_cast<const SITargetLowering*>(getTargetLowering());
663  N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
664  break;
665  }
666  case ISD::AND:
667  case ISD::SRL:
668  case ISD::SRA:
670  if (N->getValueType(0) != MVT::i32)
671  break;
672 
673  SelectS_BFE(N);
674  return;
675  case ISD::BRCOND:
676  SelectBRCOND(N);
677  return;
678  case ISD::FMAD:
679  case ISD::FMA:
680  SelectFMAD_FMA(N);
681  return;
683  SelectATOMIC_CMP_SWAP(N);
684  return;
690  // Hack around using a legal type if f16 is illegal.
691  if (N->getValueType(0) == MVT::i32) {
692  MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
693  N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
694  { N->getOperand(0), N->getOperand(1) });
695  SelectCode(N);
696  return;
697  }
698 
699  break;
700  }
701  case ISD::INTRINSIC_W_CHAIN: {
702  SelectINTRINSIC_W_CHAIN(N);
703  return;
704  }
706  SelectINTRINSIC_WO_CHAIN(N);
707  return;
708  }
709  case ISD::INTRINSIC_VOID: {
710  SelectINTRINSIC_VOID(N);
711  return;
712  }
713  }
714 
715  SelectCode(N);
716 }
717 
718 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
719  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
720  const Instruction *Term = BB->getTerminator();
721  return Term->getMetadata("amdgpu.uniform") ||
722  Term->getMetadata("structurizecfg.uniform");
723 }
724 
725 bool AMDGPUDAGToDAGISel::isUnneededShiftMask(const SDNode *N,
726  unsigned ShAmtBits) const {
727  assert(N->getOpcode() == ISD::AND);
728 
729  const APInt &RHS = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
730  if (RHS.countTrailingOnes() >= ShAmtBits)
731  return true;
732 
733  const APInt &LHSKnownZeros = CurDAG->computeKnownBits(N->getOperand(0)).Zero;
734  return (LHSKnownZeros | RHS).countTrailingOnes() >= ShAmtBits;
735 }
736 
738  SDValue &N0, SDValue &N1) {
739  if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
740  Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
741  // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.
742  // (i64 (bitcast (v2i32 (build_vector
743  // (or (extract_vector_elt V, 0), OFFSET),
744  // (extract_vector_elt V, 1)))))
745  SDValue Lo = Addr.getOperand(0).getOperand(0);
746  if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) {
747  SDValue BaseLo = Lo.getOperand(0);
748  SDValue BaseHi = Addr.getOperand(0).getOperand(1);
749  // Check that split base (Lo and Hi) are extracted from the same one.
750  if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
751  BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
752  BaseLo.getOperand(0) == BaseHi.getOperand(0) &&
753  // Lo is statically extracted from index 0.
754  isa<ConstantSDNode>(BaseLo.getOperand(1)) &&
755  BaseLo.getConstantOperandVal(1) == 0 &&
756  // Hi is statically extracted from index 0.
757  isa<ConstantSDNode>(BaseHi.getOperand(1)) &&
758  BaseHi.getConstantOperandVal(1) == 1) {
759  N0 = BaseLo.getOperand(0).getOperand(0);
760  N1 = Lo.getOperand(1);
761  return true;
762  }
763  }
764  }
765  return false;
766 }
767 
768 bool AMDGPUDAGToDAGISel::isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
769  SDValue &RHS) const {
771  LHS = Addr.getOperand(0);
772  RHS = Addr.getOperand(1);
773  return true;
774  }
775 
777  assert(LHS && RHS && isa<ConstantSDNode>(RHS));
778  return true;
779  }
780 
781  return false;
782 }
783 
785  return "AMDGPU DAG->DAG Pattern Instruction Selection";
786 }
787 
788 //===----------------------------------------------------------------------===//
789 // Complex Patterns
790 //===----------------------------------------------------------------------===//
791 
792 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
793  SDValue &Offset) {
794  return false;
795 }
796 
797 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
798  SDValue &Offset) {
799  ConstantSDNode *C;
800  SDLoc DL(Addr);
801 
802  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
803  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
804  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
805  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
806  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
807  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
808  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
809  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
810  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
811  Base = Addr.getOperand(0);
812  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
813  } else {
814  Base = Addr;
816  }
817 
818  return true;
819 }
820 
821 SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
822  const SDLoc &DL) const {
823  SDNode *Mov = CurDAG->getMachineNode(
824  AMDGPU::S_MOV_B32, DL, MVT::i32,
826  return SDValue(Mov, 0);
827 }
828 
829 // FIXME: Should only handle addcarry/subcarry
830 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
831  SDLoc DL(N);
832  SDValue LHS = N->getOperand(0);
833  SDValue RHS = N->getOperand(1);
834 
835  unsigned Opcode = N->getOpcode();
836  bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
837  bool ProduceCarry =
838  ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
839  bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
840 
841  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
842  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
843 
844  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
845  DL, MVT::i32, LHS, Sub0);
846  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
847  DL, MVT::i32, LHS, Sub1);
848 
849  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
850  DL, MVT::i32, RHS, Sub0);
851  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
852  DL, MVT::i32, RHS, Sub1);
853 
854  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
855 
856  static const unsigned OpcMap[2][2][2] = {
857  {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
858  {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
859  {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
860  {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
861 
862  unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd];
863  unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd];
864 
865  SDNode *AddLo;
866  if (!ConsumeCarry) {
867  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
868  AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
869  } else {
870  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
871  AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
872  }
873  SDValue AddHiArgs[] = {
874  SDValue(Hi0, 0),
875  SDValue(Hi1, 0),
876  SDValue(AddLo, 1)
877  };
878  SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
879 
880  SDValue RegSequenceArgs[] = {
881  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
882  SDValue(AddLo,0),
883  Sub0,
884  SDValue(AddHi,0),
885  Sub1,
886  };
887  SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
888  MVT::i64, RegSequenceArgs);
889 
890  if (ProduceCarry) {
891  // Replace the carry-use
892  ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
893  }
894 
895  // Replace the remaining uses.
897 }
898 
899 void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
900  SDLoc DL(N);
901  SDValue LHS = N->getOperand(0);
902  SDValue RHS = N->getOperand(1);
903  SDValue CI = N->getOperand(2);
904 
905  if (N->isDivergent()) {
906  unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
907  : AMDGPU::V_SUBB_U32_e64;
909  N, Opc, N->getVTList(),
910  {LHS, RHS, CI,
911  CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
912  } else {
913  unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO
914  : AMDGPU::S_SUB_CO_PSEUDO;
915  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});
916  }
917 }
918 
919 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
920  // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
921  // carry out despite the _i32 name. These were renamed in VI to _U32.
922  // FIXME: We should probably rename the opcodes here.
923  bool IsAdd = N->getOpcode() == ISD::UADDO;
924  bool IsVALU = N->isDivergent();
925 
926  for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;
927  ++UI)
928  if (UI.getUse().getResNo() == 1) {
929  if ((IsAdd && (UI->getOpcode() != ISD::ADDCARRY)) ||
930  (!IsAdd && (UI->getOpcode() != ISD::SUBCARRY))) {
931  IsVALU = true;
932  break;
933  }
934  }
935 
936  if (IsVALU) {
937  unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
938 
940  N, Opc, N->getVTList(),
941  {N->getOperand(0), N->getOperand(1),
942  CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
943  } else {
944  unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
945  : AMDGPU::S_USUBO_PSEUDO;
946 
947  CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
948  {N->getOperand(0), N->getOperand(1)});
949  }
950 }
951 
952 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
953  SDLoc SL(N);
954  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
955  SDValue Ops[10];
956 
957  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
958  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
959  SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
960  Ops[8] = N->getOperand(0);
961  Ops[9] = N->getOperand(4);
962 
963  // If there are no source modifiers, prefer fmac over fma because it can use
964  // the smaller VOP2 encoding.
965  bool UseFMAC = Subtarget->hasDLInsts() &&
966  cast<ConstantSDNode>(Ops[0])->isZero() &&
967  cast<ConstantSDNode>(Ops[2])->isZero() &&
968  cast<ConstantSDNode>(Ops[4])->isZero();
969  unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
970  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), Ops);
971 }
972 
973 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
974  SDLoc SL(N);
975  // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
976  SDValue Ops[8];
977 
978  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
979  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
980  Ops[6] = N->getOperand(0);
981  Ops[7] = N->getOperand(3);
982 
983  CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
984 }
985 
986 // We need to handle this here because tablegen doesn't support matching
987 // instructions with multiple outputs.
988 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
989  SDLoc SL(N);
990  EVT VT = N->getValueType(0);
991 
992  assert(VT == MVT::f32 || VT == MVT::f64);
993 
994  unsigned Opc
995  = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
996 
997  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
998  // omod
999  SDValue Ops[8];
1000  SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1001  SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]);
1002  SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]);
1003  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1004 }
1005 
1006 // We need to handle this here because tablegen doesn't support matching
1007 // instructions with multiple outputs.
1008 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
1009  SDLoc SL(N);
1010  bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
1011  unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1012 
1013  SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1014  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
1015  Clamp };
1016  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1017 }
1018 
1019 // We need to handle this here because tablegen doesn't support matching
1020 // instructions with multiple outputs.
1021 void AMDGPUDAGToDAGISel::SelectMUL_LOHI(SDNode *N) {
1022  SDLoc SL(N);
1023  bool Signed = N->getOpcode() == ISD::SMUL_LOHI;
1024  unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1025 
1026  SDValue Zero = CurDAG->getTargetConstant(0, SL, MVT::i64);
1027  SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1028  SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Zero, Clamp};
1029  SDNode *Mad = CurDAG->getMachineNode(Opc, SL, N->getVTList(), Ops);
1030  if (!SDValue(N, 0).use_empty()) {
1031  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);
1032  SDNode *Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1033  MVT::i32, SDValue(Mad, 0), Sub0);
1034  ReplaceUses(SDValue(N, 0), SDValue(Lo, 0));
1035  }
1036  if (!SDValue(N, 1).use_empty()) {
1037  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);
1038  SDNode *Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1039  MVT::i32, SDValue(Mad, 0), Sub1);
1040  ReplaceUses(SDValue(N, 1), SDValue(Hi, 0));
1041  }
1043 }
1044 
1045 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const {
1046  if (!isUInt<16>(Offset))
1047  return false;
1048 
1049  if (!Base || Subtarget->hasUsableDSOffset() ||
1050  Subtarget->unsafeDSOffsetFoldingEnabled())
1051  return true;
1052 
1053  // On Southern Islands instruction with a negative base value and an offset
1054  // don't seem to work.
1055  return CurDAG->SignBitIsZero(Base);
1056 }
1057 
1058 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
1059  SDValue &Offset) const {
1060  SDLoc DL(Addr);
1062  SDValue N0 = Addr.getOperand(0);
1063  SDValue N1 = Addr.getOperand(1);
1064  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1065  if (isDSOffsetLegal(N0, C1->getSExtValue())) {
1066  // (add n0, c0)
1067  Base = N0;
1068  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1069  return true;
1070  }
1071  } else if (Addr.getOpcode() == ISD::SUB) {
1072  // sub C, x -> add (sub 0, x), C
1073  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1074  int64_t ByteOffset = C->getSExtValue();
1075  if (isDSOffsetLegal(SDValue(), ByteOffset)) {
1077 
1078  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1079  // the known bits in isDSOffsetLegal. We need to emit the selected node
1080  // here, so this is thrown away.
1082  Zero, Addr.getOperand(1));
1083 
1084  if (isDSOffsetLegal(Sub, ByteOffset)) {
1086  Opnds.push_back(Zero);
1087  Opnds.push_back(Addr.getOperand(1));
1088 
1089  // FIXME: Select to VOP3 version for with-carry.
1090  unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1091  if (Subtarget->hasAddNoCarry()) {
1092  SubOp = AMDGPU::V_SUB_U32_e64;
1093  Opnds.push_back(
1094  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1095  }
1096 
1097  MachineSDNode *MachineSub =
1098  CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1099 
1100  Base = SDValue(MachineSub, 0);
1101  Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
1102  return true;
1103  }
1104  }
1105  }
1106  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1107  // If we have a constant address, prefer to put the constant into the
1108  // offset. This can save moves to load the constant address since multiple
1109  // operations can share the zero base address register, and enables merging
1110  // into read2 / write2 instructions.
1111 
1112  SDLoc DL(Addr);
1113 
1114  if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) {
1116  MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1117  DL, MVT::i32, Zero);
1118  Base = SDValue(MovZero, 0);
1119  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1120  return true;
1121  }
1122  }
1123 
1124  // default case
1125  Base = Addr;
1127  return true;
1128 }
1129 
1130 bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
1131  unsigned Offset1,
1132  unsigned Size) const {
1133  if (Offset0 % Size != 0 || Offset1 % Size != 0)
1134  return false;
1135  if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size))
1136  return false;
1137 
1138  if (!Base || Subtarget->hasUsableDSOffset() ||
1139  Subtarget->unsafeDSOffsetFoldingEnabled())
1140  return true;
1141 
1142  // On Southern Islands instruction with a negative base value and an offset
1143  // don't seem to work.
1144  return CurDAG->SignBitIsZero(Base);
1145 }
1146 
1147 // TODO: If offset is too big, put low 16-bit into offset.
1148 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
1149  SDValue &Offset0,
1150  SDValue &Offset1) const {
1151  return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 4);
1152 }
1153 
1154 bool AMDGPUDAGToDAGISel::SelectDS128Bit8ByteAligned(SDValue Addr, SDValue &Base,
1155  SDValue &Offset0,
1156  SDValue &Offset1) const {
1157  return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 8);
1158 }
1159 
1160 bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
1161  SDValue &Offset0, SDValue &Offset1,
1162  unsigned Size) const {
1163  SDLoc DL(Addr);
1164 
1166  SDValue N0 = Addr.getOperand(0);
1167  SDValue N1 = Addr.getOperand(1);
1168  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1169  unsigned OffsetValue0 = C1->getZExtValue();
1170  unsigned OffsetValue1 = OffsetValue0 + Size;
1171 
1172  // (add n0, c0)
1173  if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1, Size)) {
1174  Base = N0;
1175  Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1176  Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1177  return true;
1178  }
1179  } else if (Addr.getOpcode() == ISD::SUB) {
1180  // sub C, x -> add (sub 0, x), C
1181  if (const ConstantSDNode *C =
1182  dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1183  unsigned OffsetValue0 = C->getZExtValue();
1184  unsigned OffsetValue1 = OffsetValue0 + Size;
1185 
1186  if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1187  SDLoc DL(Addr);
1189 
1190  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1191  // the known bits in isDSOffsetLegal. We need to emit the selected node
1192  // here, so this is thrown away.
1193  SDValue Sub =
1194  CurDAG->getNode(ISD::SUB, DL, MVT::i32, Zero, Addr.getOperand(1));
1195 
1196  if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1, Size)) {
1198  Opnds.push_back(Zero);
1199  Opnds.push_back(Addr.getOperand(1));
1200  unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1201  if (Subtarget->hasAddNoCarry()) {
1202  SubOp = AMDGPU::V_SUB_U32_e64;
1203  Opnds.push_back(
1204  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1205  }
1206 
1207  MachineSDNode *MachineSub = CurDAG->getMachineNode(
1208  SubOp, DL, MVT::getIntegerVT(Size * 8), Opnds);
1209 
1210  Base = SDValue(MachineSub, 0);
1211  Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1212  Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1213  return true;
1214  }
1215  }
1216  }
1217  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1218  unsigned OffsetValue0 = CAddr->getZExtValue();
1219  unsigned OffsetValue1 = OffsetValue0 + Size;
1220 
1221  if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1223  MachineSDNode *MovZero =
1224  CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero);
1225  Base = SDValue(MovZero, 0);
1226  Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1227  Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1228  return true;
1229  }
1230  }
1231 
1232  // default case
1233 
1234  Base = Addr;
1235  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
1236  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
1237  return true;
1238 }
1239 
1240 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
1241  SDValue &SOffset, SDValue &Offset,
1242  SDValue &Offen, SDValue &Idxen,
1243  SDValue &Addr64) const {
1244  // Subtarget prefers to use flat instruction
1245  // FIXME: This should be a pattern predicate and not reach here
1246  if (Subtarget->useFlatForGlobal())
1247  return false;
1248 
1249  SDLoc DL(Addr);
1250 
1251  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1252  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1253  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1254  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1255 
1256  ConstantSDNode *C1 = nullptr;
1257  SDValue N0 = Addr;
1259  C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1260  if (isUInt<32>(C1->getZExtValue()))
1261  N0 = Addr.getOperand(0);
1262  else
1263  C1 = nullptr;
1264  }
1265 
1266  if (N0.getOpcode() == ISD::ADD) {
1267  // (add N2, N3) -> addr64, or
1268  // (add (add N2, N3), C1) -> addr64
1269  SDValue N2 = N0.getOperand(0);
1270  SDValue N3 = N0.getOperand(1);
1271  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1272 
1273  if (N2->isDivergent()) {
1274  if (N3->isDivergent()) {
1275  // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1276  // addr64, and construct the resource from a 0 address.
1277  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1278  VAddr = N0;
1279  } else {
1280  // N2 is divergent, N3 is not.
1281  Ptr = N3;
1282  VAddr = N2;
1283  }
1284  } else {
1285  // N2 is not divergent.
1286  Ptr = N2;
1287  VAddr = N3;
1288  }
1290  } else if (N0->isDivergent()) {
1291  // N0 is divergent. Use it as the addr64, and construct the resource from a
1292  // 0 address.
1293  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1294  VAddr = N0;
1295  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1296  } else {
1297  // N0 -> offset, or
1298  // (N0 + C1) -> offset
1299  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1300  Ptr = N0;
1301  }
1302 
1303  if (!C1) {
1304  // No offset.
1306  return true;
1307  }
1308 
1309  if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
1310  // Legal offset for instruction.
1311  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1312  return true;
1313  }
1314 
1315  // Illegal offset, store it in soffset.
1317  SOffset =
1319  AMDGPU::S_MOV_B32, DL, MVT::i32,
1320  CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1321  0);
1322  return true;
1323 }
1324 
1325 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1326  SDValue &VAddr, SDValue &SOffset,
1327  SDValue &Offset) const {
1328  SDValue Ptr, Offen, Idxen, Addr64;
1329 
1330  // addr64 bit was removed for volcanic islands.
1331  // FIXME: This should be a pattern predicate and not reach here
1332  if (!Subtarget->hasAddr64())
1333  return false;
1334 
1335  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
1336  return false;
1337 
1338  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1339  if (C->getSExtValue()) {
1340  SDLoc DL(Addr);
1341 
1342  const SITargetLowering& Lowering =
1343  *static_cast<const SITargetLowering*>(getTargetLowering());
1344 
1345  SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1346  return true;
1347  }
1348 
1349  return false;
1350 }
1351 
1352 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1353  SDLoc DL(N);
1354 
1355  auto *FI = dyn_cast<FrameIndexSDNode>(N);
1356  SDValue TFI =
1357  FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N;
1358 
1359  // We rebase the base address into an absolute stack address and hence
1360  // use constant 0 for soffset. This value must be retained until
1361  // frame elimination and eliminateFrameIndex will choose the appropriate
1362  // frame register if need be.
1363  return std::make_pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32));
1364 }
1365 
1366 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1367  SDValue Addr, SDValue &Rsrc,
1368  SDValue &VAddr, SDValue &SOffset,
1369  SDValue &ImmOffset) const {
1370 
1371  SDLoc DL(Addr);
1374 
1375  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1376 
1377  if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1378  int64_t Imm = CAddr->getSExtValue();
1379  const int64_t NullPtr =
1380  AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
1381  // Don't fold null pointer.
1382  if (Imm != NullPtr) {
1383  SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1384  MachineSDNode *MovHighBits = CurDAG->getMachineNode(
1385  AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
1386  VAddr = SDValue(MovHighBits, 0);
1387 
1388  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1389  ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1390  return true;
1391  }
1392  }
1393 
1395  // (add n0, c1)
1396 
1397  SDValue N0 = Addr.getOperand(0);
1398  SDValue N1 = Addr.getOperand(1);
1399 
1400  // Offsets in vaddr must be positive if range checking is enabled.
1401  //
1402  // The total computation of vaddr + soffset + offset must not overflow. If
1403  // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1404  // overflowing.
1405  //
1406  // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1407  // always perform a range check. If a negative vaddr base index was used,
1408  // this would fail the range check. The overall address computation would
1409  // compute a valid address, but this doesn't happen due to the range
1410  // check. For out-of-bounds MUBUF loads, a 0 is returned.
1411  //
1412  // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1413  // MUBUF vaddr, but not on older subtargets which can only do this if the
1414  // sign bit is known 0.
1415  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1416  if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
1417  (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1418  CurDAG->SignBitIsZero(N0))) {
1419  std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1420  ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1421  return true;
1422  }
1423  }
1424 
1425  // (node)
1426  std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1427  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1428  return true;
1429 }
1430 
1431 static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val) {
1432  if (Val.getOpcode() != ISD::CopyFromReg)
1433  return false;
1434  auto RC =
1435  TRI.getPhysRegClass(cast<RegisterSDNode>(Val.getOperand(1))->getReg());
1436  return RC && TRI.isSGPRClass(RC);
1437 }
1438 
1439 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1440  SDValue Addr,
1441  SDValue &SRsrc,
1442  SDValue &SOffset,
1443  SDValue &Offset) const {
1444  const SIRegisterInfo *TRI =
1445  static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
1448  SDLoc DL(Addr);
1449 
1450  // CopyFromReg <sgpr>
1451  if (IsCopyFromSGPR(*TRI, Addr)) {
1452  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1453  SOffset = Addr;
1455  return true;
1456  }
1457 
1458  ConstantSDNode *CAddr;
1459  if (Addr.getOpcode() == ISD::ADD) {
1460  // Add (CopyFromReg <sgpr>) <constant>
1461  CAddr = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
1462  if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1463  return false;
1464  if (!IsCopyFromSGPR(*TRI, Addr.getOperand(0)))
1465  return false;
1466 
1467  SOffset = Addr.getOperand(0);
1468  } else if ((CAddr = dyn_cast<ConstantSDNode>(Addr)) &&
1469  SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) {
1470  // <constant>
1471  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1472  } else {
1473  return false;
1474  }
1475 
1476  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1477 
1479  return true;
1480 }
1481 
1482 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1483  SDValue &SOffset, SDValue &Offset
1484  ) const {
1485  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1486  const SIInstrInfo *TII =
1487  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1488 
1489  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
1490  return false;
1491 
1492  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1493  !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1494  !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1495  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1496  APInt::getAllOnes(32).getZExtValue(); // Size
1497  SDLoc DL(Addr);
1498 
1499  const SITargetLowering& Lowering =
1500  *static_cast<const SITargetLowering*>(getTargetLowering());
1501 
1502  SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1503  return true;
1504  }
1505  return false;
1506 }
1507 
1508 // Find a load or store from corresponding pattern root.
1509 // Roots may be build_vector, bitconvert or their combinations.
1511  N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();
1512  if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
1513  return MN;
1514  assert(isa<BuildVectorSDNode>(N));
1515  for (SDValue V : N->op_values())
1516  if (MemSDNode *MN =
1517  dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
1518  return MN;
1519  llvm_unreachable("cannot find MemSDNode in the pattern!");
1520 }
1521 
1522 bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
1523  SDValue &VAddr, SDValue &Offset,
1524  uint64_t FlatVariant) const {
1525  int64_t OffsetVal = 0;
1526 
1527  unsigned AS = findMemSDNode(N)->getAddressSpace();
1528 
1529  bool CanHaveFlatSegmentOffsetBug =
1530  Subtarget->hasFlatSegmentOffsetBug() &&
1531  FlatVariant == SIInstrFlags::FLAT &&
1533 
1534  if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
1535  SDValue N0, N1;
1536  if (isBaseWithConstantOffset64(Addr, N0, N1)) {
1537  int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1538 
1539  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1540  if (TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1541  Addr = N0;
1542  OffsetVal = COffsetVal;
1543  } else {
1544  // If the offset doesn't fit, put the low bits into the offset field and
1545  // add the rest.
1546  //
1547  // For a FLAT instruction the hardware decides whether to access
1548  // global/scratch/shared memory based on the high bits of vaddr,
1549  // ignoring the offset field, so we have to ensure that when we add
1550  // remainder to vaddr it still points into the same underlying object.
1551  // The easiest way to do that is to make sure that we split the offset
1552  // into two pieces that are both >= 0 or both <= 0.
1553 
1554  SDLoc DL(N);
1555  uint64_t RemainderOffset;
1556 
1557  std::tie(OffsetVal, RemainderOffset) =
1558  TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1559 
1560  SDValue AddOffsetLo =
1561  getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
1562  SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1563 
1564  if (Addr.getValueType().getSizeInBits() == 32) {
1566  Opnds.push_back(N0);
1567  Opnds.push_back(AddOffsetLo);
1568  unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1569  if (Subtarget->hasAddNoCarry()) {
1570  AddOp = AMDGPU::V_ADD_U32_e64;
1571  Opnds.push_back(Clamp);
1572  }
1573  Addr = SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0);
1574  } else {
1575  // TODO: Should this try to use a scalar add pseudo if the base address
1576  // is uniform and saddr is usable?
1577  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
1578  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
1579 
1580  SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1581  DL, MVT::i32, N0, Sub0);
1582  SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1583  DL, MVT::i32, N0, Sub1);
1584 
1585  SDValue AddOffsetHi =
1586  getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
1587 
1589 
1590  SDNode *Add =
1591  CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs,
1592  {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
1593 
1594  SDNode *Addc = CurDAG->getMachineNode(
1595  AMDGPU::V_ADDC_U32_e64, DL, VTs,
1596  {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
1597 
1598  SDValue RegSequenceArgs[] = {
1599  CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
1600  SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1};
1601 
1602  Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1603  MVT::i64, RegSequenceArgs),
1604  0);
1605  }
1606  }
1607  }
1608  }
1609 
1610  VAddr = Addr;
1611  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1612  return true;
1613 }
1614 
1615 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue Addr,
1616  SDValue &VAddr,
1617  SDValue &Offset) const {
1618  return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FLAT);
1619 }
1620 
1621 bool AMDGPUDAGToDAGISel::SelectGlobalOffset(SDNode *N, SDValue Addr,
1622  SDValue &VAddr,
1623  SDValue &Offset) const {
1624  return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FlatGlobal);
1625 }
1626 
1627 bool AMDGPUDAGToDAGISel::SelectScratchOffset(SDNode *N, SDValue Addr,
1628  SDValue &VAddr,
1629  SDValue &Offset) const {
1630  return SelectFlatOffsetImpl(N, Addr, VAddr, Offset,
1632 }
1633 
1634 // If this matches zero_extend i32:x, return x
1636  if (Op.getOpcode() != ISD::ZERO_EXTEND)
1637  return SDValue();
1638 
1639  SDValue ExtSrc = Op.getOperand(0);
1640  return (ExtSrc.getValueType() == MVT::i32) ? ExtSrc : SDValue();
1641 }
1642 
1643 // Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
1644 bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
1645  SDValue Addr,
1646  SDValue &SAddr,
1647  SDValue &VOffset,
1648  SDValue &Offset) const {
1649  int64_t ImmOffset = 0;
1650 
1651  // Match the immediate offset first, which canonically is moved as low as
1652  // possible.
1653 
1654  SDValue LHS, RHS;
1655  if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
1656  int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1657  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1658 
1659  if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS,
1661  Addr = LHS;
1662  ImmOffset = COffsetVal;
1663  } else if (!LHS->isDivergent()) {
1664  if (COffsetVal > 0) {
1665  SDLoc SL(N);
1666  // saddr + large_offset -> saddr +
1667  // (voffset = large_offset & ~MaxOffset) +
1668  // (large_offset & MaxOffset);
1669  int64_t SplitImmOffset, RemainderOffset;
1670  std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
1672 
1673  if (isUInt<32>(RemainderOffset)) {
1674  SDNode *VMov = CurDAG->getMachineNode(
1675  AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1676  CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
1677  VOffset = SDValue(VMov, 0);
1678  SAddr = LHS;
1679  Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
1680  return true;
1681  }
1682  }
1683 
1684  // We are adding a 64 bit SGPR and a constant. If constant bus limit
1685  // is 1 we would need to perform 1 or 2 extra moves for each half of
1686  // the constant and it is better to do a scalar add and then issue a
1687  // single VALU instruction to materialize zero. Otherwise it is less
1688  // instructions to perform VALU adds with immediates or inline literals.
1689  unsigned NumLiterals =
1690  !TII->isInlineConstant(APInt(32, COffsetVal & 0xffffffff)) +
1691  !TII->isInlineConstant(APInt(32, COffsetVal >> 32));
1692  if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
1693  return false;
1694  }
1695  }
1696 
1697  // Match the variable offset.
1698  if (Addr.getOpcode() == ISD::ADD) {
1699  LHS = Addr.getOperand(0);
1700  RHS = Addr.getOperand(1);
1701 
1702  if (!LHS->isDivergent()) {
1703  // add (i64 sgpr), (zero_extend (i32 vgpr))
1704  if (SDValue ZextRHS = matchZExtFromI32(RHS)) {
1705  SAddr = LHS;
1706  VOffset = ZextRHS;
1707  }
1708  }
1709 
1710  if (!SAddr && !RHS->isDivergent()) {
1711  // add (zero_extend (i32 vgpr)), (i64 sgpr)
1712  if (SDValue ZextLHS = matchZExtFromI32(LHS)) {
1713  SAddr = RHS;
1714  VOffset = ZextLHS;
1715  }
1716  }
1717 
1718  if (SAddr) {
1719  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1720  return true;
1721  }
1722  }
1723 
1724  if (Addr->isDivergent() || Addr.getOpcode() == ISD::UNDEF ||
1725  isa<ConstantSDNode>(Addr))
1726  return false;
1727 
1728  // It's cheaper to materialize a single 32-bit zero for vaddr than the two
1729  // moves required to copy a 64-bit SGPR to VGPR.
1730  SAddr = Addr;
1731  SDNode *VMov =
1732  CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
1734  VOffset = SDValue(VMov, 0);
1735  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1736  return true;
1737 }
1738 
1739 static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr) {
1740  if (auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1741  SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
1742  } else if (SAddr.getOpcode() == ISD::ADD &&
1743  isa<FrameIndexSDNode>(SAddr.getOperand(0))) {
1744  // Materialize this into a scalar move for scalar address to avoid
1745  // readfirstlane.
1746  auto FI = cast<FrameIndexSDNode>(SAddr.getOperand(0));
1747  SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1748  FI->getValueType(0));
1749  SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, SDLoc(SAddr),
1750  MVT::i32, TFI, SAddr.getOperand(1)),
1751  0);
1752  }
1753 
1754  return SAddr;
1755 }
1756 
1757 // Match (32-bit SGPR base) + sext(imm offset)
1758 bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
1759  SDValue &SAddr,
1760  SDValue &Offset) const {
1761  if (Addr->isDivergent())
1762  return false;
1763 
1764  SDLoc DL(Addr);
1765 
1766  int64_t COffsetVal = 0;
1767 
1769  COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
1770  SAddr = Addr.getOperand(0);
1771  } else {
1772  SAddr = Addr;
1773  }
1774 
1775  SAddr = SelectSAddrFI(CurDAG, SAddr);
1776 
1777  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1778 
1779  if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS,
1781  int64_t SplitImmOffset, RemainderOffset;
1782  std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
1784 
1785  COffsetVal = SplitImmOffset;
1786 
1787  SDValue AddOffset =
1788  SAddr.getOpcode() == ISD::TargetFrameIndex
1789  ? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL)
1790  : CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32);
1791  SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32,
1792  SAddr, AddOffset),
1793  0);
1794  }
1795 
1796  Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16);
1797 
1798  return true;
1799 }
1800 
1801 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1802  SDValue &Offset, bool &Imm) const {
1803  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1804  if (!C) {
1805  if (ByteOffsetNode.getValueType().isScalarInteger() &&
1806  ByteOffsetNode.getValueType().getSizeInBits() == 32) {
1807  Offset = ByteOffsetNode;
1808  Imm = false;
1809  return true;
1810  }
1811  if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
1812  if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
1813  Offset = ByteOffsetNode.getOperand(0);
1814  Imm = false;
1815  return true;
1816  }
1817  }
1818  return false;
1819  }
1820 
1821  SDLoc SL(ByteOffsetNode);
1822  // GFX9 and GFX10 have signed byte immediate offsets.
1823  int64_t ByteOffset = C->getSExtValue();
1824  Optional<int64_t> EncodedOffset =
1825  AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, false);
1826  if (EncodedOffset) {
1827  Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1828  Imm = true;
1829  return true;
1830  }
1831 
1832  // SGPR and literal offsets are unsigned.
1833  if (ByteOffset < 0)
1834  return false;
1835 
1836  EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);
1837  if (EncodedOffset) {
1838  Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1839  return true;
1840  }
1841 
1842  if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
1843  return false;
1844 
1845  SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1846  Offset = SDValue(
1847  CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
1848 
1849  return true;
1850 }
1851 
1852 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
1853  if (Addr.getValueType() != MVT::i32)
1854  return Addr;
1855 
1856  // Zero-extend a 32-bit address.
1857  SDLoc SL(Addr);
1858 
1861  unsigned AddrHiVal = Info->get32BitAddressHighBits();
1862  SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
1863 
1864  const SDValue Ops[] = {
1865  CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
1866  Addr,
1867  CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
1868  SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
1869  0),
1870  CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
1871  };
1872 
1873  return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
1874  Ops), 0);
1875 }
1876 
1877 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1878  SDValue &Offset, bool &Imm) const {
1879  SDLoc SL(Addr);
1880 
1881  // A 32-bit (address + offset) should not cause unsigned 32-bit integer
1882  // wraparound, because s_load instructions perform the addition in 64 bits.
1883  if ((Addr.getValueType() != MVT::i32 ||
1884  Addr->getFlags().hasNoUnsignedWrap())) {
1885  SDValue N0, N1;
1886  // Extract the base and offset if possible.
1888  Addr.getOpcode() == ISD::ADD) {
1889  N0 = Addr.getOperand(0);
1890  N1 = Addr.getOperand(1);
1891  } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
1892  assert(N0 && N1 && isa<ConstantSDNode>(N1));
1893  }
1894  if (N0 && N1) {
1895  if (SelectSMRDOffset(N1, Offset, Imm)) {
1896  SBase = Expand32BitAddress(N0);
1897  return true;
1898  }
1899  }
1900  }
1901  SBase = Expand32BitAddress(Addr);
1903  Imm = true;
1904  return true;
1905 }
1906 
1907 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1908  SDValue &Offset) const {
1909  bool Imm = false;
1910  return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1911 }
1912 
1913 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1914  SDValue &Offset) const {
1915 
1916  assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
1917 
1918  bool Imm = false;
1919  if (!SelectSMRD(Addr, SBase, Offset, Imm))
1920  return false;
1921 
1922  return !Imm && isa<ConstantSDNode>(Offset);
1923 }
1924 
1925 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1926  SDValue &Offset) const {
1927  bool Imm = false;
1928  return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1929  !isa<ConstantSDNode>(Offset);
1930 }
1931 
1932 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1933  SDValue &Offset) const {
1934  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
1935  // The immediate offset for S_BUFFER instructions is unsigned.
1936  if (auto Imm =
1937  AMDGPU::getSMRDEncodedOffset(*Subtarget, C->getZExtValue(), true)) {
1939  return true;
1940  }
1941  }
1942 
1943  return false;
1944 }
1945 
1946 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1947  SDValue &Offset) const {
1948  assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
1949 
1950  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
1951  if (auto Imm = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget,
1952  C->getZExtValue())) {
1954  return true;
1955  }
1956  }
1957 
1958  return false;
1959 }
1960 
1961 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1962  SDValue &Base,
1963  SDValue &Offset) const {
1964  SDLoc DL(Index);
1965 
1967  SDValue N0 = Index.getOperand(0);
1968  SDValue N1 = Index.getOperand(1);
1969  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1970 
1971  // (add n0, c0)
1972  // Don't peel off the offset (c0) if doing so could possibly lead
1973  // the base (n0) to be negative.
1974  // (or n0, |c0|) can never change a sign given isBaseWithConstantOffset.
1975  if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0) ||
1976  (Index->getOpcode() == ISD::OR && C1->getSExtValue() >= 0)) {
1977  Base = N0;
1978  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1979  return true;
1980  }
1981  }
1982 
1983  if (isa<ConstantSDNode>(Index))
1984  return false;
1985 
1986  Base = Index;
1988  return true;
1989 }
1990 
1991 SDNode *AMDGPUDAGToDAGISel::getBFE32(bool IsSigned, const SDLoc &DL,
1992  SDValue Val, uint32_t Offset,
1993  uint32_t Width) {
1994  if (Val->isDivergent()) {
1995  unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
1998 
1999  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, Off, W);
2000  }
2001  unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2002  // Transformation function, pack the offset and width of a BFE into
2003  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
2004  // source, bits [5:0] contain the offset and bits [22:16] the width.
2005  uint32_t PackedVal = Offset | (Width << 16);
2006  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
2007 
2008  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
2009 }
2010 
2011 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
2012  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
2013  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
2014  // Predicate: 0 < b <= c < 32
2015 
2016  const SDValue &Shl = N->getOperand(0);
2017  ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
2018  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2019 
2020  if (B && C) {
2021  uint32_t BVal = B->getZExtValue();
2022  uint32_t CVal = C->getZExtValue();
2023 
2024  if (0 < BVal && BVal <= CVal && CVal < 32) {
2025  bool Signed = N->getOpcode() == ISD::SRA;
2026  ReplaceNode(N, getBFE32(Signed, SDLoc(N), Shl.getOperand(0), CVal - BVal,
2027  32 - CVal));
2028  return;
2029  }
2030  }
2031  SelectCode(N);
2032 }
2033 
2034 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
2035  switch (N->getOpcode()) {
2036  case ISD::AND:
2037  if (N->getOperand(0).getOpcode() == ISD::SRL) {
2038  // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
2039  // Predicate: isMask(mask)
2040  const SDValue &Srl = N->getOperand(0);
2041  ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
2042  ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
2043 
2044  if (Shift && Mask) {
2045  uint32_t ShiftVal = Shift->getZExtValue();
2046  uint32_t MaskVal = Mask->getZExtValue();
2047 
2048  if (isMask_32(MaskVal)) {
2049  uint32_t WidthVal = countPopulation(MaskVal);
2050  ReplaceNode(N, getBFE32(false, SDLoc(N), Srl.getOperand(0), ShiftVal,
2051  WidthVal));
2052  return;
2053  }
2054  }
2055  }
2056  break;
2057  case ISD::SRL:
2058  if (N->getOperand(0).getOpcode() == ISD::AND) {
2059  // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
2060  // Predicate: isMask(mask >> b)
2061  const SDValue &And = N->getOperand(0);
2062  ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
2063  ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
2064 
2065  if (Shift && Mask) {
2066  uint32_t ShiftVal = Shift->getZExtValue();
2067  uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
2068 
2069  if (isMask_32(MaskVal)) {
2070  uint32_t WidthVal = countPopulation(MaskVal);
2071  ReplaceNode(N, getBFE32(false, SDLoc(N), And.getOperand(0), ShiftVal,
2072  WidthVal));
2073  return;
2074  }
2075  }
2076  } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
2077  SelectS_BFEFromShifts(N);
2078  return;
2079  }
2080  break;
2081  case ISD::SRA:
2082  if (N->getOperand(0).getOpcode() == ISD::SHL) {
2083  SelectS_BFEFromShifts(N);
2084  return;
2085  }
2086  break;
2087 
2088  case ISD::SIGN_EXTEND_INREG: {
2089  // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
2090  SDValue Src = N->getOperand(0);
2091  if (Src.getOpcode() != ISD::SRL)
2092  break;
2093 
2094  const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
2095  if (!Amt)
2096  break;
2097 
2098  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2099  ReplaceNode(N, getBFE32(true, SDLoc(N), Src.getOperand(0),
2100  Amt->getZExtValue(), Width));
2101  return;
2102  }
2103  }
2104 
2105  SelectCode(N);
2106 }
2107 
2108 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
2109  assert(N->getOpcode() == ISD::BRCOND);
2110  if (!N->hasOneUse())
2111  return false;
2112 
2113  SDValue Cond = N->getOperand(1);
2114  if (Cond.getOpcode() == ISD::CopyToReg)
2115  Cond = Cond.getOperand(2);
2116 
2117  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
2118  return false;
2119 
2120  MVT VT = Cond.getOperand(0).getSimpleValueType();
2121  if (VT == MVT::i32)
2122  return true;
2123 
2124  if (VT == MVT::i64) {
2125  auto ST = static_cast<const GCNSubtarget *>(Subtarget);
2126 
2127  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
2128  return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
2129  }
2130 
2131  return false;
2132 }
2133 
2134 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
2135  SDValue Cond = N->getOperand(1);
2136 
2137  if (Cond.isUndef()) {
2138  CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2139  N->getOperand(2), N->getOperand(0));
2140  return;
2141  }
2142 
2143  const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
2144  const SIRegisterInfo *TRI = ST->getRegisterInfo();
2145 
2146  bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
2147  unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
2148  Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC();
2149  SDLoc SL(N);
2150 
2151  if (!UseSCCBr) {
2152  // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
2153  // analyzed what generates the vcc value, so we do not know whether vcc
2154  // bits for disabled lanes are 0. Thus we need to mask out bits for
2155  // disabled lanes.
2156  //
2157  // For the case that we select S_CBRANCH_SCC1 and it gets
2158  // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
2159  // SIInstrInfo::moveToVALU which inserts the S_AND).
2160  //
2161  // We could add an analysis of what generates the vcc value here and omit
2162  // the S_AND when is unnecessary. But it would be better to add a separate
2163  // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
2164  // catches both cases.
2165  Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32
2166  : AMDGPU::S_AND_B64,
2167  SL, MVT::i1,
2168  CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO
2169  : AMDGPU::EXEC,
2170  MVT::i1),
2171  Cond),
2172  0);
2173  }
2174 
2175  SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
2176  CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
2177  N->getOperand(2), // Basic Block
2178  VCC.getValue(0));
2179 }
2180 
2181 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
2182  MVT VT = N->getSimpleValueType(0);
2183  bool IsFMA = N->getOpcode() == ISD::FMA;
2184  if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
2185  !Subtarget->hasFmaMixInsts()) ||
2186  ((IsFMA && Subtarget->hasMadMixInsts()) ||
2187  (!IsFMA && Subtarget->hasFmaMixInsts()))) {
2188  SelectCode(N);
2189  return;
2190  }
2191 
2192  SDValue Src0 = N->getOperand(0);
2193  SDValue Src1 = N->getOperand(1);
2194  SDValue Src2 = N->getOperand(2);
2195  unsigned Src0Mods, Src1Mods, Src2Mods;
2196 
2197  // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
2198  // using the conversion from f16.
2199  bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
2200  bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
2201  bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
2202 
2203  assert((IsFMA || !Mode.allFP32Denormals()) &&
2204  "fmad selected with denormals enabled");
2205  // TODO: We can select this with f32 denormals enabled if all the sources are
2206  // converted from f16 (in which case fmad isn't legal).
2207 
2208  if (Sel0 || Sel1 || Sel2) {
2209  // For dummy operands.
2211  SDValue Ops[] = {
2212  CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
2213  CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
2214  CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
2216  Zero, Zero
2217  };
2218 
2220  IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
2221  MVT::f32, Ops);
2222  } else {
2223  SelectCode(N);
2224  }
2225 }
2226 
2227 // This is here because there isn't a way to use the generated sub0_sub1 as the
2228 // subreg index to EXTRACT_SUBREG in tablegen.
2229 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
2230  MemSDNode *Mem = cast<MemSDNode>(N);
2231  unsigned AS = Mem->getAddressSpace();
2232  if (AS == AMDGPUAS::FLAT_ADDRESS) {
2233  SelectCode(N);
2234  return;
2235  }
2236 
2237  MVT VT = N->getSimpleValueType(0);
2238  bool Is32 = (VT == MVT::i32);
2239  SDLoc SL(N);
2240 
2241  MachineSDNode *CmpSwap = nullptr;
2242  if (Subtarget->hasAddr64()) {
2243  SDValue SRsrc, VAddr, SOffset, Offset;
2244 
2245  if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset)) {
2246  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
2247  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
2248  SDValue CmpVal = Mem->getOperand(2);
2250 
2251  // XXX - Do we care about glue operands?
2252 
2253  SDValue Ops[] = {CmpVal, VAddr, SRsrc, SOffset, Offset, CPol,
2254  Mem->getChain()};
2255 
2256  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2257  }
2258  }
2259 
2260  if (!CmpSwap) {
2261  SDValue SRsrc, SOffset, Offset;
2262  if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset)) {
2263  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
2264  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
2265 
2266  SDValue CmpVal = Mem->getOperand(2);
2268  SDValue Ops[] = {CmpVal, SRsrc, SOffset, Offset, CPol, Mem->getChain()};
2269 
2270  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2271  }
2272  }
2273 
2274  if (!CmpSwap) {
2275  SelectCode(N);
2276  return;
2277  }
2278 
2279  MachineMemOperand *MMO = Mem->getMemOperand();
2280  CurDAG->setNodeMemRefs(CmpSwap, {MMO});
2281 
2282  unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
2283  SDValue Extract
2284  = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
2285 
2286  ReplaceUses(SDValue(N, 0), Extract);
2287  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
2289 }
2290 
2291 void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
2292  // The address is assumed to be uniform, so if it ends up in a VGPR, it will
2293  // be copied to an SGPR with readfirstlane.
2294  unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2295  AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2296 
2297  SDValue Chain = N->getOperand(0);
2298  SDValue Ptr = N->getOperand(2);
2299  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2300  MachineMemOperand *MMO = M->getMemOperand();
2301  bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
2302 
2303  SDValue Offset;
2304  if (CurDAG->isBaseWithConstantOffset(Ptr)) {
2305  SDValue PtrBase = Ptr.getOperand(0);
2306  SDValue PtrOffset = Ptr.getOperand(1);
2307 
2308  const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2309  if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) {
2310  N = glueCopyToM0(N, PtrBase);
2311  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2312  }
2313  }
2314 
2315  if (!Offset) {
2316  N = glueCopyToM0(N, Ptr);
2318  }
2319 
2320  SDValue Ops[] = {
2321  Offset,
2322  CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2323  Chain,
2324  N->getOperand(N->getNumOperands() - 1) // New glue
2325  };
2326 
2327  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2328  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2329 }
2330 
2331 static unsigned gwsIntrinToOpcode(unsigned IntrID) {
2332  switch (IntrID) {
2333  case Intrinsic::amdgcn_ds_gws_init:
2334  return AMDGPU::DS_GWS_INIT;
2335  case Intrinsic::amdgcn_ds_gws_barrier:
2336  return AMDGPU::DS_GWS_BARRIER;
2337  case Intrinsic::amdgcn_ds_gws_sema_v:
2338  return AMDGPU::DS_GWS_SEMA_V;
2339  case Intrinsic::amdgcn_ds_gws_sema_br:
2340  return AMDGPU::DS_GWS_SEMA_BR;
2341  case Intrinsic::amdgcn_ds_gws_sema_p:
2342  return AMDGPU::DS_GWS_SEMA_P;
2343  case Intrinsic::amdgcn_ds_gws_sema_release_all:
2344  return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2345  default:
2346  llvm_unreachable("not a gws intrinsic");
2347  }
2348 }
2349 
2350 void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
2351  if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2352  !Subtarget->hasGWSSemaReleaseAll()) {
2353  // Let this error.
2354  SelectCode(N);
2355  return;
2356  }
2357 
2358  // Chain, intrinsic ID, vsrc, offset
2359  const bool HasVSrc = N->getNumOperands() == 4;
2360  assert(HasVSrc || N->getNumOperands() == 3);
2361 
2362  SDLoc SL(N);
2363  SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
2364  int ImmOffset = 0;
2365  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2366  MachineMemOperand *MMO = M->getMemOperand();
2367 
2368  // Don't worry if the offset ends up in a VGPR. Only one lane will have
2369  // effect, so SIFixSGPRCopies will validly insert readfirstlane.
2370 
2371  // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
2372  // offset field) % 64. Some versions of the programming guide omit the m0
2373  // part, or claim it's from offset 0.
2374  if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2375  // If we have a constant offset, try to use the 0 in m0 as the base.
2376  // TODO: Look into changing the default m0 initialization value. If the
2377  // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
2378  // the immediate offset.
2379  glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
2380  ImmOffset = ConstOffset->getZExtValue();
2381  } else {
2382  if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
2383  ImmOffset = BaseOffset.getConstantOperandVal(1);
2384  BaseOffset = BaseOffset.getOperand(0);
2385  }
2386 
2387  // Prefer to do the shift in an SGPR since it should be possible to use m0
2388  // as the result directly. If it's already an SGPR, it will be eliminated
2389  // later.
2390  SDNode *SGPROffset
2391  = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
2392  BaseOffset);
2393  // Shift to offset in m0
2394  SDNode *M0Base
2395  = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
2396  SDValue(SGPROffset, 0),
2397  CurDAG->getTargetConstant(16, SL, MVT::i32));
2398  glueCopyToM0(N, SDValue(M0Base, 0));
2399  }
2400 
2401  SDValue Chain = N->getOperand(0);
2402  SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
2403 
2404  const unsigned Opc = gwsIntrinToOpcode(IntrID);
2406  if (HasVSrc)
2407  Ops.push_back(N->getOperand(2));
2408  Ops.push_back(OffsetField);
2409  Ops.push_back(Chain);
2410 
2411  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2412  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2413 }
2414 
2415 void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) {
2416  if (Subtarget->getLDSBankCount() != 16) {
2417  // This is a single instruction with a pattern.
2418  SelectCode(N);
2419  return;
2420  }
2421 
2422  SDLoc DL(N);
2423 
2424  // This requires 2 instructions. It is possible to write a pattern to support
2425  // this, but the generated isel emitter doesn't correctly deal with multiple
2426  // output instructions using the same physical register input. The copy to m0
2427  // is incorrectly placed before the second instruction.
2428  //
2429  // TODO: Match source modifiers.
2430  //
2431  // def : Pat <
2432  // (int_amdgcn_interp_p1_f16
2433  // (VOP3Mods f32:$src0, i32:$src0_modifiers),
2434  // (i32 timm:$attrchan), (i32 timm:$attr),
2435  // (i1 timm:$high), M0),
2436  // (V_INTERP_P1LV_F16 $src0_modifiers, VGPR_32:$src0, timm:$attr,
2437  // timm:$attrchan, 0,
2438  // (V_INTERP_MOV_F32 2, timm:$attr, timm:$attrchan), timm:$high)> {
2439  // let Predicates = [has16BankLDS];
2440  // }
2441 
2442  // 16 bank LDS
2444  N->getOperand(5), SDValue());
2445 
2447 
2448  SDNode *InterpMov =
2449  CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, {
2450  CurDAG->getTargetConstant(2, DL, MVT::i32), // P0
2451  N->getOperand(3), // Attr
2452  N->getOperand(2), // Attrchan
2453  ToM0.getValue(1) // In glue
2454  });
2455 
2456  SDNode *InterpP1LV =
2457  CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, {
2458  CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
2459  N->getOperand(1), // Src0
2460  N->getOperand(3), // Attr
2461  N->getOperand(2), // Attrchan
2462  CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
2463  SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high
2464  N->getOperand(4), // high
2465  CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp
2466  CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod
2467  SDValue(InterpMov, 1)
2468  });
2469 
2470  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0));
2471 }
2472 
2473 void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
2474  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2475  switch (IntrID) {
2476  case Intrinsic::amdgcn_ds_append:
2477  case Intrinsic::amdgcn_ds_consume: {
2478  if (N->getValueType(0) != MVT::i32)
2479  break;
2480  SelectDSAppendConsume(N, IntrID);
2481  return;
2482  }
2483  }
2484 
2485  SelectCode(N);
2486 }
2487 
2488 void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
2489  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
2490  unsigned Opcode;
2491  switch (IntrID) {
2492  case Intrinsic::amdgcn_wqm:
2493  Opcode = AMDGPU::WQM;
2494  break;
2495  case Intrinsic::amdgcn_softwqm:
2496  Opcode = AMDGPU::SOFT_WQM;
2497  break;
2498  case Intrinsic::amdgcn_wwm:
2499  case Intrinsic::amdgcn_strict_wwm:
2500  Opcode = AMDGPU::STRICT_WWM;
2501  break;
2502  case Intrinsic::amdgcn_strict_wqm:
2503  Opcode = AMDGPU::STRICT_WQM;
2504  break;
2505  case Intrinsic::amdgcn_interp_p1_f16:
2506  SelectInterpP1F16(N);
2507  return;
2508  default:
2509  SelectCode(N);
2510  return;
2511  }
2512 
2513  SDValue Src = N->getOperand(1);
2514  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
2515 }
2516 
2517 void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
2518  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2519  switch (IntrID) {
2520  case Intrinsic::amdgcn_ds_gws_init:
2521  case Intrinsic::amdgcn_ds_gws_barrier:
2522  case Intrinsic::amdgcn_ds_gws_sema_v:
2523  case Intrinsic::amdgcn_ds_gws_sema_br:
2524  case Intrinsic::amdgcn_ds_gws_sema_p:
2525  case Intrinsic::amdgcn_ds_gws_sema_release_all:
2526  SelectDS_GWS(N, IntrID);
2527  return;
2528  default:
2529  break;
2530  }
2531 
2532  SelectCode(N);
2533 }
2534 
2535 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
2536  unsigned &Mods,
2537  bool AllowAbs) const {
2538  Mods = 0;
2539  Src = In;
2540 
2541  if (Src.getOpcode() == ISD::FNEG) {
2542  Mods |= SISrcMods::NEG;
2543  Src = Src.getOperand(0);
2544  }
2545 
2546  if (AllowAbs && Src.getOpcode() == ISD::FABS) {
2547  Mods |= SISrcMods::ABS;
2548  Src = Src.getOperand(0);
2549  }
2550 
2551  return true;
2552 }
2553 
2554 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
2555  SDValue &SrcMods) const {
2556  unsigned Mods;
2557  if (SelectVOP3ModsImpl(In, Src, Mods)) {
2558  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2559  return true;
2560  }
2561 
2562  return false;
2563 }
2564 
2565 bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,
2566  SDValue &SrcMods) const {
2567  unsigned Mods;
2568  if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
2569  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2570  return true;
2571  }
2572 
2573  return false;
2574 }
2575 
2576 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
2577  SDValue &SrcMods) const {
2578  SelectVOP3Mods(In, Src, SrcMods);
2579  return isNoNanSrc(Src);
2580 }
2581 
2582 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
2583  if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
2584  return false;
2585 
2586  Src = In;
2587  return true;
2588 }
2589 
2590 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
2591  SDValue &SrcMods, SDValue &Clamp,
2592  SDValue &Omod) const {
2593  SDLoc DL(In);
2594  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2595  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2596 
2597  return SelectVOP3Mods(In, Src, SrcMods);
2598 }
2599 
2600 bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src,
2601  SDValue &SrcMods, SDValue &Clamp,
2602  SDValue &Omod) const {
2603  SDLoc DL(In);
2604  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2605  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2606 
2607  return SelectVOP3BMods(In, Src, SrcMods);
2608 }
2609 
2610 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
2611  SDValue &Clamp, SDValue &Omod) const {
2612  Src = In;
2613 
2614  SDLoc DL(In);
2615  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2616  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2617 
2618  return true;
2619 }
2620 
2621 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
2622  SDValue &SrcMods) const {
2623  unsigned Mods = 0;
2624  Src = In;
2625 
2626  if (Src.getOpcode() == ISD::FNEG) {
2627  Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
2628  Src = Src.getOperand(0);
2629  }
2630 
2631  if (Src.getOpcode() == ISD::BUILD_VECTOR) {
2632  unsigned VecMods = Mods;
2633 
2634  SDValue Lo = stripBitcast(Src.getOperand(0));
2635  SDValue Hi = stripBitcast(Src.getOperand(1));
2636 
2637  if (Lo.getOpcode() == ISD::FNEG) {
2638  Lo = stripBitcast(Lo.getOperand(0));
2639  Mods ^= SISrcMods::NEG;
2640  }
2641 
2642  if (Hi.getOpcode() == ISD::FNEG) {
2643  Hi = stripBitcast(Hi.getOperand(0));
2644  Mods ^= SISrcMods::NEG_HI;
2645  }
2646 
2647  if (isExtractHiElt(Lo, Lo))
2648  Mods |= SISrcMods::OP_SEL_0;
2649 
2650  if (isExtractHiElt(Hi, Hi))
2651  Mods |= SISrcMods::OP_SEL_1;
2652 
2653  unsigned VecSize = Src.getValueSizeInBits();
2654  Lo = stripExtractLoElt(Lo);
2655  Hi = stripExtractLoElt(Hi);
2656 
2657  if (Lo.getValueSizeInBits() > VecSize) {
2659  (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
2660  MVT::getIntegerVT(VecSize), Lo);
2661  }
2662 
2663  if (Hi.getValueSizeInBits() > VecSize) {
2665  (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
2666  MVT::getIntegerVT(VecSize), Hi);
2667  }
2668 
2669  assert(Lo.getValueSizeInBits() <= VecSize &&
2670  Hi.getValueSizeInBits() <= VecSize);
2671 
2672  if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
2673  // Really a scalar input. Just select from the low half of the register to
2674  // avoid packing.
2675 
2676  if (VecSize == 32 || VecSize == Lo.getValueSizeInBits()) {
2677  Src = Lo;
2678  } else {
2679  assert(Lo.getValueSizeInBits() == 32 && VecSize == 64);
2680 
2681  SDLoc SL(In);
2682  SDValue Undef = SDValue(
2683  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
2684  Lo.getValueType()), 0);
2685  auto RC = Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
2686  : AMDGPU::SReg_64RegClassID;
2687  const SDValue Ops[] = {
2688  CurDAG->getTargetConstant(RC, SL, MVT::i32),
2689  Lo, CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2690  Undef, CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
2691 
2692  Src = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
2693  Src.getValueType(), Ops), 0);
2694  }
2695  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2696  return true;
2697  }
2698 
2699  if (VecSize == 64 && Lo == Hi && isa<ConstantFPSDNode>(Lo)) {
2700  uint64_t Lit = cast<ConstantFPSDNode>(Lo)->getValueAPF()
2701  .bitcastToAPInt().getZExtValue();
2702  if (AMDGPU::isInlinableLiteral32(Lit, Subtarget->hasInv2PiInlineImm())) {
2703  Src = CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64);;
2704  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2705  return true;
2706  }
2707  }
2708 
2709  Mods = VecMods;
2710  }
2711 
2712  // Packed instructions do not have abs modifiers.
2713  Mods |= SISrcMods::OP_SEL_1;
2714 
2715  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2716  return true;
2717 }
2718 
2719 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
2720  SDValue &SrcMods) const {
2721  Src = In;
2722  // FIXME: Handle op_sel
2723  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2724  return true;
2725 }
2726 
2727 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2728  SDValue &SrcMods) const {
2729  // FIXME: Handle op_sel
2730  return SelectVOP3Mods(In, Src, SrcMods);
2731 }
2732 
2733 // The return value is not whether the match is possible (which it always is),
2734 // but whether or not it a conversion is really used.
2735 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2736  unsigned &Mods) const {
2737  Mods = 0;
2738  SelectVOP3ModsImpl(In, Src, Mods);
2739 
2740  if (Src.getOpcode() == ISD::FP_EXTEND) {
2741  Src = Src.getOperand(0);
2742  assert(Src.getValueType() == MVT::f16);
2743  Src = stripBitcast(Src);
2744 
2745  // Be careful about folding modifiers if we already have an abs. fneg is
2746  // applied last, so we don't want to apply an earlier fneg.
2747  if ((Mods & SISrcMods::ABS) == 0) {
2748  unsigned ModsTmp;
2749  SelectVOP3ModsImpl(Src, Src, ModsTmp);
2750 
2751  if ((ModsTmp & SISrcMods::NEG) != 0)
2752  Mods ^= SISrcMods::NEG;
2753 
2754  if ((ModsTmp & SISrcMods::ABS) != 0)
2755  Mods |= SISrcMods::ABS;
2756  }
2757 
2758  // op_sel/op_sel_hi decide the source type and source.
2759  // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2760  // If the sources's op_sel is set, it picks the high half of the source
2761  // register.
2762 
2763  Mods |= SISrcMods::OP_SEL_1;
2764  if (isExtractHiElt(Src, Src)) {
2765  Mods |= SISrcMods::OP_SEL_0;
2766 
2767  // TODO: Should we try to look for neg/abs here?
2768  }
2769 
2770  return true;
2771  }
2772 
2773  return false;
2774 }
2775 
2776 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2777  SDValue &SrcMods) const {
2778  unsigned Mods = 0;
2779  SelectVOP3PMadMixModsImpl(In, Src, Mods);
2780  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2781  return true;
2782 }
2783 
2784 SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
2785  if (In.isUndef())
2786  return CurDAG->getUNDEF(MVT::i32);
2787 
2788  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2789  SDLoc SL(In);
2790  return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
2791  }
2792 
2793  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2794  SDLoc SL(In);
2795  return CurDAG->getConstant(
2796  C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2797  }
2798 
2799  SDValue Src;
2800  if (isExtractHiElt(In, Src))
2801  return Src;
2802 
2803  return SDValue();
2804 }
2805 
2806 bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2807  assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn);
2808 
2809  const SIRegisterInfo *SIRI =
2810  static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2811  const SIInstrInfo * SII =
2812  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2813 
2814  unsigned Limit = 0;
2815  bool AllUsesAcceptSReg = true;
2816  for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2817  Limit < 10 && U != E; ++U, ++Limit) {
2818  const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
2819 
2820  // If the register class is unknown, it could be an unknown
2821  // register class that needs to be an SGPR, e.g. an inline asm
2822  // constraint
2823  if (!RC || SIRI->isSGPRClass(RC))
2824  return false;
2825 
2826  if (RC != &AMDGPU::VS_32RegClass) {
2827  AllUsesAcceptSReg = false;
2828  SDNode * User = *U;
2829  if (User->isMachineOpcode()) {
2830  unsigned Opc = User->getMachineOpcode();
2831  MCInstrDesc Desc = SII->get(Opc);
2832  if (Desc.isCommutable()) {
2833  unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2834  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2835  if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2836  unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2837  const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
2838  if (CommutedRC == &AMDGPU::VS_32RegClass)
2839  AllUsesAcceptSReg = true;
2840  }
2841  }
2842  }
2843  // If "AllUsesAcceptSReg == false" so far we haven't suceeded
2844  // commuting current user. This means have at least one use
2845  // that strictly require VGPR. Thus, we will not attempt to commute
2846  // other user instructions.
2847  if (!AllUsesAcceptSReg)
2848  break;
2849  }
2850  }
2851  return !AllUsesAcceptSReg && (Limit < 10);
2852 }
2853 
2854 bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
2855  auto Ld = cast<LoadSDNode>(N);
2856 
2857  return Ld->getAlignment() >= 4 &&
2858  (
2859  (
2860  (
2861  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2862  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
2863  )
2864  &&
2865  !N->isDivergent()
2866  )
2867  ||
2868  (
2869  Subtarget->getScalarizeGlobalBehavior() &&
2870  Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
2871  Ld->isSimple() &&
2872  !N->isDivergent() &&
2873  static_cast<const SITargetLowering *>(
2874  getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
2875  )
2876  );
2877 }
2878 
2881  *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
2882  bool IsModified = false;
2883  do {
2884  IsModified = false;
2885 
2886  // Go over all selected nodes and try to fold them a bit more
2888  while (Position != CurDAG->allnodes_end()) {
2889  SDNode *Node = &*Position++;
2890  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
2891  if (!MachineNode)
2892  continue;
2893 
2894  SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
2895  if (ResNode != Node) {
2896  if (ResNode)
2897  ReplaceUses(Node, ResNode);
2898  IsModified = true;
2899  }
2900  }
2902  } while (IsModified);
2903 }
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:240
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::AMDGPUISD::CLAMP
@ CLAMP
CLAMP value between 0.0 and 1.0.
Definition: AMDGPUISelLowering.h:378
llvm::ISD::FPOWI
@ FPOWI
Definition: ISDOpcodes.h:876
i
i
Definition: README.txt:29
llvm::MCInstrDesc::getNumDefs
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:243
llvm::ARMII::VecSize
@ VecSize
Definition: ARMBaseInfo.h:421
CmpMode::FP
@ FP
llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1558
Lowering
Shadow Stack GC Lowering
Definition: ShadowStackGCLowering.cpp:99
llvm::SelectionDAGISel::getTargetLowering
const TargetLowering * getTargetLowering() const
Definition: SelectionDAGISel.h:67
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4645
llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
llvm::AMDGPUSubtarget::hasInv2PiInlineImm
bool hasInv2PiInlineImm() const
Definition: AMDGPUSubtarget.h:180
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:22
llvm::SelectionDAGISel::TM
TargetMachine & TM
Definition: SelectionDAGISel.h:41
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector
bool matchLoadD16FromBuildVector(SDNode *N) const
Definition: AMDGPUISelDAGToDAG.cpp:209
llvm::EVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:150
AMDGPUISelDAGToDAG.h
llvm::AMDGPUISD::DIV_SCALE
@ DIV_SCALE
Definition: AMDGPUISelLowering.h:409
v2i32
gets compiled into this on rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movq rsp movq rsp movq rsp movq rsp movq rsp rax movq rsp rax movq rsp rsp rsp eax eax jbe LBB1_3 rcx rax movq rsp eax rsp ret ecx eax rcx movl rsp jmp LBB1_2 gcc rsp rax movq rsp rsp movq rsp rax movq rsp eax eax jb L6 rdx eax rsp ret p2align edx rdx eax movl rsp eax rsp ret and it gets compiled into this on ebp esp eax movl ebp eax movl ebp eax esp popl ebp ret gcc ebp eax popl ebp ret Teach tblgen not to check bitconvert source type in some cases This allows us to consolidate the following patterns in X86InstrMMX v2i32(MMX_MOVDQ2Qrr VR128:$src))>
llvm::SISrcMods::NEG_HI
@ NEG_HI
Definition: SIDefines.h:213
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1088
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:633
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:852
llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1395
llvm::SelectionDAG::SignBitIsZero
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
Definition: SelectionDAG.cpp:2484
llvm::AMDGPUTargetLowering
Definition: AMDGPUISelLowering.h:27
llvm::SIRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned RCID) const
Definition: SIRegisterInfo.cpp:2710
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:902
SIMachineFunctionInfo.h
llvm::SelectionDAG::getCopyToReg
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:735
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:151
llvm::ISD::ConstantFP
@ ConstantFP
Definition: ISDOpcodes.h:77
llvm::SelectionDAG::allnodes_end
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:494
llvm::AMDGPU::getSMRDEncodedOffset
Optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
Definition: AMDGPUBaseInfo.cpp:1900
llvm::ISD::UADDO
@ UADDO
Definition: ISDOpcodes.h:311
llvm::AMDGPUSubtarget::hasMadMixInsts
bool hasMadMixInsts() const
Definition: AMDGPUSubtarget.h:148
llvm::ISD::ADDC
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
llvm::KnownBits::Zero
APInt Zero
Definition: KnownBits.h:24
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1176
llvm::ISD::SETEQ
@ SETEQ
Definition: ISDOpcodes.h:1390
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:209
llvm::SelectionDAG::getVTList
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition: SelectionDAG.cpp:8561
llvm::AMDGPUISD::CVT_PKNORM_I16_F32
@ CVT_PKNORM_I16_F32
Definition: AMDGPUISelLowering.h:464
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::MachineSDNode
An SDNode that represents everything that will be needed to construct a MachineInstr.
Definition: SelectionDAGNodes.h:2765
llvm::AMDGPUISD::FMUL_W_CHAIN
@ FMUL_W_CHAIN
Definition: AMDGPUISelLowering.h:389
llvm::SelectionDAG::allnodes_begin
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:493
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::AMDGPUISD::DIV_FIXUP
@ DIV_FIXUP
Definition: AMDGPUISelLowering.h:411
llvm::AMDGPUISD::LOAD_D16_HI_I8
@ LOAD_D16_HI_I8
Definition: AMDGPUISelLowering.h:490
llvm::GCNSubtarget::hasFlatSegmentOffsetBug
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:581
llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition: SelectionDAGNodes.h:1338
ValueTracking.h
llvm::ISD::FLOG2
@ FLOG2
Definition: ISDOpcodes.h:879
llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition: SelectionDAGNodes.h:1361
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:454
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:466
llvm::AMDGPU::getSMRDEncodedLiteralOffset32
Optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
Definition: AMDGPUBaseInfo.cpp:1917
llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition: SelectionDAGNodes.h:2300
llvm::SDNode::use_iterator
This class provides iterator support for SDUse operands that use a specific SDNode.
Definition: SelectionDAGNodes.h:729
Shift
bool Shift
Definition: README.txt:468
AMDGPUDAGToDAGISel
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
Definition: AMDGPUISelDAGToDAG.h:79
llvm::MemSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:1363
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:128
i8
Clang compiles this i8
Definition: README.txt:504
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1271
llvm::SelectionDAG::isBaseWithConstantOffset
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
Definition: SelectionDAG.cpp:4408
llvm::GCNSubtarget::hasDLInsts
bool hasDLInsts() const
Definition: GCNSubtarget.h:662
llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:702
llvm::Optional< int64_t >
llvm::SDNode::isPredecessorOf
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
Definition: SelectionDAGNodes.h:815
i1
Decimal Convert From to National Zoned Signed int_ppc_altivec_bcdcfno i1
Definition: README_P9.txt:147
llvm::createAMDGPUISelDag
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
Definition: AMDGPUISelDAGToDAG.cpp:112
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:80
llvm::MemSDNode
This is an abstract virtual class for memory operations.
Definition: SelectionDAGNodes.h:1256
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
matchZExtFromI32
static SDValue matchZExtFromI32(SDValue Op)
Definition: AMDGPUISelDAGToDAG.cpp:1635
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52
llvm::SelectionDAG::RemoveDeadNodes
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
Definition: SelectionDAG.cpp:870
RHS
Value * RHS
Definition: X86PartialReduction.cpp:74
llvm::SDNode::isDivergent
bool isDivergent() const
Definition: SelectionDAGNodes.h:696
llvm::Lo_32
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:353
AMDGPUDAGToDAGISel::PreprocessISelDAG
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
Definition: AMDGPUISelDAGToDAG.cpp:289
SelectionDAG.h
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::SelectionDAG::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:442
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1564
llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:219
AMDGPUDAGToDAGISel::SelectBuildVector
void SelectBuildVector(SDNode *N, unsigned RegClassID)
Definition: AMDGPUISelDAGToDAG.cpp:451
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::ISD::FABS
@ FABS
Definition: ISDOpcodes.h:871
llvm::AMDGPUISD::FMIN3
@ FMIN3
Definition: AMDGPUISelLowering.h:401
llvm::ISD::ADDCARRY
@ ADDCARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:290
AMDGPUDAGToDAGISel::PostprocessISelDAG
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
Definition: AMDGPUISelDAGToDAG.cpp:2879
llvm::ISD::BRCOND
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:960
llvm::SDNode::getVTList
SDVTList getVTList() const
Definition: SelectionDAGNodes.h:934
llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:207
llvm::GCNSubtarget::unsafeDSOffsetFoldingEnabled
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:399
llvm::SelectionDAG::getRegister
SDValue getRegister(unsigned Reg, EVT VT)
Definition: SelectionDAG.cpp:2025
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
llvm::GCNSubtarget::hasGWSSemaReleaseAll
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:617
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:889
SelectSAddrFI
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
Definition: AMDGPUISelDAGToDAG.cpp:1739
LHS
Value * LHS
Definition: X86PartialReduction.cpp:73
llvm::AMDGPU::CPol::CPol
CPol
Definition: SIDefines.h:292
i64
Clang compiles this i64
Definition: README.txt:504
AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel
AMDGPUDAGToDAGISel(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
Definition: AMDGPUISelDAGToDAG.cpp:117
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:651
llvm::AMDGPUAS::CONSTANT_ADDRESS_32BIT
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
Definition: AMDGPU.h:366
AMDGPUDAGToDAGISel::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: AMDGPUISelDAGToDAG.cpp:199
llvm::SelectionDAG::getTargetFrameIndex
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:688
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1123
llvm::MemIntrinsicSDNode
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
Definition: SelectionDAGNodes.h:1474
llvm::AMDGPUSubtarget::get
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Definition: AMDGPUSubtarget.cpp:1113
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:216
SelectionDAGNodes.h
llvm::SIInstrFlags::WQM
@ WQM
Definition: SIDefines.h:71
llvm::ISD::Constant
@ Constant
Definition: ISDOpcodes.h:76
llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:729
llvm::GCNSubtarget::hasAddr64
bool hasAddr64() const
Definition: GCNSubtarget.h:310
llvm::ISD::ABS
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:640
FunctionLoweringInfo.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::GCNSubtarget::useFlatForGlobal
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:458
llvm::GCNSubtarget::hasFmaMixInsts
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:360
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:739
llvm::User
Definition: User.h:44
llvm::AMDGPUISD::CVT_PKNORM_U16_F32
@ CVT_PKNORM_U16_F32
Definition: AMDGPUISelLowering.h:465
llvm::SelectionDAG::getUNDEF
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:951
llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:747
llvm::ISD::CopyToReg
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
llvm::AMDGPUISD::FMED3
@ FMED3
Definition: AMDGPUISelLowering.h:404
GFX9
@ GFX9
Definition: SIInstrInfo.cpp:7684
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::SelectionDAG::getConstant
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1413
llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:309
llvm::MCSubtargetInfo::getTargetTriple
const Triple & getTargetTriple() const
Definition: MCSubtargetInfo.h:107
f32
So we should use XX3Form_Rcr to implement instrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store load store see def memrix16 in PPCInstrInfo td Load Store Vector load store outs ins lxsdx set load store with conversion from to outs ins lxsspx set f32
Definition: README_P9.txt:522
llvm::AMDGPUISD::LOAD_D16_LO_I8
@ LOAD_D16_LO_I8
Definition: AMDGPUISelLowering.h:492
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::ISD::FROUND
@ FROUND
Definition: ISDOpcodes.h:887
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:735
llvm::AMDGPUISD::ATOMIC_LOAD_FMAX
@ ATOMIC_LOAD_FMAX
Definition: AMDGPUISelLowering.h:506
llvm::ISD::SRA
@ SRA
Definition: ISDOpcodes.h:658
llvm::SelectionDAGISel::ReplaceNode
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
Definition: SelectionDAGISel.h:227
llvm::SISrcMods::OP_SEL_0
@ OP_SEL_0
Definition: SIDefines.h:214
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:195
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MCInstrDesc::isCommutable
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
Definition: MCInstrDesc.h:472
llvm::M0
unsigned M0(unsigned Val)
Definition: VE.h:370
llvm::Instruction
Definition: Instruction.h:45
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::ISD::SINT_TO_FP
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:739
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1467
llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition: ISDOpcodes.h:886
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:885
llvm::SIInstrInfo::findCommutedOpIndices
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
Definition: SIInstrInfo.cpp:2251
llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:360
llvm::SelectionDAG::getMemIntrinsicNode
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
Definition: SelectionDAG.cpp:7283
llvm::LegacyDivergenceAnalysis
Definition: LegacyDivergenceAnalysis.h:31
llvm::SelectionDAG::dump
void dump() const
Definition: SelectionDAGDumper.cpp:913
llvm::AMDGPUISD::CVT_PK_U16_U32
@ CVT_PK_U16_U32
Definition: AMDGPUISelLowering.h:467
llvm::SIRegisterInfo::isSGPRClass
static bool isSGPRClass(const TargetRegisterClass *RC)
Definition: SIRegisterInfo.h:174
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:632
llvm::MCOperandInfo::RegClass
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:89
llvm::SelectionDAG::isKnownNeverNaN
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue is known to never be NaN.
Definition: SelectionDAG.cpp:4420
llvm::Triple::getArch
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:311
llvm::SDValue::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned i) const
Definition: SelectionDAGNodes.h:1135
llvm::ISD::CopyFromReg
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:208
llvm::MCID::RegSequence
@ RegSequence
Definition: MCInstrDesc.h:179
llvm::AMDGPUISD::FMA_W_CHAIN
@ FMA_W_CHAIN
Definition: AMDGPUISelLowering.h:388
llvm::GCNSubtarget::getScalarizeGlobalBehavior
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:763
llvm::ISD::USUBO
@ USUBO
Definition: ISDOpcodes.h:315
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
findMemSDNode
static MemSDNode * findMemSDNode(SDNode *N)
Definition: AMDGPUISelDAGToDAG.cpp:1510
llvm::Hi_32
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:348
LoopInfo.h
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::ISD::ATOMIC_LOAD_FADD
@ ATOMIC_LOAD_FADD
Definition: ISDOpcodes.h:1155
i32
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM ID Predecessors according to mbb< bb27, 0x8b0a7c0 > Note ADDri is not a two address instruction its result reg1037 is an operand of the PHI node in bb76 and its operand reg1039 is the result of the PHI node We should treat it as a two address code and make sure the ADDri is scheduled after any node that reads reg1039 Use info(i.e. register scavenger) to assign it a free register to allow reuse the collector could move the objects and invalidate the derived pointer This is bad enough in the first but safe points can crop up unpredictably **array_addr i32
Definition: README.txt:122
llvm::ISD::FPOW
@ FPOW
Definition: ISDOpcodes.h:877
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:377
R600MCTargetDesc.h
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:641
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:364
llvm::SelectionDAG::RemoveDeadNode
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
Definition: SelectionDAG.cpp:924
llvm::countPopulation
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:567
llvm::AMDGPUISD::LDEXP
@ LDEXP
Definition: AMDGPUISelLowering.h:424
llvm::isInt< 32 >
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:373
llvm::ISD::FLOG10
@ FLOG10
Definition: ISDOpcodes.h:880
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:341
llvm::ISD::FMAD
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:470
llvm::isUInt< 16 >
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:408
llvm::AMDGPU::CPol::GLC
@ GLC
Definition: SIDefines.h:293
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
uint64_t
llvm::ConstantFPSDNode
Definition: SelectionDAGNodes.h:1607
llvm::SelectionDAGISel::TII
const TargetInstrInfo * TII
Definition: SelectionDAGISel.h:52
llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition: SelectionDAGNodes.h:1342
llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:925
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:79
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::AMDGPUISD::LOAD_D16_HI
@ LOAD_D16_HI
Definition: AMDGPUISelLowering.h:488
getBaseWithOffsetUsingSplitOR
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
Definition: AMDGPUISelDAGToDAG.cpp:737
llvm::SelectionDAGISel::FuncInfo
std::unique_ptr< FunctionLoweringInfo > FuncInfo
Definition: SelectionDAGISel.h:43
llvm::GCNSubtarget::hasAddNoCarry
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:625
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::GCNSubtarget::privateMemoryResourceIsRangeChecked
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:480
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:511
llvm::AMDGPUISD::FMAD_FTZ
@ FMAD_FTZ
Definition: AMDGPUISelLowering.h:414
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:906
llvm::AMDGPUISD::ATOMIC_DEC
@ ATOMIC_DEC
Definition: AMDGPUISelLowering.h:504
llvm::SelectionDAG::getNode
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition: SelectionDAG.cpp:8326
llvm::AMDGPUISD::CVT_PK_I16_I32
@ CVT_PK_I16_I32
Definition: AMDGPUISelLowering.h:466
llvm::AMDGPUISD::BFE_I32
@ BFE_I32
Definition: AMDGPUISelLowering.h:430
llvm::ISD::ATOMIC_CMP_SWAP
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1129
AMDGPUDAGToDAGISel::getPassName
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
Definition: AMDGPUISelDAGToDAG.cpp:784
llvm::LoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Definition: SelectionDAGNodes.h:2315
llvm::AMDGPUArgumentUsageInfo
Definition: AMDGPUArgumentUsageInfo.h:158
llvm::SelectionDAG::MorphNodeTo
SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef< SDValue > Ops)
This mutates the specified node to have the specified return type, opcode, and operands.
Definition: SelectionDAG.cpp:8898
llvm::isUInt< 32 >
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:411
llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition: SelectionDAGNodes.h:171
llvm::TargetMachine::Options
TargetOptions Options
Definition: TargetMachine.h:121
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SISrcMods::NEG
@ NEG
Definition: SIDefines.h:210
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:80
llvm::AMDGPUISD::SIN_HW
@ SIN_HW
Definition: AMDGPUISelLowering.h:394
llvm::isUInt< 8 >
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:405
llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition: SelectionDAGNodes.h:1573
llvm::SIInstrFlags::FlatGlobal
@ FlatGlobal
Definition: SIDefines.h:102
llvm::AMDGPUISD::RSQ
@ RSQ
Definition: AMDGPUISelLowering.h:419
i16
< i32 > ret i32 conv5 And the following x86 eax movsbl ecx cmpl ecx sete al movzbl eax ret It should be possible to eliminate the sign extensions LLVM misses a load store narrowing opportunity in this i16
Definition: README.txt:1493
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1370
llvm::MCInstrDesc::OpInfo
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:206
llvm::SelectionDAGISel::CurDAG
SelectionDAG * CurDAG
Definition: SelectionDAGISel.h:47
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:8999
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::ISD::SUBCARRY
@ SUBCARRY
Definition: ISDOpcodes.h:291
llvm::SelectionDAG::setNodeMemRefs
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
Definition: SelectionDAG.cpp:8767
llvm::SIInstrFlags::FLAT
@ FLAT
Definition: SIDefines.h:59
llvm::GCNSubtarget::getLDSBankCount
int getLDSBankCount() const
Definition: GCNSubtarget.h:267
AMDGPUDAGToDAGISel::Select
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
Definition: AMDGPUISelDAGToDAG.cpp:506
v4i32
Vector Rotate Left Mask Mask v4i32
Definition: README_P9.txt:112
llvm::SelectionDAG::SelectNodeTo
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
Definition: SelectionDAG.cpp:8791
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
R600RegisterInfo.h
llvm::isMask_32
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:467
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::MachineFunction
Definition: MachineFunction.h:241
llvm::AMDGPU::CPol::SCC
@ SCC
Definition: SIDefines.h:296
SelectionDAGISel.h
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:359
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::GCNSubtarget::hasFlatInstOffsets
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:546
llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:353
AMDGPU.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::ISD::UMUL_LOHI
@ UMUL_LOHI
Definition: ISDOpcodes.h:251
llvm::AMDGPUISD::LOAD_D16_LO_U8
@ LOAD_D16_LO_U8
Definition: AMDGPUISelLowering.h:493
llvm::SelectionDAG::ReplaceAllUsesOfValueWith
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
Definition: SelectionDAG.cpp:9587
llvm::SISrcMods::OP_SEL_1
@ OP_SEL_1
Definition: SIDefines.h:215
uint32_t
llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition: SelectionDAGNodes.h:1131
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::ISD::UNDEF
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
llvm::ISD::FEXP
@ FEXP
Definition: ISDOpcodes.h:881
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::ISD::SMUL_LOHI
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
llvm::ISD::FEXP2
@ FEXP2
Definition: ISDOpcodes.h:882
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:358
llvm::CodeGenOpt::Level
Level
Definition: CodeGen.h:52
llvm::SDVTList
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Definition: SelectionDAGNodes.h:78
llvm::ISD::FMUL
@ FMUL
Definition: ISDOpcodes.h:379
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1350
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
llvm::LoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2319
llvm::LoopInfoBase::getLoopsInPreorder
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
Definition: LoopInfoImpl.h:578
llvm::ISD::FSQRT
@ FSQRT
Definition: ISDOpcodes.h:872
llvm::SelectionDAGISel::MF
MachineFunction * MF
Definition: SelectionDAGISel.h:45
llvm::AMDGPUISD::RCP
@ RCP
Definition: AMDGPUISelLowering.h:418
llvm::ISD::FMAXNUM
@ FMAXNUM
Definition: ISDOpcodes.h:903
llvm::SelectionDAG::computeKnownBits
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
Definition: SelectionDAG.cpp:2809
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:607
llvm::AMDGPUISD::LOAD_D16_HI_U8
@ LOAD_D16_HI_U8
Definition: AMDGPUISelLowering.h:491
llvm::AMDGPUISD::MAD_U64_U32
@ MAD_U64_U32
Definition: AMDGPUISelLowering.h:442
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:325
llvm::MemSDNode::getAddressSpace
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Definition: SelectionDAGNodes.h:1349
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
llvm::AMDGPUISD::ATOMIC_LOAD_FMIN
@ ATOMIC_LOAD_FMIN
Definition: AMDGPUISelLowering.h:505
llvm::ISD::FCOS
@ FCOS
Definition: ISDOpcodes.h:875
llvm::SelectionDAG::getEntryNode
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:516
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:883
llvm::ISD::FSIN
@ FSIN
Definition: ISDOpcodes.h:874
llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:491
llvm::SITargetLowering
Definition: SIISelLowering.h:31
llvm::SIInstrFlags::FlatScratch
@ FlatScratch
Definition: SIDefines.h:117
llvm::SelectionDAG::getTargetExtractSubreg
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
Definition: SelectionDAG.cpp:9117
llvm::SelectionDAGISel::ReplaceUses
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
Definition: SelectionDAGISel.h:206
llvm::ISD::SUBC
@ SUBC
Definition: ISDOpcodes.h:270
llvm::GCNSubtarget::ldsRequiresM0Init
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition: GCNSubtarget.h:603
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:137
gwsIntrinToOpcode
static unsigned gwsIntrinToOpcode(unsigned IntrID)
Definition: AMDGPUISelDAGToDAG.cpp:2331
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
llvm::AMDGPUPerfHintAnalysis
Definition: AMDGPUPerfHintAnalysis.h:23
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:926
llvm::AMDGPUISD::CVT_PKRTZ_F16_F32
@ CVT_PKRTZ_F16_F32
Definition: AMDGPUISelLowering.h:463
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:416
llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition: ISDOpcodes.h:740
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition: GCNSubtarget.h:258
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
llvm::AMDGPU::isInlinableLiteral32
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
Definition: AMDGPUBaseInfo.cpp:1754
llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:301
llvm::ISD::FP_EXTEND
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:837
llvm::TargetOptions::NoNaNsFPMath
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
Definition: TargetOptions.h:173
llvm::AMDGPUISD::FRACT
@ FRACT
Definition: AMDGPUISelLowering.h:374
llvm::ISD::FSUB
@ FSUB
Definition: ISDOpcodes.h:378
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:657
LegacyDivergenceAnalysis.h
llvm::SelectionDAGISel
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
Definition: SelectionDAGISel.h:39
llvm::AMDGPUISD::FMAX3
@ FMAX3
Definition: AMDGPUISelLowering.h:398
llvm::GCNSubtarget::getConstantBusLimit
unsigned getConstantBusLimit(unsigned Opcode) const
Definition: AMDGPUSubtarget.cpp:323
llvm::ISD::FREM
@ FREM
Definition: ISDOpcodes.h:381
llvm::AMDGPUISD::MAD_I64_I32
@ MAD_I64_I32
Definition: AMDGPUISelLowering.h:443
Dominators.h
N
#define N
llvm::APInt::countTrailingOnes
unsigned countTrailingOnes() const
Count the number of trailing one bits.
Definition: APInt.h:1555
llvm::ISD::SUBE
@ SUBE
Definition: ISDOpcodes.h:280
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:335
llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:659
llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition: TargetMachine.h:129
llvm::GCNSubtarget::d16PreservesUnusedBits
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:593
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_END(AMDGPUDAGToDAGISel
llvm::AMDGPUISD::COS_HW
@ COS_HW
Definition: AMDGPUISelLowering.h:393
llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition: SelectionDAGNodes.h:1119
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:637
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::ISD::INTRINSIC_W_CHAIN
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:870
llvm::SelectionDAG::getMachineFunction
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:437
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::ISD::BUILD_PAIR
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::AMDGPUISD::DWORDADDR
@ DWORDADDR
Definition: AMDGPUISelLowering.h:373
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::ISD::TargetFrameIndex
@ TargetFrameIndex
Definition: ISDOpcodes.h:166
f64
QP Compare Ordered outs ins xscmpudp No builtin are required Or llvm fcmp order unorder compare DP QP Compare builtin are required DP xscmp *dp write to VSX register Use int_ppc_vsx_xscmpeqdp f64
Definition: README_P9.txt:314
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:884
llvm::AMDGPUISD::RCP_IFLAG
@ RCP_IFLAG
Definition: AMDGPUISelLowering.h:421
llvm::AMDGPUISD::ATOMIC_INC
@ ATOMIC_INC
Definition: AMDGPUISelLowering.h:503
llvm::ISD::FCANONICALIZE
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:483
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
llvm::ISD::FLOG
@ FLOG
Definition: ISDOpcodes.h:878
InitializePasses.h
llvm::GCNSubtarget::hasUsableDSOffset
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:395
llvm::ISD::ADDE
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:818
llvm::MCInstrDesc::getNumOperands
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:228
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:363
llvm::SelectionDAG::getTarget
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:441
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:362
llvm::EVT::bitsEq
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:229
AMDGPUTargetMachine.h
SubReg
unsigned SubReg
Definition: AArch64AdvSIMDScalarPass.cpp:104
llvm::AMDGPUISD::BFE_U32
@ BFE_U32
Definition: AMDGPUISelLowering.h:429
llvm::ISD::SCALAR_TO_VECTOR
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:583
AMDGPUDAGToDAGISel::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Definition: AMDGPUISelDAGToDAG.cpp:124
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1198
llvm::AMDGPUISD::LOAD_D16_LO
@ LOAD_D16_LO
Definition: AMDGPUISelLowering.h:489
IsCopyFromSGPR
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
Definition: AMDGPUISelDAGToDAG.cpp:1431
llvm::ISD::FDIV
@ FDIV
Definition: ISDOpcodes.h:380