LLVM  14.0.0git
AMDGPUISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines an instruction selector for the AMDGPU target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPUISelDAGToDAG.h"
15 #include "AMDGPU.h"
16 #include "AMDGPUTargetMachine.h"
18 #include "R600RegisterInfo.h"
19 #include "SIMachineFunctionInfo.h"
26 #include "llvm/IR/IntrinsicsAMDGPU.h"
27 #include "llvm/InitializePasses.h"
28 
29 #ifdef EXPENSIVE_CHECKS
30 #include "llvm/Analysis/LoopInfo.h"
31 #include "llvm/IR/Dominators.h"
32 #endif
33 
34 #define DEBUG_TYPE "isel"
35 
36 using namespace llvm;
37 
38 //===----------------------------------------------------------------------===//
39 // Instruction Selector Implementation
40 //===----------------------------------------------------------------------===//
41 
42 namespace {
43 
44 static SDValue stripBitcast(SDValue Val) {
45  return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
46 }
47 
48 // Figure out if this is really an extract of the high 16-bits of a dword.
49 static bool isExtractHiElt(SDValue In, SDValue &Out) {
50  In = stripBitcast(In);
51 
52  if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
53  if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
54  if (!Idx->isOne())
55  return false;
56  Out = In.getOperand(0);
57  return true;
58  }
59  }
60 
61  if (In.getOpcode() != ISD::TRUNCATE)
62  return false;
63 
64  SDValue Srl = In.getOperand(0);
65  if (Srl.getOpcode() == ISD::SRL) {
66  if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
67  if (ShiftAmt->getZExtValue() == 16) {
68  Out = stripBitcast(Srl.getOperand(0));
69  return true;
70  }
71  }
72  }
73 
74  return false;
75 }
76 
77 // Look through operations that obscure just looking at the low 16-bits of the
78 // same register.
79 static SDValue stripExtractLoElt(SDValue In) {
80  if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
81  if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
82  if (Idx->isZero() && In.getValueSizeInBits() <= 32)
83  return In.getOperand(0);
84  }
85  }
86 
87  if (In.getOpcode() == ISD::TRUNCATE) {
88  SDValue Src = In.getOperand(0);
89  if (Src.getValueType().getSizeInBits() == 32)
90  return stripBitcast(Src);
91  }
92 
93  return In;
94 }
95 
96 } // end anonymous namespace
97 
99  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
103 #ifdef EXPENSIVE_CHECKS
106 #endif
108  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
109 
110 /// This pass converts a legalized DAG into a AMDGPU-specific
111 // DAG, ready for instruction scheduling.
113  CodeGenOpt::Level OptLevel) {
114  return new AMDGPUDAGToDAGISel(TM, OptLevel);
115 }
116 
118  TargetMachine *TM /*= nullptr*/,
119  CodeGenOpt::Level OptLevel /*= CodeGenOpt::Default*/)
120  : SelectionDAGISel(*TM, OptLevel) {
121  EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
122 }
123 
125 #ifdef EXPENSIVE_CHECKS
126  DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
127  LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
128  for (auto &L : LI->getLoopsInPreorder()) {
129  assert(L->isLCSSAForm(DT));
130  }
131 #endif
132  Subtarget = &MF.getSubtarget<GCNSubtarget>();
134  return SelectionDAGISel::runOnMachineFunction(MF);
135 }
136 
137 bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
138  // XXX - only need to list legal operations.
139  switch (Opc) {
140  case ISD::FADD:
141  case ISD::FSUB:
142  case ISD::FMUL:
143  case ISD::FDIV:
144  case ISD::FREM:
145  case ISD::FCANONICALIZE:
146  case ISD::UINT_TO_FP:
147  case ISD::SINT_TO_FP:
148  case ISD::FABS:
149  // Fabs is lowered to a bit operation, but it's an and which will clear the
150  // high bits anyway.
151  case ISD::FSQRT:
152  case ISD::FSIN:
153  case ISD::FCOS:
154  case ISD::FPOWI:
155  case ISD::FPOW:
156  case ISD::FLOG:
157  case ISD::FLOG2:
158  case ISD::FLOG10:
159  case ISD::FEXP:
160  case ISD::FEXP2:
161  case ISD::FCEIL:
162  case ISD::FTRUNC:
163  case ISD::FRINT:
164  case ISD::FNEARBYINT:
165  case ISD::FROUND:
166  case ISD::FFLOOR:
167  case ISD::FMINNUM:
168  case ISD::FMAXNUM:
169  case AMDGPUISD::FRACT:
170  case AMDGPUISD::CLAMP:
171  case AMDGPUISD::COS_HW:
172  case AMDGPUISD::SIN_HW:
173  case AMDGPUISD::FMIN3:
174  case AMDGPUISD::FMAX3:
175  case AMDGPUISD::FMED3:
176  case AMDGPUISD::FMAD_FTZ:
177  case AMDGPUISD::RCP:
178  case AMDGPUISD::RSQ:
180  case AMDGPUISD::LDEXP:
181  // On gfx10, all 16-bit instructions preserve the high bits.
182  return Subtarget->getGeneration() <= AMDGPUSubtarget::GFX9;
183  case ISD::FP_ROUND:
184  // We may select fptrunc (fma/mad) to mad_mixlo, which does not zero the
185  // high bits on gfx9.
186  // TODO: If we had the source node we could see if the source was fma/mad
187  return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
188  case ISD::FMA:
189  case ISD::FMAD:
191  return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
192  default:
193  // fcopysign, select and others may be lowered to 32-bit bit operations
194  // which don't zero the high bits.
195  return false;
196  }
197 }
198 
202 #ifdef EXPENSIVE_CHECKS
205 #endif
206  SelectionDAGISel::getAnalysisUsage(AU);
207 }
208 
210  assert(Subtarget->d16PreservesUnusedBits());
211  MVT VT = N->getValueType(0).getSimpleVT();
212  if (VT != MVT::v2i16 && VT != MVT::v2f16)
213  return false;
214 
215  SDValue Lo = N->getOperand(0);
216  SDValue Hi = N->getOperand(1);
217 
218  LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
219 
220  // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
221  // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
222  // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
223 
224  // Need to check for possible indirect dependencies on the other half of the
225  // vector to avoid introducing a cycle.
226  if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
227  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
228 
230  SDValue Ops[] = {
231  LdHi->getChain(), LdHi->getBasePtr(), TiedIn
232  };
233 
234  unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
235  if (LdHi->getMemoryVT() == MVT::i8) {
236  LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
238  } else {
239  assert(LdHi->getMemoryVT() == MVT::i16);
240  }
241 
242  SDValue NewLoadHi =
243  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
244  Ops, LdHi->getMemoryVT(),
245  LdHi->getMemOperand());
246 
247  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
248  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
249  return true;
250  }
251 
252  // build_vector (load ptr), hi -> load_d16_lo ptr, hi
253  // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
254  // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
255  LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
256  if (LdLo && Lo.hasOneUse()) {
257  SDValue TiedIn = getHi16Elt(Hi);
258  if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
259  return false;
260 
261  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
262  unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
263  if (LdLo->getMemoryVT() == MVT::i8) {
264  LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
266  } else {
267  assert(LdLo->getMemoryVT() == MVT::i16);
268  }
269 
270  TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
271 
272  SDValue Ops[] = {
273  LdLo->getChain(), LdLo->getBasePtr(), TiedIn
274  };
275 
276  SDValue NewLoadLo =
277  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
278  Ops, LdLo->getMemoryVT(),
279  LdLo->getMemOperand());
280 
281  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
282  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
283  return true;
284  }
285 
286  return false;
287 }
288 
290  if (!Subtarget->d16PreservesUnusedBits())
291  return;
292 
294 
295  bool MadeChange = false;
296  while (Position != CurDAG->allnodes_begin()) {
297  SDNode *N = &*--Position;
298  if (N->use_empty())
299  continue;
300 
301  switch (N->getOpcode()) {
302  case ISD::BUILD_VECTOR:
303  MadeChange |= matchLoadD16FromBuildVector(N);
304  break;
305  default:
306  break;
307  }
308  }
309 
310  if (MadeChange) {
312  LLVM_DEBUG(dbgs() << "After PreProcess:\n";
313  CurDAG->dump(););
314  }
315 }
316 
317 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
318  if (TM.Options.NoNaNsFPMath)
319  return true;
320 
321  // TODO: Move into isKnownNeverNaN
322  if (N->getFlags().hasNoNaNs())
323  return true;
324 
325  return CurDAG->isKnownNeverNaN(N);
326 }
327 
328 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
329  bool Negated) const {
330  if (N->isUndef())
331  return true;
332 
333  const SIInstrInfo *TII = Subtarget->getInstrInfo();
334  if (Negated) {
335  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
336  return TII->isInlineConstant(-C->getAPIntValue());
337 
338  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
339  return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
340 
341  } else {
342  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
343  return TII->isInlineConstant(C->getAPIntValue());
344 
345  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
346  return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
347  }
348 
349  return false;
350 }
351 
352 /// Determine the register class for \p OpNo
353 /// \returns The register class of the virtual register that will be used for
354 /// the given operand number \OpNo or NULL if the register class cannot be
355 /// determined.
356 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
357  unsigned OpNo) const {
358  if (!N->isMachineOpcode()) {
359  if (N->getOpcode() == ISD::CopyToReg) {
360  Register Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
361  if (Reg.isVirtual()) {
363  return MRI.getRegClass(Reg);
364  }
365 
366  const SIRegisterInfo *TRI
367  = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
368  return TRI->getPhysRegClass(Reg);
369  }
370 
371  return nullptr;
372  }
373 
374  switch (N->getMachineOpcode()) {
375  default: {
376  const MCInstrDesc &Desc =
377  Subtarget->getInstrInfo()->get(N->getMachineOpcode());
378  unsigned OpIdx = Desc.getNumDefs() + OpNo;
379  if (OpIdx >= Desc.getNumOperands())
380  return nullptr;
381  int RegClass = Desc.OpInfo[OpIdx].RegClass;
382  if (RegClass == -1)
383  return nullptr;
384 
385  return Subtarget->getRegisterInfo()->getRegClass(RegClass);
386  }
387  case AMDGPU::REG_SEQUENCE: {
388  unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
389  const TargetRegisterClass *SuperRC =
390  Subtarget->getRegisterInfo()->getRegClass(RCID);
391 
392  SDValue SubRegOp = N->getOperand(OpNo + 1);
393  unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
394  return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
395  SubRegIdx);
396  }
397  }
398 }
399 
400 SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain,
401  SDValue Glue) const {
403  Ops.push_back(NewChain); // Replace the chain.
404  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
405  Ops.push_back(N->getOperand(i));
406 
407  Ops.push_back(Glue);
408  return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
409 }
410 
411 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
412  const SITargetLowering& Lowering =
413  *static_cast<const SITargetLowering*>(getTargetLowering());
414 
415  assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
416 
417  SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val);
418  return glueCopyToOp(N, M0, M0.getValue(1));
419 }
420 
421 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
422  unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
423  if (AS == AMDGPUAS::LOCAL_ADDRESS) {
424  if (Subtarget->ldsRequiresM0Init())
425  return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
426  } else if (AS == AMDGPUAS::REGION_ADDRESS) {
428  unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
429  return
430  glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
431  }
432  return N;
433 }
434 
435 MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
436  EVT VT) const {
438  AMDGPU::S_MOV_B32, DL, MVT::i32,
439  CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
440  SDNode *Hi =
441  CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
442  CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32));
443  const SDValue Ops[] = {
444  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
445  SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
446  SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
447 
448  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
449 }
450 
451 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
452  EVT VT = N->getValueType(0);
453  unsigned NumVectorElts = VT.getVectorNumElements();
454  EVT EltVT = VT.getVectorElementType();
455  SDLoc DL(N);
456  SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
457 
458  if (NumVectorElts == 1) {
459  CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
460  RegClass);
461  return;
462  }
463 
464  assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
465  "supported yet");
466  // 32 = Max Num Vector Elements
467  // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
468  // 1 = Vector Register Class
469  SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
470 
471  bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() ==
472  Triple::amdgcn;
473  RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
474  bool IsRegSeq = true;
475  unsigned NOps = N->getNumOperands();
476  for (unsigned i = 0; i < NOps; i++) {
477  // XXX: Why is this here?
478  if (isa<RegisterSDNode>(N->getOperand(i))) {
479  IsRegSeq = false;
480  break;
481  }
482  unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
483  : R600RegisterInfo::getSubRegFromChannel(i);
484  RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
485  RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
486  }
487  if (NOps != NumVectorElts) {
488  // Fill in the missing undef elements if this was a scalar_to_vector.
489  assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
490  MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
491  DL, EltVT);
492  for (unsigned i = NOps; i < NumVectorElts; ++i) {
493  unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
494  : R600RegisterInfo::getSubRegFromChannel(i);
495  RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
496  RegSeqArgs[1 + (2 * i) + 1] =
498  }
499  }
500 
501  if (!IsRegSeq)
502  SelectCode(N);
503  CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
504 }
505 
507  unsigned int Opc = N->getOpcode();
508  if (N->isMachineOpcode()) {
509  N->setNodeId(-1);
510  return; // Already selected.
511  }
512 
513  // isa<MemSDNode> almost works but is slightly too permissive for some DS
514  // intrinsics.
515  if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) ||
516  (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
517  Opc == ISD::ATOMIC_LOAD_FADD ||
519  Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) {
520  N = glueCopyToM0LDSInit(N);
521  SelectCode(N);
522  return;
523  }
524 
525  switch (Opc) {
526  default:
527  break;
528  // We are selecting i64 ADD here instead of custom lower it during
529  // DAG legalization, so we can fold some i64 ADDs used for address
530  // calculation into the LOAD and STORE instructions.
531  case ISD::ADDC:
532  case ISD::ADDE:
533  case ISD::SUBC:
534  case ISD::SUBE: {
535  if (N->getValueType(0) != MVT::i64)
536  break;
537 
538  SelectADD_SUB_I64(N);
539  return;
540  }
541  case ISD::ADDCARRY:
542  case ISD::SUBCARRY:
543  if (N->getValueType(0) != MVT::i32)
544  break;
545 
546  SelectAddcSubb(N);
547  return;
548  case ISD::UADDO:
549  case ISD::USUBO: {
550  SelectUADDO_USUBO(N);
551  return;
552  }
554  SelectFMUL_W_CHAIN(N);
555  return;
556  }
557  case AMDGPUISD::FMA_W_CHAIN: {
558  SelectFMA_W_CHAIN(N);
559  return;
560  }
561 
563  case ISD::BUILD_VECTOR: {
564  EVT VT = N->getValueType(0);
565  unsigned NumVectorElts = VT.getVectorNumElements();
566  if (VT.getScalarSizeInBits() == 16) {
567  if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
568  if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
569  ReplaceNode(N, Packed);
570  return;
571  }
572  }
573 
574  break;
575  }
576 
578  unsigned RegClassID =
579  SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
580  SelectBuildVector(N, RegClassID);
581  return;
582  }
583  case ISD::BUILD_PAIR: {
584  SDValue RC, SubReg0, SubReg1;
585  SDLoc DL(N);
586  if (N->getValueType(0) == MVT::i128) {
587  RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);
588  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
589  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
590  } else if (N->getValueType(0) == MVT::i64) {
591  RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
592  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
593  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
594  } else {
595  llvm_unreachable("Unhandled value type for BUILD_PAIR");
596  }
597  const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
598  N->getOperand(1), SubReg1 };
599  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
600  N->getValueType(0), Ops));
601  return;
602  }
603 
604  case ISD::Constant:
605  case ISD::ConstantFP: {
606  if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
607  break;
608 
609  uint64_t Imm;
610  if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
611  Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
612  else {
613  ConstantSDNode *C = cast<ConstantSDNode>(N);
614  Imm = C->getZExtValue();
615  }
616 
617  SDLoc DL(N);
618  ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
619  return;
620  }
621  case AMDGPUISD::BFE_I32:
622  case AMDGPUISD::BFE_U32: {
623  // There is a scalar version available, but unlike the vector version which
624  // has a separate operand for the offset and width, the scalar version packs
625  // the width and offset into a single operand. Try to move to the scalar
626  // version if the offsets are constant, so that we can try to keep extended
627  // loads of kernel arguments in SGPRs.
628 
629  // TODO: Technically we could try to pattern match scalar bitshifts of
630  // dynamic values, but it's probably not useful.
631  ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
632  if (!Offset)
633  break;
634 
635  ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
636  if (!Width)
637  break;
638 
639  bool Signed = Opc == AMDGPUISD::BFE_I32;
640 
641  uint32_t OffsetVal = Offset->getZExtValue();
642  uint32_t WidthVal = Width->getZExtValue();
643 
644  ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
645  SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
646  return;
647  }
648  case AMDGPUISD::DIV_SCALE: {
649  SelectDIV_SCALE(N);
650  return;
651  }
653  case AMDGPUISD::MAD_U64_U32: {
654  SelectMAD_64_32(N);
655  return;
656  }
657  case ISD::CopyToReg: {
658  const SITargetLowering& Lowering =
659  *static_cast<const SITargetLowering*>(getTargetLowering());
660  N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
661  break;
662  }
663  case ISD::AND:
664  case ISD::SRL:
665  case ISD::SRA:
667  if (N->getValueType(0) != MVT::i32)
668  break;
669 
670  SelectS_BFE(N);
671  return;
672  case ISD::BRCOND:
673  SelectBRCOND(N);
674  return;
675  case ISD::FMAD:
676  case ISD::FMA:
677  SelectFMAD_FMA(N);
678  return;
680  SelectATOMIC_CMP_SWAP(N);
681  return;
687  // Hack around using a legal type if f16 is illegal.
688  if (N->getValueType(0) == MVT::i32) {
689  MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
690  N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
691  { N->getOperand(0), N->getOperand(1) });
692  SelectCode(N);
693  return;
694  }
695 
696  break;
697  }
698  case ISD::INTRINSIC_W_CHAIN: {
699  SelectINTRINSIC_W_CHAIN(N);
700  return;
701  }
703  SelectINTRINSIC_WO_CHAIN(N);
704  return;
705  }
706  case ISD::INTRINSIC_VOID: {
707  SelectINTRINSIC_VOID(N);
708  return;
709  }
710  }
711 
712  SelectCode(N);
713 }
714 
715 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
716  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
717  const Instruction *Term = BB->getTerminator();
718  return Term->getMetadata("amdgpu.uniform") ||
719  Term->getMetadata("structurizecfg.uniform");
720 }
721 
723  SDValue &N0, SDValue &N1) {
724  if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
725  Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
726  // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.
727  // (i64 (bitcast (v2i32 (build_vector
728  // (or (extract_vector_elt V, 0), OFFSET),
729  // (extract_vector_elt V, 1)))))
730  SDValue Lo = Addr.getOperand(0).getOperand(0);
731  if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) {
732  SDValue BaseLo = Lo.getOperand(0);
733  SDValue BaseHi = Addr.getOperand(0).getOperand(1);
734  // Check that split base (Lo and Hi) are extracted from the same one.
735  if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
736  BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
737  BaseLo.getOperand(0) == BaseHi.getOperand(0) &&
738  // Lo is statically extracted from index 0.
739  isa<ConstantSDNode>(BaseLo.getOperand(1)) &&
740  BaseLo.getConstantOperandVal(1) == 0 &&
741  // Hi is statically extracted from index 0.
742  isa<ConstantSDNode>(BaseHi.getOperand(1)) &&
743  BaseHi.getConstantOperandVal(1) == 1) {
744  N0 = BaseLo.getOperand(0).getOperand(0);
745  N1 = Lo.getOperand(1);
746  return true;
747  }
748  }
749  }
750  return false;
751 }
752 
753 bool AMDGPUDAGToDAGISel::isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
754  SDValue &RHS) const {
756  LHS = Addr.getOperand(0);
757  RHS = Addr.getOperand(1);
758  return true;
759  }
760 
761  if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, LHS, RHS)) {
762  assert(LHS && RHS && isa<ConstantSDNode>(RHS));
763  return true;
764  }
765 
766  return false;
767 }
768 
770  return "AMDGPU DAG->DAG Pattern Instruction Selection";
771 }
772 
773 //===----------------------------------------------------------------------===//
774 // Complex Patterns
775 //===----------------------------------------------------------------------===//
776 
777 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
778  SDValue &Offset) {
779  return false;
780 }
781 
782 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
783  SDValue &Offset) {
784  ConstantSDNode *C;
785  SDLoc DL(Addr);
786 
787  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
788  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
789  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
790  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
791  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
792  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
793  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
794  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
795  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
796  Base = Addr.getOperand(0);
797  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
798  } else {
799  Base = Addr;
801  }
802 
803  return true;
804 }
805 
806 SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
807  const SDLoc &DL) const {
808  SDNode *Mov = CurDAG->getMachineNode(
809  AMDGPU::S_MOV_B32, DL, MVT::i32,
811  return SDValue(Mov, 0);
812 }
813 
814 // FIXME: Should only handle addcarry/subcarry
815 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
816  SDLoc DL(N);
817  SDValue LHS = N->getOperand(0);
818  SDValue RHS = N->getOperand(1);
819 
820  unsigned Opcode = N->getOpcode();
821  bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
822  bool ProduceCarry =
823  ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
824  bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
825 
826  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
827  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
828 
829  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
830  DL, MVT::i32, LHS, Sub0);
831  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
832  DL, MVT::i32, LHS, Sub1);
833 
834  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
835  DL, MVT::i32, RHS, Sub0);
836  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
837  DL, MVT::i32, RHS, Sub1);
838 
839  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
840 
841  static const unsigned OpcMap[2][2][2] = {
842  {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
843  {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
844  {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
845  {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
846 
847  unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd];
848  unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd];
849 
850  SDNode *AddLo;
851  if (!ConsumeCarry) {
852  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
853  AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
854  } else {
855  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
856  AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
857  }
858  SDValue AddHiArgs[] = {
859  SDValue(Hi0, 0),
860  SDValue(Hi1, 0),
861  SDValue(AddLo, 1)
862  };
863  SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
864 
865  SDValue RegSequenceArgs[] = {
866  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
867  SDValue(AddLo,0),
868  Sub0,
869  SDValue(AddHi,0),
870  Sub1,
871  };
872  SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
873  MVT::i64, RegSequenceArgs);
874 
875  if (ProduceCarry) {
876  // Replace the carry-use
877  ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
878  }
879 
880  // Replace the remaining uses.
882 }
883 
884 void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
885  SDLoc DL(N);
886  SDValue LHS = N->getOperand(0);
887  SDValue RHS = N->getOperand(1);
888  SDValue CI = N->getOperand(2);
889 
890  if (N->isDivergent()) {
891  unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
892  : AMDGPU::V_SUBB_U32_e64;
894  N, Opc, N->getVTList(),
895  {LHS, RHS, CI,
896  CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
897  } else {
898  unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO
899  : AMDGPU::S_SUB_CO_PSEUDO;
900  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});
901  }
902 }
903 
904 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
905  // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
906  // carry out despite the _i32 name. These were renamed in VI to _U32.
907  // FIXME: We should probably rename the opcodes here.
908  bool IsAdd = N->getOpcode() == ISD::UADDO;
909  bool IsVALU = N->isDivergent();
910 
911  for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;
912  ++UI)
913  if (UI.getUse().getResNo() == 1) {
914  if ((IsAdd && (UI->getOpcode() != ISD::ADDCARRY)) ||
915  (!IsAdd && (UI->getOpcode() != ISD::SUBCARRY))) {
916  IsVALU = true;
917  break;
918  }
919  }
920 
921  if (IsVALU) {
922  unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
923 
925  N, Opc, N->getVTList(),
926  {N->getOperand(0), N->getOperand(1),
927  CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
928  } else {
929  unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
930  : AMDGPU::S_USUBO_PSEUDO;
931 
932  CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
933  {N->getOperand(0), N->getOperand(1)});
934  }
935 }
936 
937 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
938  SDLoc SL(N);
939  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
940  SDValue Ops[10];
941 
942  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
943  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
944  SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
945  Ops[8] = N->getOperand(0);
946  Ops[9] = N->getOperand(4);
947 
948  // If there are no source modifiers, prefer fmac over fma because it can use
949  // the smaller VOP2 encoding.
950  bool UseFMAC = Subtarget->hasDLInsts() &&
951  cast<ConstantSDNode>(Ops[0])->isZero() &&
952  cast<ConstantSDNode>(Ops[2])->isZero() &&
953  cast<ConstantSDNode>(Ops[4])->isZero();
954  unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
955  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), Ops);
956 }
957 
958 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
959  SDLoc SL(N);
960  // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
961  SDValue Ops[8];
962 
963  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
964  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
965  Ops[6] = N->getOperand(0);
966  Ops[7] = N->getOperand(3);
967 
968  CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
969 }
970 
971 // We need to handle this here because tablegen doesn't support matching
972 // instructions with multiple outputs.
973 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
974  SDLoc SL(N);
975  EVT VT = N->getValueType(0);
976 
977  assert(VT == MVT::f32 || VT == MVT::f64);
978 
979  unsigned Opc
980  = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
981 
982  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
983  // omod
984  SDValue Ops[8];
985  SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
986  SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]);
987  SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]);
988  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
989 }
990 
991 // We need to handle this here because tablegen doesn't support matching
992 // instructions with multiple outputs.
993 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
994  SDLoc SL(N);
995  bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
996  unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
997 
998  SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
999  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
1000  Clamp };
1001  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1002 }
1003 
1004 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const {
1005  if (!isUInt<16>(Offset))
1006  return false;
1007 
1008  if (!Base || Subtarget->hasUsableDSOffset() ||
1009  Subtarget->unsafeDSOffsetFoldingEnabled())
1010  return true;
1011 
1012  // On Southern Islands instruction with a negative base value and an offset
1013  // don't seem to work.
1014  return CurDAG->SignBitIsZero(Base);
1015 }
1016 
1017 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
1018  SDValue &Offset) const {
1019  SDLoc DL(Addr);
1021  SDValue N0 = Addr.getOperand(0);
1022  SDValue N1 = Addr.getOperand(1);
1023  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1024  if (isDSOffsetLegal(N0, C1->getSExtValue())) {
1025  // (add n0, c0)
1026  Base = N0;
1027  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1028  return true;
1029  }
1030  } else if (Addr.getOpcode() == ISD::SUB) {
1031  // sub C, x -> add (sub 0, x), C
1032  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1033  int64_t ByteOffset = C->getSExtValue();
1034  if (isDSOffsetLegal(SDValue(), ByteOffset)) {
1036 
1037  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1038  // the known bits in isDSOffsetLegal. We need to emit the selected node
1039  // here, so this is thrown away.
1041  Zero, Addr.getOperand(1));
1042 
1043  if (isDSOffsetLegal(Sub, ByteOffset)) {
1045  Opnds.push_back(Zero);
1046  Opnds.push_back(Addr.getOperand(1));
1047 
1048  // FIXME: Select to VOP3 version for with-carry.
1049  unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1050  if (Subtarget->hasAddNoCarry()) {
1051  SubOp = AMDGPU::V_SUB_U32_e64;
1052  Opnds.push_back(
1053  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1054  }
1055 
1056  MachineSDNode *MachineSub =
1057  CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1058 
1059  Base = SDValue(MachineSub, 0);
1060  Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
1061  return true;
1062  }
1063  }
1064  }
1065  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1066  // If we have a constant address, prefer to put the constant into the
1067  // offset. This can save moves to load the constant address since multiple
1068  // operations can share the zero base address register, and enables merging
1069  // into read2 / write2 instructions.
1070 
1071  SDLoc DL(Addr);
1072 
1073  if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) {
1075  MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1076  DL, MVT::i32, Zero);
1077  Base = SDValue(MovZero, 0);
1078  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1079  return true;
1080  }
1081  }
1082 
1083  // default case
1084  Base = Addr;
1086  return true;
1087 }
1088 
1089 bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
1090  unsigned Offset1,
1091  unsigned Size) const {
1092  if (Offset0 % Size != 0 || Offset1 % Size != 0)
1093  return false;
1094  if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size))
1095  return false;
1096 
1097  if (!Base || Subtarget->hasUsableDSOffset() ||
1098  Subtarget->unsafeDSOffsetFoldingEnabled())
1099  return true;
1100 
1101  // On Southern Islands instruction with a negative base value and an offset
1102  // don't seem to work.
1103  return CurDAG->SignBitIsZero(Base);
1104 }
1105 
1106 // TODO: If offset is too big, put low 16-bit into offset.
1107 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
1108  SDValue &Offset0,
1109  SDValue &Offset1) const {
1110  return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 4);
1111 }
1112 
1113 bool AMDGPUDAGToDAGISel::SelectDS128Bit8ByteAligned(SDValue Addr, SDValue &Base,
1114  SDValue &Offset0,
1115  SDValue &Offset1) const {
1116  return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 8);
1117 }
1118 
1119 bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
1120  SDValue &Offset0, SDValue &Offset1,
1121  unsigned Size) const {
1122  SDLoc DL(Addr);
1123 
1125  SDValue N0 = Addr.getOperand(0);
1126  SDValue N1 = Addr.getOperand(1);
1127  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1128  unsigned OffsetValue0 = C1->getZExtValue();
1129  unsigned OffsetValue1 = OffsetValue0 + Size;
1130 
1131  // (add n0, c0)
1132  if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1, Size)) {
1133  Base = N0;
1134  Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1135  Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1136  return true;
1137  }
1138  } else if (Addr.getOpcode() == ISD::SUB) {
1139  // sub C, x -> add (sub 0, x), C
1140  if (const ConstantSDNode *C =
1141  dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1142  unsigned OffsetValue0 = C->getZExtValue();
1143  unsigned OffsetValue1 = OffsetValue0 + Size;
1144 
1145  if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1146  SDLoc DL(Addr);
1148 
1149  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1150  // the known bits in isDSOffsetLegal. We need to emit the selected node
1151  // here, so this is thrown away.
1152  SDValue Sub =
1153  CurDAG->getNode(ISD::SUB, DL, MVT::i32, Zero, Addr.getOperand(1));
1154 
1155  if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1, Size)) {
1157  Opnds.push_back(Zero);
1158  Opnds.push_back(Addr.getOperand(1));
1159  unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1160  if (Subtarget->hasAddNoCarry()) {
1161  SubOp = AMDGPU::V_SUB_U32_e64;
1162  Opnds.push_back(
1163  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1164  }
1165 
1166  MachineSDNode *MachineSub = CurDAG->getMachineNode(
1167  SubOp, DL, MVT::getIntegerVT(Size * 8), Opnds);
1168 
1169  Base = SDValue(MachineSub, 0);
1170  Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1171  Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1172  return true;
1173  }
1174  }
1175  }
1176  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1177  unsigned OffsetValue0 = CAddr->getZExtValue();
1178  unsigned OffsetValue1 = OffsetValue0 + Size;
1179 
1180  if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1182  MachineSDNode *MovZero =
1183  CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero);
1184  Base = SDValue(MovZero, 0);
1185  Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1186  Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1187  return true;
1188  }
1189  }
1190 
1191  // default case
1192 
1193  Base = Addr;
1194  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
1195  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
1196  return true;
1197 }
1198 
1199 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
1200  SDValue &SOffset, SDValue &Offset,
1201  SDValue &Offen, SDValue &Idxen,
1202  SDValue &Addr64) const {
1203  // Subtarget prefers to use flat instruction
1204  // FIXME: This should be a pattern predicate and not reach here
1205  if (Subtarget->useFlatForGlobal())
1206  return false;
1207 
1208  SDLoc DL(Addr);
1209 
1210  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1211  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1212  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1213  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1214 
1215  ConstantSDNode *C1 = nullptr;
1216  SDValue N0 = Addr;
1218  C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1219  if (isUInt<32>(C1->getZExtValue()))
1220  N0 = Addr.getOperand(0);
1221  else
1222  C1 = nullptr;
1223  }
1224 
1225  if (N0.getOpcode() == ISD::ADD) {
1226  // (add N2, N3) -> addr64, or
1227  // (add (add N2, N3), C1) -> addr64
1228  SDValue N2 = N0.getOperand(0);
1229  SDValue N3 = N0.getOperand(1);
1230  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1231 
1232  if (N2->isDivergent()) {
1233  if (N3->isDivergent()) {
1234  // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1235  // addr64, and construct the resource from a 0 address.
1236  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1237  VAddr = N0;
1238  } else {
1239  // N2 is divergent, N3 is not.
1240  Ptr = N3;
1241  VAddr = N2;
1242  }
1243  } else {
1244  // N2 is not divergent.
1245  Ptr = N2;
1246  VAddr = N3;
1247  }
1249  } else if (N0->isDivergent()) {
1250  // N0 is divergent. Use it as the addr64, and construct the resource from a
1251  // 0 address.
1252  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1253  VAddr = N0;
1254  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1255  } else {
1256  // N0 -> offset, or
1257  // (N0 + C1) -> offset
1258  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1259  Ptr = N0;
1260  }
1261 
1262  if (!C1) {
1263  // No offset.
1265  return true;
1266  }
1267 
1268  if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
1269  // Legal offset for instruction.
1270  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1271  return true;
1272  }
1273 
1274  // Illegal offset, store it in soffset.
1276  SOffset =
1278  AMDGPU::S_MOV_B32, DL, MVT::i32,
1279  CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1280  0);
1281  return true;
1282 }
1283 
1284 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1285  SDValue &VAddr, SDValue &SOffset,
1286  SDValue &Offset) const {
1287  SDValue Ptr, Offen, Idxen, Addr64;
1288 
1289  // addr64 bit was removed for volcanic islands.
1290  // FIXME: This should be a pattern predicate and not reach here
1291  if (!Subtarget->hasAddr64())
1292  return false;
1293 
1294  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
1295  return false;
1296 
1297  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1298  if (C->getSExtValue()) {
1299  SDLoc DL(Addr);
1300 
1301  const SITargetLowering& Lowering =
1302  *static_cast<const SITargetLowering*>(getTargetLowering());
1303 
1304  SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1305  return true;
1306  }
1307 
1308  return false;
1309 }
1310 
1311 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1312  SDLoc DL(N);
1313 
1314  auto *FI = dyn_cast<FrameIndexSDNode>(N);
1315  SDValue TFI =
1316  FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N;
1317 
1318  // We rebase the base address into an absolute stack address and hence
1319  // use constant 0 for soffset. This value must be retained until
1320  // frame elimination and eliminateFrameIndex will choose the appropriate
1321  // frame register if need be.
1322  return std::make_pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32));
1323 }
1324 
1325 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1326  SDValue Addr, SDValue &Rsrc,
1327  SDValue &VAddr, SDValue &SOffset,
1328  SDValue &ImmOffset) const {
1329 
1330  SDLoc DL(Addr);
1333 
1334  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1335 
1336  if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1337  int64_t Imm = CAddr->getSExtValue();
1338  const int64_t NullPtr =
1339  AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
1340  // Don't fold null pointer.
1341  if (Imm != NullPtr) {
1342  SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1343  MachineSDNode *MovHighBits = CurDAG->getMachineNode(
1344  AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
1345  VAddr = SDValue(MovHighBits, 0);
1346 
1347  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1348  ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1349  return true;
1350  }
1351  }
1352 
1354  // (add n0, c1)
1355 
1356  SDValue N0 = Addr.getOperand(0);
1357  SDValue N1 = Addr.getOperand(1);
1358 
1359  // Offsets in vaddr must be positive if range checking is enabled.
1360  //
1361  // The total computation of vaddr + soffset + offset must not overflow. If
1362  // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1363  // overflowing.
1364  //
1365  // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1366  // always perform a range check. If a negative vaddr base index was used,
1367  // this would fail the range check. The overall address computation would
1368  // compute a valid address, but this doesn't happen due to the range
1369  // check. For out-of-bounds MUBUF loads, a 0 is returned.
1370  //
1371  // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1372  // MUBUF vaddr, but not on older subtargets which can only do this if the
1373  // sign bit is known 0.
1374  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1375  if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
1376  (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1377  CurDAG->SignBitIsZero(N0))) {
1378  std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1379  ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1380  return true;
1381  }
1382  }
1383 
1384  // (node)
1385  std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1386  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1387  return true;
1388 }
1389 
1390 static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val) {
1391  if (Val.getOpcode() != ISD::CopyFromReg)
1392  return false;
1393  auto RC =
1394  TRI.getPhysRegClass(cast<RegisterSDNode>(Val.getOperand(1))->getReg());
1395  return RC && TRI.isSGPRClass(RC);
1396 }
1397 
1398 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1399  SDValue Addr,
1400  SDValue &SRsrc,
1401  SDValue &SOffset,
1402  SDValue &Offset) const {
1403  const SIRegisterInfo *TRI =
1404  static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
1407  SDLoc DL(Addr);
1408 
1409  // CopyFromReg <sgpr>
1410  if (IsCopyFromSGPR(*TRI, Addr)) {
1411  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1412  SOffset = Addr;
1414  return true;
1415  }
1416 
1417  ConstantSDNode *CAddr;
1418  if (Addr.getOpcode() == ISD::ADD) {
1419  // Add (CopyFromReg <sgpr>) <constant>
1420  CAddr = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
1421  if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1422  return false;
1423  if (!IsCopyFromSGPR(*TRI, Addr.getOperand(0)))
1424  return false;
1425 
1426  SOffset = Addr.getOperand(0);
1427  } else if ((CAddr = dyn_cast<ConstantSDNode>(Addr)) &&
1428  SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) {
1429  // <constant>
1430  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1431  } else {
1432  return false;
1433  }
1434 
1435  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1436 
1438  return true;
1439 }
1440 
1441 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1442  SDValue &SOffset, SDValue &Offset
1443  ) const {
1444  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1445  const SIInstrInfo *TII =
1446  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1447 
1448  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
1449  return false;
1450 
1451  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1452  !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1453  !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1454  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1455  APInt::getAllOnes(32).getZExtValue(); // Size
1456  SDLoc DL(Addr);
1457 
1458  const SITargetLowering& Lowering =
1459  *static_cast<const SITargetLowering*>(getTargetLowering());
1460 
1461  SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1462  return true;
1463  }
1464  return false;
1465 }
1466 
1467 // Find a load or store from corresponding pattern root.
1468 // Roots may be build_vector, bitconvert or their combinations.
1470  N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();
1471  if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
1472  return MN;
1473  assert(isa<BuildVectorSDNode>(N));
1474  for (SDValue V : N->op_values())
1475  if (MemSDNode *MN =
1476  dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
1477  return MN;
1478  llvm_unreachable("cannot find MemSDNode in the pattern!");
1479 }
1480 
1481 bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
1482  SDValue &VAddr, SDValue &Offset,
1483  uint64_t FlatVariant) const {
1484  int64_t OffsetVal = 0;
1485 
1486  unsigned AS = findMemSDNode(N)->getAddressSpace();
1487 
1488  bool CanHaveFlatSegmentOffsetBug =
1489  Subtarget->hasFlatSegmentOffsetBug() &&
1490  FlatVariant == SIInstrFlags::FLAT &&
1492 
1493  if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
1494  SDValue N0, N1;
1495  if (isBaseWithConstantOffset64(Addr, N0, N1)) {
1496  int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1497 
1498  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1499  if (TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1500  Addr = N0;
1501  OffsetVal = COffsetVal;
1502  } else {
1503  // If the offset doesn't fit, put the low bits into the offset field and
1504  // add the rest.
1505  //
1506  // For a FLAT instruction the hardware decides whether to access
1507  // global/scratch/shared memory based on the high bits of vaddr,
1508  // ignoring the offset field, so we have to ensure that when we add
1509  // remainder to vaddr it still points into the same underlying object.
1510  // The easiest way to do that is to make sure that we split the offset
1511  // into two pieces that are both >= 0 or both <= 0.
1512 
1513  SDLoc DL(N);
1514  uint64_t RemainderOffset;
1515 
1516  std::tie(OffsetVal, RemainderOffset) =
1517  TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1518 
1519  SDValue AddOffsetLo =
1520  getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
1521  SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1522 
1523  if (Addr.getValueType().getSizeInBits() == 32) {
1525  Opnds.push_back(N0);
1526  Opnds.push_back(AddOffsetLo);
1527  unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1528  if (Subtarget->hasAddNoCarry()) {
1529  AddOp = AMDGPU::V_ADD_U32_e64;
1530  Opnds.push_back(Clamp);
1531  }
1532  Addr = SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0);
1533  } else {
1534  // TODO: Should this try to use a scalar add pseudo if the base address
1535  // is uniform and saddr is usable?
1536  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
1537  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
1538 
1539  SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1540  DL, MVT::i32, N0, Sub0);
1541  SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1542  DL, MVT::i32, N0, Sub1);
1543 
1544  SDValue AddOffsetHi =
1545  getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
1546 
1548 
1549  SDNode *Add =
1550  CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs,
1551  {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
1552 
1553  SDNode *Addc = CurDAG->getMachineNode(
1554  AMDGPU::V_ADDC_U32_e64, DL, VTs,
1555  {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
1556 
1557  SDValue RegSequenceArgs[] = {
1558  CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
1559  SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1};
1560 
1561  Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1562  MVT::i64, RegSequenceArgs),
1563  0);
1564  }
1565  }
1566  }
1567  }
1568 
1569  VAddr = Addr;
1570  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1571  return true;
1572 }
1573 
1574 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue Addr,
1575  SDValue &VAddr,
1576  SDValue &Offset) const {
1577  return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FLAT);
1578 }
1579 
1580 bool AMDGPUDAGToDAGISel::SelectGlobalOffset(SDNode *N, SDValue Addr,
1581  SDValue &VAddr,
1582  SDValue &Offset) const {
1583  return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FlatGlobal);
1584 }
1585 
1586 bool AMDGPUDAGToDAGISel::SelectScratchOffset(SDNode *N, SDValue Addr,
1587  SDValue &VAddr,
1588  SDValue &Offset) const {
1589  return SelectFlatOffsetImpl(N, Addr, VAddr, Offset,
1591 }
1592 
1593 // If this matches zero_extend i32:x, return x
1595  if (Op.getOpcode() != ISD::ZERO_EXTEND)
1596  return SDValue();
1597 
1598  SDValue ExtSrc = Op.getOperand(0);
1599  return (ExtSrc.getValueType() == MVT::i32) ? ExtSrc : SDValue();
1600 }
1601 
1602 // Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
1603 bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
1604  SDValue Addr,
1605  SDValue &SAddr,
1606  SDValue &VOffset,
1607  SDValue &Offset) const {
1608  int64_t ImmOffset = 0;
1609 
1610  // Match the immediate offset first, which canonically is moved as low as
1611  // possible.
1612 
1613  SDValue LHS, RHS;
1614  if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
1615  int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1616  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1617 
1618  if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS,
1620  Addr = LHS;
1621  ImmOffset = COffsetVal;
1622  } else if (!LHS->isDivergent()) {
1623  if (COffsetVal > 0) {
1624  SDLoc SL(N);
1625  // saddr + large_offset -> saddr +
1626  // (voffset = large_offset & ~MaxOffset) +
1627  // (large_offset & MaxOffset);
1628  int64_t SplitImmOffset, RemainderOffset;
1629  std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
1631 
1632  if (isUInt<32>(RemainderOffset)) {
1633  SDNode *VMov = CurDAG->getMachineNode(
1634  AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1635  CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
1636  VOffset = SDValue(VMov, 0);
1637  SAddr = LHS;
1638  Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
1639  return true;
1640  }
1641  }
1642 
1643  // We are adding a 64 bit SGPR and a constant. If constant bus limit
1644  // is 1 we would need to perform 1 or 2 extra moves for each half of
1645  // the constant and it is better to do a scalar add and then issue a
1646  // single VALU instruction to materialize zero. Otherwise it is less
1647  // instructions to perform VALU adds with immediates or inline literals.
1648  unsigned NumLiterals =
1649  !TII->isInlineConstant(APInt(32, COffsetVal & 0xffffffff)) +
1650  !TII->isInlineConstant(APInt(32, COffsetVal >> 32));
1651  if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
1652  return false;
1653  }
1654  }
1655 
1656  // Match the variable offset.
1657  if (Addr.getOpcode() == ISD::ADD) {
1658  LHS = Addr.getOperand(0);
1659  RHS = Addr.getOperand(1);
1660 
1661  if (!LHS->isDivergent()) {
1662  // add (i64 sgpr), (zero_extend (i32 vgpr))
1663  if (SDValue ZextRHS = matchZExtFromI32(RHS)) {
1664  SAddr = LHS;
1665  VOffset = ZextRHS;
1666  }
1667  }
1668 
1669  if (!SAddr && !RHS->isDivergent()) {
1670  // add (zero_extend (i32 vgpr)), (i64 sgpr)
1671  if (SDValue ZextLHS = matchZExtFromI32(LHS)) {
1672  SAddr = RHS;
1673  VOffset = ZextLHS;
1674  }
1675  }
1676 
1677  if (SAddr) {
1678  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1679  return true;
1680  }
1681  }
1682 
1683  if (Addr->isDivergent() || Addr.getOpcode() == ISD::UNDEF ||
1684  isa<ConstantSDNode>(Addr))
1685  return false;
1686 
1687  // It's cheaper to materialize a single 32-bit zero for vaddr than the two
1688  // moves required to copy a 64-bit SGPR to VGPR.
1689  SAddr = Addr;
1690  SDNode *VMov =
1691  CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
1693  VOffset = SDValue(VMov, 0);
1694  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1695  return true;
1696 }
1697 
1698 static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr) {
1699  if (auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1700  SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
1701  } else if (SAddr.getOpcode() == ISD::ADD &&
1702  isa<FrameIndexSDNode>(SAddr.getOperand(0))) {
1703  // Materialize this into a scalar move for scalar address to avoid
1704  // readfirstlane.
1705  auto FI = cast<FrameIndexSDNode>(SAddr.getOperand(0));
1706  SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1707  FI->getValueType(0));
1708  SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, SDLoc(SAddr),
1709  MVT::i32, TFI, SAddr.getOperand(1)),
1710  0);
1711  }
1712 
1713  return SAddr;
1714 }
1715 
1716 // Match (32-bit SGPR base) + sext(imm offset)
1717 bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
1718  SDValue &SAddr,
1719  SDValue &Offset) const {
1720  if (Addr->isDivergent())
1721  return false;
1722 
1723  SDLoc DL(Addr);
1724 
1725  int64_t COffsetVal = 0;
1726 
1728  COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
1729  SAddr = Addr.getOperand(0);
1730  } else {
1731  SAddr = Addr;
1732  }
1733 
1734  SAddr = SelectSAddrFI(CurDAG, SAddr);
1735 
1736  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1737 
1738  if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS,
1740  int64_t SplitImmOffset, RemainderOffset;
1741  std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
1743 
1744  COffsetVal = SplitImmOffset;
1745 
1746  SDValue AddOffset =
1747  SAddr.getOpcode() == ISD::TargetFrameIndex
1748  ? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL)
1749  : CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32);
1750  SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32,
1751  SAddr, AddOffset),
1752  0);
1753  }
1754 
1755  Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16);
1756 
1757  return true;
1758 }
1759 
1760 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1761  SDValue &Offset, bool &Imm) const {
1762  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1763  if (!C) {
1764  if (ByteOffsetNode.getValueType().isScalarInteger() &&
1765  ByteOffsetNode.getValueType().getSizeInBits() == 32) {
1766  Offset = ByteOffsetNode;
1767  Imm = false;
1768  return true;
1769  }
1770  if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
1771  if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
1772  Offset = ByteOffsetNode.getOperand(0);
1773  Imm = false;
1774  return true;
1775  }
1776  }
1777  return false;
1778  }
1779 
1780  SDLoc SL(ByteOffsetNode);
1781  // GFX9 and GFX10 have signed byte immediate offsets.
1782  int64_t ByteOffset = C->getSExtValue();
1783  Optional<int64_t> EncodedOffset =
1784  AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, false);
1785  if (EncodedOffset) {
1786  Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1787  Imm = true;
1788  return true;
1789  }
1790 
1791  // SGPR and literal offsets are unsigned.
1792  if (ByteOffset < 0)
1793  return false;
1794 
1795  EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);
1796  if (EncodedOffset) {
1797  Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1798  return true;
1799  }
1800 
1801  if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
1802  return false;
1803 
1804  SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1805  Offset = SDValue(
1806  CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
1807 
1808  return true;
1809 }
1810 
1811 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
1812  if (Addr.getValueType() != MVT::i32)
1813  return Addr;
1814 
1815  // Zero-extend a 32-bit address.
1816  SDLoc SL(Addr);
1817 
1820  unsigned AddrHiVal = Info->get32BitAddressHighBits();
1821  SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
1822 
1823  const SDValue Ops[] = {
1824  CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
1825  Addr,
1826  CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
1827  SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
1828  0),
1829  CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
1830  };
1831 
1832  return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
1833  Ops), 0);
1834 }
1835 
1836 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1837  SDValue &Offset, bool &Imm) const {
1838  SDLoc SL(Addr);
1839 
1840  // A 32-bit (address + offset) should not cause unsigned 32-bit integer
1841  // wraparound, because s_load instructions perform the addition in 64 bits.
1842  if ((Addr.getValueType() != MVT::i32 ||
1843  Addr->getFlags().hasNoUnsignedWrap())) {
1844  SDValue N0, N1;
1845  // Extract the base and offset if possible.
1847  Addr.getOpcode() == ISD::ADD) {
1848  N0 = Addr.getOperand(0);
1849  N1 = Addr.getOperand(1);
1850  } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
1851  assert(N0 && N1 && isa<ConstantSDNode>(N1));
1852  }
1853  if (N0 && N1) {
1854  if (SelectSMRDOffset(N1, Offset, Imm)) {
1855  SBase = Expand32BitAddress(N0);
1856  return true;
1857  }
1858  }
1859  }
1860  SBase = Expand32BitAddress(Addr);
1862  Imm = true;
1863  return true;
1864 }
1865 
1866 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1867  SDValue &Offset) const {
1868  bool Imm = false;
1869  return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1870 }
1871 
1872 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1873  SDValue &Offset) const {
1874 
1875  assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
1876 
1877  bool Imm = false;
1878  if (!SelectSMRD(Addr, SBase, Offset, Imm))
1879  return false;
1880 
1881  return !Imm && isa<ConstantSDNode>(Offset);
1882 }
1883 
1884 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1885  SDValue &Offset) const {
1886  bool Imm = false;
1887  return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1888  !isa<ConstantSDNode>(Offset);
1889 }
1890 
1891 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1892  SDValue &Offset) const {
1893  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
1894  // The immediate offset for S_BUFFER instructions is unsigned.
1895  if (auto Imm =
1896  AMDGPU::getSMRDEncodedOffset(*Subtarget, C->getZExtValue(), true)) {
1898  return true;
1899  }
1900  }
1901 
1902  return false;
1903 }
1904 
1905 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1906  SDValue &Offset) const {
1907  assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
1908 
1909  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
1910  if (auto Imm = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget,
1911  C->getZExtValue())) {
1913  return true;
1914  }
1915  }
1916 
1917  return false;
1918 }
1919 
1920 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1921  SDValue &Base,
1922  SDValue &Offset) const {
1923  SDLoc DL(Index);
1924 
1926  SDValue N0 = Index.getOperand(0);
1927  SDValue N1 = Index.getOperand(1);
1928  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1929 
1930  // (add n0, c0)
1931  // Don't peel off the offset (c0) if doing so could possibly lead
1932  // the base (n0) to be negative.
1933  // (or n0, |c0|) can never change a sign given isBaseWithConstantOffset.
1934  if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0) ||
1935  (Index->getOpcode() == ISD::OR && C1->getSExtValue() >= 0)) {
1936  Base = N0;
1937  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1938  return true;
1939  }
1940  }
1941 
1942  if (isa<ConstantSDNode>(Index))
1943  return false;
1944 
1945  Base = Index;
1947  return true;
1948 }
1949 
1950 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1951  SDValue Val, uint32_t Offset,
1952  uint32_t Width) {
1953  // Transformation function, pack the offset and width of a BFE into
1954  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1955  // source, bits [5:0] contain the offset and bits [22:16] the width.
1956  uint32_t PackedVal = Offset | (Width << 16);
1957  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1958 
1959  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1960 }
1961 
1962 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1963  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1964  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1965  // Predicate: 0 < b <= c < 32
1966 
1967  const SDValue &Shl = N->getOperand(0);
1968  ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
1969  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
1970 
1971  if (B && C) {
1972  uint32_t BVal = B->getZExtValue();
1973  uint32_t CVal = C->getZExtValue();
1974 
1975  if (0 < BVal && BVal <= CVal && CVal < 32) {
1976  bool Signed = N->getOpcode() == ISD::SRA;
1977  unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1978 
1979  ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1980  32 - CVal));
1981  return;
1982  }
1983  }
1984  SelectCode(N);
1985 }
1986 
1987 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1988  switch (N->getOpcode()) {
1989  case ISD::AND:
1990  if (N->getOperand(0).getOpcode() == ISD::SRL) {
1991  // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1992  // Predicate: isMask(mask)
1993  const SDValue &Srl = N->getOperand(0);
1994  ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
1995  ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
1996 
1997  if (Shift && Mask) {
1998  uint32_t ShiftVal = Shift->getZExtValue();
1999  uint32_t MaskVal = Mask->getZExtValue();
2000 
2001  if (isMask_32(MaskVal)) {
2002  uint32_t WidthVal = countPopulation(MaskVal);
2003 
2004  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
2005  Srl.getOperand(0), ShiftVal, WidthVal));
2006  return;
2007  }
2008  }
2009  }
2010  break;
2011  case ISD::SRL:
2012  if (N->getOperand(0).getOpcode() == ISD::AND) {
2013  // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
2014  // Predicate: isMask(mask >> b)
2015  const SDValue &And = N->getOperand(0);
2016  ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
2017  ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
2018 
2019  if (Shift && Mask) {
2020  uint32_t ShiftVal = Shift->getZExtValue();
2021  uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
2022 
2023  if (isMask_32(MaskVal)) {
2024  uint32_t WidthVal = countPopulation(MaskVal);
2025 
2026  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
2027  And.getOperand(0), ShiftVal, WidthVal));
2028  return;
2029  }
2030  }
2031  } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
2032  SelectS_BFEFromShifts(N);
2033  return;
2034  }
2035  break;
2036  case ISD::SRA:
2037  if (N->getOperand(0).getOpcode() == ISD::SHL) {
2038  SelectS_BFEFromShifts(N);
2039  return;
2040  }
2041  break;
2042 
2043  case ISD::SIGN_EXTEND_INREG: {
2044  // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
2045  SDValue Src = N->getOperand(0);
2046  if (Src.getOpcode() != ISD::SRL)
2047  break;
2048 
2049  const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
2050  if (!Amt)
2051  break;
2052 
2053  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2054  ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
2055  Amt->getZExtValue(), Width));
2056  return;
2057  }
2058  }
2059 
2060  SelectCode(N);
2061 }
2062 
2063 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
2064  assert(N->getOpcode() == ISD::BRCOND);
2065  if (!N->hasOneUse())
2066  return false;
2067 
2068  SDValue Cond = N->getOperand(1);
2069  if (Cond.getOpcode() == ISD::CopyToReg)
2070  Cond = Cond.getOperand(2);
2071 
2072  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
2073  return false;
2074 
2075  MVT VT = Cond.getOperand(0).getSimpleValueType();
2076  if (VT == MVT::i32)
2077  return true;
2078 
2079  if (VT == MVT::i64) {
2080  auto ST = static_cast<const GCNSubtarget *>(Subtarget);
2081 
2082  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
2083  return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
2084  }
2085 
2086  return false;
2087 }
2088 
2089 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
2090  SDValue Cond = N->getOperand(1);
2091 
2092  if (Cond.isUndef()) {
2093  CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2094  N->getOperand(2), N->getOperand(0));
2095  return;
2096  }
2097 
2098  const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
2099  const SIRegisterInfo *TRI = ST->getRegisterInfo();
2100 
2101  bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
2102  unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
2103  Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC();
2104  SDLoc SL(N);
2105 
2106  if (!UseSCCBr) {
2107  // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
2108  // analyzed what generates the vcc value, so we do not know whether vcc
2109  // bits for disabled lanes are 0. Thus we need to mask out bits for
2110  // disabled lanes.
2111  //
2112  // For the case that we select S_CBRANCH_SCC1 and it gets
2113  // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
2114  // SIInstrInfo::moveToVALU which inserts the S_AND).
2115  //
2116  // We could add an analysis of what generates the vcc value here and omit
2117  // the S_AND when is unnecessary. But it would be better to add a separate
2118  // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
2119  // catches both cases.
2120  Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32
2121  : AMDGPU::S_AND_B64,
2122  SL, MVT::i1,
2123  CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO
2124  : AMDGPU::EXEC,
2125  MVT::i1),
2126  Cond),
2127  0);
2128  }
2129 
2130  SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
2131  CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
2132  N->getOperand(2), // Basic Block
2133  VCC.getValue(0));
2134 }
2135 
2136 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
2137  MVT VT = N->getSimpleValueType(0);
2138  bool IsFMA = N->getOpcode() == ISD::FMA;
2139  if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
2140  !Subtarget->hasFmaMixInsts()) ||
2141  ((IsFMA && Subtarget->hasMadMixInsts()) ||
2142  (!IsFMA && Subtarget->hasFmaMixInsts()))) {
2143  SelectCode(N);
2144  return;
2145  }
2146 
2147  SDValue Src0 = N->getOperand(0);
2148  SDValue Src1 = N->getOperand(1);
2149  SDValue Src2 = N->getOperand(2);
2150  unsigned Src0Mods, Src1Mods, Src2Mods;
2151 
2152  // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
2153  // using the conversion from f16.
2154  bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
2155  bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
2156  bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
2157 
2158  assert((IsFMA || !Mode.allFP32Denormals()) &&
2159  "fmad selected with denormals enabled");
2160  // TODO: We can select this with f32 denormals enabled if all the sources are
2161  // converted from f16 (in which case fmad isn't legal).
2162 
2163  if (Sel0 || Sel1 || Sel2) {
2164  // For dummy operands.
2166  SDValue Ops[] = {
2167  CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
2168  CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
2169  CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
2171  Zero, Zero
2172  };
2173 
2175  IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
2176  MVT::f32, Ops);
2177  } else {
2178  SelectCode(N);
2179  }
2180 }
2181 
2182 // This is here because there isn't a way to use the generated sub0_sub1 as the
2183 // subreg index to EXTRACT_SUBREG in tablegen.
2184 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
2185  MemSDNode *Mem = cast<MemSDNode>(N);
2186  unsigned AS = Mem->getAddressSpace();
2187  if (AS == AMDGPUAS::FLAT_ADDRESS) {
2188  SelectCode(N);
2189  return;
2190  }
2191 
2192  MVT VT = N->getSimpleValueType(0);
2193  bool Is32 = (VT == MVT::i32);
2194  SDLoc SL(N);
2195 
2196  MachineSDNode *CmpSwap = nullptr;
2197  if (Subtarget->hasAddr64()) {
2198  SDValue SRsrc, VAddr, SOffset, Offset;
2199 
2200  if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset)) {
2201  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
2202  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
2203  SDValue CmpVal = Mem->getOperand(2);
2205 
2206  // XXX - Do we care about glue operands?
2207 
2208  SDValue Ops[] = {CmpVal, VAddr, SRsrc, SOffset, Offset, CPol,
2209  Mem->getChain()};
2210 
2211  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2212  }
2213  }
2214 
2215  if (!CmpSwap) {
2216  SDValue SRsrc, SOffset, Offset;
2217  if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset)) {
2218  unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
2219  AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
2220 
2221  SDValue CmpVal = Mem->getOperand(2);
2223  SDValue Ops[] = {CmpVal, SRsrc, SOffset, Offset, CPol, Mem->getChain()};
2224 
2225  CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
2226  }
2227  }
2228 
2229  if (!CmpSwap) {
2230  SelectCode(N);
2231  return;
2232  }
2233 
2234  MachineMemOperand *MMO = Mem->getMemOperand();
2235  CurDAG->setNodeMemRefs(CmpSwap, {MMO});
2236 
2237  unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
2238  SDValue Extract
2239  = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
2240 
2241  ReplaceUses(SDValue(N, 0), Extract);
2242  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
2244 }
2245 
2246 void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
2247  // The address is assumed to be uniform, so if it ends up in a VGPR, it will
2248  // be copied to an SGPR with readfirstlane.
2249  unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2250  AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2251 
2252  SDValue Chain = N->getOperand(0);
2253  SDValue Ptr = N->getOperand(2);
2254  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2255  MachineMemOperand *MMO = M->getMemOperand();
2256  bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
2257 
2258  SDValue Offset;
2259  if (CurDAG->isBaseWithConstantOffset(Ptr)) {
2260  SDValue PtrBase = Ptr.getOperand(0);
2261  SDValue PtrOffset = Ptr.getOperand(1);
2262 
2263  const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2264  if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) {
2265  N = glueCopyToM0(N, PtrBase);
2266  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2267  }
2268  }
2269 
2270  if (!Offset) {
2271  N = glueCopyToM0(N, Ptr);
2273  }
2274 
2275  SDValue Ops[] = {
2276  Offset,
2277  CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2278  Chain,
2279  N->getOperand(N->getNumOperands() - 1) // New glue
2280  };
2281 
2282  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2283  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2284 }
2285 
2286 static unsigned gwsIntrinToOpcode(unsigned IntrID) {
2287  switch (IntrID) {
2288  case Intrinsic::amdgcn_ds_gws_init:
2289  return AMDGPU::DS_GWS_INIT;
2290  case Intrinsic::amdgcn_ds_gws_barrier:
2291  return AMDGPU::DS_GWS_BARRIER;
2292  case Intrinsic::amdgcn_ds_gws_sema_v:
2293  return AMDGPU::DS_GWS_SEMA_V;
2294  case Intrinsic::amdgcn_ds_gws_sema_br:
2295  return AMDGPU::DS_GWS_SEMA_BR;
2296  case Intrinsic::amdgcn_ds_gws_sema_p:
2297  return AMDGPU::DS_GWS_SEMA_P;
2298  case Intrinsic::amdgcn_ds_gws_sema_release_all:
2299  return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2300  default:
2301  llvm_unreachable("not a gws intrinsic");
2302  }
2303 }
2304 
2305 void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
2306  if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2307  !Subtarget->hasGWSSemaReleaseAll()) {
2308  // Let this error.
2309  SelectCode(N);
2310  return;
2311  }
2312 
2313  // Chain, intrinsic ID, vsrc, offset
2314  const bool HasVSrc = N->getNumOperands() == 4;
2315  assert(HasVSrc || N->getNumOperands() == 3);
2316 
2317  SDLoc SL(N);
2318  SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
2319  int ImmOffset = 0;
2320  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2321  MachineMemOperand *MMO = M->getMemOperand();
2322 
2323  // Don't worry if the offset ends up in a VGPR. Only one lane will have
2324  // effect, so SIFixSGPRCopies will validly insert readfirstlane.
2325 
2326  // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
2327  // offset field) % 64. Some versions of the programming guide omit the m0
2328  // part, or claim it's from offset 0.
2329  if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2330  // If we have a constant offset, try to use the 0 in m0 as the base.
2331  // TODO: Look into changing the default m0 initialization value. If the
2332  // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
2333  // the immediate offset.
2334  glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
2335  ImmOffset = ConstOffset->getZExtValue();
2336  } else {
2337  if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
2338  ImmOffset = BaseOffset.getConstantOperandVal(1);
2339  BaseOffset = BaseOffset.getOperand(0);
2340  }
2341 
2342  // Prefer to do the shift in an SGPR since it should be possible to use m0
2343  // as the result directly. If it's already an SGPR, it will be eliminated
2344  // later.
2345  SDNode *SGPROffset
2346  = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
2347  BaseOffset);
2348  // Shift to offset in m0
2349  SDNode *M0Base
2350  = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
2351  SDValue(SGPROffset, 0),
2352  CurDAG->getTargetConstant(16, SL, MVT::i32));
2353  glueCopyToM0(N, SDValue(M0Base, 0));
2354  }
2355 
2356  SDValue Chain = N->getOperand(0);
2357  SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
2358 
2359  const unsigned Opc = gwsIntrinToOpcode(IntrID);
2361  if (HasVSrc)
2362  Ops.push_back(N->getOperand(2));
2363  Ops.push_back(OffsetField);
2364  Ops.push_back(Chain);
2365 
2366  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2367  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2368 }
2369 
2370 void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) {
2371  if (Subtarget->getLDSBankCount() != 16) {
2372  // This is a single instruction with a pattern.
2373  SelectCode(N);
2374  return;
2375  }
2376 
2377  SDLoc DL(N);
2378 
2379  // This requires 2 instructions. It is possible to write a pattern to support
2380  // this, but the generated isel emitter doesn't correctly deal with multiple
2381  // output instructions using the same physical register input. The copy to m0
2382  // is incorrectly placed before the second instruction.
2383  //
2384  // TODO: Match source modifiers.
2385  //
2386  // def : Pat <
2387  // (int_amdgcn_interp_p1_f16
2388  // (VOP3Mods f32:$src0, i32:$src0_modifiers),
2389  // (i32 timm:$attrchan), (i32 timm:$attr),
2390  // (i1 timm:$high), M0),
2391  // (V_INTERP_P1LV_F16 $src0_modifiers, VGPR_32:$src0, timm:$attr,
2392  // timm:$attrchan, 0,
2393  // (V_INTERP_MOV_F32 2, timm:$attr, timm:$attrchan), timm:$high)> {
2394  // let Predicates = [has16BankLDS];
2395  // }
2396 
2397  // 16 bank LDS
2399  N->getOperand(5), SDValue());
2400 
2402 
2403  SDNode *InterpMov =
2404  CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, {
2405  CurDAG->getTargetConstant(2, DL, MVT::i32), // P0
2406  N->getOperand(3), // Attr
2407  N->getOperand(2), // Attrchan
2408  ToM0.getValue(1) // In glue
2409  });
2410 
2411  SDNode *InterpP1LV =
2412  CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, {
2413  CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
2414  N->getOperand(1), // Src0
2415  N->getOperand(3), // Attr
2416  N->getOperand(2), // Attrchan
2417  CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
2418  SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high
2419  N->getOperand(4), // high
2420  CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp
2421  CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod
2422  SDValue(InterpMov, 1)
2423  });
2424 
2425  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0));
2426 }
2427 
2428 void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
2429  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2430  switch (IntrID) {
2431  case Intrinsic::amdgcn_ds_append:
2432  case Intrinsic::amdgcn_ds_consume: {
2433  if (N->getValueType(0) != MVT::i32)
2434  break;
2435  SelectDSAppendConsume(N, IntrID);
2436  return;
2437  }
2438  }
2439 
2440  SelectCode(N);
2441 }
2442 
2443 void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
2444  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
2445  unsigned Opcode;
2446  switch (IntrID) {
2447  case Intrinsic::amdgcn_wqm:
2448  Opcode = AMDGPU::WQM;
2449  break;
2450  case Intrinsic::amdgcn_softwqm:
2451  Opcode = AMDGPU::SOFT_WQM;
2452  break;
2453  case Intrinsic::amdgcn_wwm:
2454  case Intrinsic::amdgcn_strict_wwm:
2455  Opcode = AMDGPU::STRICT_WWM;
2456  break;
2457  case Intrinsic::amdgcn_strict_wqm:
2458  Opcode = AMDGPU::STRICT_WQM;
2459  break;
2460  case Intrinsic::amdgcn_interp_p1_f16:
2461  SelectInterpP1F16(N);
2462  return;
2463  default:
2464  SelectCode(N);
2465  return;
2466  }
2467 
2468  SDValue Src = N->getOperand(1);
2469  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
2470 }
2471 
2472 void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
2473  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2474  switch (IntrID) {
2475  case Intrinsic::amdgcn_ds_gws_init:
2476  case Intrinsic::amdgcn_ds_gws_barrier:
2477  case Intrinsic::amdgcn_ds_gws_sema_v:
2478  case Intrinsic::amdgcn_ds_gws_sema_br:
2479  case Intrinsic::amdgcn_ds_gws_sema_p:
2480  case Intrinsic::amdgcn_ds_gws_sema_release_all:
2481  SelectDS_GWS(N, IntrID);
2482  return;
2483  default:
2484  break;
2485  }
2486 
2487  SelectCode(N);
2488 }
2489 
2490 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
2491  unsigned &Mods,
2492  bool AllowAbs) const {
2493  Mods = 0;
2494  Src = In;
2495 
2496  if (Src.getOpcode() == ISD::FNEG) {
2497  Mods |= SISrcMods::NEG;
2498  Src = Src.getOperand(0);
2499  }
2500 
2501  if (AllowAbs && Src.getOpcode() == ISD::FABS) {
2502  Mods |= SISrcMods::ABS;
2503  Src = Src.getOperand(0);
2504  }
2505 
2506  return true;
2507 }
2508 
2509 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
2510  SDValue &SrcMods) const {
2511  unsigned Mods;
2512  if (SelectVOP3ModsImpl(In, Src, Mods)) {
2513  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2514  return true;
2515  }
2516 
2517  return false;
2518 }
2519 
2520 bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,
2521  SDValue &SrcMods) const {
2522  unsigned Mods;
2523  if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
2524  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2525  return true;
2526  }
2527 
2528  return false;
2529 }
2530 
2531 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
2532  SDValue &SrcMods) const {
2533  SelectVOP3Mods(In, Src, SrcMods);
2534  return isNoNanSrc(Src);
2535 }
2536 
2537 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
2538  if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
2539  return false;
2540 
2541  Src = In;
2542  return true;
2543 }
2544 
2545 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
2546  SDValue &SrcMods, SDValue &Clamp,
2547  SDValue &Omod) const {
2548  SDLoc DL(In);
2549  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2550  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2551 
2552  return SelectVOP3Mods(In, Src, SrcMods);
2553 }
2554 
2555 bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src,
2556  SDValue &SrcMods, SDValue &Clamp,
2557  SDValue &Omod) const {
2558  SDLoc DL(In);
2559  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2560  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2561 
2562  return SelectVOP3BMods(In, Src, SrcMods);
2563 }
2564 
2565 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
2566  SDValue &Clamp, SDValue &Omod) const {
2567  Src = In;
2568 
2569  SDLoc DL(In);
2570  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2571  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2572 
2573  return true;
2574 }
2575 
2576 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
2577  SDValue &SrcMods) const {
2578  unsigned Mods = 0;
2579  Src = In;
2580 
2581  if (Src.getOpcode() == ISD::FNEG) {
2582  Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
2583  Src = Src.getOperand(0);
2584  }
2585 
2586  if (Src.getOpcode() == ISD::BUILD_VECTOR) {
2587  unsigned VecMods = Mods;
2588 
2589  SDValue Lo = stripBitcast(Src.getOperand(0));
2590  SDValue Hi = stripBitcast(Src.getOperand(1));
2591 
2592  if (Lo.getOpcode() == ISD::FNEG) {
2593  Lo = stripBitcast(Lo.getOperand(0));
2594  Mods ^= SISrcMods::NEG;
2595  }
2596 
2597  if (Hi.getOpcode() == ISD::FNEG) {
2598  Hi = stripBitcast(Hi.getOperand(0));
2599  Mods ^= SISrcMods::NEG_HI;
2600  }
2601 
2602  if (isExtractHiElt(Lo, Lo))
2603  Mods |= SISrcMods::OP_SEL_0;
2604 
2605  if (isExtractHiElt(Hi, Hi))
2606  Mods |= SISrcMods::OP_SEL_1;
2607 
2608  unsigned VecSize = Src.getValueSizeInBits();
2609  Lo = stripExtractLoElt(Lo);
2610  Hi = stripExtractLoElt(Hi);
2611 
2612  if (Lo.getValueSizeInBits() > VecSize) {
2614  (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
2615  MVT::getIntegerVT(VecSize), Lo);
2616  }
2617 
2618  if (Hi.getValueSizeInBits() > VecSize) {
2620  (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
2621  MVT::getIntegerVT(VecSize), Hi);
2622  }
2623 
2624  assert(Lo.getValueSizeInBits() <= VecSize &&
2625  Hi.getValueSizeInBits() <= VecSize);
2626 
2627  if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
2628  // Really a scalar input. Just select from the low half of the register to
2629  // avoid packing.
2630 
2631  if (VecSize == 32 || VecSize == Lo.getValueSizeInBits()) {
2632  Src = Lo;
2633  } else {
2634  assert(Lo.getValueSizeInBits() == 32 && VecSize == 64);
2635 
2636  SDLoc SL(In);
2637  SDValue Undef = SDValue(
2638  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
2639  Lo.getValueType()), 0);
2640  auto RC = Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
2641  : AMDGPU::SReg_64RegClassID;
2642  const SDValue Ops[] = {
2643  CurDAG->getTargetConstant(RC, SL, MVT::i32),
2644  Lo, CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2645  Undef, CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
2646 
2647  Src = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
2648  Src.getValueType(), Ops), 0);
2649  }
2650  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2651  return true;
2652  }
2653 
2654  if (VecSize == 64 && Lo == Hi && isa<ConstantFPSDNode>(Lo)) {
2655  uint64_t Lit = cast<ConstantFPSDNode>(Lo)->getValueAPF()
2656  .bitcastToAPInt().getZExtValue();
2657  if (AMDGPU::isInlinableLiteral32(Lit, Subtarget->hasInv2PiInlineImm())) {
2658  Src = CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64);;
2659  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2660  return true;
2661  }
2662  }
2663 
2664  Mods = VecMods;
2665  }
2666 
2667  // Packed instructions do not have abs modifiers.
2668  Mods |= SISrcMods::OP_SEL_1;
2669 
2670  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2671  return true;
2672 }
2673 
2674 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
2675  SDValue &SrcMods) const {
2676  Src = In;
2677  // FIXME: Handle op_sel
2678  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2679  return true;
2680 }
2681 
2682 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2683  SDValue &SrcMods) const {
2684  // FIXME: Handle op_sel
2685  return SelectVOP3Mods(In, Src, SrcMods);
2686 }
2687 
2688 // The return value is not whether the match is possible (which it always is),
2689 // but whether or not it a conversion is really used.
2690 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2691  unsigned &Mods) const {
2692  Mods = 0;
2693  SelectVOP3ModsImpl(In, Src, Mods);
2694 
2695  if (Src.getOpcode() == ISD::FP_EXTEND) {
2696  Src = Src.getOperand(0);
2697  assert(Src.getValueType() == MVT::f16);
2698  Src = stripBitcast(Src);
2699 
2700  // Be careful about folding modifiers if we already have an abs. fneg is
2701  // applied last, so we don't want to apply an earlier fneg.
2702  if ((Mods & SISrcMods::ABS) == 0) {
2703  unsigned ModsTmp;
2704  SelectVOP3ModsImpl(Src, Src, ModsTmp);
2705 
2706  if ((ModsTmp & SISrcMods::NEG) != 0)
2707  Mods ^= SISrcMods::NEG;
2708 
2709  if ((ModsTmp & SISrcMods::ABS) != 0)
2710  Mods |= SISrcMods::ABS;
2711  }
2712 
2713  // op_sel/op_sel_hi decide the source type and source.
2714  // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2715  // If the sources's op_sel is set, it picks the high half of the source
2716  // register.
2717 
2718  Mods |= SISrcMods::OP_SEL_1;
2719  if (isExtractHiElt(Src, Src)) {
2720  Mods |= SISrcMods::OP_SEL_0;
2721 
2722  // TODO: Should we try to look for neg/abs here?
2723  }
2724 
2725  return true;
2726  }
2727 
2728  return false;
2729 }
2730 
2731 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2732  SDValue &SrcMods) const {
2733  unsigned Mods = 0;
2734  SelectVOP3PMadMixModsImpl(In, Src, Mods);
2735  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2736  return true;
2737 }
2738 
2739 SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
2740  if (In.isUndef())
2741  return CurDAG->getUNDEF(MVT::i32);
2742 
2743  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2744  SDLoc SL(In);
2745  return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
2746  }
2747 
2748  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2749  SDLoc SL(In);
2750  return CurDAG->getConstant(
2751  C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2752  }
2753 
2754  SDValue Src;
2755  if (isExtractHiElt(In, Src))
2756  return Src;
2757 
2758  return SDValue();
2759 }
2760 
2761 bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2762  assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn);
2763 
2764  const SIRegisterInfo *SIRI =
2765  static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2766  const SIInstrInfo * SII =
2767  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2768 
2769  unsigned Limit = 0;
2770  bool AllUsesAcceptSReg = true;
2771  for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2772  Limit < 10 && U != E; ++U, ++Limit) {
2773  const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
2774 
2775  // If the register class is unknown, it could be an unknown
2776  // register class that needs to be an SGPR, e.g. an inline asm
2777  // constraint
2778  if (!RC || SIRI->isSGPRClass(RC))
2779  return false;
2780 
2781  if (RC != &AMDGPU::VS_32RegClass) {
2782  AllUsesAcceptSReg = false;
2783  SDNode * User = *U;
2784  if (User->isMachineOpcode()) {
2785  unsigned Opc = User->getMachineOpcode();
2786  MCInstrDesc Desc = SII->get(Opc);
2787  if (Desc.isCommutable()) {
2788  unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2789  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2790  if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2791  unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2792  const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
2793  if (CommutedRC == &AMDGPU::VS_32RegClass)
2794  AllUsesAcceptSReg = true;
2795  }
2796  }
2797  }
2798  // If "AllUsesAcceptSReg == false" so far we haven't suceeded
2799  // commuting current user. This means have at least one use
2800  // that strictly require VGPR. Thus, we will not attempt to commute
2801  // other user instructions.
2802  if (!AllUsesAcceptSReg)
2803  break;
2804  }
2805  }
2806  return !AllUsesAcceptSReg && (Limit < 10);
2807 }
2808 
2809 bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
2810  auto Ld = cast<LoadSDNode>(N);
2811 
2812  return Ld->getAlignment() >= 4 &&
2813  (
2814  (
2815  (
2816  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2817  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
2818  )
2819  &&
2820  !N->isDivergent()
2821  )
2822  ||
2823  (
2824  Subtarget->getScalarizeGlobalBehavior() &&
2825  Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
2826  Ld->isSimple() &&
2827  !N->isDivergent() &&
2828  static_cast<const SITargetLowering *>(
2829  getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
2830  )
2831  );
2832 }
2833 
2836  *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
2837  bool IsModified = false;
2838  do {
2839  IsModified = false;
2840 
2841  // Go over all selected nodes and try to fold them a bit more
2843  while (Position != CurDAG->allnodes_end()) {
2844  SDNode *Node = &*Position++;
2845  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
2846  if (!MachineNode)
2847  continue;
2848 
2849  SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
2850  if (ResNode != Node) {
2851  if (ResNode)
2852  ReplaceUses(Node, ResNode);
2853  IsModified = true;
2854  }
2855  }
2857  } while (IsModified);
2858 }
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:240
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::AMDGPUISD::CLAMP
@ CLAMP
CLAMP value between 0.0 and 1.0.
Definition: AMDGPUISelLowering.h:368
llvm::ISD::FPOWI
@ FPOWI
Definition: ISDOpcodes.h:872
i
i
Definition: README.txt:29
llvm::MCInstrDesc::getNumDefs
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:243
CmpMode::FP
@ FP
llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1556
Lowering
Shadow Stack GC Lowering
Definition: ShadowStackGCLowering.cpp:99
llvm::SelectionDAGISel::getTargetLowering
const TargetLowering * getTargetLowering() const
Definition: SelectionDAGISel.h:67
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4636
llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
llvm::AMDGPUSubtarget::hasInv2PiInlineImm
bool hasInv2PiInlineImm() const
Definition: AMDGPUSubtarget.h:180
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::SelectionDAGISel::TM
TargetMachine & TM
Definition: SelectionDAGISel.h:41
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector
bool matchLoadD16FromBuildVector(SDNode *N) const
Definition: AMDGPUISelDAGToDAG.cpp:209
llvm::EVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:150
AMDGPUISelDAGToDAG.h
llvm::AMDGPUISD::DIV_SCALE
@ DIV_SCALE
Definition: AMDGPUISelLowering.h:399
v2i32
gets compiled into this on rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movq rsp movq rsp movq rsp movq rsp movq rsp rax movq rsp rax movq rsp rsp rsp eax eax jbe LBB1_3 rcx rax movq rsp eax rsp ret ecx eax rcx movl rsp jmp LBB1_2 gcc rsp rax movq rsp rsp movq rsp rax movq rsp eax eax jb L6 rdx eax rsp ret p2align edx rdx eax movl rsp eax rsp ret and it gets compiled into this on ebp esp eax movl ebp eax movl ebp eax esp popl ebp ret gcc ebp eax popl ebp ret Teach tblgen not to check bitconvert source type in some cases This allows us to consolidate the following patterns in X86InstrMMX v2i32(MMX_MOVDQ2Qrr VR128:$src))>
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1086
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:633
llvm::ARMII::VecSize
@ VecSize
Definition: ARMBaseInfo.h:417
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:848
llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1386
llvm::SelectionDAG::SignBitIsZero
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
Definition: SelectionDAG.cpp:2477
llvm::AMDGPUTargetLowering
Definition: AMDGPUISelLowering.h:27
llvm::SIRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned RCID) const
Definition: SIRegisterInfo.cpp:2435
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:364
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:898
SIMachineFunctionInfo.h
llvm::SelectionDAG::getCopyToReg
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:735
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:152
llvm::ISD::ConstantFP
@ ConstantFP
Definition: ISDOpcodes.h:77
llvm::SelectionDAG::allnodes_end
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:494
llvm::AMDGPU::getSMRDEncodedOffset
Optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
Definition: AMDGPUBaseInfo.cpp:1875
llvm::ISD::UADDO
@ UADDO
Definition: ISDOpcodes.h:311
llvm::AMDGPUSubtarget::hasMadMixInsts
bool hasMadMixInsts() const
Definition: AMDGPUSubtarget.h:148
llvm::ISD::ADDC
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
llvm::SIInstrFlags::FlatGlobal
@ FlatGlobal
Definition: SIDefines.h:101
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::ISD::SETEQ
@ SETEQ
Definition: ISDOpcodes.h:1381
llvm::SelectionDAG::getVTList
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition: SelectionDAG.cpp:8580
llvm::AMDGPUISD::CVT_PKNORM_I16_F32
@ CVT_PKNORM_I16_F32
Definition: AMDGPUISelLowering.h:454
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::MachineSDNode
An SDNode that represents everything that will be needed to construct a MachineInstr.
Definition: SelectionDAGNodes.h:2746
llvm::AMDGPUISD::FMUL_W_CHAIN
@ FMUL_W_CHAIN
Definition: AMDGPUISelLowering.h:379
llvm::SelectionDAG::allnodes_begin
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:493
llvm::AMDGPUISD::DIV_FIXUP
@ DIV_FIXUP
Definition: AMDGPUISelLowering.h:401
llvm::AMDGPUISD::LOAD_D16_HI_I8
@ LOAD_D16_HI_I8
Definition: AMDGPUISelLowering.h:480
llvm::GCNSubtarget::hasFlatSegmentOffsetBug
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:581
llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition: SelectionDAGNodes.h:1336
ValueTracking.h
llvm::ISD::FLOG2
@ FLOG2
Definition: ISDOpcodes.h:875
llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition: SelectionDAGNodes.h:1359
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:455
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:466
llvm::AMDGPU::getSMRDEncodedLiteralOffset32
Optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
Definition: AMDGPUBaseInfo.cpp:1892
llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition: SelectionDAGNodes.h:2281
llvm::SDNode::use_iterator
This class provides iterator support for SDUse operands that use a specific SDNode.
Definition: SelectionDAGNodes.h:727
Shift
bool Shift
Definition: README.txt:468
AMDGPUDAGToDAGISel
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
Definition: AMDGPUISelDAGToDAG.h:79
llvm::MemSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:1361
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:128
i8
Clang compiles this i8
Definition: README.txt:504
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1268
llvm::SelectionDAG::isBaseWithConstantOffset
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
Definition: SelectionDAG.cpp:4364
llvm::GCNSubtarget::hasDLInsts
bool hasDLInsts() const
Definition: GCNSubtarget.h:662
llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:702
llvm::Optional< int64_t >
llvm::SDNode::isPredecessorOf
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
Definition: SelectionDAGNodes.h:813
llvm::AMDGPU::SIModeRegisterDefaults
Definition: AMDGPUBaseInfo.h:915
i1
Decimal Convert From to National Zoned Signed int_ppc_altivec_bcdcfno i1
Definition: README_P9.txt:147
llvm::createAMDGPUISelDag
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
Definition: AMDGPUISelDAGToDAG.cpp:112
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::MemSDNode
This is an abstract virtual class for memory operations.
Definition: SelectionDAGNodes.h:1254
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
matchZExtFromI32
static SDValue matchZExtFromI32(SDValue Op)
Definition: AMDGPUISelDAGToDAG.cpp:1594
llvm::SelectionDAG::RemoveDeadNodes
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
Definition: SelectionDAG.cpp:852
llvm::SDNode::isDivergent
bool isDivergent() const
Definition: SelectionDAGNodes.h:694
llvm::Lo_32
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:353
AMDGPUDAGToDAGISel::PreprocessISelDAG
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
Definition: AMDGPUISelDAGToDAG.cpp:289
SelectionDAG.h
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::SelectionDAG::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:442
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:219
AMDGPUDAGToDAGISel::SelectBuildVector
void SelectBuildVector(SDNode *N, unsigned RegClassID)
Definition: AMDGPUISelDAGToDAG.cpp:451
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::ISD::FABS
@ FABS
Definition: ISDOpcodes.h:867
llvm::AMDGPUISD::FMIN3
@ FMIN3
Definition: AMDGPUISelLowering.h:391
llvm::ISD::ADDCARRY
@ ADDCARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:290
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:362
AMDGPUDAGToDAGISel::PostprocessISelDAG
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
Definition: AMDGPUISelDAGToDAG.cpp:2834
llvm::ISD::BRCOND
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:956
llvm::SDNode::getVTList
SDVTList getVTList() const
Definition: SelectionDAGNodes.h:932
llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:207
llvm::GCNSubtarget::unsafeDSOffsetFoldingEnabled
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:399
llvm::SelectionDAG::getRegister
SDValue getRegister(unsigned Reg, EVT VT)
Definition: SelectionDAG.cpp:2007
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
llvm::GCNSubtarget::hasGWSSemaReleaseAll
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:617
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::SIInstrFlags::FLAT
@ FLAT
Definition: SIDefines.h:58
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:885
SelectSAddrFI
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
Definition: AMDGPUISelDAGToDAG.cpp:1698
llvm::SIInstrFlags::FlatScratch
@ FlatScratch
Definition: SIDefines.h:116
llvm::AMDGPU::CPol::CPol
CPol
Definition: SIDefines.h:291
i64
Clang compiles this i64
Definition: README.txt:504
AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel
AMDGPUDAGToDAGISel(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
Definition: AMDGPUISelDAGToDAG.cpp:117
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:640
AMDGPUDAGToDAGISel::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: AMDGPUISelDAGToDAG.cpp:199
llvm::SelectionDAG::getTargetFrameIndex
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:688
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1121
llvm::MemIntrinsicSDNode
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
Definition: SelectionDAGNodes.h:1472
llvm::AMDGPUSubtarget::get
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Definition: AMDGPUSubtarget.cpp:1108
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:216
SelectionDAGNodes.h
llvm::ISD::Constant
@ Constant
Definition: ISDOpcodes.h:76
llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:729
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52
llvm::GCNSubtarget::hasAddr64
bool hasAddr64() const
Definition: GCNSubtarget.h:310
llvm::ISD::ABS
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:640
FunctionLoweringInfo.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::GCNSubtarget::useFlatForGlobal
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:458
llvm::GCNSubtarget::hasFmaMixInsts
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:360
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:728
llvm::User
Definition: User.h:44
llvm::AMDGPUISD::CVT_PKNORM_U16_F32
@ CVT_PKNORM_U16_F32
Definition: AMDGPUISelLowering.h:455
llvm::SelectionDAG::getUNDEF
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:951
llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:747
llvm::ISD::CopyToReg
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
llvm::AMDGPUISD::FMED3
@ FMED3
Definition: AMDGPUISelLowering.h:394
GFX9
@ GFX9
Definition: SIInstrInfo.cpp:7689
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::SelectionDAG::getConstant
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1395
llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:309
llvm::MCSubtargetInfo::getTargetTriple
const Triple & getTargetTriple() const
Definition: MCSubtargetInfo.h:107
f32
So we should use XX3Form_Rcr to implement instrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store load store see def memrix16 in PPCInstrInfo td Load Store Vector load store outs ins lxsdx set load store with conversion from to outs ins lxsspx set f32
Definition: README_P9.txt:522
llvm::AMDGPUISD::LOAD_D16_LO_I8
@ LOAD_D16_LO_I8
Definition: AMDGPUISelLowering.h:482
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:363
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::ISD::FROUND
@ FROUND
Definition: ISDOpcodes.h:883
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:735
llvm::AMDGPUISD::ATOMIC_LOAD_FMAX
@ ATOMIC_LOAD_FMAX
Definition: AMDGPUISelLowering.h:496
llvm::ISD::SRA
@ SRA
Definition: ISDOpcodes.h:658
llvm::SISrcMods::NEG_HI
@ NEG_HI
Definition: SIDefines.h:212
llvm::SISrcMods::OP_SEL_1
@ OP_SEL_1
Definition: SIDefines.h:214
llvm::SelectionDAGISel::ReplaceNode
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
Definition: SelectionDAGISel.h:227
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:359
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:195
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MCInstrDesc::isCommutable
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
Definition: MCInstrDesc.h:472
llvm::M0
unsigned M0(unsigned Val)
Definition: VE.h:371
llvm::Instruction
Definition: Instruction.h:45
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::ISD::SINT_TO_FP
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:739
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1460
llvm::SISrcMods::NEG
@ NEG
Definition: SIDefines.h:209
llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition: ISDOpcodes.h:882
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:881
llvm::SIInstrInfo::findCommutedOpIndices
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
Definition: SIInstrInfo.cpp:2172
llvm::SelectionDAG::getMemIntrinsicNode
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
Definition: SelectionDAG.cpp:7268
llvm::LegacyDivergenceAnalysis
Definition: LegacyDivergenceAnalysis.h:31
llvm::SelectionDAG::dump
void dump() const
Definition: SelectionDAGDumper.cpp:913
llvm::AMDGPUISD::CVT_PK_U16_U32
@ CVT_PK_U16_U32
Definition: AMDGPUISelLowering.h:457
llvm::SIRegisterInfo::isSGPRClass
static bool isSGPRClass(const TargetRegisterClass *RC)
Definition: SIRegisterInfo.h:162
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:632
llvm::MCOperandInfo::RegClass
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:89
llvm::SelectionDAG::isKnownNeverNaN
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue is known to never be NaN.
Definition: SelectionDAG.cpp:4376
llvm::Triple::getArch
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:310
llvm::SDValue::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned i) const
Definition: SelectionDAGNodes.h:1133
llvm::ISD::CopyFromReg
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:208
llvm::MCID::RegSequence
@ RegSequence
Definition: MCInstrDesc.h:179
llvm::AMDGPUISD::FMA_W_CHAIN
@ FMA_W_CHAIN
Definition: AMDGPUISelLowering.h:378
llvm::GCNSubtarget::getScalarizeGlobalBehavior
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:763
llvm::ISD::USUBO
@ USUBO
Definition: ISDOpcodes.h:315
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
findMemSDNode
static MemSDNode * findMemSDNode(SDNode *N)
Definition: AMDGPUISelDAGToDAG.cpp:1469
llvm::Hi_32
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:348
LoopInfo.h
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::ISD::ATOMIC_LOAD_FADD
@ ATOMIC_LOAD_FADD
Definition: ISDOpcodes.h:1151
i32
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM ID Predecessors according to mbb< bb27, 0x8b0a7c0 > Note ADDri is not a two address instruction its result reg1037 is an operand of the PHI node in bb76 and its operand reg1039 is the result of the PHI node We should treat it as a two address code and make sure the ADDri is scheduled after any node that reads reg1039 Use info(i.e. register scavenger) to assign it a free register to allow reuse the collector could move the objects and invalidate the derived pointer This is bad enough in the first but safe points can crop up unpredictably **array_addr i32
Definition: README.txt:122
llvm::ISD::FPOW
@ FPOW
Definition: ISDOpcodes.h:873
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:377
R600MCTargetDesc.h
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:630
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::SelectionDAG::RemoveDeadNode
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
Definition: SelectionDAG.cpp:906
llvm::countPopulation
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:567
llvm::AMDGPUISD::LDEXP
@ LDEXP
Definition: AMDGPUISelLowering.h:414
llvm::isInt< 32 >
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:373
llvm::ISD::FLOG10
@ FLOG10
Definition: ISDOpcodes.h:876
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:341
llvm::ISD::FMAD
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:470
llvm::isUInt< 16 >
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:408
llvm::AMDGPU::CPol::GLC
@ GLC
Definition: SIDefines.h:292
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
uint64_t
llvm::ConstantFPSDNode
Definition: SelectionDAGNodes.h:1605
llvm::SelectionDAGISel::TII
const TargetInstrInfo * TII
Definition: SelectionDAGISel.h:52
llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition: SelectionDAGNodes.h:1340
llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:921
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::AMDGPUISD::LOAD_D16_HI
@ LOAD_D16_HI
Definition: AMDGPUISelLowering.h:478
getBaseWithOffsetUsingSplitOR
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
Definition: AMDGPUISelDAGToDAG.cpp:722
llvm::SelectionDAGISel::FuncInfo
std::unique_ptr< FunctionLoweringInfo > FuncInfo
Definition: SelectionDAGISel.h:43
llvm::GCNSubtarget::hasAddNoCarry
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:625
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::GCNSubtarget::privateMemoryResourceIsRangeChecked
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:480
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:511
llvm::AMDGPUISD::FMAD_FTZ
@ FMAD_FTZ
Definition: AMDGPUISelLowering.h:404
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:904
llvm::AMDGPUISD::ATOMIC_DEC
@ ATOMIC_DEC
Definition: AMDGPUISelLowering.h:494
llvm::SelectionDAG::getNode
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition: SelectionDAG.cpp:8345
llvm::AMDGPUISD::CVT_PK_I16_I32
@ CVT_PK_I16_I32
Definition: AMDGPUISelLowering.h:456
llvm::AMDGPUISD::BFE_I32
@ BFE_I32
Definition: AMDGPUISelLowering.h:420
llvm::ISD::ATOMIC_CMP_SWAP
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1125
AMDGPUDAGToDAGISel::getPassName
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
Definition: AMDGPUISelDAGToDAG.cpp:769
llvm::LoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Definition: SelectionDAGNodes.h:2296
llvm::AMDGPUArgumentUsageInfo
Definition: AMDGPUArgumentUsageInfo.h:158
llvm::SelectionDAG::MorphNodeTo
SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef< SDValue > Ops)
This mutates the specified node to have the specified return type, opcode, and operands.
Definition: SelectionDAG.cpp:8917
llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:360
llvm::isUInt< 32 >
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:411
llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition: SelectionDAGNodes.h:172
llvm::TargetMachine::Options
TargetOptions Options
Definition: TargetMachine.h:120
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:79
llvm::AMDGPUISD::SIN_HW
@ SIN_HW
Definition: AMDGPUISelLowering.h:384
llvm::isUInt< 8 >
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:405
llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition: SelectionDAGNodes.h:1571
llvm::AMDGPUISD::RSQ
@ RSQ
Definition: AMDGPUISelLowering.h:409
i16
< i32 > ret i32 conv5 And the following x86 eax movsbl ecx cmpl ecx sete al movzbl eax ret It should be possible to eliminate the sign extensions LLVM misses a load store narrowing opportunity in this i16
Definition: README.txt:1493
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1361
llvm::MCInstrDesc::OpInfo
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:206
llvm::SelectionDAGISel::CurDAG
SelectionDAG * CurDAG
Definition: SelectionDAGISel.h:47
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:9018
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::ISD::SUBCARRY
@ SUBCARRY
Definition: ISDOpcodes.h:291
llvm::SelectionDAG::setNodeMemRefs
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
Definition: SelectionDAG.cpp:8786
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:358
llvm::GCNSubtarget::getLDSBankCount
int getLDSBankCount() const
Definition: GCNSubtarget.h:267
AMDGPUDAGToDAGISel::Select
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
Definition: AMDGPUISelDAGToDAG.cpp:506
v4i32
Vector Rotate Left Mask Mask v4i32
Definition: README_P9.txt:112
llvm::SelectionDAG::SelectNodeTo
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
Definition: SelectionDAG.cpp:8810
R600RegisterInfo.h
llvm::isMask_32
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:467
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::MachineFunction
Definition: MachineFunction.h:234
llvm::AMDGPU::CPol::SCC
@ SCC
Definition: SIDefines.h:295
SelectionDAGISel.h
llvm::LoopInfo
Definition: LoopInfo.h:1083
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::GCNSubtarget::hasFlatInstOffsets
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:546
llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:353
AMDGPU.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::AMDGPUISD::LOAD_D16_LO_U8
@ LOAD_D16_LO_U8
Definition: AMDGPUISelLowering.h:483
llvm::SelectionDAG::ReplaceAllUsesOfValueWith
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
Definition: SelectionDAG.cpp:9606
uint32_t
llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition: SelectionDAGNodes.h:1129
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::ISD::UNDEF
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
llvm::ISD::FEXP
@ FEXP
Definition: ISDOpcodes.h:877
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::ISD::FEXP2
@ FEXP2
Definition: ISDOpcodes.h:878
llvm::CodeGenOpt::Level
Level
Definition: CodeGen.h:52
llvm::SDVTList
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Definition: SelectionDAGNodes.h:79
llvm::ISD::FMUL
@ FMUL
Definition: ISDOpcodes.h:379
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1341
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
llvm::LoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2300
llvm::LoopInfoBase::getLoopsInPreorder
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
Definition: LoopInfoImpl.h:578
llvm::ISD::FSQRT
@ FSQRT
Definition: ISDOpcodes.h:868
llvm::SISrcMods::OP_SEL_0
@ OP_SEL_0
Definition: SIDefines.h:213
llvm::SelectionDAGISel::MF
MachineFunction * MF
Definition: SelectionDAGISel.h:45
llvm::AMDGPUISD::RCP
@ RCP
Definition: AMDGPUISelLowering.h:408
llvm::ISD::FMAXNUM
@ FMAXNUM
Definition: ISDOpcodes.h:899
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:596
llvm::AMDGPUISD::LOAD_D16_HI_U8
@ LOAD_D16_HI_U8
Definition: AMDGPUISelLowering.h:481
llvm::AMDGPUISD::MAD_U64_U32
@ MAD_U64_U32
Definition: AMDGPUISelLowering.h:432
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:324
llvm::MemSDNode::getAddressSpace
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Definition: SelectionDAGNodes.h:1347
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
llvm::AMDGPUISD::ATOMIC_LOAD_FMIN
@ ATOMIC_LOAD_FMIN
Definition: AMDGPUISelLowering.h:495
llvm::ISD::FCOS
@ FCOS
Definition: ISDOpcodes.h:871
llvm::SelectionDAG::getEntryNode
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:516
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:879
llvm::ISD::FSIN
@ FSIN
Definition: ISDOpcodes.h:870
llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:491
llvm::SITargetLowering
Definition: SIISelLowering.h:31
llvm::SelectionDAG::getTargetExtractSubreg
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
Definition: SelectionDAG.cpp:9136
llvm::SelectionDAGISel::ReplaceUses
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
Definition: SelectionDAGISel.h:206
llvm::ISD::SUBC
@ SUBC
Definition: ISDOpcodes.h:270
llvm::GCNSubtarget::ldsRequiresM0Init
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition: GCNSubtarget.h:603
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:138
gwsIntrinToOpcode
static unsigned gwsIntrinToOpcode(unsigned IntrID)
Definition: AMDGPUISelDAGToDAG.cpp:2286
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
llvm::AMDGPUPerfHintAnalysis
Definition: AMDGPUPerfHintAnalysis.h:23
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:922
llvm::AMDGPUISD::CVT_PKRTZ_F16_F32
@ CVT_PKRTZ_F16_F32
Definition: AMDGPUISelLowering.h:453
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:413
llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition: ISDOpcodes.h:740
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition: GCNSubtarget.h:258
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
llvm::AMDGPU::isInlinableLiteral32
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
Definition: AMDGPUBaseInfo.cpp:1729
llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:301
llvm::ISD::FP_EXTEND
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:833
llvm::TargetOptions::NoNaNsFPMath
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
Definition: TargetOptions.h:173
llvm::AMDGPUISD::FRACT
@ FRACT
Definition: AMDGPUISelLowering.h:364
llvm::ISD::FSUB
@ FSUB
Definition: ISDOpcodes.h:378
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:657
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
LegacyDivergenceAnalysis.h
llvm::SelectionDAGISel
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
Definition: SelectionDAGISel.h:39
llvm::AMDGPUISD::FMAX3
@ FMAX3
Definition: AMDGPUISelLowering.h:388
llvm::GCNSubtarget::getConstantBusLimit
unsigned getConstantBusLimit(unsigned Opcode) const
Definition: AMDGPUSubtarget.cpp:323
llvm::ISD::FREM
@ FREM
Definition: ISDOpcodes.h:381
llvm::AMDGPUISD::MAD_I64_I32
@ MAD_I64_I32
Definition: AMDGPUISelLowering.h:433
Dominators.h
N
#define N
llvm::ISD::SUBE
@ SUBE
Definition: ISDOpcodes.h:280
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:335
llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:659
llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition: TargetMachine.h:128
llvm::GCNSubtarget::d16PreservesUnusedBits
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:593
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_END(AMDGPUDAGToDAGISel
llvm::AMDGPUISD::COS_HW
@ COS_HW
Definition: AMDGPUISelLowering.h:383
llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition: SelectionDAGNodes.h:1117
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:637
llvm::SIInstrFlags::WQM
@ WQM
Definition: SIDefines.h:70
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::ISD::INTRINSIC_W_CHAIN
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:866
llvm::SelectionDAG::getMachineFunction
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:437
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AMDGPUAS::CONSTANT_ADDRESS_32BIT
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
Definition: AMDGPU.h:366
llvm::ISD::BUILD_PAIR
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::AMDGPUISD::DWORDADDR
@ DWORDADDR
Definition: AMDGPUISelLowering.h:363
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::ISD::TargetFrameIndex
@ TargetFrameIndex
Definition: ISDOpcodes.h:166
f64
QP Compare Ordered outs ins xscmpudp No builtin are required Or llvm fcmp order unorder compare DP QP Compare builtin are required DP xscmp *dp write to VSX register Use int_ppc_vsx_xscmpeqdp f64
Definition: README_P9.txt:314
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:880
llvm::AMDGPUISD::RCP_IFLAG
@ RCP_IFLAG
Definition: AMDGPUISelLowering.h:411
llvm::AMDGPUISD::ATOMIC_INC
@ ATOMIC_INC
Definition: AMDGPUISelLowering.h:493
llvm::ISD::FCANONICALIZE
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:483
llvm::ISD::FLOG
@ FLOG
Definition: ISDOpcodes.h:874
InitializePasses.h
llvm::GCNSubtarget::hasUsableDSOffset
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:395
llvm::ISD::ADDE
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:814
llvm::AMDGPU::SIModeRegisterDefaults::allFP32Denormals
bool allFP32Denormals() const
Definition: AMDGPUBaseInfo.h:960
llvm::MCInstrDesc::getNumOperands
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:228
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::SelectionDAG::getTarget
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:441
llvm::EVT::bitsEq
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:229
AMDGPUTargetMachine.h
SubReg
unsigned SubReg
Definition: AArch64AdvSIMDScalarPass.cpp:104
llvm::AMDGPUISD::BFE_U32
@ BFE_U32
Definition: AMDGPUISelLowering.h:419
llvm::ISD::SCALAR_TO_VECTOR
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:583
AMDGPUDAGToDAGISel::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Definition: AMDGPUISelDAGToDAG.cpp:124
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1184
llvm::AMDGPUISD::LOAD_D16_LO
@ LOAD_D16_LO
Definition: AMDGPUISelLowering.h:479
IsCopyFromSGPR
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
Definition: AMDGPUISelDAGToDAG.cpp:1390
llvm::ISD::FDIV
@ FDIV
Definition: ISDOpcodes.h:380