LLVM  15.0.0git
AMDGPUISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines an instruction selector for the AMDGPU target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPUISelDAGToDAG.h"
15 #include "AMDGPU.h"
16 #include "AMDGPUSubtarget.h"
17 #include "AMDGPUTargetMachine.h"
20 #include "R600RegisterInfo.h"
21 #include "SIMachineFunctionInfo.h"
28 #include "llvm/IR/IntrinsicsAMDGPU.h"
29 #include "llvm/InitializePasses.h"
30 
31 #ifdef EXPENSIVE_CHECKS
32 #include "llvm/Analysis/LoopInfo.h"
33 #include "llvm/IR/Dominators.h"
34 #endif
35 
36 #define DEBUG_TYPE "isel"
37 
38 using namespace llvm;
39 
40 //===----------------------------------------------------------------------===//
41 // Instruction Selector Implementation
42 //===----------------------------------------------------------------------===//
43 
44 namespace {
45 
46 static SDValue stripBitcast(SDValue Val) {
47  return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
48 }
49 
50 // Figure out if this is really an extract of the high 16-bits of a dword.
51 static bool isExtractHiElt(SDValue In, SDValue &Out) {
52  In = stripBitcast(In);
53 
54  if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
55  if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
56  if (!Idx->isOne())
57  return false;
58  Out = In.getOperand(0);
59  return true;
60  }
61  }
62 
63  if (In.getOpcode() != ISD::TRUNCATE)
64  return false;
65 
66  SDValue Srl = In.getOperand(0);
67  if (Srl.getOpcode() == ISD::SRL) {
68  if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
69  if (ShiftAmt->getZExtValue() == 16) {
70  Out = stripBitcast(Srl.getOperand(0));
71  return true;
72  }
73  }
74  }
75 
76  return false;
77 }
78 
79 // Look through operations that obscure just looking at the low 16-bits of the
80 // same register.
81 static SDValue stripExtractLoElt(SDValue In) {
82  if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
83  if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
84  if (Idx->isZero() && In.getValueSizeInBits() <= 32)
85  return In.getOperand(0);
86  }
87  }
88 
89  if (In.getOpcode() == ISD::TRUNCATE) {
90  SDValue Src = In.getOperand(0);
91  if (Src.getValueType().getSizeInBits() == 32)
92  return stripBitcast(Src);
93  }
94 
95  return In;
96 }
97 
98 } // end anonymous namespace
99 
101  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
105 #ifdef EXPENSIVE_CHECKS
108 #endif
110  "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
111 
112 /// This pass converts a legalized DAG into a AMDGPU-specific
113 // DAG, ready for instruction scheduling.
115  CodeGenOpt::Level OptLevel) {
116  return new AMDGPUDAGToDAGISel(TM, OptLevel);
117 }
118 
120  TargetMachine *TM /*= nullptr*/,
121  CodeGenOpt::Level OptLevel /*= CodeGenOpt::Default*/)
122  : SelectionDAGISel(*TM, OptLevel) {
123  EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
124 }
125 
127 #ifdef EXPENSIVE_CHECKS
128  DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
129  LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
130  for (auto &L : LI->getLoopsInPreorder()) {
131  assert(L->isLCSSAForm(DT));
132  }
133 #endif
134  Subtarget = &MF.getSubtarget<GCNSubtarget>();
135  Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction());
136  return SelectionDAGISel::runOnMachineFunction(MF);
137 }
138 
139 bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
140  // XXX - only need to list legal operations.
141  switch (Opc) {
142  case ISD::FADD:
143  case ISD::FSUB:
144  case ISD::FMUL:
145  case ISD::FDIV:
146  case ISD::FREM:
147  case ISD::FCANONICALIZE:
148  case ISD::UINT_TO_FP:
149  case ISD::SINT_TO_FP:
150  case ISD::FABS:
151  // Fabs is lowered to a bit operation, but it's an and which will clear the
152  // high bits anyway.
153  case ISD::FSQRT:
154  case ISD::FSIN:
155  case ISD::FCOS:
156  case ISD::FPOWI:
157  case ISD::FPOW:
158  case ISD::FLOG:
159  case ISD::FLOG2:
160  case ISD::FLOG10:
161  case ISD::FEXP:
162  case ISD::FEXP2:
163  case ISD::FCEIL:
164  case ISD::FTRUNC:
165  case ISD::FRINT:
166  case ISD::FNEARBYINT:
167  case ISD::FROUND:
168  case ISD::FFLOOR:
169  case ISD::FMINNUM:
170  case ISD::FMAXNUM:
171  case AMDGPUISD::FRACT:
172  case AMDGPUISD::CLAMP:
173  case AMDGPUISD::COS_HW:
174  case AMDGPUISD::SIN_HW:
175  case AMDGPUISD::FMIN3:
176  case AMDGPUISD::FMAX3:
177  case AMDGPUISD::FMED3:
178  case AMDGPUISD::FMAD_FTZ:
179  case AMDGPUISD::RCP:
180  case AMDGPUISD::RSQ:
182  case AMDGPUISD::LDEXP:
183  // On gfx10, all 16-bit instructions preserve the high bits.
184  return Subtarget->getGeneration() <= AMDGPUSubtarget::GFX9;
185  case ISD::FP_ROUND:
186  // We may select fptrunc (fma/mad) to mad_mixlo, which does not zero the
187  // high bits on gfx9.
188  // TODO: If we had the source node we could see if the source was fma/mad
189  return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
190  case ISD::FMA:
191  case ISD::FMAD:
193  return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
194  default:
195  // fcopysign, select and others may be lowered to 32-bit bit operations
196  // which don't zero the high bits.
197  return false;
198  }
199 }
200 
204 #ifdef EXPENSIVE_CHECKS
207 #endif
208  SelectionDAGISel::getAnalysisUsage(AU);
209 }
210 
212  assert(Subtarget->d16PreservesUnusedBits());
213  MVT VT = N->getValueType(0).getSimpleVT();
214  if (VT != MVT::v2i16 && VT != MVT::v2f16)
215  return false;
216 
217  SDValue Lo = N->getOperand(0);
218  SDValue Hi = N->getOperand(1);
219 
220  LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
221 
222  // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
223  // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
224  // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
225 
226  // Need to check for possible indirect dependencies on the other half of the
227  // vector to avoid introducing a cycle.
228  if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
229  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
230 
232  SDValue Ops[] = {
233  LdHi->getChain(), LdHi->getBasePtr(), TiedIn
234  };
235 
236  unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
237  if (LdHi->getMemoryVT() == MVT::i8) {
238  LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
240  } else {
241  assert(LdHi->getMemoryVT() == MVT::i16);
242  }
243 
244  SDValue NewLoadHi =
245  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
246  Ops, LdHi->getMemoryVT(),
247  LdHi->getMemOperand());
248 
249  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
250  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
251  return true;
252  }
253 
254  // build_vector (load ptr), hi -> load_d16_lo ptr, hi
255  // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
256  // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
257  LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
258  if (LdLo && Lo.hasOneUse()) {
259  SDValue TiedIn = getHi16Elt(Hi);
260  if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
261  return false;
262 
263  SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
264  unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
265  if (LdLo->getMemoryVT() == MVT::i8) {
266  LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
268  } else {
269  assert(LdLo->getMemoryVT() == MVT::i16);
270  }
271 
272  TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
273 
274  SDValue Ops[] = {
275  LdLo->getChain(), LdLo->getBasePtr(), TiedIn
276  };
277 
278  SDValue NewLoadLo =
279  CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
280  Ops, LdLo->getMemoryVT(),
281  LdLo->getMemOperand());
282 
283  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
284  CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
285  return true;
286  }
287 
288  return false;
289 }
290 
292  if (!Subtarget->d16PreservesUnusedBits())
293  return;
294 
296 
297  bool MadeChange = false;
298  while (Position != CurDAG->allnodes_begin()) {
299  SDNode *N = &*--Position;
300  if (N->use_empty())
301  continue;
302 
303  switch (N->getOpcode()) {
304  case ISD::BUILD_VECTOR:
305  MadeChange |= matchLoadD16FromBuildVector(N);
306  break;
307  default:
308  break;
309  }
310  }
311 
312  if (MadeChange) {
314  LLVM_DEBUG(dbgs() << "After PreProcess:\n";
315  CurDAG->dump(););
316  }
317 }
318 
319 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
320  if (TM.Options.NoNaNsFPMath)
321  return true;
322 
323  // TODO: Move into isKnownNeverNaN
324  if (N->getFlags().hasNoNaNs())
325  return true;
326 
327  return CurDAG->isKnownNeverNaN(N);
328 }
329 
330 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
331  bool Negated) const {
332  if (N->isUndef())
333  return true;
334 
335  const SIInstrInfo *TII = Subtarget->getInstrInfo();
336  if (Negated) {
337  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
338  return TII->isInlineConstant(-C->getAPIntValue());
339 
340  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
341  return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
342 
343  } else {
344  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
345  return TII->isInlineConstant(C->getAPIntValue());
346 
347  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
348  return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
349  }
350 
351  return false;
352 }
353 
354 /// Determine the register class for \p OpNo
355 /// \returns The register class of the virtual register that will be used for
356 /// the given operand number \OpNo or NULL if the register class cannot be
357 /// determined.
358 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
359  unsigned OpNo) const {
360  if (!N->isMachineOpcode()) {
361  if (N->getOpcode() == ISD::CopyToReg) {
362  Register Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
363  if (Reg.isVirtual()) {
365  return MRI.getRegClass(Reg);
366  }
367 
368  const SIRegisterInfo *TRI
369  = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
370  return TRI->getPhysRegClass(Reg);
371  }
372 
373  return nullptr;
374  }
375 
376  switch (N->getMachineOpcode()) {
377  default: {
378  const MCInstrDesc &Desc =
379  Subtarget->getInstrInfo()->get(N->getMachineOpcode());
380  unsigned OpIdx = Desc.getNumDefs() + OpNo;
381  if (OpIdx >= Desc.getNumOperands())
382  return nullptr;
383  int RegClass = Desc.OpInfo[OpIdx].RegClass;
384  if (RegClass == -1)
385  return nullptr;
386 
387  return Subtarget->getRegisterInfo()->getRegClass(RegClass);
388  }
389  case AMDGPU::REG_SEQUENCE: {
390  unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
391  const TargetRegisterClass *SuperRC =
392  Subtarget->getRegisterInfo()->getRegClass(RCID);
393 
394  SDValue SubRegOp = N->getOperand(OpNo + 1);
395  unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
396  return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
397  SubRegIdx);
398  }
399  }
400 }
401 
402 SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain,
403  SDValue Glue) const {
405  Ops.push_back(NewChain); // Replace the chain.
406  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
407  Ops.push_back(N->getOperand(i));
408 
409  Ops.push_back(Glue);
410  return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
411 }
412 
413 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
414  const SITargetLowering& Lowering =
415  *static_cast<const SITargetLowering*>(getTargetLowering());
416 
417  assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
418 
419  SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val);
420  return glueCopyToOp(N, M0, M0.getValue(1));
421 }
422 
423 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
424  unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
425  if (AS == AMDGPUAS::LOCAL_ADDRESS) {
426  if (Subtarget->ldsRequiresM0Init())
427  return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
428  } else if (AS == AMDGPUAS::REGION_ADDRESS) {
430  unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
431  return
432  glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
433  }
434  return N;
435 }
436 
437 MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
438  EVT VT) const {
440  AMDGPU::S_MOV_B32, DL, MVT::i32,
441  CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
442  SDNode *Hi =
443  CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
445  const SDValue Ops[] = {
446  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
447  SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
448  SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
449 
450  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
451 }
452 
453 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
454  EVT VT = N->getValueType(0);
455  unsigned NumVectorElts = VT.getVectorNumElements();
456  EVT EltVT = VT.getVectorElementType();
457  SDLoc DL(N);
458  SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
459 
460  if (NumVectorElts == 1) {
461  CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
462  RegClass);
463  return;
464  }
465 
466  assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
467  "supported yet");
468  // 32 = Max Num Vector Elements
469  // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
470  // 1 = Vector Register Class
471  SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
472 
473  bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() ==
474  Triple::amdgcn;
475  RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
476  bool IsRegSeq = true;
477  unsigned NOps = N->getNumOperands();
478  for (unsigned i = 0; i < NOps; i++) {
479  // XXX: Why is this here?
480  if (isa<RegisterSDNode>(N->getOperand(i))) {
481  IsRegSeq = false;
482  break;
483  }
484  unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
485  : R600RegisterInfo::getSubRegFromChannel(i);
486  RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
487  RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
488  }
489  if (NOps != NumVectorElts) {
490  // Fill in the missing undef elements if this was a scalar_to_vector.
491  assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
492  MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
493  DL, EltVT);
494  for (unsigned i = NOps; i < NumVectorElts; ++i) {
495  unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
496  : R600RegisterInfo::getSubRegFromChannel(i);
497  RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
498  RegSeqArgs[1 + (2 * i) + 1] =
500  }
501  }
502 
503  if (!IsRegSeq)
504  SelectCode(N);
505  CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
506 }
507 
509  unsigned int Opc = N->getOpcode();
510  if (N->isMachineOpcode()) {
511  N->setNodeId(-1);
512  return; // Already selected.
513  }
514 
515  // isa<MemSDNode> almost works but is slightly too permissive for some DS
516  // intrinsics.
517  if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) ||
518  (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
519  Opc == ISD::ATOMIC_LOAD_FADD ||
521  Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) {
522  N = glueCopyToM0LDSInit(N);
523  SelectCode(N);
524  return;
525  }
526 
527  switch (Opc) {
528  default:
529  break;
530  // We are selecting i64 ADD here instead of custom lower it during
531  // DAG legalization, so we can fold some i64 ADDs used for address
532  // calculation into the LOAD and STORE instructions.
533  case ISD::ADDC:
534  case ISD::ADDE:
535  case ISD::SUBC:
536  case ISD::SUBE: {
537  if (N->getValueType(0) != MVT::i64)
538  break;
539 
540  SelectADD_SUB_I64(N);
541  return;
542  }
543  case ISD::ADDCARRY:
544  case ISD::SUBCARRY:
545  if (N->getValueType(0) != MVT::i32)
546  break;
547 
548  SelectAddcSubb(N);
549  return;
550  case ISD::UADDO:
551  case ISD::USUBO: {
552  SelectUADDO_USUBO(N);
553  return;
554  }
556  SelectFMUL_W_CHAIN(N);
557  return;
558  }
559  case AMDGPUISD::FMA_W_CHAIN: {
560  SelectFMA_W_CHAIN(N);
561  return;
562  }
563 
565  case ISD::BUILD_VECTOR: {
566  EVT VT = N->getValueType(0);
567  unsigned NumVectorElts = VT.getVectorNumElements();
568  if (VT.getScalarSizeInBits() == 16) {
569  if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
570  if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
571  ReplaceNode(N, Packed);
572  return;
573  }
574  }
575 
576  break;
577  }
578 
580  unsigned RegClassID =
581  SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
582  SelectBuildVector(N, RegClassID);
583  return;
584  }
585  case ISD::BUILD_PAIR: {
586  SDValue RC, SubReg0, SubReg1;
587  SDLoc DL(N);
588  if (N->getValueType(0) == MVT::i128) {
589  RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);
590  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
591  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
592  } else if (N->getValueType(0) == MVT::i64) {
593  RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
594  SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
595  SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
596  } else {
597  llvm_unreachable("Unhandled value type for BUILD_PAIR");
598  }
599  const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
600  N->getOperand(1), SubReg1 };
601  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
602  N->getValueType(0), Ops));
603  return;
604  }
605 
606  case ISD::Constant:
607  case ISD::ConstantFP: {
608  if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
609  break;
610 
611  uint64_t Imm;
612  if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
613  Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
614  else {
615  ConstantSDNode *C = cast<ConstantSDNode>(N);
616  Imm = C->getZExtValue();
617  }
618 
619  SDLoc DL(N);
620  ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
621  return;
622  }
623  case AMDGPUISD::BFE_I32:
624  case AMDGPUISD::BFE_U32: {
625  // There is a scalar version available, but unlike the vector version which
626  // has a separate operand for the offset and width, the scalar version packs
627  // the width and offset into a single operand. Try to move to the scalar
628  // version if the offsets are constant, so that we can try to keep extended
629  // loads of kernel arguments in SGPRs.
630 
631  // TODO: Technically we could try to pattern match scalar bitshifts of
632  // dynamic values, but it's probably not useful.
633  ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
634  if (!Offset)
635  break;
636 
637  ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
638  if (!Width)
639  break;
640 
641  bool Signed = Opc == AMDGPUISD::BFE_I32;
642 
643  uint32_t OffsetVal = Offset->getZExtValue();
644  uint32_t WidthVal = Width->getZExtValue();
645 
646  ReplaceNode(N, getBFE32(Signed, SDLoc(N), N->getOperand(0), OffsetVal,
647  WidthVal));
648  return;
649  }
650  case AMDGPUISD::DIV_SCALE: {
651  SelectDIV_SCALE(N);
652  return;
653  }
655  case AMDGPUISD::MAD_U64_U32: {
656  SelectMAD_64_32(N);
657  return;
658  }
659  case ISD::SMUL_LOHI:
660  case ISD::UMUL_LOHI:
661  return SelectMUL_LOHI(N);
662  case ISD::CopyToReg: {
663  const SITargetLowering& Lowering =
664  *static_cast<const SITargetLowering*>(getTargetLowering());
665  N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
666  break;
667  }
668  case ISD::AND:
669  case ISD::SRL:
670  case ISD::SRA:
672  if (N->getValueType(0) != MVT::i32)
673  break;
674 
675  SelectS_BFE(N);
676  return;
677  case ISD::BRCOND:
678  SelectBRCOND(N);
679  return;
680  case ISD::FMAD:
681  case ISD::FMA:
682  SelectFMAD_FMA(N);
683  return;
689  // Hack around using a legal type if f16 is illegal.
690  if (N->getValueType(0) == MVT::i32) {
691  MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
692  N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
693  { N->getOperand(0), N->getOperand(1) });
694  SelectCode(N);
695  return;
696  }
697 
698  break;
699  }
700  case ISD::INTRINSIC_W_CHAIN: {
701  SelectINTRINSIC_W_CHAIN(N);
702  return;
703  }
705  SelectINTRINSIC_WO_CHAIN(N);
706  return;
707  }
708  case ISD::INTRINSIC_VOID: {
709  SelectINTRINSIC_VOID(N);
710  return;
711  }
712  }
713 
714  SelectCode(N);
715 }
716 
717 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
718  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
719  const Instruction *Term = BB->getTerminator();
720  return Term->getMetadata("amdgpu.uniform") ||
721  Term->getMetadata("structurizecfg.uniform");
722 }
723 
724 bool AMDGPUDAGToDAGISel::isUnneededShiftMask(const SDNode *N,
725  unsigned ShAmtBits) const {
726  assert(N->getOpcode() == ISD::AND);
727 
728  const APInt &RHS = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
729  if (RHS.countTrailingOnes() >= ShAmtBits)
730  return true;
731 
732  const APInt &LHSKnownZeros = CurDAG->computeKnownBits(N->getOperand(0)).Zero;
733  return (LHSKnownZeros | RHS).countTrailingOnes() >= ShAmtBits;
734 }
735 
737  SDValue &N0, SDValue &N1) {
738  if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
739  Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
740  // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.
741  // (i64 (bitcast (v2i32 (build_vector
742  // (or (extract_vector_elt V, 0), OFFSET),
743  // (extract_vector_elt V, 1)))))
744  SDValue Lo = Addr.getOperand(0).getOperand(0);
745  if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) {
746  SDValue BaseLo = Lo.getOperand(0);
747  SDValue BaseHi = Addr.getOperand(0).getOperand(1);
748  // Check that split base (Lo and Hi) are extracted from the same one.
749  if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
750  BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
751  BaseLo.getOperand(0) == BaseHi.getOperand(0) &&
752  // Lo is statically extracted from index 0.
753  isa<ConstantSDNode>(BaseLo.getOperand(1)) &&
754  BaseLo.getConstantOperandVal(1) == 0 &&
755  // Hi is statically extracted from index 0.
756  isa<ConstantSDNode>(BaseHi.getOperand(1)) &&
757  BaseHi.getConstantOperandVal(1) == 1) {
758  N0 = BaseLo.getOperand(0).getOperand(0);
759  N1 = Lo.getOperand(1);
760  return true;
761  }
762  }
763  }
764  return false;
765 }
766 
767 bool AMDGPUDAGToDAGISel::isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
768  SDValue &RHS) const {
770  LHS = Addr.getOperand(0);
771  RHS = Addr.getOperand(1);
772  return true;
773  }
774 
776  assert(LHS && RHS && isa<ConstantSDNode>(RHS));
777  return true;
778  }
779 
780  return false;
781 }
782 
784  return "AMDGPU DAG->DAG Pattern Instruction Selection";
785 }
786 
787 //===----------------------------------------------------------------------===//
788 // Complex Patterns
789 //===----------------------------------------------------------------------===//
790 
791 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
792  SDValue &Offset) {
793  return false;
794 }
795 
796 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
797  SDValue &Offset) {
798  ConstantSDNode *C;
799  SDLoc DL(Addr);
800 
801  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
802  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
803  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
804  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
805  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
806  Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
807  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
808  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
809  (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
810  Base = Addr.getOperand(0);
811  Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
812  } else {
813  Base = Addr;
815  }
816 
817  return true;
818 }
819 
820 SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
821  const SDLoc &DL) const {
822  SDNode *Mov = CurDAG->getMachineNode(
823  AMDGPU::S_MOV_B32, DL, MVT::i32,
825  return SDValue(Mov, 0);
826 }
827 
828 // FIXME: Should only handle addcarry/subcarry
829 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
830  SDLoc DL(N);
831  SDValue LHS = N->getOperand(0);
832  SDValue RHS = N->getOperand(1);
833 
834  unsigned Opcode = N->getOpcode();
835  bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
836  bool ProduceCarry =
837  ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
838  bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
839 
840  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
841  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
842 
843  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
844  DL, MVT::i32, LHS, Sub0);
845  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
846  DL, MVT::i32, LHS, Sub1);
847 
848  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
849  DL, MVT::i32, RHS, Sub0);
850  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
851  DL, MVT::i32, RHS, Sub1);
852 
853  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
854 
855  static const unsigned OpcMap[2][2][2] = {
856  {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
857  {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
858  {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
859  {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
860 
861  unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd];
862  unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd];
863 
864  SDNode *AddLo;
865  if (!ConsumeCarry) {
866  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
867  AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
868  } else {
869  SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
870  AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
871  }
872  SDValue AddHiArgs[] = {
873  SDValue(Hi0, 0),
874  SDValue(Hi1, 0),
875  SDValue(AddLo, 1)
876  };
877  SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
878 
879  SDValue RegSequenceArgs[] = {
880  CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
881  SDValue(AddLo,0),
882  Sub0,
883  SDValue(AddHi,0),
884  Sub1,
885  };
886  SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
887  MVT::i64, RegSequenceArgs);
888 
889  if (ProduceCarry) {
890  // Replace the carry-use
891  ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
892  }
893 
894  // Replace the remaining uses.
896 }
897 
898 void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
899  SDLoc DL(N);
900  SDValue LHS = N->getOperand(0);
901  SDValue RHS = N->getOperand(1);
902  SDValue CI = N->getOperand(2);
903 
904  if (N->isDivergent()) {
905  unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
906  : AMDGPU::V_SUBB_U32_e64;
908  N, Opc, N->getVTList(),
909  {LHS, RHS, CI,
910  CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
911  } else {
912  unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO
913  : AMDGPU::S_SUB_CO_PSEUDO;
914  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});
915  }
916 }
917 
918 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
919  // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
920  // carry out despite the _i32 name. These were renamed in VI to _U32.
921  // FIXME: We should probably rename the opcodes here.
922  bool IsAdd = N->getOpcode() == ISD::UADDO;
923  bool IsVALU = N->isDivergent();
924 
925  for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;
926  ++UI)
927  if (UI.getUse().getResNo() == 1) {
928  if ((IsAdd && (UI->getOpcode() != ISD::ADDCARRY)) ||
929  (!IsAdd && (UI->getOpcode() != ISD::SUBCARRY))) {
930  IsVALU = true;
931  break;
932  }
933  }
934 
935  if (IsVALU) {
936  unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
937 
939  N, Opc, N->getVTList(),
940  {N->getOperand(0), N->getOperand(1),
941  CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
942  } else {
943  unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
944  : AMDGPU::S_USUBO_PSEUDO;
945 
946  CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
947  {N->getOperand(0), N->getOperand(1)});
948  }
949 }
950 
951 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
952  SDLoc SL(N);
953  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
954  SDValue Ops[10];
955 
956  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
957  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
958  SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
959  Ops[8] = N->getOperand(0);
960  Ops[9] = N->getOperand(4);
961 
962  // If there are no source modifiers, prefer fmac over fma because it can use
963  // the smaller VOP2 encoding.
964  bool UseFMAC = Subtarget->hasDLInsts() &&
965  cast<ConstantSDNode>(Ops[0])->isZero() &&
966  cast<ConstantSDNode>(Ops[2])->isZero() &&
967  cast<ConstantSDNode>(Ops[4])->isZero();
968  unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
969  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), Ops);
970 }
971 
972 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
973  SDLoc SL(N);
974  // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
975  SDValue Ops[8];
976 
977  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
978  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
979  Ops[6] = N->getOperand(0);
980  Ops[7] = N->getOperand(3);
981 
982  CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
983 }
984 
985 // We need to handle this here because tablegen doesn't support matching
986 // instructions with multiple outputs.
987 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
988  SDLoc SL(N);
989  EVT VT = N->getValueType(0);
990 
991  assert(VT == MVT::f32 || VT == MVT::f64);
992 
993  unsigned Opc
994  = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
995 
996  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
997  // omod
998  SDValue Ops[8];
999  SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1000  SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]);
1001  SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]);
1002  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1003 }
1004 
1005 // We need to handle this here because tablegen doesn't support matching
1006 // instructions with multiple outputs.
1007 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
1008  SDLoc SL(N);
1009  bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
1010  unsigned Opc;
1011  if (Subtarget->getGeneration() == AMDGPUSubtarget::GFX11)
1012  Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1013  : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1014  else
1015  Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1016 
1018  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
1019  Clamp };
1020  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1021 }
1022 
1023 // We need to handle this here because tablegen doesn't support matching
1024 // instructions with multiple outputs.
1025 void AMDGPUDAGToDAGISel::SelectMUL_LOHI(SDNode *N) {
1026  SDLoc SL(N);
1027  bool Signed = N->getOpcode() == ISD::SMUL_LOHI;
1028  unsigned Opc;
1029  if (Subtarget->getGeneration() == AMDGPUSubtarget::GFX11)
1030  Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1031  : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1032  else
1033  Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1034 
1035  SDValue Zero = CurDAG->getTargetConstant(0, SL, MVT::i64);
1037  SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Zero, Clamp};
1038  SDNode *Mad = CurDAG->getMachineNode(Opc, SL, N->getVTList(), Ops);
1039  if (!SDValue(N, 0).use_empty()) {
1040  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);
1041  SDNode *Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1042  MVT::i32, SDValue(Mad, 0), Sub0);
1043  ReplaceUses(SDValue(N, 0), SDValue(Lo, 0));
1044  }
1045  if (!SDValue(N, 1).use_empty()) {
1046  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);
1047  SDNode *Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1048  MVT::i32, SDValue(Mad, 0), Sub1);
1049  ReplaceUses(SDValue(N, 1), SDValue(Hi, 0));
1050  }
1052 }
1053 
1054 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const {
1055  if (!isUInt<16>(Offset))
1056  return false;
1057 
1058  if (!Base || Subtarget->hasUsableDSOffset() ||
1059  Subtarget->unsafeDSOffsetFoldingEnabled())
1060  return true;
1061 
1062  // On Southern Islands instruction with a negative base value and an offset
1063  // don't seem to work.
1064  return CurDAG->SignBitIsZero(Base);
1065 }
1066 
1067 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
1068  SDValue &Offset) const {
1069  SDLoc DL(Addr);
1071  SDValue N0 = Addr.getOperand(0);
1072  SDValue N1 = Addr.getOperand(1);
1073  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1074  if (isDSOffsetLegal(N0, C1->getSExtValue())) {
1075  // (add n0, c0)
1076  Base = N0;
1077  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1078  return true;
1079  }
1080  } else if (Addr.getOpcode() == ISD::SUB) {
1081  // sub C, x -> add (sub 0, x), C
1082  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1083  int64_t ByteOffset = C->getSExtValue();
1084  if (isDSOffsetLegal(SDValue(), ByteOffset)) {
1086 
1087  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1088  // the known bits in isDSOffsetLegal. We need to emit the selected node
1089  // here, so this is thrown away.
1091  Zero, Addr.getOperand(1));
1092 
1093  if (isDSOffsetLegal(Sub, ByteOffset)) {
1095  Opnds.push_back(Zero);
1096  Opnds.push_back(Addr.getOperand(1));
1097 
1098  // FIXME: Select to VOP3 version for with-carry.
1099  unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1100  if (Subtarget->hasAddNoCarry()) {
1101  SubOp = AMDGPU::V_SUB_U32_e64;
1102  Opnds.push_back(
1103  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1104  }
1105 
1106  MachineSDNode *MachineSub =
1107  CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1108 
1109  Base = SDValue(MachineSub, 0);
1110  Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
1111  return true;
1112  }
1113  }
1114  }
1115  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1116  // If we have a constant address, prefer to put the constant into the
1117  // offset. This can save moves to load the constant address since multiple
1118  // operations can share the zero base address register, and enables merging
1119  // into read2 / write2 instructions.
1120 
1121  SDLoc DL(Addr);
1122 
1123  if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) {
1125  MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1126  DL, MVT::i32, Zero);
1127  Base = SDValue(MovZero, 0);
1128  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1129  return true;
1130  }
1131  }
1132 
1133  // default case
1134  Base = Addr;
1136  return true;
1137 }
1138 
1139 bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
1140  unsigned Offset1,
1141  unsigned Size) const {
1142  if (Offset0 % Size != 0 || Offset1 % Size != 0)
1143  return false;
1144  if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size))
1145  return false;
1146 
1147  if (!Base || Subtarget->hasUsableDSOffset() ||
1148  Subtarget->unsafeDSOffsetFoldingEnabled())
1149  return true;
1150 
1151  // On Southern Islands instruction with a negative base value and an offset
1152  // don't seem to work.
1153  return CurDAG->SignBitIsZero(Base);
1154 }
1155 
1156 // TODO: If offset is too big, put low 16-bit into offset.
1157 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
1158  SDValue &Offset0,
1159  SDValue &Offset1) const {
1160  return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 4);
1161 }
1162 
1163 bool AMDGPUDAGToDAGISel::SelectDS128Bit8ByteAligned(SDValue Addr, SDValue &Base,
1164  SDValue &Offset0,
1165  SDValue &Offset1) const {
1166  return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 8);
1167 }
1168 
1169 bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
1170  SDValue &Offset0, SDValue &Offset1,
1171  unsigned Size) const {
1172  SDLoc DL(Addr);
1173 
1175  SDValue N0 = Addr.getOperand(0);
1176  SDValue N1 = Addr.getOperand(1);
1177  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1178  unsigned OffsetValue0 = C1->getZExtValue();
1179  unsigned OffsetValue1 = OffsetValue0 + Size;
1180 
1181  // (add n0, c0)
1182  if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1, Size)) {
1183  Base = N0;
1184  Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1185  Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1186  return true;
1187  }
1188  } else if (Addr.getOpcode() == ISD::SUB) {
1189  // sub C, x -> add (sub 0, x), C
1190  if (const ConstantSDNode *C =
1191  dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1192  unsigned OffsetValue0 = C->getZExtValue();
1193  unsigned OffsetValue1 = OffsetValue0 + Size;
1194 
1195  if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1196  SDLoc DL(Addr);
1198 
1199  // XXX - This is kind of hacky. Create a dummy sub node so we can check
1200  // the known bits in isDSOffsetLegal. We need to emit the selected node
1201  // here, so this is thrown away.
1202  SDValue Sub =
1203  CurDAG->getNode(ISD::SUB, DL, MVT::i32, Zero, Addr.getOperand(1));
1204 
1205  if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1, Size)) {
1207  Opnds.push_back(Zero);
1208  Opnds.push_back(Addr.getOperand(1));
1209  unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1210  if (Subtarget->hasAddNoCarry()) {
1211  SubOp = AMDGPU::V_SUB_U32_e64;
1212  Opnds.push_back(
1213  CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1214  }
1215 
1216  MachineSDNode *MachineSub = CurDAG->getMachineNode(
1217  SubOp, DL, MVT::getIntegerVT(Size * 8), Opnds);
1218 
1219  Base = SDValue(MachineSub, 0);
1220  Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1221  Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1222  return true;
1223  }
1224  }
1225  }
1226  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1227  unsigned OffsetValue0 = CAddr->getZExtValue();
1228  unsigned OffsetValue1 = OffsetValue0 + Size;
1229 
1230  if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1232  MachineSDNode *MovZero =
1233  CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero);
1234  Base = SDValue(MovZero, 0);
1235  Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1236  Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1237  return true;
1238  }
1239  }
1240 
1241  // default case
1242 
1243  Base = Addr;
1244  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
1245  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
1246  return true;
1247 }
1248 
1249 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
1250  SDValue &SOffset, SDValue &Offset,
1251  SDValue &Offen, SDValue &Idxen,
1252  SDValue &Addr64) const {
1253  // Subtarget prefers to use flat instruction
1254  // FIXME: This should be a pattern predicate and not reach here
1255  if (Subtarget->useFlatForGlobal())
1256  return false;
1257 
1258  SDLoc DL(Addr);
1259 
1260  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1261  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1262  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1263  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1264 
1265  ConstantSDNode *C1 = nullptr;
1266  SDValue N0 = Addr;
1268  C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1269  if (isUInt<32>(C1->getZExtValue()))
1270  N0 = Addr.getOperand(0);
1271  else
1272  C1 = nullptr;
1273  }
1274 
1275  if (N0.getOpcode() == ISD::ADD) {
1276  // (add N2, N3) -> addr64, or
1277  // (add (add N2, N3), C1) -> addr64
1278  SDValue N2 = N0.getOperand(0);
1279  SDValue N3 = N0.getOperand(1);
1280  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1281 
1282  if (N2->isDivergent()) {
1283  if (N3->isDivergent()) {
1284  // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1285  // addr64, and construct the resource from a 0 address.
1286  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1287  VAddr = N0;
1288  } else {
1289  // N2 is divergent, N3 is not.
1290  Ptr = N3;
1291  VAddr = N2;
1292  }
1293  } else {
1294  // N2 is not divergent.
1295  Ptr = N2;
1296  VAddr = N3;
1297  }
1299  } else if (N0->isDivergent()) {
1300  // N0 is divergent. Use it as the addr64, and construct the resource from a
1301  // 0 address.
1302  Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1303  VAddr = N0;
1304  Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1305  } else {
1306  // N0 -> offset, or
1307  // (N0 + C1) -> offset
1308  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1309  Ptr = N0;
1310  }
1311 
1312  if (!C1) {
1313  // No offset.
1315  return true;
1316  }
1317 
1318  if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
1319  // Legal offset for instruction.
1320  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1321  return true;
1322  }
1323 
1324  // Illegal offset, store it in soffset.
1326  SOffset =
1328  AMDGPU::S_MOV_B32, DL, MVT::i32,
1329  CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1330  0);
1331  return true;
1332 }
1333 
1334 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1335  SDValue &VAddr, SDValue &SOffset,
1336  SDValue &Offset) const {
1337  SDValue Ptr, Offen, Idxen, Addr64;
1338 
1339  // addr64 bit was removed for volcanic islands.
1340  // FIXME: This should be a pattern predicate and not reach here
1341  if (!Subtarget->hasAddr64())
1342  return false;
1343 
1344  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
1345  return false;
1346 
1347  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1348  if (C->getSExtValue()) {
1349  SDLoc DL(Addr);
1350 
1351  const SITargetLowering& Lowering =
1352  *static_cast<const SITargetLowering*>(getTargetLowering());
1353 
1354  SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1355  return true;
1356  }
1357 
1358  return false;
1359 }
1360 
1361 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1362  SDLoc DL(N);
1363 
1364  auto *FI = dyn_cast<FrameIndexSDNode>(N);
1365  SDValue TFI =
1366  FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N;
1367 
1368  // We rebase the base address into an absolute stack address and hence
1369  // use constant 0 for soffset. This value must be retained until
1370  // frame elimination and eliminateFrameIndex will choose the appropriate
1371  // frame register if need be.
1372  return std::make_pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32));
1373 }
1374 
1375 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1376  SDValue Addr, SDValue &Rsrc,
1377  SDValue &VAddr, SDValue &SOffset,
1378  SDValue &ImmOffset) const {
1379 
1380  SDLoc DL(Addr);
1383 
1384  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1385 
1386  if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1387  int64_t Imm = CAddr->getSExtValue();
1388  const int64_t NullPtr =
1389  AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
1390  // Don't fold null pointer.
1391  if (Imm != NullPtr) {
1392  SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1393  MachineSDNode *MovHighBits = CurDAG->getMachineNode(
1394  AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
1395  VAddr = SDValue(MovHighBits, 0);
1396 
1397  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1398  ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1399  return true;
1400  }
1401  }
1402 
1404  // (add n0, c1)
1405 
1406  SDValue N0 = Addr.getOperand(0);
1407  SDValue N1 = Addr.getOperand(1);
1408 
1409  // Offsets in vaddr must be positive if range checking is enabled.
1410  //
1411  // The total computation of vaddr + soffset + offset must not overflow. If
1412  // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1413  // overflowing.
1414  //
1415  // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1416  // always perform a range check. If a negative vaddr base index was used,
1417  // this would fail the range check. The overall address computation would
1418  // compute a valid address, but this doesn't happen due to the range
1419  // check. For out-of-bounds MUBUF loads, a 0 is returned.
1420  //
1421  // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1422  // MUBUF vaddr, but not on older subtargets which can only do this if the
1423  // sign bit is known 0.
1424  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1425  if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
1426  (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1427  CurDAG->SignBitIsZero(N0))) {
1428  std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1429  ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1430  return true;
1431  }
1432  }
1433 
1434  // (node)
1435  std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1436  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1437  return true;
1438 }
1439 
1440 static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val) {
1441  if (Val.getOpcode() != ISD::CopyFromReg)
1442  return false;
1443  auto RC =
1444  TRI.getPhysRegClass(cast<RegisterSDNode>(Val.getOperand(1))->getReg());
1445  return RC && TRI.isSGPRClass(RC);
1446 }
1447 
1448 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1449  SDValue Addr,
1450  SDValue &SRsrc,
1451  SDValue &SOffset,
1452  SDValue &Offset) const {
1453  const SIRegisterInfo *TRI =
1454  static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
1457  SDLoc DL(Addr);
1458 
1459  // CopyFromReg <sgpr>
1460  if (IsCopyFromSGPR(*TRI, Addr)) {
1461  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1462  SOffset = Addr;
1464  return true;
1465  }
1466 
1467  ConstantSDNode *CAddr;
1468  if (Addr.getOpcode() == ISD::ADD) {
1469  // Add (CopyFromReg <sgpr>) <constant>
1470  CAddr = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
1471  if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1472  return false;
1473  if (!IsCopyFromSGPR(*TRI, Addr.getOperand(0)))
1474  return false;
1475 
1476  SOffset = Addr.getOperand(0);
1477  } else if ((CAddr = dyn_cast<ConstantSDNode>(Addr)) &&
1478  SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) {
1479  // <constant>
1480  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1481  } else {
1482  return false;
1483  }
1484 
1485  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1486 
1488  return true;
1489 }
1490 
1491 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1492  SDValue &SOffset, SDValue &Offset
1493  ) const {
1494  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1495  const SIInstrInfo *TII =
1496  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1497 
1498  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
1499  return false;
1500 
1501  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1502  !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1503  !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1504  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1505  APInt::getAllOnes(32).getZExtValue(); // Size
1506  SDLoc DL(Addr);
1507 
1508  const SITargetLowering& Lowering =
1509  *static_cast<const SITargetLowering*>(getTargetLowering());
1510 
1511  SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1512  return true;
1513  }
1514  return false;
1515 }
1516 
1517 // Find a load or store from corresponding pattern root.
1518 // Roots may be build_vector, bitconvert or their combinations.
1520  N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();
1521  if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
1522  return MN;
1523  assert(isa<BuildVectorSDNode>(N));
1524  for (SDValue V : N->op_values())
1525  if (MemSDNode *MN =
1526  dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
1527  return MN;
1528  llvm_unreachable("cannot find MemSDNode in the pattern!");
1529 }
1530 
1531 bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
1532  SDValue &VAddr, SDValue &Offset,
1533  uint64_t FlatVariant) const {
1534  int64_t OffsetVal = 0;
1535 
1536  unsigned AS = findMemSDNode(N)->getAddressSpace();
1537 
1538  bool CanHaveFlatSegmentOffsetBug =
1539  Subtarget->hasFlatSegmentOffsetBug() &&
1540  FlatVariant == SIInstrFlags::FLAT &&
1542 
1543  if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
1544  SDValue N0, N1;
1545  if (isBaseWithConstantOffset64(Addr, N0, N1)) {
1546  int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1547 
1548  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1549  if (TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1550  Addr = N0;
1551  OffsetVal = COffsetVal;
1552  } else {
1553  // If the offset doesn't fit, put the low bits into the offset field and
1554  // add the rest.
1555  //
1556  // For a FLAT instruction the hardware decides whether to access
1557  // global/scratch/shared memory based on the high bits of vaddr,
1558  // ignoring the offset field, so we have to ensure that when we add
1559  // remainder to vaddr it still points into the same underlying object.
1560  // The easiest way to do that is to make sure that we split the offset
1561  // into two pieces that are both >= 0 or both <= 0.
1562 
1563  SDLoc DL(N);
1564  uint64_t RemainderOffset;
1565 
1566  std::tie(OffsetVal, RemainderOffset) =
1567  TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1568 
1569  SDValue AddOffsetLo =
1570  getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
1572 
1573  if (Addr.getValueType().getSizeInBits() == 32) {
1575  Opnds.push_back(N0);
1576  Opnds.push_back(AddOffsetLo);
1577  unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1578  if (Subtarget->hasAddNoCarry()) {
1579  AddOp = AMDGPU::V_ADD_U32_e64;
1580  Opnds.push_back(Clamp);
1581  }
1582  Addr = SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0);
1583  } else {
1584  // TODO: Should this try to use a scalar add pseudo if the base address
1585  // is uniform and saddr is usable?
1586  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
1587  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
1588 
1589  SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1590  DL, MVT::i32, N0, Sub0);
1591  SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1592  DL, MVT::i32, N0, Sub1);
1593 
1594  SDValue AddOffsetHi =
1595  getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
1596 
1598 
1599  SDNode *Add =
1600  CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs,
1601  {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
1602 
1603  SDNode *Addc = CurDAG->getMachineNode(
1604  AMDGPU::V_ADDC_U32_e64, DL, VTs,
1605  {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
1606 
1607  SDValue RegSequenceArgs[] = {
1608  CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
1609  SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1};
1610 
1611  Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1612  MVT::i64, RegSequenceArgs),
1613  0);
1614  }
1615  }
1616  }
1617  }
1618 
1619  VAddr = Addr;
1620  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1621  return true;
1622 }
1623 
1624 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue Addr,
1625  SDValue &VAddr,
1626  SDValue &Offset) const {
1627  return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FLAT);
1628 }
1629 
1630 bool AMDGPUDAGToDAGISel::SelectGlobalOffset(SDNode *N, SDValue Addr,
1631  SDValue &VAddr,
1632  SDValue &Offset) const {
1633  return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FlatGlobal);
1634 }
1635 
1636 bool AMDGPUDAGToDAGISel::SelectScratchOffset(SDNode *N, SDValue Addr,
1637  SDValue &VAddr,
1638  SDValue &Offset) const {
1639  return SelectFlatOffsetImpl(N, Addr, VAddr, Offset,
1641 }
1642 
1643 // If this matches zero_extend i32:x, return x
1645  if (Op.getOpcode() != ISD::ZERO_EXTEND)
1646  return SDValue();
1647 
1648  SDValue ExtSrc = Op.getOperand(0);
1649  return (ExtSrc.getValueType() == MVT::i32) ? ExtSrc : SDValue();
1650 }
1651 
1652 // Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
1653 bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
1654  SDValue Addr,
1655  SDValue &SAddr,
1656  SDValue &VOffset,
1657  SDValue &Offset) const {
1658  int64_t ImmOffset = 0;
1659 
1660  // Match the immediate offset first, which canonically is moved as low as
1661  // possible.
1662 
1663  SDValue LHS, RHS;
1664  if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
1665  int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1666  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1667 
1668  if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS,
1670  Addr = LHS;
1671  ImmOffset = COffsetVal;
1672  } else if (!LHS->isDivergent()) {
1673  if (COffsetVal > 0) {
1674  SDLoc SL(N);
1675  // saddr + large_offset -> saddr +
1676  // (voffset = large_offset & ~MaxOffset) +
1677  // (large_offset & MaxOffset);
1678  int64_t SplitImmOffset, RemainderOffset;
1679  std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
1681 
1682  if (isUInt<32>(RemainderOffset)) {
1683  SDNode *VMov = CurDAG->getMachineNode(
1684  AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1685  CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
1686  VOffset = SDValue(VMov, 0);
1687  SAddr = LHS;
1688  Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
1689  return true;
1690  }
1691  }
1692 
1693  // We are adding a 64 bit SGPR and a constant. If constant bus limit
1694  // is 1 we would need to perform 1 or 2 extra moves for each half of
1695  // the constant and it is better to do a scalar add and then issue a
1696  // single VALU instruction to materialize zero. Otherwise it is less
1697  // instructions to perform VALU adds with immediates or inline literals.
1698  unsigned NumLiterals =
1699  !TII->isInlineConstant(APInt(32, COffsetVal & 0xffffffff)) +
1700  !TII->isInlineConstant(APInt(32, COffsetVal >> 32));
1701  if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
1702  return false;
1703  }
1704  }
1705 
1706  // Match the variable offset.
1707  if (Addr.getOpcode() == ISD::ADD) {
1708  LHS = Addr.getOperand(0);
1709  RHS = Addr.getOperand(1);
1710 
1711  if (!LHS->isDivergent()) {
1712  // add (i64 sgpr), (zero_extend (i32 vgpr))
1713  if (SDValue ZextRHS = matchZExtFromI32(RHS)) {
1714  SAddr = LHS;
1715  VOffset = ZextRHS;
1716  }
1717  }
1718 
1719  if (!SAddr && !RHS->isDivergent()) {
1720  // add (zero_extend (i32 vgpr)), (i64 sgpr)
1721  if (SDValue ZextLHS = matchZExtFromI32(LHS)) {
1722  SAddr = RHS;
1723  VOffset = ZextLHS;
1724  }
1725  }
1726 
1727  if (SAddr) {
1728  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1729  return true;
1730  }
1731  }
1732 
1733  if (Addr->isDivergent() || Addr.getOpcode() == ISD::UNDEF ||
1734  isa<ConstantSDNode>(Addr))
1735  return false;
1736 
1737  // It's cheaper to materialize a single 32-bit zero for vaddr than the two
1738  // moves required to copy a 64-bit SGPR to VGPR.
1739  SAddr = Addr;
1740  SDNode *VMov =
1741  CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
1743  VOffset = SDValue(VMov, 0);
1744  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1745  return true;
1746 }
1747 
1748 static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr) {
1749  if (auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1750  SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
1751  } else if (SAddr.getOpcode() == ISD::ADD &&
1752  isa<FrameIndexSDNode>(SAddr.getOperand(0))) {
1753  // Materialize this into a scalar move for scalar address to avoid
1754  // readfirstlane.
1755  auto FI = cast<FrameIndexSDNode>(SAddr.getOperand(0));
1756  SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1757  FI->getValueType(0));
1758  SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, SDLoc(SAddr),
1759  MVT::i32, TFI, SAddr.getOperand(1)),
1760  0);
1761  }
1762 
1763  return SAddr;
1764 }
1765 
1766 // Match (32-bit SGPR base) + sext(imm offset)
1767 bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
1768  SDValue &SAddr,
1769  SDValue &Offset) const {
1770  if (Addr->isDivergent())
1771  return false;
1772 
1773  SDLoc DL(Addr);
1774 
1775  int64_t COffsetVal = 0;
1776 
1778  COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
1779  SAddr = Addr.getOperand(0);
1780  } else {
1781  SAddr = Addr;
1782  }
1783 
1784  SAddr = SelectSAddrFI(CurDAG, SAddr);
1785 
1786  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1787 
1788  if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS,
1790  int64_t SplitImmOffset, RemainderOffset;
1791  std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
1793 
1794  COffsetVal = SplitImmOffset;
1795 
1796  SDValue AddOffset =
1797  SAddr.getOpcode() == ISD::TargetFrameIndex
1798  ? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL)
1799  : CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32);
1800  SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32,
1801  SAddr, AddOffset),
1802  0);
1803  }
1804 
1805  Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16);
1806 
1807  return true;
1808 }
1809 
1810 // Check whether the flat scratch SVS swizzle bug affects this access.
1811 bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
1812  SDValue VAddr, SDValue SAddr, uint64_t ImmOffset) const {
1813  if (!Subtarget->hasFlatScratchSVSSwizzleBug())
1814  return false;
1815 
1816  // The bug affects the swizzling of SVS accesses if there is any carry out
1817  // from the two low order bits (i.e. from bit 1 into bit 2) when adding
1818  // voffset to (soffset + inst_offset).
1819  KnownBits VKnown = CurDAG->computeKnownBits(VAddr);
1820  KnownBits SKnown = KnownBits::computeForAddSub(
1821  true, false, CurDAG->computeKnownBits(SAddr),
1822  KnownBits::makeConstant(APInt(32, ImmOffset)));
1823  uint64_t VMax = VKnown.getMaxValue().getZExtValue();
1824  uint64_t SMax = SKnown.getMaxValue().getZExtValue();
1825  return (VMax & 3) + (SMax & 3) >= 4;
1826 }
1827 
1828 bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
1829  SDValue &VAddr, SDValue &SAddr,
1830  SDValue &Offset) const {
1831  int64_t ImmOffset = 0;
1832 
1833  SDValue LHS, RHS;
1834  if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
1835  int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1836  const SIInstrInfo *TII = Subtarget->getInstrInfo();
1837 
1838  if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true)) {
1839  Addr = LHS;
1840  ImmOffset = COffsetVal;
1841  } else if (!LHS->isDivergent() && COffsetVal > 0) {
1842  SDLoc SL(N);
1843  // saddr + large_offset -> saddr + (vaddr = large_offset & ~MaxOffset) +
1844  // (large_offset & MaxOffset);
1845  int64_t SplitImmOffset, RemainderOffset;
1846  std::tie(SplitImmOffset, RemainderOffset)
1847  = TII->splitFlatOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true);
1848 
1849  if (isUInt<32>(RemainderOffset)) {
1850  SDNode *VMov = CurDAG->getMachineNode(
1851  AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1852  CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
1853  VAddr = SDValue(VMov, 0);
1854  SAddr = LHS;
1855  if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
1856  return false;
1857  Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
1858  return true;
1859  }
1860  }
1861  }
1862 
1863  if (Addr.getOpcode() != ISD::ADD)
1864  return false;
1865 
1866  LHS = Addr.getOperand(0);
1867  RHS = Addr.getOperand(1);
1868 
1869  if (!LHS->isDivergent() && RHS->isDivergent()) {
1870  SAddr = LHS;
1871  VAddr = RHS;
1872  } else if (!RHS->isDivergent() && LHS->isDivergent()) {
1873  SAddr = RHS;
1874  VAddr = LHS;
1875  } else {
1876  return false;
1877  }
1878 
1879  if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
1880  return false;
1881  SAddr = SelectSAddrFI(CurDAG, SAddr);
1882  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1883  return true;
1884 }
1885 
1886 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1887  SDValue &Offset, bool &Imm) const {
1888  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1889  if (!C) {
1890  if (ByteOffsetNode.getValueType().isScalarInteger() &&
1891  ByteOffsetNode.getValueType().getSizeInBits() == 32) {
1892  Offset = ByteOffsetNode;
1893  Imm = false;
1894  return true;
1895  }
1896  if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
1897  if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
1898  Offset = ByteOffsetNode.getOperand(0);
1899  Imm = false;
1900  return true;
1901  }
1902  }
1903  return false;
1904  }
1905 
1906  SDLoc SL(ByteOffsetNode);
1907  // GFX9 and GFX10 have signed byte immediate offsets.
1908  int64_t ByteOffset = C->getSExtValue();
1909  Optional<int64_t> EncodedOffset =
1910  AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, false);
1911  if (EncodedOffset) {
1912  Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1913  Imm = true;
1914  return true;
1915  }
1916 
1917  // SGPR and literal offsets are unsigned.
1918  if (ByteOffset < 0)
1919  return false;
1920 
1921  EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);
1922  if (EncodedOffset) {
1923  Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1924  return true;
1925  }
1926 
1927  if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
1928  return false;
1929 
1930  SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1931  Offset = SDValue(
1932  CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
1933 
1934  return true;
1935 }
1936 
1937 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
1938  if (Addr.getValueType() != MVT::i32)
1939  return Addr;
1940 
1941  // Zero-extend a 32-bit address.
1942  SDLoc SL(Addr);
1943 
1946  unsigned AddrHiVal = Info->get32BitAddressHighBits();
1947  SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
1948 
1949  const SDValue Ops[] = {
1950  CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
1951  Addr,
1952  CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
1953  SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
1954  0),
1955  CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
1956  };
1957 
1958  return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
1959  Ops), 0);
1960 }
1961 
1962 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1963  SDValue &Offset, bool &Imm) const {
1964  SDLoc SL(Addr);
1965 
1966  // A 32-bit (address + offset) should not cause unsigned 32-bit integer
1967  // wraparound, because s_load instructions perform the addition in 64 bits.
1968  if ((Addr.getValueType() != MVT::i32 ||
1969  Addr->getFlags().hasNoUnsignedWrap())) {
1970  SDValue N0, N1;
1971  // Extract the base and offset if possible.
1973  Addr.getOpcode() == ISD::ADD) {
1974  N0 = Addr.getOperand(0);
1975  N1 = Addr.getOperand(1);
1976  } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
1977  assert(N0 && N1 && isa<ConstantSDNode>(N1));
1978  }
1979  if (N0 && N1) {
1980  if (SelectSMRDOffset(N1, Offset, Imm)) {
1981  SBase = Expand32BitAddress(N0);
1982  return true;
1983  }
1984  }
1985  }
1986  SBase = Expand32BitAddress(Addr);
1988  Imm = true;
1989  return true;
1990 }
1991 
1992 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1993  SDValue &Offset) const {
1994  bool Imm = false;
1995  return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1996 }
1997 
1998 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1999  SDValue &Offset) const {
2000 
2001  assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
2002 
2003  bool Imm = false;
2004  if (!SelectSMRD(Addr, SBase, Offset, Imm))
2005  return false;
2006 
2007  return !Imm && isa<ConstantSDNode>(Offset);
2008 }
2009 
2010 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
2011  SDValue &Offset) const {
2012  bool Imm = false;
2013  return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
2014  !isa<ConstantSDNode>(Offset);
2015 }
2016 
2017 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
2018  SDValue &Offset) const {
2019  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
2020  // The immediate offset for S_BUFFER instructions is unsigned.
2021  if (auto Imm =
2022  AMDGPU::getSMRDEncodedOffset(*Subtarget, C->getZExtValue(), true)) {
2024  return true;
2025  }
2026  }
2027 
2028  return false;
2029 }
2030 
2031 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
2032  SDValue &Offset) const {
2033  assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
2034 
2035  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
2036  if (auto Imm = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget,
2037  C->getZExtValue())) {
2039  return true;
2040  }
2041  }
2042 
2043  return false;
2044 }
2045 
2046 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
2047  SDValue &Base,
2048  SDValue &Offset) const {
2049  SDLoc DL(Index);
2050 
2052  SDValue N0 = Index.getOperand(0);
2053  SDValue N1 = Index.getOperand(1);
2054  ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
2055 
2056  // (add n0, c0)
2057  // Don't peel off the offset (c0) if doing so could possibly lead
2058  // the base (n0) to be negative.
2059  // (or n0, |c0|) can never change a sign given isBaseWithConstantOffset.
2060  if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0) ||
2061  (Index->getOpcode() == ISD::OR && C1->getSExtValue() >= 0)) {
2062  Base = N0;
2063  Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
2064  return true;
2065  }
2066  }
2067 
2068  if (isa<ConstantSDNode>(Index))
2069  return false;
2070 
2071  Base = Index;
2073  return true;
2074 }
2075 
2076 SDNode *AMDGPUDAGToDAGISel::getBFE32(bool IsSigned, const SDLoc &DL,
2077  SDValue Val, uint32_t Offset,
2078  uint32_t Width) {
2079  if (Val->isDivergent()) {
2080  unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2083 
2084  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, Off, W);
2085  }
2086  unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2087  // Transformation function, pack the offset and width of a BFE into
2088  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
2089  // source, bits [5:0] contain the offset and bits [22:16] the width.
2090  uint32_t PackedVal = Offset | (Width << 16);
2091  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
2092 
2093  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
2094 }
2095 
2096 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
2097  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
2098  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
2099  // Predicate: 0 < b <= c < 32
2100 
2101  const SDValue &Shl = N->getOperand(0);
2102  ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
2103  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2104 
2105  if (B && C) {
2106  uint32_t BVal = B->getZExtValue();
2107  uint32_t CVal = C->getZExtValue();
2108 
2109  if (0 < BVal && BVal <= CVal && CVal < 32) {
2110  bool Signed = N->getOpcode() == ISD::SRA;
2111  ReplaceNode(N, getBFE32(Signed, SDLoc(N), Shl.getOperand(0), CVal - BVal,
2112  32 - CVal));
2113  return;
2114  }
2115  }
2116  SelectCode(N);
2117 }
2118 
2119 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
2120  switch (N->getOpcode()) {
2121  case ISD::AND:
2122  if (N->getOperand(0).getOpcode() == ISD::SRL) {
2123  // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
2124  // Predicate: isMask(mask)
2125  const SDValue &Srl = N->getOperand(0);
2126  ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
2127  ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
2128 
2129  if (Shift && Mask) {
2130  uint32_t ShiftVal = Shift->getZExtValue();
2131  uint32_t MaskVal = Mask->getZExtValue();
2132 
2133  if (isMask_32(MaskVal)) {
2134  uint32_t WidthVal = countPopulation(MaskVal);
2135  ReplaceNode(N, getBFE32(false, SDLoc(N), Srl.getOperand(0), ShiftVal,
2136  WidthVal));
2137  return;
2138  }
2139  }
2140  }
2141  break;
2142  case ISD::SRL:
2143  if (N->getOperand(0).getOpcode() == ISD::AND) {
2144  // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
2145  // Predicate: isMask(mask >> b)
2146  const SDValue &And = N->getOperand(0);
2147  ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
2148  ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
2149 
2150  if (Shift && Mask) {
2151  uint32_t ShiftVal = Shift->getZExtValue();
2152  uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
2153 
2154  if (isMask_32(MaskVal)) {
2155  uint32_t WidthVal = countPopulation(MaskVal);
2156  ReplaceNode(N, getBFE32(false, SDLoc(N), And.getOperand(0), ShiftVal,
2157  WidthVal));
2158  return;
2159  }
2160  }
2161  } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
2162  SelectS_BFEFromShifts(N);
2163  return;
2164  }
2165  break;
2166  case ISD::SRA:
2167  if (N->getOperand(0).getOpcode() == ISD::SHL) {
2168  SelectS_BFEFromShifts(N);
2169  return;
2170  }
2171  break;
2172 
2173  case ISD::SIGN_EXTEND_INREG: {
2174  // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
2175  SDValue Src = N->getOperand(0);
2176  if (Src.getOpcode() != ISD::SRL)
2177  break;
2178 
2179  const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
2180  if (!Amt)
2181  break;
2182 
2183  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2184  ReplaceNode(N, getBFE32(true, SDLoc(N), Src.getOperand(0),
2185  Amt->getZExtValue(), Width));
2186  return;
2187  }
2188  }
2189 
2190  SelectCode(N);
2191 }
2192 
2193 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
2194  assert(N->getOpcode() == ISD::BRCOND);
2195  if (!N->hasOneUse())
2196  return false;
2197 
2198  SDValue Cond = N->getOperand(1);
2199  if (Cond.getOpcode() == ISD::CopyToReg)
2200  Cond = Cond.getOperand(2);
2201 
2202  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
2203  return false;
2204 
2205  MVT VT = Cond.getOperand(0).getSimpleValueType();
2206  if (VT == MVT::i32)
2207  return true;
2208 
2209  if (VT == MVT::i64) {
2210  auto ST = static_cast<const GCNSubtarget *>(Subtarget);
2211 
2212  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
2213  return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
2214  }
2215 
2216  return false;
2217 }
2218 
2219 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
2220  SDValue Cond = N->getOperand(1);
2221 
2222  if (Cond.isUndef()) {
2223  CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2224  N->getOperand(2), N->getOperand(0));
2225  return;
2226  }
2227 
2228  const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
2229  const SIRegisterInfo *TRI = ST->getRegisterInfo();
2230 
2231  bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
2232  unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
2233  Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC();
2234  SDLoc SL(N);
2235 
2236  if (!UseSCCBr) {
2237  // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
2238  // analyzed what generates the vcc value, so we do not know whether vcc
2239  // bits for disabled lanes are 0. Thus we need to mask out bits for
2240  // disabled lanes.
2241  //
2242  // For the case that we select S_CBRANCH_SCC1 and it gets
2243  // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
2244  // SIInstrInfo::moveToVALU which inserts the S_AND).
2245  //
2246  // We could add an analysis of what generates the vcc value here and omit
2247  // the S_AND when is unnecessary. But it would be better to add a separate
2248  // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
2249  // catches both cases.
2250  Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32
2251  : AMDGPU::S_AND_B64,
2252  SL, MVT::i1,
2253  CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO
2254  : AMDGPU::EXEC,
2255  MVT::i1),
2256  Cond),
2257  0);
2258  }
2259 
2260  SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
2261  CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
2262  N->getOperand(2), // Basic Block
2263  VCC.getValue(0));
2264 }
2265 
2266 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
2267  MVT VT = N->getSimpleValueType(0);
2268  bool IsFMA = N->getOpcode() == ISD::FMA;
2269  if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
2270  !Subtarget->hasFmaMixInsts()) ||
2271  ((IsFMA && Subtarget->hasMadMixInsts()) ||
2272  (!IsFMA && Subtarget->hasFmaMixInsts()))) {
2273  SelectCode(N);
2274  return;
2275  }
2276 
2277  SDValue Src0 = N->getOperand(0);
2278  SDValue Src1 = N->getOperand(1);
2279  SDValue Src2 = N->getOperand(2);
2280  unsigned Src0Mods, Src1Mods, Src2Mods;
2281 
2282  // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
2283  // using the conversion from f16.
2284  bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
2285  bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
2286  bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
2287 
2288  assert((IsFMA || !Mode.allFP32Denormals()) &&
2289  "fmad selected with denormals enabled");
2290  // TODO: We can select this with f32 denormals enabled if all the sources are
2291  // converted from f16 (in which case fmad isn't legal).
2292 
2293  if (Sel0 || Sel1 || Sel2) {
2294  // For dummy operands.
2296  SDValue Ops[] = {
2297  CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
2298  CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
2299  CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
2301  Zero, Zero
2302  };
2303 
2305  IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
2306  MVT::f32, Ops);
2307  } else {
2308  SelectCode(N);
2309  }
2310 }
2311 
2312 void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
2313  // The address is assumed to be uniform, so if it ends up in a VGPR, it will
2314  // be copied to an SGPR with readfirstlane.
2315  unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2316  AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2317 
2318  SDValue Chain = N->getOperand(0);
2319  SDValue Ptr = N->getOperand(2);
2320  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2321  MachineMemOperand *MMO = M->getMemOperand();
2322  bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
2323 
2324  SDValue Offset;
2325  if (CurDAG->isBaseWithConstantOffset(Ptr)) {
2326  SDValue PtrBase = Ptr.getOperand(0);
2327  SDValue PtrOffset = Ptr.getOperand(1);
2328 
2329  const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2330  if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) {
2331  N = glueCopyToM0(N, PtrBase);
2332  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2333  }
2334  }
2335 
2336  if (!Offset) {
2337  N = glueCopyToM0(N, Ptr);
2339  }
2340 
2341  SDValue Ops[] = {
2342  Offset,
2343  CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2344  Chain,
2345  N->getOperand(N->getNumOperands() - 1) // New glue
2346  };
2347 
2348  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2349  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2350 }
2351 
2352 static unsigned gwsIntrinToOpcode(unsigned IntrID) {
2353  switch (IntrID) {
2354  case Intrinsic::amdgcn_ds_gws_init:
2355  return AMDGPU::DS_GWS_INIT;
2356  case Intrinsic::amdgcn_ds_gws_barrier:
2357  return AMDGPU::DS_GWS_BARRIER;
2358  case Intrinsic::amdgcn_ds_gws_sema_v:
2359  return AMDGPU::DS_GWS_SEMA_V;
2360  case Intrinsic::amdgcn_ds_gws_sema_br:
2361  return AMDGPU::DS_GWS_SEMA_BR;
2362  case Intrinsic::amdgcn_ds_gws_sema_p:
2363  return AMDGPU::DS_GWS_SEMA_P;
2364  case Intrinsic::amdgcn_ds_gws_sema_release_all:
2365  return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2366  default:
2367  llvm_unreachable("not a gws intrinsic");
2368  }
2369 }
2370 
2371 void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
2372  if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2373  !Subtarget->hasGWSSemaReleaseAll()) {
2374  // Let this error.
2375  SelectCode(N);
2376  return;
2377  }
2378 
2379  // Chain, intrinsic ID, vsrc, offset
2380  const bool HasVSrc = N->getNumOperands() == 4;
2381  assert(HasVSrc || N->getNumOperands() == 3);
2382 
2383  SDLoc SL(N);
2384  SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
2385  int ImmOffset = 0;
2386  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2387  MachineMemOperand *MMO = M->getMemOperand();
2388 
2389  // Don't worry if the offset ends up in a VGPR. Only one lane will have
2390  // effect, so SIFixSGPRCopies will validly insert readfirstlane.
2391 
2392  // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
2393  // offset field) % 64. Some versions of the programming guide omit the m0
2394  // part, or claim it's from offset 0.
2395  if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2396  // If we have a constant offset, try to use the 0 in m0 as the base.
2397  // TODO: Look into changing the default m0 initialization value. If the
2398  // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
2399  // the immediate offset.
2400  glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
2401  ImmOffset = ConstOffset->getZExtValue();
2402  } else {
2403  if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
2404  ImmOffset = BaseOffset.getConstantOperandVal(1);
2405  BaseOffset = BaseOffset.getOperand(0);
2406  }
2407 
2408  // Prefer to do the shift in an SGPR since it should be possible to use m0
2409  // as the result directly. If it's already an SGPR, it will be eliminated
2410  // later.
2411  SDNode *SGPROffset
2412  = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
2413  BaseOffset);
2414  // Shift to offset in m0
2415  SDNode *M0Base
2416  = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
2417  SDValue(SGPROffset, 0),
2418  CurDAG->getTargetConstant(16, SL, MVT::i32));
2419  glueCopyToM0(N, SDValue(M0Base, 0));
2420  }
2421 
2422  SDValue Chain = N->getOperand(0);
2423  SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
2424 
2425  const unsigned Opc = gwsIntrinToOpcode(IntrID);
2427  if (HasVSrc)
2428  Ops.push_back(N->getOperand(2));
2429  Ops.push_back(OffsetField);
2430  Ops.push_back(Chain);
2431 
2432  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2433  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2434 }
2435 
2436 void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) {
2437  if (Subtarget->getLDSBankCount() != 16) {
2438  // This is a single instruction with a pattern.
2439  SelectCode(N);
2440  return;
2441  }
2442 
2443  SDLoc DL(N);
2444 
2445  // This requires 2 instructions. It is possible to write a pattern to support
2446  // this, but the generated isel emitter doesn't correctly deal with multiple
2447  // output instructions using the same physical register input. The copy to m0
2448  // is incorrectly placed before the second instruction.
2449  //
2450  // TODO: Match source modifiers.
2451  //
2452  // def : Pat <
2453  // (int_amdgcn_interp_p1_f16
2454  // (VOP3Mods f32:$src0, i32:$src0_modifiers),
2455  // (i32 timm:$attrchan), (i32 timm:$attr),
2456  // (i1 timm:$high), M0),
2457  // (V_INTERP_P1LV_F16 $src0_modifiers, VGPR_32:$src0, timm:$attr,
2458  // timm:$attrchan, 0,
2459  // (V_INTERP_MOV_F32 2, timm:$attr, timm:$attrchan), timm:$high)> {
2460  // let Predicates = [has16BankLDS];
2461  // }
2462 
2463  // 16 bank LDS
2465  N->getOperand(5), SDValue());
2466 
2468 
2469  SDNode *InterpMov =
2470  CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, {
2471  CurDAG->getTargetConstant(2, DL, MVT::i32), // P0
2472  N->getOperand(3), // Attr
2473  N->getOperand(2), // Attrchan
2474  ToM0.getValue(1) // In glue
2475  });
2476 
2477  SDNode *InterpP1LV =
2478  CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, {
2479  CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
2480  N->getOperand(1), // Src0
2481  N->getOperand(3), // Attr
2482  N->getOperand(2), // Attrchan
2483  CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
2484  SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high
2485  N->getOperand(4), // high
2486  CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp
2487  CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod
2488  SDValue(InterpMov, 1)
2489  });
2490 
2491  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0));
2492 }
2493 
2494 void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
2495  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2496  switch (IntrID) {
2497  case Intrinsic::amdgcn_ds_append:
2498  case Intrinsic::amdgcn_ds_consume: {
2499  if (N->getValueType(0) != MVT::i32)
2500  break;
2501  SelectDSAppendConsume(N, IntrID);
2502  return;
2503  }
2504  }
2505 
2506  SelectCode(N);
2507 }
2508 
2509 void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
2510  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
2511  unsigned Opcode;
2512  switch (IntrID) {
2513  case Intrinsic::amdgcn_wqm:
2514  Opcode = AMDGPU::WQM;
2515  break;
2516  case Intrinsic::amdgcn_softwqm:
2517  Opcode = AMDGPU::SOFT_WQM;
2518  break;
2519  case Intrinsic::amdgcn_wwm:
2520  case Intrinsic::amdgcn_strict_wwm:
2521  Opcode = AMDGPU::STRICT_WWM;
2522  break;
2523  case Intrinsic::amdgcn_strict_wqm:
2524  Opcode = AMDGPU::STRICT_WQM;
2525  break;
2526  case Intrinsic::amdgcn_interp_p1_f16:
2527  SelectInterpP1F16(N);
2528  return;
2529  default:
2530  SelectCode(N);
2531  return;
2532  }
2533 
2534  SDValue Src = N->getOperand(1);
2535  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
2536 }
2537 
2538 void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
2539  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2540  switch (IntrID) {
2541  case Intrinsic::amdgcn_ds_gws_init:
2542  case Intrinsic::amdgcn_ds_gws_barrier:
2543  case Intrinsic::amdgcn_ds_gws_sema_v:
2544  case Intrinsic::amdgcn_ds_gws_sema_br:
2545  case Intrinsic::amdgcn_ds_gws_sema_p:
2546  case Intrinsic::amdgcn_ds_gws_sema_release_all:
2547  SelectDS_GWS(N, IntrID);
2548  return;
2549  default:
2550  break;
2551  }
2552 
2553  SelectCode(N);
2554 }
2555 
2556 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
2557  unsigned &Mods,
2558  bool AllowAbs) const {
2559  Mods = 0;
2560  Src = In;
2561 
2562  if (Src.getOpcode() == ISD::FNEG) {
2563  Mods |= SISrcMods::NEG;
2564  Src = Src.getOperand(0);
2565  }
2566 
2567  if (AllowAbs && Src.getOpcode() == ISD::FABS) {
2568  Mods |= SISrcMods::ABS;
2569  Src = Src.getOperand(0);
2570  }
2571 
2572  return true;
2573 }
2574 
2575 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
2576  SDValue &SrcMods) const {
2577  unsigned Mods;
2578  if (SelectVOP3ModsImpl(In, Src, Mods)) {
2579  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2580  return true;
2581  }
2582 
2583  return false;
2584 }
2585 
2586 bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,
2587  SDValue &SrcMods) const {
2588  unsigned Mods;
2589  if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
2590  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2591  return true;
2592  }
2593 
2594  return false;
2595 }
2596 
2597 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
2598  SDValue &SrcMods) const {
2599  SelectVOP3Mods(In, Src, SrcMods);
2600  return isNoNanSrc(Src);
2601 }
2602 
2603 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
2604  if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
2605  return false;
2606 
2607  Src = In;
2608  return true;
2609 }
2610 
2611 bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(SDValue In, SDValue &Src,
2612  SDValue &SrcMods,
2613  bool OpSel) const {
2614  unsigned Mods;
2615  if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
2616  if (OpSel)
2617  Mods |= SISrcMods::OP_SEL_0;
2618  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2619  return true;
2620  }
2621 
2622  return false;
2623 }
2624 
2625 bool AMDGPUDAGToDAGISel::SelectVINTERPMods(SDValue In, SDValue &Src,
2626  SDValue &SrcMods) const {
2627  return SelectVINTERPModsImpl(In, Src, SrcMods, /* OpSel */ false);
2628 }
2629 
2630 bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(SDValue In, SDValue &Src,
2631  SDValue &SrcMods) const {
2632  return SelectVINTERPModsImpl(In, Src, SrcMods, /* OpSel */ true);
2633 }
2634 
2635 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
2636  SDValue &SrcMods, SDValue &Clamp,
2637  SDValue &Omod) const {
2638  SDLoc DL(In);
2640  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2641 
2642  return SelectVOP3Mods(In, Src, SrcMods);
2643 }
2644 
2645 bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src,
2646  SDValue &SrcMods, SDValue &Clamp,
2647  SDValue &Omod) const {
2648  SDLoc DL(In);
2650  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2651 
2652  return SelectVOP3BMods(In, Src, SrcMods);
2653 }
2654 
2655 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
2656  SDValue &Clamp, SDValue &Omod) const {
2657  Src = In;
2658 
2659  SDLoc DL(In);
2661  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2662 
2663  return true;
2664 }
2665 
2666 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
2667  SDValue &SrcMods, bool IsDOT) const {
2668  unsigned Mods = 0;
2669  Src = In;
2670 
2671  if (Src.getOpcode() == ISD::FNEG) {
2672  Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
2673  Src = Src.getOperand(0);
2674  }
2675 
2676  if (Src.getOpcode() == ISD::BUILD_VECTOR &&
2677  (!IsDOT || !Subtarget->hasDOTOpSelHazard())) {
2678  unsigned VecMods = Mods;
2679 
2680  SDValue Lo = stripBitcast(Src.getOperand(0));
2681  SDValue Hi = stripBitcast(Src.getOperand(1));
2682 
2683  if (Lo.getOpcode() == ISD::FNEG) {
2684  Lo = stripBitcast(Lo.getOperand(0));
2685  Mods ^= SISrcMods::NEG;
2686  }
2687 
2688  if (Hi.getOpcode() == ISD::FNEG) {
2689  Hi = stripBitcast(Hi.getOperand(0));
2690  Mods ^= SISrcMods::NEG_HI;
2691  }
2692 
2693  if (isExtractHiElt(Lo, Lo))
2694  Mods |= SISrcMods::OP_SEL_0;
2695 
2696  if (isExtractHiElt(Hi, Hi))
2697  Mods |= SISrcMods::OP_SEL_1;
2698 
2699  unsigned VecSize = Src.getValueSizeInBits();
2700  Lo = stripExtractLoElt(Lo);
2701  Hi = stripExtractLoElt(Hi);
2702 
2703  if (Lo.getValueSizeInBits() > VecSize) {
2705  (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
2706  MVT::getIntegerVT(VecSize), Lo);
2707  }
2708 
2709  if (Hi.getValueSizeInBits() > VecSize) {
2711  (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
2712  MVT::getIntegerVT(VecSize), Hi);
2713  }
2714 
2715  assert(Lo.getValueSizeInBits() <= VecSize &&
2716  Hi.getValueSizeInBits() <= VecSize);
2717 
2718  if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
2719  // Really a scalar input. Just select from the low half of the register to
2720  // avoid packing.
2721 
2722  if (VecSize == 32 || VecSize == Lo.getValueSizeInBits()) {
2723  Src = Lo;
2724  } else {
2725  assert(Lo.getValueSizeInBits() == 32 && VecSize == 64);
2726 
2727  SDLoc SL(In);
2728  SDValue Undef = SDValue(
2729  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
2730  Lo.getValueType()), 0);
2731  auto RC = Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
2732  : AMDGPU::SReg_64RegClassID;
2733  const SDValue Ops[] = {
2734  CurDAG->getTargetConstant(RC, SL, MVT::i32),
2735  Lo, CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2736  Undef, CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
2737 
2738  Src = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
2739  Src.getValueType(), Ops), 0);
2740  }
2741  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2742  return true;
2743  }
2744 
2745  if (VecSize == 64 && Lo == Hi && isa<ConstantFPSDNode>(Lo)) {
2746  uint64_t Lit = cast<ConstantFPSDNode>(Lo)->getValueAPF()
2747  .bitcastToAPInt().getZExtValue();
2748  if (AMDGPU::isInlinableLiteral32(Lit, Subtarget->hasInv2PiInlineImm())) {
2749  Src = CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64);;
2750  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2751  return true;
2752  }
2753  }
2754 
2755  Mods = VecMods;
2756  }
2757 
2758  // Packed instructions do not have abs modifiers.
2759  Mods |= SISrcMods::OP_SEL_1;
2760 
2761  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2762  return true;
2763 }
2764 
2765 bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, SDValue &Src,
2766  SDValue &SrcMods) const {
2767  return SelectVOP3PMods(In, Src, SrcMods, true);
2768 }
2769 
2770 bool AMDGPUDAGToDAGISel::SelectDotIUVOP3PMods(SDValue In, SDValue &Src) const {
2771  const ConstantSDNode *C = cast<ConstantSDNode>(In);
2772  // Literal i1 value set in intrinsic, represents SrcMods for the next operand.
2773  // 1 promotes packed values to signed, 0 treats them as unsigned.
2774  assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
2775 
2776  unsigned Mods = SISrcMods::OP_SEL_1;
2777  unsigned SrcSign = C->getAPIntValue().getZExtValue();
2778  if (SrcSign == 1)
2779  Mods ^= SISrcMods::NEG;
2780 
2781  Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2782  return true;
2783 }
2784 
2785 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
2786  SDValue &SrcMods) const {
2787  Src = In;
2788  // FIXME: Handle op_sel
2789  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2790  return true;
2791 }
2792 
2793 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2794  SDValue &SrcMods) const {
2795  // FIXME: Handle op_sel
2796  return SelectVOP3Mods(In, Src, SrcMods);
2797 }
2798 
2799 // The return value is not whether the match is possible (which it always is),
2800 // but whether or not it a conversion is really used.
2801 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2802  unsigned &Mods) const {
2803  Mods = 0;
2804  SelectVOP3ModsImpl(In, Src, Mods);
2805 
2806  if (Src.getOpcode() == ISD::FP_EXTEND) {
2807  Src = Src.getOperand(0);
2808  assert(Src.getValueType() == MVT::f16);
2809  Src = stripBitcast(Src);
2810 
2811  // Be careful about folding modifiers if we already have an abs. fneg is
2812  // applied last, so we don't want to apply an earlier fneg.
2813  if ((Mods & SISrcMods::ABS) == 0) {
2814  unsigned ModsTmp;
2815  SelectVOP3ModsImpl(Src, Src, ModsTmp);
2816 
2817  if ((ModsTmp & SISrcMods::NEG) != 0)
2818  Mods ^= SISrcMods::NEG;
2819 
2820  if ((ModsTmp & SISrcMods::ABS) != 0)
2821  Mods |= SISrcMods::ABS;
2822  }
2823 
2824  // op_sel/op_sel_hi decide the source type and source.
2825  // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2826  // If the sources's op_sel is set, it picks the high half of the source
2827  // register.
2828 
2829  Mods |= SISrcMods::OP_SEL_1;
2830  if (isExtractHiElt(Src, Src)) {
2831  Mods |= SISrcMods::OP_SEL_0;
2832 
2833  // TODO: Should we try to look for neg/abs here?
2834  }
2835 
2836  return true;
2837  }
2838 
2839  return false;
2840 }
2841 
2842 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2843  SDValue &SrcMods) const {
2844  unsigned Mods = 0;
2845  SelectVOP3PMadMixModsImpl(In, Src, Mods);
2846  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2847  return true;
2848 }
2849 
2850 SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
2851  if (In.isUndef())
2852  return CurDAG->getUNDEF(MVT::i32);
2853 
2854  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2855  SDLoc SL(In);
2856  return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
2857  }
2858 
2859  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2860  SDLoc SL(In);
2861  return CurDAG->getConstant(
2862  C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2863  }
2864 
2865  SDValue Src;
2866  if (isExtractHiElt(In, Src))
2867  return Src;
2868 
2869  return SDValue();
2870 }
2871 
2872 bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2873  assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn);
2874 
2875  const SIRegisterInfo *SIRI =
2876  static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2877  const SIInstrInfo * SII =
2878  static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2879 
2880  unsigned Limit = 0;
2881  bool AllUsesAcceptSReg = true;
2882  for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2883  Limit < 10 && U != E; ++U, ++Limit) {
2884  const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
2885 
2886  // If the register class is unknown, it could be an unknown
2887  // register class that needs to be an SGPR, e.g. an inline asm
2888  // constraint
2889  if (!RC || SIRI->isSGPRClass(RC))
2890  return false;
2891 
2892  if (RC != &AMDGPU::VS_32RegClass) {
2893  AllUsesAcceptSReg = false;
2894  SDNode * User = *U;
2895  if (User->isMachineOpcode()) {
2896  unsigned Opc = User->getMachineOpcode();
2897  MCInstrDesc Desc = SII->get(Opc);
2898  if (Desc.isCommutable()) {
2899  unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2900  unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2901  if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2902  unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2903  const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
2904  if (CommutedRC == &AMDGPU::VS_32RegClass)
2905  AllUsesAcceptSReg = true;
2906  }
2907  }
2908  }
2909  // If "AllUsesAcceptSReg == false" so far we haven't succeeded
2910  // commuting current user. This means have at least one use
2911  // that strictly require VGPR. Thus, we will not attempt to commute
2912  // other user instructions.
2913  if (!AllUsesAcceptSReg)
2914  break;
2915  }
2916  }
2917  return !AllUsesAcceptSReg && (Limit < 10);
2918 }
2919 
2920 bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
2921  auto Ld = cast<LoadSDNode>(N);
2922 
2923  return Ld->getAlign() >= Align(4) &&
2924  (((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2925  Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
2926  !N->isDivergent()) ||
2927  (Subtarget->getScalarizeGlobalBehavior() &&
2928  Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
2929  Ld->isSimple() && !N->isDivergent() &&
2930  static_cast<const SITargetLowering *>(getTargetLowering())
2931  ->isMemOpHasNoClobberedMemOperand(N)));
2932 }
2933 
2936  *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
2937  bool IsModified = false;
2938  do {
2939  IsModified = false;
2940 
2941  // Go over all selected nodes and try to fold them a bit more
2943  while (Position != CurDAG->allnodes_end()) {
2944  SDNode *Node = &*Position++;
2945  MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
2946  if (!MachineNode)
2947  continue;
2948 
2949  SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
2950  if (ResNode != Node) {
2951  if (ResNode)
2952  ReplaceUses(Node, ResNode);
2953  IsModified = true;
2954  }
2955  }
2957  } while (IsModified);
2958 }
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:240
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
llvm::AMDGPUISD::CLAMP
@ CLAMP
CLAMP value between 0.0 and 1.0.
Definition: AMDGPUISelLowering.h:376
llvm::ISD::FPOWI
@ FPOWI
Definition: ISDOpcodes.h:917
i
i
Definition: README.txt:29
llvm::MCInstrDesc::getNumDefs
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:245
CmpMode::FP
@ FP
llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1564
Lowering
Shadow Stack GC Lowering
Definition: ShadowStackGCLowering.cpp:98
llvm::SelectionDAGISel::getTargetLowering
const TargetLowering * getTargetLowering() const
Definition: SelectionDAGISel.h:69
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4637
llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
llvm::AMDGPUSubtarget::hasInv2PiInlineImm
bool hasInv2PiInlineImm() const
Definition: AMDGPUSubtarget.h:184
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::SIInstrFlags::WQM
@ WQM
Definition: SIDefines.h:77
llvm::SelectionDAGISel::TM
TargetMachine & TM
Definition: SelectionDAGISel.h:42
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector
bool matchLoadD16FromBuildVector(SDNode *N) const
Definition: AMDGPUISelDAGToDAG.cpp:211
llvm::EVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:149
AMDGPUISelDAGToDAG.h
llvm::AMDGPUISD::DIV_SCALE
@ DIV_SCALE
Definition: AMDGPUISelLowering.h:407
v2i32
gets compiled into this on rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movaps rsp movq rsp movq rsp movq rsp movq rsp movq rsp rax movq rsp rax movq rsp rsp rsp eax eax jbe LBB1_3 rcx rax movq rsp eax rsp ret ecx eax rcx movl rsp jmp LBB1_2 gcc rsp rax movq rsp rsp movq rsp rax movq rsp eax eax jb L6 rdx eax rsp ret p2align edx rdx eax movl rsp eax rsp ret and it gets compiled into this on ebp esp eax movl ebp eax movl ebp eax esp popl ebp ret gcc ebp eax popl ebp ret Teach tblgen not to check bitconvert source type in some cases This allows us to consolidate the following patterns in X86InstrMMX v2i32(MMX_MOVDQ2Qrr VR128:$src))>
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1090
llvm::ARMII::VecSize
@ VecSize
Definition: ARMBaseInfo.h:421
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:667
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:886
llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1436
llvm::SelectionDAG::SignBitIsZero
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
Definition: SelectionDAG.cpp:2506
llvm::AMDGPUTargetLowering
Definition: AMDGPUISelLowering.h:27
llvm::SIRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned RCID) const
Definition: SIRegisterInfo.cpp:2943
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:943
SIMachineFunctionInfo.h
llvm::SelectionDAG::getCopyToReg
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:750
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:151
llvm::ISD::ConstantFP
@ ConstantFP
Definition: ISDOpcodes.h:77
llvm::SelectionDAG::allnodes_end
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:509
llvm::AMDGPU::getSMRDEncodedOffset
Optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
Definition: AMDGPUBaseInfo.cpp:2211
llvm::ISD::UADDO
@ UADDO
Definition: ISDOpcodes.h:324
llvm::AMDGPUSubtarget::hasMadMixInsts
bool hasMadMixInsts() const
Definition: AMDGPUSubtarget.h:152
llvm::ISD::ADDC
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
llvm::KnownBits::Zero
APInt Zero
Definition: KnownBits.h:24
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::ISD::SETEQ
@ SETEQ
Definition: ISDOpcodes.h:1431
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:242
llvm::SIInstrFlags::FlatScratch
@ FlatScratch
Definition: SIDefines.h:123
llvm::SelectionDAG::getVTList
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition: SelectionDAG.cpp:9112
llvm::AMDGPUISD::CVT_PKNORM_I16_F32
@ CVT_PKNORM_I16_F32
Definition: AMDGPUISelLowering.h:462
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::MachineSDNode
An SDNode that represents everything that will be needed to construct a MachineInstr.
Definition: SelectionDAGNodes.h:2867
llvm::AMDGPUISD::FMUL_W_CHAIN
@ FMUL_W_CHAIN
Definition: AMDGPUISelLowering.h:387
llvm::SelectionDAG::allnodes_begin
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:508
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::AMDGPUISD::DIV_FIXUP
@ DIV_FIXUP
Definition: AMDGPUISelLowering.h:409
llvm::AMDGPUISD::LOAD_D16_HI_I8
@ LOAD_D16_HI_I8
Definition: AMDGPUISelLowering.h:491
llvm::GCNSubtarget::hasFlatSegmentOffsetBug
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:603
llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition: SelectionDAGNodes.h:1341
ValueTracking.h
llvm::ISD::FLOG2
@ FLOG2
Definition: ISDOpcodes.h:920
llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition: SelectionDAGNodes.h:1364
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:454
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:482
llvm::AMDGPU::getSMRDEncodedLiteralOffset32
Optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
Definition: AMDGPUBaseInfo.cpp:2228
llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition: SelectionDAGNodes.h:2314
llvm::SISrcMods::OP_SEL_0
@ OP_SEL_0
Definition: SIDefines.h:220
llvm::SDNode::use_iterator
This class provides iterator support for SDUse operands that use a specific SDNode.
Definition: SelectionDAGNodes.h:733
llvm::AMDGPU::VOP3PEncoding::OpSel
OpSel
Definition: SIDefines.h:885
Shift
bool Shift
Definition: README.txt:468
AMDGPUDAGToDAGISel
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
Definition: AMDGPUISelDAGToDAG.h:79
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
i8
Clang compiles this i8
Definition: README.txt:504
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1287
llvm::SelectionDAG::isBaseWithConstantOffset
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
Definition: SelectionDAG.cpp:4524
llvm::GCNSubtarget::hasDLInsts
bool hasDLInsts() const
Definition: GCNSubtarget.h:684
llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:736
llvm::Optional< int64_t >
llvm::SDNode::isPredecessorOf
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
Definition: SelectionDAGNodes.h:817
f32
So we should use XX3Form_Rcr to implement intrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store load store see def memrix16 in PPCInstrInfo td Load Store Vector load store outs ins lxsdx set load store with conversion from to outs ins lxsspx set f32
Definition: README_P9.txt:522
i1
Decimal Convert From to National Zoned Signed int_ppc_altivec_bcdcfno i1
Definition: README_P9.txt:147
llvm::createAMDGPUISelDag
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
Definition: AMDGPUISelDAGToDAG.cpp:114
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:79
llvm::MemSDNode
This is an abstract virtual class for memory operations.
Definition: SelectionDAGNodes.h:1259
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
matchZExtFromI32
static SDValue matchZExtFromI32(SDValue Op)
Definition: AMDGPUISelDAGToDAG.cpp:1644
llvm::SelectionDAG::RemoveDeadNodes
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
Definition: SelectionDAG.cpp:900
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::SISrcMods::OP_SEL_1
@ OP_SEL_1
Definition: SIDefines.h:221
llvm::SDNode::isDivergent
bool isDivergent() const
Definition: SelectionDAGNodes.h:700
llvm::Lo_32
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:353
AMDGPUDAGToDAGISel::PreprocessISelDAG
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
Definition: AMDGPUISelDAGToDAG.cpp:291
SelectionDAG.h
llvm::SelectionDAG::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:454
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::SISrcMods::NEG
@ NEG
Definition: SIDefines.h:216
llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:224
AMDGPUDAGToDAGISel::SelectBuildVector
void SelectBuildVector(SDNode *N, unsigned RegClassID)
Definition: AMDGPUISelDAGToDAG.cpp:453
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::ISD::FABS
@ FABS
Definition: ISDOpcodes.h:912
llvm::AMDGPUISD::FMIN3
@ FMIN3
Definition: AMDGPUISelLowering.h:399
llvm::ISD::ADDCARRY
@ ADDCARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:303
AMDGPUDAGToDAGISel::PostprocessISelDAG
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
Definition: AMDGPUISelDAGToDAG.cpp:2934
llvm::ISD::BRCOND
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1001
llvm::AMDGPUAS::CONSTANT_ADDRESS_32BIT
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
Definition: AMDGPU.h:369
llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:212
llvm::GCNSubtarget::unsafeDSOffsetFoldingEnabled
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:414
llvm::SelectionDAG::getRegister
SDValue getRegister(unsigned Reg, EVT VT)
Definition: SelectionDAG.cpp:2061
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
llvm::GCNSubtarget::hasGWSSemaReleaseAll
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:639
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:930
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
SelectSAddrFI
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
Definition: AMDGPUISelDAGToDAG.cpp:1748
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
i64
Clang compiles this i64
Definition: README.txt:504
AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel
AMDGPUDAGToDAGISel(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
Definition: AMDGPUISelDAGToDAG.cpp:119
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:666
AMDGPUDAGToDAGISel::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: AMDGPUISelDAGToDAG.cpp:201
llvm::SelectionDAG::getTargetFrameIndex
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:703
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1125
llvm::MemIntrinsicSDNode
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
Definition: SelectionDAGNodes.h:1480
llvm::AMDGPUSubtarget::get
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Definition: AMDGPUSubtarget.cpp:972
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:220
SelectionDAGNodes.h
llvm::ISD::Constant
@ Constant
Definition: ISDOpcodes.h:76
llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:763
llvm::SPIRV::ImageOperand::ConstOffset
@ ConstOffset
llvm::GCNSubtarget::hasAddr64
bool hasAddr64() const
Definition: GCNSubtarget.h:325
llvm::ISD::ABS
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:674
FunctionLoweringInfo.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::GCNSubtarget::useFlatForGlobal
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:473
llvm::GCNSubtarget::hasFmaMixInsts
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:375
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:754
llvm::User
Definition: User.h:44
llvm::AMDGPUISD::CVT_PKNORM_U16_F32
@ CVT_PKNORM_U16_F32
Definition: AMDGPUISelLowering.h:463
llvm::SelectionDAG::getUNDEF
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:971
llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:781
llvm::ISD::CopyToReg
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
llvm::AMDGPUISD::FMED3
@ FMED3
Definition: AMDGPUISelLowering.h:402
GFX9
@ GFX9
Definition: SIInstrInfo.cpp:7835
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::SelectionDAG::getConstant
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1449
llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:308
llvm::MCSubtargetInfo::getTargetTriple
const Triple & getTargetTriple() const
Definition: MCSubtargetInfo.h:108
llvm::AMDGPUISD::LOAD_D16_LO_I8
@ LOAD_D16_LO_I8
Definition: AMDGPUISelLowering.h:493
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::ISD::FROUND
@ FROUND
Definition: ISDOpcodes.h:928
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:769
AMDGPUSubtarget.h
llvm::AMDGPUISD::ATOMIC_LOAD_FMAX
@ ATOMIC_LOAD_FMAX
Definition: AMDGPUISelLowering.h:507
llvm::ISD::SRA
@ SRA
Definition: ISDOpcodes.h:692
llvm::SelectionDAGISel::ReplaceNode
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
Definition: SelectionDAGISel.h:229
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:197
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MCInstrDesc::isCommutable
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
Definition: MCInstrDesc.h:478
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:366
llvm::M0
unsigned M0(unsigned Val)
Definition: VE.h:369
llvm::Instruction
Definition: Instruction.h:42
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::ISD::SINT_TO_FP
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:773
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1466
llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition: ISDOpcodes.h:927
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:926
llvm::SIInstrInfo::findCommutedOpIndices
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
Definition: SIInstrInfo.cpp:2328
llvm::SelectionDAG::getMemIntrinsicNode
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
Definition: SelectionDAG.cpp:7533
llvm::SIInstrFlags::FlatGlobal
@ FlatGlobal
Definition: SIDefines.h:108
llvm::LegacyDivergenceAnalysis
Definition: LegacyDivergenceAnalysis.h:31
llvm::SelectionDAG::dump
void dump() const
Definition: SelectionDAGDumper.cpp:921
llvm::AMDGPUISD::CVT_PK_U16_U32
@ CVT_PK_U16_U32
Definition: AMDGPUISelLowering.h:465
llvm::SIRegisterInfo::isSGPRClass
static bool isSGPRClass(const TargetRegisterClass *RC)
Definition: SIRegisterInfo.h:187
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:666
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
llvm::MCOperandInfo::RegClass
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:90
llvm::SelectionDAG::isKnownNeverNaN
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue is known to never be NaN.
Definition: SelectionDAG.cpp:4536
llvm::Triple::getArch
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:345
llvm::GCNSubtarget::hasFlatScratchSVSSwizzleBug
bool hasFlatScratchSVSSwizzleBug() const
Definition: GCNSubtarget.h:1052
llvm::SDValue::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned i) const
Definition: SelectionDAGNodes.h:1137
llvm::ISD::CopyFromReg
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:208
llvm::MCID::RegSequence
@ RegSequence
Definition: MCInstrDesc.h:181
llvm::AMDGPUISD::FMA_W_CHAIN
@ FMA_W_CHAIN
Definition: AMDGPUISelLowering.h:386
llvm::GCNSubtarget::getScalarizeGlobalBehavior
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:791
llvm::ISD::USUBO
@ USUBO
Definition: ISDOpcodes.h:328
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
findMemSDNode
static MemSDNode * findMemSDNode(SDNode *N)
Definition: AMDGPUISelDAGToDAG.cpp:1519
llvm::Hi_32
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:348
LoopInfo.h
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:642
llvm::ISD::ATOMIC_LOAD_FADD
@ ATOMIC_LOAD_FADD
Definition: ISDOpcodes.h:1196
i32
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM ID Predecessors according to mbb< bb27, 0x8b0a7c0 > Note ADDri is not a two address instruction its result reg1037 is an operand of the PHI node in bb76 and its operand reg1039 is the result of the PHI node We should treat it as a two address code and make sure the ADDri is scheduled after any node that reads reg1039 Use info(i.e. register scavenger) to assign it a free register to allow reuse the collector could move the objects and invalidate the derived pointer This is bad enough in the first but safe points can crop up unpredictably **array_addr i32
Definition: README.txt:122
llvm::ISD::FPOW
@ FPOW
Definition: ISDOpcodes.h:918
llvm::sys::unicode::SBase
constexpr const char32_t SBase
Definition: UnicodeNameToCodepoint.cpp:256
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
R600MCTargetDesc.h
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:656
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::SelectionDAG::RemoveDeadNode
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
Definition: SelectionDAG.cpp:954
llvm::countPopulation
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:567
llvm::AMDGPUISD::LDEXP
@ LDEXP
Definition: AMDGPUISelLowering.h:422
llvm::isInt< 32 >
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:373
AMDGPUMCTargetDesc.h
llvm::ISD::FLOG10
@ FLOG10
Definition: ISDOpcodes.h:921
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:340
llvm::ISD::FMAD
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:486
llvm::isUInt< 16 >
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:408
Index
uint32_t Index
Definition: ELFObjHandler.cpp:82
uint64_t
llvm::ConstantFPSDNode
Definition: SelectionDAGNodes.h:1613
llvm::SelectionDAGISel::TII
const TargetInstrInfo * TII
Definition: SelectionDAGISel.h:53
llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition: SelectionDAGNodes.h:1345
llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:966
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:78
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::AMDGPUISD::LOAD_D16_HI
@ LOAD_D16_HI
Definition: AMDGPUISelLowering.h:489
getBaseWithOffsetUsingSplitOR
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
Definition: AMDGPUISelDAGToDAG.cpp:736
llvm::SelectionDAGISel::FuncInfo
std::unique_ptr< FunctionLoweringInfo > FuncInfo
Definition: SelectionDAGISel.h:44
llvm::GCNSubtarget::hasAddNoCarry
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:647
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::GCNSubtarget::privateMemoryResourceIsRangeChecked
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:495
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:534
llvm::AMDGPUISD::FMAD_FTZ
@ FMAD_FTZ
Definition: AMDGPUISelLowering.h:412
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:908
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:365
llvm::AMDGPUISD::ATOMIC_DEC
@ ATOMIC_DEC
Definition: AMDGPUISelLowering.h:505
llvm::SelectionDAG::getNode
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition: SelectionDAG.cpp:8842
llvm::AMDGPUISD::CVT_PK_I16_I32
@ CVT_PK_I16_I32
Definition: AMDGPUISelLowering.h:464
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
llvm::AMDGPUISD::BFE_I32
@ BFE_I32
Definition: AMDGPUISelLowering.h:428
AMDGPUDAGToDAGISel::getPassName
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
Definition: AMDGPUISelDAGToDAG.cpp:783
llvm::LoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Definition: SelectionDAGNodes.h:2329
llvm::AMDGPUArgumentUsageInfo
Definition: AMDGPUArgumentUsageInfo.h:158
llvm::KnownBits::getMaxValue
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:136
llvm::SelectionDAG::MorphNodeTo
SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef< SDValue > Ops)
This mutates the specified node to have the specified return type, opcode, and operands.
Definition: SelectionDAG.cpp:9449
llvm::isUInt< 32 >
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:411
llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition: SelectionDAGNodes.h:171
llvm::TargetMachine::Options
TargetOptions Options
Definition: TargetMachine.h:118
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::AMDGPUISD::SIN_HW
@ SIN_HW
Definition: AMDGPUISelLowering.h:392
llvm::isUInt< 8 >
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:405
llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition: SelectionDAGNodes.h:1579
llvm::AMDGPUISD::RSQ
@ RSQ
Definition: AMDGPUISelLowering.h:417
i16
< i32 > ret i32 conv5 And the following x86 eax movsbl ecx cmpl ecx sete al movzbl eax ret It should be possible to eliminate the sign extensions LLVM misses a load store narrowing opportunity in this i16
Definition: README.txt:1493
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1411
llvm::MCInstrDesc::OpInfo
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:208
llvm::SelectionDAGISel::CurDAG
SelectionDAG * CurDAG
Definition: SelectionDAGISel.h:48
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:9550
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::ISD::SUBCARRY
@ SUBCARRY
Definition: ISDOpcodes.h:304
llvm::SelectionDAG::setNodeMemRefs
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
Definition: SelectionDAG.cpp:9318
llvm::GCNSubtarget::getLDSBankCount
int getLDSBankCount() const
Definition: GCNSubtarget.h:282
llvm::GCNSubtarget::hasDOTOpSelHazard
bool hasDOTOpSelHazard() const
Definition: GCNSubtarget.h:1009
AMDGPUDAGToDAGISel::Select
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
Definition: AMDGPUISelDAGToDAG.cpp:508
v4i32
Vector Rotate Left Mask Mask v4i32
Definition: README_P9.txt:112
llvm::SelectionDAG::SelectNodeTo
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
Definition: SelectionDAG.cpp:9342
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
R600RegisterInfo.h
llvm::isMask_32
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:467
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:367
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::AMDGPU::CPol::SCC
@ SCC
Definition: SIDefines.h:304
llvm::SISrcMods::NEG_HI
@ NEG_HI
Definition: SIDefines.h:219
SelectionDAGISel.h
llvm::LoopInfo
Definition: LoopInfo.h:1102
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:137
llvm::SPIRV::SamplerAddressingMode::Clamp
@ Clamp
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:362
llvm::GCNSubtarget::hasFlatInstOffsets
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:561
llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:352
AMDGPU.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::ISD::UMUL_LOHI
@ UMUL_LOHI
Definition: ISDOpcodes.h:251
llvm::AMDGPUISD::LOAD_D16_LO_U8
@ LOAD_D16_LO_U8
Definition: AMDGPUISelLowering.h:494
llvm::SelectionDAG::ReplaceAllUsesOfValueWith
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
Definition: SelectionDAG.cpp:10138
uint32_t
llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition: SelectionDAGNodes.h:1133
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::SIInstrFlags::FLAT
@ FLAT
Definition: SIDefines.h:59
llvm::ISD::UNDEF
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
llvm::ISD::FEXP
@ FEXP
Definition: ISDOpcodes.h:922
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::ISD::SMUL_LOHI
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
llvm::ISD::FEXP2
@ FEXP2
Definition: ISDOpcodes.h:923
llvm::CodeGenOpt::Level
Level
Definition: CodeGen.h:52
llvm::SDVTList
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Definition: SelectionDAGNodes.h:78
llvm::ISD::FMUL
@ FMUL
Definition: ISDOpcodes.h:392
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1391
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
llvm::LoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2333
llvm::LoopInfoBase::getLoopsInPreorder
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
Definition: LoopInfoImpl.h:575
llvm::ISD::FSQRT
@ FSQRT
Definition: ISDOpcodes.h:913
llvm::SelectionDAGISel::MF
MachineFunction * MF
Definition: SelectionDAGISel.h:46
llvm::AMDGPUISD::RCP
@ RCP
Definition: AMDGPUISelLowering.h:416
llvm::ISD::FMAXNUM
@ FMAXNUM
Definition: ISDOpcodes.h:944
llvm::SelectionDAG::computeKnownBits
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
Definition: SelectionDAG.cpp:2889
llvm::KnownBits
Definition: KnownBits.h:23
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:622
llvm::AMDGPUISD::LOAD_D16_HI_U8
@ LOAD_D16_HI_U8
Definition: AMDGPUISelLowering.h:492
llvm::AMDGPUISD::MAD_U64_U32
@ MAD_U64_U32
Definition: AMDGPUISelLowering.h:440
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:345
llvm::MemSDNode::getAddressSpace
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Definition: SelectionDAGNodes.h:1352
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
llvm::AMDGPUISD::ATOMIC_LOAD_FMIN
@ ATOMIC_LOAD_FMIN
Definition: AMDGPUISelLowering.h:506
llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:363
llvm::ISD::FCOS
@ FCOS
Definition: ISDOpcodes.h:916
llvm::SelectionDAG::getEntryNode
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:531
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:924
llvm::ISD::FSIN
@ FSIN
Definition: ISDOpcodes.h:915
llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:514
llvm::SITargetLowering
Definition: SIISelLowering.h:31
llvm::SelectionDAG::getTargetExtractSubreg
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
Definition: SelectionDAG.cpp:9668
llvm::SelectionDAGISel::ReplaceUses
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
Definition: SelectionDAGISel.h:208
llvm::ISD::SUBC
@ SUBC
Definition: ISDOpcodes.h:270
llvm::GCNSubtarget::ldsRequiresM0Init
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition: GCNSubtarget.h:625
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:137
gwsIntrinToOpcode
static unsigned gwsIntrinToOpcode(unsigned IntrID)
Definition: AMDGPUISelDAGToDAG.cpp:2352
GFX11
@ GFX11
Definition: SIInstrInfo.cpp:7840
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::AMDGPUPerfHintAnalysis
Definition: AMDGPUPerfHintAnalysis.h:23
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:967
llvm::AMDGPUISD::CVT_PKRTZ_F16_F32
@ CVT_PKRTZ_F16_F32
Definition: AMDGPUISelLowering.h:461
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:436
llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition: ISDOpcodes.h:774
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition: GCNSubtarget.h:263
llvm::RISCVMatInt::Imm
@ Imm
Definition: RISCVMatInt.h:23
llvm::SIInstrInfo
Definition: SIInstrInfo.h:44
llvm::AMDGPU::isInlinableLiteral32
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
Definition: AMDGPUBaseInfo.cpp:2065
llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:300
llvm::ISD::FP_EXTEND
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:871
llvm::TargetOptions::NoNaNsFPMath
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
Definition: TargetOptions.h:175
llvm::AMDGPUISD::FRACT
@ FRACT
Definition: AMDGPUISelLowering.h:372
llvm::ISD::FSUB
@ FSUB
Definition: ISDOpcodes.h:391
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:691
LegacyDivergenceAnalysis.h
llvm::SelectionDAGISel
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
Definition: SelectionDAGISel.h:40
llvm::AMDGPUISD::FMAX3
@ FMAX3
Definition: AMDGPUISelLowering.h:396
llvm::GCNSubtarget::getConstantBusLimit
unsigned getConstantBusLimit(unsigned Opcode) const
Definition: AMDGPUSubtarget.cpp:180
llvm::ISD::FREM
@ FREM
Definition: ISDOpcodes.h:394
llvm::AMDGPUISD::MAD_I64_I32
@ MAD_I64_I32
Definition: AMDGPUISelLowering.h:441
Dominators.h
N
#define N
llvm::APInt::countTrailingOnes
unsigned countTrailingOnes() const
Count the number of trailing one bits.
Definition: APInt.h:1559
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:361
llvm::ISD::SUBE
@ SUBE
Definition: ISDOpcodes.h:280
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:349
llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:693
llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition: TargetMachine.h:126
llvm::GCNSubtarget::d16PreservesUnusedBits
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:615
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_END(AMDGPUDAGToDAGISel
llvm::AMDGPUISD::COS_HW
@ COS_HW
Definition: AMDGPUISelLowering.h:391
llvm::SIInstrFlags::IsDOT
@ IsDOT
Definition: SIDefines.h:120
llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition: SelectionDAGNodes.h:1121
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:652
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::ISD::INTRINSIC_W_CHAIN
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:911
llvm::SelectionDAG::getMachineFunction
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:449
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::ISD::BUILD_PAIR
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::AMDGPUISD::DWORDADDR
@ DWORDADDR
Definition: AMDGPUISelLowering.h:371
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::ISD::TargetFrameIndex
@ TargetFrameIndex
Definition: ISDOpcodes.h:166
f64
QP Compare Ordered outs ins xscmpudp No builtin are required Or llvm fcmp order unorder compare DP QP Compare builtin are required DP xscmp *dp write to VSX register Use int_ppc_vsx_xscmpeqdp f64
Definition: README_P9.txt:314
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:925
llvm::AMDGPUISD::RCP_IFLAG
@ RCP_IFLAG
Definition: AMDGPUISelLowering.h:419
llvm::AMDGPUISD::ATOMIC_INC
@ ATOMIC_INC
Definition: AMDGPUISelLowering.h:504
llvm::ISD::FCANONICALIZE
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:499
llvm::ISD::FLOG
@ FLOG
Definition: ISDOpcodes.h:919
InitializePasses.h
llvm::GCNSubtarget::hasUsableDSOffset
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:410
llvm::ISD::ADDE
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:852
llvm::MCInstrDesc::getNumOperands
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:230
llvm::RecurKind::SMax
@ SMax
Signed integer max implemented in terms of select(cmp()).
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::SelectionDAG::getTarget
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:453
llvm::EVT::bitsEq
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:228
AMDGPUTargetMachine.h
llvm::AMDGPUISD::BFE_U32
@ BFE_U32
Definition: AMDGPUISelLowering.h:427
llvm::ISD::SCALAR_TO_VECTOR
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:606
AMDGPUDAGToDAGISel::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Definition: AMDGPUISelDAGToDAG.cpp:126
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1237
llvm::AMDGPUISD::LOAD_D16_LO
@ LOAD_D16_LO
Definition: AMDGPUISelLowering.h:490
IsCopyFromSGPR
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
Definition: AMDGPUISelDAGToDAG.cpp:1440
llvm::ISD::FDIV
@ FDIV
Definition: ISDOpcodes.h:393
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52