LLVM  16.0.0git
R600ISelLowering.cpp
Go to the documentation of this file.
1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Custom DAG lowering for R600
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "R600ISelLowering.h"
15 #include "AMDGPU.h"
17 #include "R600Defines.h"
18 #include "R600InstrInfo.h"
20 #include "R600Subtarget.h"
22 #include "llvm/IR/IntrinsicsAMDGPU.h"
23 #include "llvm/IR/IntrinsicsR600.h"
24 
25 using namespace llvm;
26 
27 #include "R600GenCallingConv.inc"
28 
30  const R600Subtarget &STI)
31  : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
32  addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
33  addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
34  addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
35  addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
36  addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
37  addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
38 
41 
43 
44  // Legalize loads and stores to the private address space.
46 
47  // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
48  // spaces, so it is custom lowered to handle those where it isn't.
50  for (MVT VT : MVT::integer_valuetypes()) {
54  }
55 
56  // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
58  MVT::v2i1, Expand);
59 
61  MVT::v4i1, Expand);
62 
64  Custom);
65 
68  // We need to include these since trunc STORES to PRIVATE need
69  // special handling to accommodate RMW
80 
81  // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
84 
85  // Set condition code actions
89  MVT::f32, Expand);
90 
92  MVT::i32, Expand);
93 
95 
97 
100 
102 
104  MVT::f64, Custom);
105 
107 
110  Custom);
111 
113  Expand);
114 
115  // ADD, SUB overflow.
116  // TODO: turn these into Legal?
117  if (Subtarget->hasCARRY())
119 
120  if (Subtarget->hasBORROW())
122 
123  // Expand sign extension of vectors
124  if (!Subtarget->hasBFE())
126 
128 
129  if (!Subtarget->hasBFE())
132 
133  if (!Subtarget->hasBFE())
136 
139 
141 
143 
146 
149 
150  // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
151  // to be Legal/Custom in order to avoid library calls.
153  Custom);
154 
155  if (!Subtarget->hasFMA())
157 
158  // FIXME: May need no denormals check
160 
161  if (!Subtarget->hasBFI())
162  // fcopysign can be done in a single instruction with BFI.
164 
165  if (!Subtarget->hasBCNT(32))
167 
168  if (!Subtarget->hasBCNT(64))
170 
171  if (Subtarget->hasFFBH())
173 
174  if (Subtarget->hasFFBL())
176 
177  // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
178  // need it for R600.
179  if (Subtarget->hasBFE())
180  setHasExtractBitsInsn(true);
181 
183 
184  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
185  for (MVT VT : ScalarIntVTs)
187  Expand);
188 
189  // LLVM will expand these to atomic_cmp_swap(0)
190  // and atomic_swap, respectively.
192 
193  // We need to custom lower some of the intrinsics
195  Custom);
196 
198 
201 }
202 
203 static inline bool isEOP(MachineBasicBlock::iterator I) {
204  if (std::next(I) == I->getParent()->end())
205  return false;
206  return std::next(I)->getOpcode() == R600::RETURN;
207 }
208 
211  MachineBasicBlock *BB) const {
212  MachineFunction *MF = BB->getParent();
215  const R600InstrInfo *TII = Subtarget->getInstrInfo();
216 
217  switch (MI.getOpcode()) {
218  default:
219  // Replace LDS_*_RET instruction that don't have any uses with the
220  // equivalent LDS_*_NORET instruction.
221  if (TII->isLDSRetInstr(MI.getOpcode())) {
222  int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
223  assert(DstIdx != -1);
224  MachineInstrBuilder NewMI;
225  // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
226  // LDS_1A2D support and remove this special case.
227  if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
228  MI.getOpcode() == R600::LDS_CMPST_RET)
229  return BB;
230 
231  NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
232  TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
233  for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
234  NewMI.add(MO);
235  } else {
237  }
238  break;
239 
240  case R600::FABS_R600: {
241  MachineInstr *NewMI = TII->buildDefaultInstruction(
242  *BB, I, R600::MOV, MI.getOperand(0).getReg(),
243  MI.getOperand(1).getReg());
244  TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
245  break;
246  }
247 
248  case R600::FNEG_R600: {
249  MachineInstr *NewMI = TII->buildDefaultInstruction(
250  *BB, I, R600::MOV, MI.getOperand(0).getReg(),
251  MI.getOperand(1).getReg());
252  TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
253  break;
254  }
255 
256  case R600::MASK_WRITE: {
257  Register maskedRegister = MI.getOperand(0).getReg();
258  assert(maskedRegister.isVirtual());
259  MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
260  TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
261  break;
262  }
263 
264  case R600::MOV_IMM_F32:
265  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
266  .getFPImm()
267  ->getValueAPF()
268  .bitcastToAPInt()
269  .getZExtValue());
270  break;
271 
272  case R600::MOV_IMM_I32:
273  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
274  MI.getOperand(1).getImm());
275  break;
276 
277  case R600::MOV_IMM_GLOBAL_ADDR: {
278  //TODO: Perhaps combine this instruction with the next if possible
279  auto MIB = TII->buildDefaultInstruction(
280  *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
281  int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
282  //TODO: Ugh this is rather ugly
283  const MachineOperand &MO = MI.getOperand(1);
284  MIB->getOperand(Idx).ChangeToGA(MO.getGlobal(), MO.getOffset(),
285  MO.getTargetFlags());
286  break;
287  }
288 
289  case R600::CONST_COPY: {
290  MachineInstr *NewMI = TII->buildDefaultInstruction(
291  *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
292  TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
293  MI.getOperand(1).getImm());
294  break;
295  }
296 
297  case R600::RAT_WRITE_CACHELESS_32_eg:
298  case R600::RAT_WRITE_CACHELESS_64_eg:
299  case R600::RAT_WRITE_CACHELESS_128_eg:
300  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
301  .add(MI.getOperand(0))
302  .add(MI.getOperand(1))
303  .addImm(isEOP(I)); // Set End of program bit
304  break;
305 
306  case R600::RAT_STORE_TYPED_eg:
307  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
308  .add(MI.getOperand(0))
309  .add(MI.getOperand(1))
310  .add(MI.getOperand(2))
311  .addImm(isEOP(I)); // Set End of program bit
312  break;
313 
314  case R600::BRANCH:
315  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
316  .add(MI.getOperand(0));
317  break;
318 
319  case R600::BRANCH_COND_f32: {
320  MachineInstr *NewMI =
321  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
322  R600::PREDICATE_BIT)
323  .add(MI.getOperand(1))
324  .addImm(R600::PRED_SETNE)
325  .addImm(0); // Flags
326  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
327  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
328  .add(MI.getOperand(0))
329  .addReg(R600::PREDICATE_BIT, RegState::Kill);
330  break;
331  }
332 
333  case R600::BRANCH_COND_i32: {
334  MachineInstr *NewMI =
335  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
336  R600::PREDICATE_BIT)
337  .add(MI.getOperand(1))
338  .addImm(R600::PRED_SETNE_INT)
339  .addImm(0); // Flags
340  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
341  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
342  .add(MI.getOperand(0))
343  .addReg(R600::PREDICATE_BIT, RegState::Kill);
344  break;
345  }
346 
347  case R600::EG_ExportSwz:
348  case R600::R600_ExportSwz: {
349  // Instruction is left unmodified if its not the last one of its type
350  bool isLastInstructionOfItsType = true;
351  unsigned InstExportType = MI.getOperand(1).getImm();
352  for (MachineBasicBlock::iterator NextExportInst = std::next(I),
353  EndBlock = BB->end(); NextExportInst != EndBlock;
354  NextExportInst = std::next(NextExportInst)) {
355  if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
356  NextExportInst->getOpcode() == R600::R600_ExportSwz) {
357  unsigned CurrentInstExportType = NextExportInst->getOperand(1)
358  .getImm();
359  if (CurrentInstExportType == InstExportType) {
360  isLastInstructionOfItsType = false;
361  break;
362  }
363  }
364  }
365  bool EOP = isEOP(I);
366  if (!EOP && !isLastInstructionOfItsType)
367  return BB;
368  unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
369  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
370  .add(MI.getOperand(0))
371  .add(MI.getOperand(1))
372  .add(MI.getOperand(2))
373  .add(MI.getOperand(3))
374  .add(MI.getOperand(4))
375  .add(MI.getOperand(5))
376  .add(MI.getOperand(6))
377  .addImm(CfInst)
378  .addImm(EOP);
379  break;
380  }
381  case R600::RETURN: {
382  return BB;
383  }
384  }
385 
386  MI.eraseFromParent();
387  return BB;
388 }
389 
390 //===----------------------------------------------------------------------===//
391 // Custom DAG Lowering Operations
392 //===----------------------------------------------------------------------===//
393 
397  switch (Op.getOpcode()) {
398  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
399  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
400  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
401  case ISD::SHL_PARTS:
402  case ISD::SRA_PARTS:
403  case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
404  case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
405  case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
406  case ISD::FCOS:
407  case ISD::FSIN: return LowerTrig(Op, DAG);
408  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
409  case ISD::STORE: return LowerSTORE(Op, DAG);
410  case ISD::LOAD: {
411  SDValue Result = LowerLOAD(Op, DAG);
412  assert((!Result.getNode() ||
413  Result.getNode()->getNumValues() == 2) &&
414  "Load should return a value and a chain");
415  return Result;
416  }
417 
418  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
419  case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
420  case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
421  case ISD::INTRINSIC_VOID: {
422  SDValue Chain = Op.getOperand(0);
423  unsigned IntrinsicID =
424  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
425  switch (IntrinsicID) {
426  case Intrinsic::r600_store_swizzle: {
427  SDLoc DL(Op);
428  const SDValue Args[8] = {
429  Chain,
430  Op.getOperand(2), // Export Value
431  Op.getOperand(3), // ArrayBase
432  Op.getOperand(4), // Type
433  DAG.getConstant(0, DL, MVT::i32), // SWZ_X
434  DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
435  DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
436  DAG.getConstant(3, DL, MVT::i32) // SWZ_W
437  };
438  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
439  }
440 
441  // default for switch(IntrinsicID)
442  default: break;
443  }
444  // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
445  break;
446  }
448  unsigned IntrinsicID =
449  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
450  EVT VT = Op.getValueType();
451  SDLoc DL(Op);
452  switch (IntrinsicID) {
453  case Intrinsic::r600_tex:
454  case Intrinsic::r600_texc: {
455  unsigned TextureOp;
456  switch (IntrinsicID) {
457  case Intrinsic::r600_tex:
458  TextureOp = 0;
459  break;
460  case Intrinsic::r600_texc:
461  TextureOp = 1;
462  break;
463  default:
464  llvm_unreachable("unhandled texture operation");
465  }
466 
467  SDValue TexArgs[19] = {
468  DAG.getConstant(TextureOp, DL, MVT::i32),
469  Op.getOperand(1),
470  DAG.getConstant(0, DL, MVT::i32),
471  DAG.getConstant(1, DL, MVT::i32),
472  DAG.getConstant(2, DL, MVT::i32),
473  DAG.getConstant(3, DL, MVT::i32),
474  Op.getOperand(2),
475  Op.getOperand(3),
476  Op.getOperand(4),
477  DAG.getConstant(0, DL, MVT::i32),
478  DAG.getConstant(1, DL, MVT::i32),
479  DAG.getConstant(2, DL, MVT::i32),
480  DAG.getConstant(3, DL, MVT::i32),
481  Op.getOperand(5),
482  Op.getOperand(6),
483  Op.getOperand(7),
484  Op.getOperand(8),
485  Op.getOperand(9),
486  Op.getOperand(10)
487  };
488  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
489  }
490  case Intrinsic::r600_dot4: {
491  SDValue Args[8] = {
492  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
493  DAG.getConstant(0, DL, MVT::i32)),
494  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
495  DAG.getConstant(0, DL, MVT::i32)),
496  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
497  DAG.getConstant(1, DL, MVT::i32)),
498  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
499  DAG.getConstant(1, DL, MVT::i32)),
500  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
501  DAG.getConstant(2, DL, MVT::i32)),
502  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
503  DAG.getConstant(2, DL, MVT::i32)),
504  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
505  DAG.getConstant(3, DL, MVT::i32)),
506  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
507  DAG.getConstant(3, DL, MVT::i32))
508  };
509  return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
510  }
511 
512  case Intrinsic::r600_implicitarg_ptr: {
515  return DAG.getConstant(ByteOffset, DL, PtrVT);
516  }
517  case Intrinsic::r600_read_ngroups_x:
518  return LowerImplicitParameter(DAG, VT, DL, 0);
519  case Intrinsic::r600_read_ngroups_y:
520  return LowerImplicitParameter(DAG, VT, DL, 1);
521  case Intrinsic::r600_read_ngroups_z:
522  return LowerImplicitParameter(DAG, VT, DL, 2);
523  case Intrinsic::r600_read_global_size_x:
524  return LowerImplicitParameter(DAG, VT, DL, 3);
525  case Intrinsic::r600_read_global_size_y:
526  return LowerImplicitParameter(DAG, VT, DL, 4);
527  case Intrinsic::r600_read_global_size_z:
528  return LowerImplicitParameter(DAG, VT, DL, 5);
529  case Intrinsic::r600_read_local_size_x:
530  return LowerImplicitParameter(DAG, VT, DL, 6);
531  case Intrinsic::r600_read_local_size_y:
532  return LowerImplicitParameter(DAG, VT, DL, 7);
533  case Intrinsic::r600_read_local_size_z:
534  return LowerImplicitParameter(DAG, VT, DL, 8);
535 
536  case Intrinsic::r600_read_tgid_x:
537  case Intrinsic::amdgcn_workgroup_id_x:
538  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
539  R600::T1_X, VT);
540  case Intrinsic::r600_read_tgid_y:
541  case Intrinsic::amdgcn_workgroup_id_y:
542  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
543  R600::T1_Y, VT);
544  case Intrinsic::r600_read_tgid_z:
545  case Intrinsic::amdgcn_workgroup_id_z:
546  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
547  R600::T1_Z, VT);
548  case Intrinsic::r600_read_tidig_x:
549  case Intrinsic::amdgcn_workitem_id_x:
550  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
551  R600::T0_X, VT);
552  case Intrinsic::r600_read_tidig_y:
553  case Intrinsic::amdgcn_workitem_id_y:
554  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
555  R600::T0_Y, VT);
556  case Intrinsic::r600_read_tidig_z:
557  case Intrinsic::amdgcn_workitem_id_z:
558  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
559  R600::T0_Z, VT);
560 
561  case Intrinsic::r600_recipsqrt_ieee:
562  return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
563 
564  case Intrinsic::r600_recipsqrt_clamped:
565  return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
566  default:
567  return Op;
568  }
569 
570  // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
571  break;
572  }
573  } // end switch(Op.getOpcode())
574  return SDValue();
575 }
576 
579  SelectionDAG &DAG) const {
580  switch (N->getOpcode()) {
581  default:
583  return;
584  case ISD::FP_TO_UINT:
585  if (N->getValueType(0) == MVT::i1) {
586  Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
587  return;
588  }
589  // Since we don't care about out of bounds values we can use FP_TO_SINT for
590  // uints too. The DAGLegalizer code for uint considers some extra cases
591  // which are not necessary here.
592  [[fallthrough]];
593  case ISD::FP_TO_SINT: {
594  if (N->getValueType(0) == MVT::i1) {
595  Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
596  return;
597  }
598 
599  SDValue Result;
600  if (expandFP_TO_SINT(N, Result, DAG))
601  Results.push_back(Result);
602  return;
603  }
604  case ISD::SDIVREM: {
605  SDValue Op = SDValue(N, 1);
606  SDValue RES = LowerSDIVREM(Op, DAG);
607  Results.push_back(RES);
608  Results.push_back(RES.getValue(1));
609  break;
610  }
611  case ISD::UDIVREM: {
612  SDValue Op = SDValue(N, 0);
613  LowerUDIVREM64(Op, DAG, Results);
614  break;
615  }
616  }
617 }
618 
619 SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
620  SDValue Vector) const {
621  SDLoc DL(Vector);
622  EVT VecVT = Vector.getValueType();
623  EVT EltVT = VecVT.getVectorElementType();
625 
626  for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
627  Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
628  DAG.getVectorIdxConstant(i, DL)));
629  }
630 
631  return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
632 }
633 
634 SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
635  SelectionDAG &DAG) const {
636  SDLoc DL(Op);
637  SDValue Vector = Op.getOperand(0);
638  SDValue Index = Op.getOperand(1);
639 
640  if (isa<ConstantSDNode>(Index) ||
642  return Op;
643 
644  Vector = vectorToVerticalVector(DAG, Vector);
645  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
646  Vector, Index);
647 }
648 
649 SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
650  SelectionDAG &DAG) const {
651  SDLoc DL(Op);
652  SDValue Vector = Op.getOperand(0);
653  SDValue Value = Op.getOperand(1);
654  SDValue Index = Op.getOperand(2);
655 
656  if (isa<ConstantSDNode>(Index) ||
658  return Op;
659 
660  Vector = vectorToVerticalVector(DAG, Vector);
661  SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
662  Vector, Value, Index);
663  return vectorToVerticalVector(DAG, Insert);
664 }
665 
666 SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
667  SDValue Op,
668  SelectionDAG &DAG) const {
669  GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
672 
673  const DataLayout &DL = DAG.getDataLayout();
674  const GlobalValue *GV = GSD->getGlobal();
676 
677  SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
678  return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
679 }
680 
681 SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
682  // On hw >= R700, COS/SIN input must be between -1. and 1.
683  // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
684  EVT VT = Op.getValueType();
685  SDValue Arg = Op.getOperand(0);
686  SDLoc DL(Op);
687 
688  // TODO: Should this propagate fast-math-flags?
689  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
690  DAG.getNode(ISD::FADD, DL, VT,
691  DAG.getNode(ISD::FMUL, DL, VT, Arg,
692  DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
693  DAG.getConstantFP(0.5, DL, MVT::f32)));
694  unsigned TrigNode;
695  switch (Op.getOpcode()) {
696  case ISD::FCOS:
697  TrigNode = AMDGPUISD::COS_HW;
698  break;
699  case ISD::FSIN:
700  TrigNode = AMDGPUISD::SIN_HW;
701  break;
702  default:
703  llvm_unreachable("Wrong trig opcode");
704  }
705  SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
706  DAG.getNode(ISD::FADD, DL, VT, FractPart,
707  DAG.getConstantFP(-0.5, DL, MVT::f32)));
708  if (Gen >= AMDGPUSubtarget::R700)
709  return TrigVal;
710  // On R600 hw, COS/SIN input must be between -Pi and Pi.
711  return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
713 }
714 
715 SDValue R600TargetLowering::LowerShiftParts(SDValue Op,
716  SelectionDAG &DAG) const {
717  SDValue Lo, Hi;
718  expandShiftParts(Op.getNode(), Lo, Hi, DAG);
719  return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
720 }
721 
722 SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
723  unsigned mainop, unsigned ovf) const {
724  SDLoc DL(Op);
725  EVT VT = Op.getValueType();
726 
727  SDValue Lo = Op.getOperand(0);
728  SDValue Hi = Op.getOperand(1);
729 
730  SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
731  // Extend sign.
732  OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
733  DAG.getValueType(MVT::i1));
734 
735  SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
736 
737  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
738 }
739 
740 SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
741  SDLoc DL(Op);
742  return DAG.getNode(
743  ISD::SETCC,
744  DL,
745  MVT::i1,
746  Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
747  DAG.getCondCode(ISD::SETEQ));
748 }
749 
750 SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
751  SDLoc DL(Op);
752  return DAG.getNode(
753  ISD::SETCC,
754  DL,
755  MVT::i1,
756  Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
757  DAG.getCondCode(ISD::SETEQ));
758 }
759 
760 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
761  const SDLoc &DL,
762  unsigned DwordOffset) const {
763  unsigned ByteOffset = DwordOffset * 4;
764  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
766 
767  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
768  assert(isInt<16>(ByteOffset));
769 
770  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
771  DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
773 }
774 
775 bool R600TargetLowering::isZero(SDValue Op) const {
776  if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
777  return Cst->isZero();
778  } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
779  return CstFP->isZero();
780  } else {
781  return false;
782  }
783 }
784 
785 bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
786  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
787  return CFP->isExactlyValue(1.0);
788  }
789  return isAllOnesConstant(Op);
790 }
791 
792 bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
793  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
794  return CFP->getValueAPF().isZero();
795  }
796  return isNullConstant(Op);
797 }
798 
799 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
800  SDLoc DL(Op);
801  EVT VT = Op.getValueType();
802 
803  SDValue LHS = Op.getOperand(0);
804  SDValue RHS = Op.getOperand(1);
805  SDValue True = Op.getOperand(2);
806  SDValue False = Op.getOperand(3);
807  SDValue CC = Op.getOperand(4);
808  SDValue Temp;
809 
810  if (VT == MVT::f32) {
811  DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
812  SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
813  if (MinMax)
814  return MinMax;
815  }
816 
817  // LHS and RHS are guaranteed to be the same value type
818  EVT CompareVT = LHS.getValueType();
819 
820  // Check if we can lower this to a native operation.
821 
822  // Try to lower to a SET* instruction:
823  //
824  // SET* can match the following patterns:
825  //
826  // select_cc f32, f32, -1, 0, cc_supported
827  // select_cc f32, f32, 1.0f, 0.0f, cc_supported
828  // select_cc i32, i32, -1, 0, cc_supported
829  //
830 
831  // Move hardware True/False values to the correct operand.
832  if (isHWTrueValue(False) && isHWFalseValue(True)) {
833  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
834  ISD::CondCode InverseCC = ISD::getSetCCInverse(CCOpcode, CompareVT);
835  if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
836  std::swap(False, True);
837  CC = DAG.getCondCode(InverseCC);
838  } else {
839  ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
840  if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
841  std::swap(False, True);
842  std::swap(LHS, RHS);
843  CC = DAG.getCondCode(SwapInvCC);
844  }
845  }
846  }
847 
848  if (isHWTrueValue(True) && isHWFalseValue(False) &&
849  (CompareVT == VT || VT == MVT::i32)) {
850  // This can be matched by a SET* instruction.
851  return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
852  }
853 
854  // Try to lower to a CND* instruction:
855  //
856  // CND* can match the following patterns:
857  //
858  // select_cc f32, 0.0, f32, f32, cc_supported
859  // select_cc f32, 0.0, i32, i32, cc_supported
860  // select_cc i32, 0, f32, f32, cc_supported
861  // select_cc i32, 0, i32, i32, cc_supported
862  //
863 
864  // Try to move the zero value to the RHS
865  if (isZero(LHS)) {
866  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
867  // Try swapping the operands
868  ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
869  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
870  std::swap(LHS, RHS);
871  CC = DAG.getCondCode(CCSwapped);
872  } else {
873  // Try inverting the condition and then swapping the operands
874  ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT);
875  CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
876  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
877  std::swap(True, False);
878  std::swap(LHS, RHS);
879  CC = DAG.getCondCode(CCSwapped);
880  }
881  }
882  }
883  if (isZero(RHS)) {
884  SDValue Cond = LHS;
885  SDValue Zero = RHS;
886  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
887  if (CompareVT != VT) {
888  // Bitcast True / False to the correct types. This will end up being
889  // a nop, but it allows us to define only a single pattern in the
890  // .TD files for each CND* instruction rather than having to have
891  // one pattern for integer True/False and one for fp True/False
892  True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
893  False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
894  }
895 
896  switch (CCOpcode) {
897  case ISD::SETONE:
898  case ISD::SETUNE:
899  case ISD::SETNE:
900  CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT);
901  Temp = True;
902  True = False;
903  False = Temp;
904  break;
905  default:
906  break;
907  }
908  SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
909  Cond, Zero,
910  True, False,
911  DAG.getCondCode(CCOpcode));
912  return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
913  }
914 
915  // If we make it this for it means we have no native instructions to handle
916  // this SELECT_CC, so we must lower it.
917  SDValue HWTrue, HWFalse;
918 
919  if (CompareVT == MVT::f32) {
920  HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
921  HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
922  } else if (CompareVT == MVT::i32) {
923  HWTrue = DAG.getConstant(-1, DL, CompareVT);
924  HWFalse = DAG.getConstant(0, DL, CompareVT);
925  }
926  else {
927  llvm_unreachable("Unhandled value type in LowerSELECT_CC");
928  }
929 
930  // Lower this unsupported SELECT_CC into a combination of two supported
931  // SELECT_CC operations.
932  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
933 
934  return DAG.getNode(ISD::SELECT_CC, DL, VT,
935  Cond, HWFalse,
936  True, False,
937  DAG.getCondCode(ISD::SETNE));
938 }
939 
940 /// LLVM generates byte-addressed pointers. For indirect addressing, we need to
941 /// convert these pointers to a register index. Each register holds
942 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
943 /// \p StackWidth, which tells us how many of the 4 sub-registers will be used
944 /// for indirect addressing.
945 SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
946  unsigned StackWidth,
947  SelectionDAG &DAG) const {
948  unsigned SRLPad;
949  switch(StackWidth) {
950  case 1:
951  SRLPad = 2;
952  break;
953  case 2:
954  SRLPad = 3;
955  break;
956  case 4:
957  SRLPad = 4;
958  break;
959  default: llvm_unreachable("Invalid stack width");
960  }
961 
962  SDLoc DL(Ptr);
963  return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
964  DAG.getConstant(SRLPad, DL, MVT::i32));
965 }
966 
967 void R600TargetLowering::getStackAddress(unsigned StackWidth,
968  unsigned ElemIdx,
969  unsigned &Channel,
970  unsigned &PtrIncr) const {
971  switch (StackWidth) {
972  default:
973  case 1:
974  Channel = 0;
975  if (ElemIdx > 0) {
976  PtrIncr = 1;
977  } else {
978  PtrIncr = 0;
979  }
980  break;
981  case 2:
982  Channel = ElemIdx % 2;
983  if (ElemIdx == 2) {
984  PtrIncr = 1;
985  } else {
986  PtrIncr = 0;
987  }
988  break;
989  case 4:
990  Channel = ElemIdx;
991  PtrIncr = 0;
992  break;
993  }
994 }
995 
996 SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
997  SelectionDAG &DAG) const {
998  SDLoc DL(Store);
999  //TODO: Who creates the i8 stores?
1000  assert(Store->isTruncatingStore()
1001  || Store->getValue().getValueType() == MVT::i8);
1002  assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
1003 
1004  SDValue Mask;
1005  if (Store->getMemoryVT() == MVT::i8) {
1006  assert(Store->getAlign() >= 1);
1007  Mask = DAG.getConstant(0xff, DL, MVT::i32);
1008  } else if (Store->getMemoryVT() == MVT::i16) {
1009  assert(Store->getAlign() >= 2);
1010  Mask = DAG.getConstant(0xffff, DL, MVT::i32);
1011  } else {
1012  llvm_unreachable("Unsupported private trunc store");
1013  }
1014 
1015  SDValue OldChain = Store->getChain();
1016  bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1017  // Skip dummy
1018  SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1019  SDValue BasePtr = Store->getBasePtr();
1020  SDValue Offset = Store->getOffset();
1021  EVT MemVT = Store->getMemoryVT();
1022 
1023  SDValue LoadPtr = BasePtr;
1024  if (!Offset.isUndef()) {
1025  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1026  }
1027 
1028  // Get dword location
1029  // TODO: this should be eliminated by the future SHR ptr, 2
1030  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1031  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1032 
1033  // Load dword
1034  // TODO: can we be smarter about machine pointer info?
1036  SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1037 
1038  Chain = Dst.getValue(1);
1039 
1040  // Get offset in dword
1041  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1042  DAG.getConstant(0x3, DL, MVT::i32));
1043 
1044  // Convert byte offset to bit shift
1045  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1046  DAG.getConstant(3, DL, MVT::i32));
1047 
1048  // TODO: Contrary to the name of the function,
1049  // it also handles sub i32 non-truncating stores (like i1)
1050  SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1051  Store->getValue());
1052 
1053  // Mask the value to the right type
1054  SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1055 
1056  // Shift the value in place
1057  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1058  MaskedValue, ShiftAmt);
1059 
1060  // Shift the mask in place
1061  SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
1062 
1063  // Invert the mask. NOTE: if we had native ROL instructions we could
1064  // use inverted mask
1065  DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
1066 
1067  // Cleanup the target bits
1068  Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1069 
1070  // Add the new bits
1071  SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1072 
1073  // Store dword
1074  // TODO: Can we be smarter about MachinePointerInfo?
1075  SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
1076 
1077  // If we are part of expanded vector, make our neighbors depend on this store
1078  if (VectorTrunc) {
1079  // Make all other vector elements depend on this store
1080  Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
1081  DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
1082  }
1083  return NewStore;
1084 }
1085 
1086 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1087  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1088  unsigned AS = StoreNode->getAddressSpace();
1089 
1090  SDValue Chain = StoreNode->getChain();
1091  SDValue Ptr = StoreNode->getBasePtr();
1092  SDValue Value = StoreNode->getValue();
1093 
1094  EVT VT = Value.getValueType();
1095  EVT MemVT = StoreNode->getMemoryVT();
1096  EVT PtrVT = Ptr.getValueType();
1097 
1098  SDLoc DL(Op);
1099 
1100  const bool TruncatingStore = StoreNode->isTruncatingStore();
1101 
1102  // Neither LOCAL nor PRIVATE can do vectors at the moment
1103  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS ||
1104  TruncatingStore) &&
1105  VT.isVector()) {
1106  if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) {
1107  // Add an extra level of chain to isolate this vector
1108  SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
1109  // TODO: can the chain be replaced without creating a new store?
1110  SDValue NewStore = DAG.getTruncStore(
1111  NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(), MemVT,
1112  StoreNode->getAlign(), StoreNode->getMemOperand()->getFlags(),
1113  StoreNode->getAAInfo());
1114  StoreNode = cast<StoreSDNode>(NewStore);
1115  }
1116 
1117  return scalarizeVectorStore(StoreNode, DAG);
1118  }
1119 
1120  Align Alignment = StoreNode->getAlign();
1121  if (Alignment < MemVT.getStoreSize() &&
1122  !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
1123  StoreNode->getMemOperand()->getFlags(),
1124  nullptr)) {
1125  return expandUnalignedStore(StoreNode, DAG);
1126  }
1127 
1128  SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
1129  DAG.getConstant(2, DL, PtrVT));
1130 
1131  if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
1132  // It is beneficial to create MSKOR here instead of combiner to avoid
1133  // artificial dependencies introduced by RMW
1134  if (TruncatingStore) {
1135  assert(VT.bitsLE(MVT::i32));
1136  SDValue MaskConstant;
1137  if (MemVT == MVT::i8) {
1138  MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1139  } else {
1140  assert(MemVT == MVT::i16);
1141  assert(StoreNode->getAlign() >= 2);
1142  MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1143  }
1144 
1145  SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
1146  DAG.getConstant(0x00000003, DL, PtrVT));
1147  SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1148  DAG.getConstant(3, DL, VT));
1149 
1150  // Put the mask in correct place
1151  SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
1152 
1153  // Put the value bits in correct place
1154  SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1155  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
1156 
1157  // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1158  // vector instead.
1159  SDValue Src[4] = {
1160  ShiftedValue,
1161  DAG.getConstant(0, DL, MVT::i32),
1162  DAG.getConstant(0, DL, MVT::i32),
1163  Mask
1164  };
1165  SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
1166  SDValue Args[3] = { Chain, Input, DWordAddr };
1168  Op->getVTList(), Args, MemVT,
1169  StoreNode->getMemOperand());
1170  } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1171  // Convert pointer from byte address to dword address.
1172  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1173 
1174  if (StoreNode->isIndexed()) {
1175  llvm_unreachable("Indexed stores not supported yet");
1176  } else {
1177  Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1178  }
1179  return Chain;
1180  }
1181  }
1182 
1183  // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1184  if (AS != AMDGPUAS::PRIVATE_ADDRESS)
1185  return SDValue();
1186 
1187  if (MemVT.bitsLT(MVT::i32))
1188  return lowerPrivateTruncStore(StoreNode, DAG);
1189 
1190  // Standard i32+ store, tag it with DWORDADDR to note that the address
1191  // has been shifted
1192  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1193  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1194  return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1195  }
1196 
1197  // Tagged i32+ stores will be matched by patterns
1198  return SDValue();
1199 }
1200 
1201 // return (512 + (kc_bank << 12)
1202 static int
1204  switch (AddressSpace) {
1206  return 512;
1208  return 512 + 4096;
1210  return 512 + 4096 * 2;
1212  return 512 + 4096 * 3;
1214  return 512 + 4096 * 4;
1216  return 512 + 4096 * 5;
1218  return 512 + 4096 * 6;
1220  return 512 + 4096 * 7;
1222  return 512 + 4096 * 8;
1224  return 512 + 4096 * 9;
1226  return 512 + 4096 * 10;
1228  return 512 + 4096 * 11;
1230  return 512 + 4096 * 12;
1232  return 512 + 4096 * 13;
1234  return 512 + 4096 * 14;
1236  return 512 + 4096 * 15;
1237  default:
1238  return -1;
1239  }
1240 }
1241 
1242 SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1243  SelectionDAG &DAG) const {
1244  SDLoc DL(Op);
1245  LoadSDNode *Load = cast<LoadSDNode>(Op);
1246  ISD::LoadExtType ExtType = Load->getExtensionType();
1247  EVT MemVT = Load->getMemoryVT();
1248  assert(Load->getAlign() >= MemVT.getStoreSize());
1249 
1250  SDValue BasePtr = Load->getBasePtr();
1251  SDValue Chain = Load->getChain();
1252  SDValue Offset = Load->getOffset();
1253 
1254  SDValue LoadPtr = BasePtr;
1255  if (!Offset.isUndef()) {
1256  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1257  }
1258 
1259  // Get dword location
1260  // NOTE: this should be eliminated by the future SHR ptr, 2
1261  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1262  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1263 
1264  // Load dword
1265  // TODO: can we be smarter about machine pointer info?
1267  SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1268 
1269  // Get offset within the register.
1270  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1271  LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
1272 
1273  // Bit offset of target byte (byteIdx * 8).
1274  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1275  DAG.getConstant(3, DL, MVT::i32));
1276 
1277  // Shift to the right.
1278  SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
1279 
1280  // Eliminate the upper bits by setting them to ...
1281  EVT MemEltVT = MemVT.getScalarType();
1282 
1283  if (ExtType == ISD::SEXTLOAD) { // ... ones.
1284  SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1285  Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
1286  } else { // ... or zeros.
1287  Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
1288  }
1289 
1290  SDValue Ops[] = {
1291  Ret,
1292  Read.getValue(1) // This should be our output chain
1293  };
1294 
1295  return DAG.getMergeValues(Ops, DL);
1296 }
1297 
1298 SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1299  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1300  unsigned AS = LoadNode->getAddressSpace();
1301  EVT MemVT = LoadNode->getMemoryVT();
1302  ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1303 
1304  if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1305  ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1306  return lowerPrivateExtLoad(Op, DAG);
1307  }
1308 
1309  SDLoc DL(Op);
1310  EVT VT = Op.getValueType();
1311  SDValue Chain = LoadNode->getChain();
1312  SDValue Ptr = LoadNode->getBasePtr();
1313 
1314  if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1315  LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
1316  VT.isVector()) {
1317  SDValue Ops[2];
1318  std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(LoadNode, DAG);
1319  return DAG.getMergeValues(Ops, DL);
1320  }
1321 
1322  // This is still used for explicit load from addrspace(8)
1323  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1324  if (ConstantBlock > -1 &&
1325  ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1326  (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1327  SDValue Result;
1328  if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1329  isa<ConstantSDNode>(Ptr)) {
1330  return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
1331  } else {
1332  //TODO: Does this even work?
1333  // non-constant ptr can't be folded, keeps it as a v4f32 load
1335  DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1336  DAG.getConstant(4, DL, MVT::i32)),
1337  DAG.getConstant(LoadNode->getAddressSpace() -
1339  );
1340  }
1341 
1342  if (!VT.isVector()) {
1344  DAG.getConstant(0, DL, MVT::i32));
1345  }
1346 
1347  SDValue MergedValues[2] = {
1348  Result,
1349  Chain
1350  };
1351  return DAG.getMergeValues(MergedValues, DL);
1352  }
1353 
1354  // For most operations returning SDValue() will result in the node being
1355  // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1356  // need to manually expand loads that may be legal in some address spaces and
1357  // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1358  // compute shaders, since the data is sign extended when it is uploaded to the
1359  // buffer. However SEXT loads from other address spaces are not supported, so
1360  // we need to expand them here.
1361  if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1362  assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1363  SDValue NewLoad = DAG.getExtLoad(
1364  ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1365  LoadNode->getAlign(), LoadNode->getMemOperand()->getFlags());
1366  SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1367  DAG.getValueType(MemVT));
1368 
1369  SDValue MergedValues[2] = { Res, Chain };
1370  return DAG.getMergeValues(MergedValues, DL);
1371  }
1372 
1373  if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1374  return SDValue();
1375  }
1376 
1377  // DWORDADDR ISD marks already shifted address
1378  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1379  assert(VT == MVT::i32);
1380  Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
1382  return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1383  }
1384  return SDValue();
1385 }
1386 
1387 SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1388  SDValue Chain = Op.getOperand(0);
1389  SDValue Cond = Op.getOperand(1);
1390  SDValue Jump = Op.getOperand(2);
1391 
1392  return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1393  Chain, Jump, Cond);
1394 }
1395 
1396 SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1397  SelectionDAG &DAG) const {
1398  MachineFunction &MF = DAG.getMachineFunction();
1399  const R600FrameLowering *TFL = Subtarget->getFrameLowering();
1400 
1401  FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1402 
1403  unsigned FrameIndex = FIN->getIndex();
1404  Register IgnoredFrameReg;
1406  TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1407  return DAG.getConstant(Offset.getFixed() * 4 * TFL->getStackWidth(MF),
1408  SDLoc(Op), Op.getValueType());
1409 }
1410 
1412  bool IsVarArg) const {
1413  switch (CC) {
1416  case CallingConv::C:
1417  case CallingConv::Fast:
1418  case CallingConv::Cold:
1419  llvm_unreachable("kernels should not be handled here");
1427  return CC_R600;
1428  default:
1429  report_fatal_error("Unsupported calling convention.");
1430  }
1431 }
1432 
1433 /// XXX Only kernel functions are supported, so we can assume for now that
1434 /// every function is a kernel function, but in the future we should use
1435 /// separate calling conventions for kernel and non-kernel functions.
1437  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1438  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1439  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1441  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1442  *DAG.getContext());
1443  MachineFunction &MF = DAG.getMachineFunction();
1445 
1446  if (AMDGPU::isShader(CallConv)) {
1447  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
1448  } else {
1450  }
1451 
1452  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1453  CCValAssign &VA = ArgLocs[i];
1454  const ISD::InputArg &In = Ins[i];
1455  EVT VT = In.VT;
1456  EVT MemVT = VA.getLocVT();
1457  if (!VT.isVector() && MemVT.isVector()) {
1458  // Get load source type if scalarized.
1459  MemVT = MemVT.getVectorElementType();
1460  }
1461 
1462  if (AMDGPU::isShader(CallConv)) {
1463  Register Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
1464  SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1465  InVals.push_back(Register);
1466  continue;
1467  }
1468 
1469  // i64 isn't a legal type, so the register type used ends up as i32, which
1470  // isn't expected here. It attempts to create this sextload, but it ends up
1471  // being invalid. Somehow this seems to work with i64 arguments, but breaks
1472  // for <1 x i64>.
1473 
1474  // The first 36 bytes of the input buffer contains information about
1475  // thread group and global sizes.
1477  if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1478  // FIXME: This should really check the extload type, but the handling of
1479  // extload vector parameters seems to be broken.
1480 
1481  // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1482  Ext = ISD::SEXTLOAD;
1483  }
1484 
1485  // Compute the offset from the value.
1486  // XXX - I think PartOffset should give you this, but it seems to give the
1487  // size of the register which isn't useful.
1488 
1489  unsigned PartOffset = VA.getLocMemOffset();
1490  unsigned Alignment = MinAlign(VT.getStoreSize(), PartOffset);
1491 
1493  SDValue Arg = DAG.getLoad(
1494  ISD::UNINDEXED, Ext, VT, DL, Chain,
1495  DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
1496  PtrInfo,
1497  MemVT, Alignment, MachineMemOperand::MONonTemporal |
1500 
1501  InVals.push_back(Arg);
1502  }
1503  return Chain;
1504 }
1505 
1507  EVT VT) const {
1508  if (!VT.isVector())
1509  return MVT::i32;
1511 }
1512 
1514  const MachineFunction &MF) const {
1515  // Local and Private addresses do not handle vectors. Limit to i32
1516  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) {
1517  return (MemVT.getSizeInBits() <= 32);
1518  }
1519  return true;
1520 }
1521 
1523  EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1524  unsigned *IsFast) const {
1525  if (IsFast)
1526  *IsFast = 0;
1527 
1528  if (!VT.isSimple() || VT == MVT::Other)
1529  return false;
1530 
1531  if (VT.bitsLT(MVT::i32))
1532  return false;
1533 
1534  // TODO: This is a rough estimate.
1535  if (IsFast)
1536  *IsFast = 1;
1537 
1538  return VT.bitsGT(MVT::i32) && Alignment >= Align(4);
1539 }
1540 
1542  SelectionDAG &DAG, SDValue VectorEntry,
1543  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1544  assert(RemapSwizzle.empty());
1545 
1546  SDLoc DL(VectorEntry);
1547  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1548 
1549  SDValue NewBldVec[4];
1550  for (unsigned i = 0; i < 4; i++)
1551  NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1552  DAG.getIntPtrConstant(i, DL));
1553 
1554  for (unsigned i = 0; i < 4; i++) {
1555  if (NewBldVec[i].isUndef())
1556  // We mask write here to teach later passes that the ith element of this
1557  // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1558  // break false dependencies and additionally make assembly easier to read.
1559  RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1560  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1561  if (C->isZero()) {
1562  RemapSwizzle[i] = 4; // SEL_0
1563  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1564  } else if (C->isExactlyValue(1.0)) {
1565  RemapSwizzle[i] = 5; // SEL_1
1566  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1567  }
1568  }
1569 
1570  if (NewBldVec[i].isUndef())
1571  continue;
1572 
1573  for (unsigned j = 0; j < i; j++) {
1574  if (NewBldVec[i] == NewBldVec[j]) {
1575  NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1576  RemapSwizzle[i] = j;
1577  break;
1578  }
1579  }
1580  }
1581 
1582  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1583  NewBldVec);
1584 }
1585 
1587  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1588  assert(RemapSwizzle.empty());
1589 
1590  SDLoc DL(VectorEntry);
1591  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1592 
1593  SDValue NewBldVec[4];
1594  bool isUnmovable[4] = {false, false, false, false};
1595  for (unsigned i = 0; i < 4; i++)
1596  NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1597  DAG.getIntPtrConstant(i, DL));
1598 
1599  for (unsigned i = 0; i < 4; i++) {
1600  RemapSwizzle[i] = i;
1601  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1602  unsigned Idx = cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1603  ->getZExtValue();
1604  if (i == Idx)
1605  isUnmovable[Idx] = true;
1606  }
1607  }
1608 
1609  for (unsigned i = 0; i < 4; i++) {
1610  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1611  unsigned Idx = cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1612  ->getZExtValue();
1613  if (isUnmovable[Idx])
1614  continue;
1615  // Swap i and Idx
1616  std::swap(NewBldVec[Idx], NewBldVec[i]);
1617  std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1618  break;
1619  }
1620  }
1621 
1622  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1623  NewBldVec);
1624 }
1625 
1626 SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[],
1627  SelectionDAG &DAG,
1628  const SDLoc &DL) const {
1629  // Old -> New swizzle values
1630  DenseMap<unsigned, unsigned> SwizzleRemap;
1631 
1632  BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1633  for (unsigned i = 0; i < 4; i++) {
1634  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1635  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1636  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1637  }
1638 
1639  SwizzleRemap.clear();
1640  BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1641  for (unsigned i = 0; i < 4; i++) {
1642  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1643  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1644  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1645  }
1646 
1647  return BuildVector;
1648 }
1649 
1650 SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
1651  SelectionDAG &DAG) const {
1652  SDLoc DL(LoadNode);
1653  EVT VT = LoadNode->getValueType(0);
1654  SDValue Chain = LoadNode->getChain();
1655  SDValue Ptr = LoadNode->getBasePtr();
1656  assert (isa<ConstantSDNode>(Ptr));
1657 
1658  //TODO: Support smaller loads
1659  if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
1660  return SDValue();
1661 
1662  if (LoadNode->getAlign() < Align(4))
1663  return SDValue();
1664 
1665  int ConstantBlock = ConstantAddressBlock(Block);
1666 
1667  SDValue Slots[4];
1668  for (unsigned i = 0; i < 4; i++) {
1669  // We want Const position encoded with the following formula :
1670  // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1671  // const_index is Ptr computed by llvm using an alignment of 16.
1672  // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1673  // then div by 4 at the ISel step
1674  SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1675  DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1676  Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1677  }
1678  EVT NewVT = MVT::v4i32;
1679  unsigned NumElements = 4;
1680  if (VT.isVector()) {
1681  NewVT = VT;
1682  NumElements = VT.getVectorNumElements();
1683  }
1684  SDValue Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
1685  if (!VT.isVector()) {
1687  DAG.getConstant(0, DL, MVT::i32));
1688  }
1689  SDValue MergedValues[2] = {
1690  Result,
1691  Chain
1692  };
1693  return DAG.getMergeValues(MergedValues, DL);
1694 }
1695 
1696 //===----------------------------------------------------------------------===//
1697 // Custom DAG Optimizations
1698 //===----------------------------------------------------------------------===//
1699 
1701  DAGCombinerInfo &DCI) const {
1702  SelectionDAG &DAG = DCI.DAG;
1703  SDLoc DL(N);
1704 
1705  switch (N->getOpcode()) {
1706  // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1707  case ISD::FP_ROUND: {
1708  SDValue Arg = N->getOperand(0);
1709  if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1710  return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1711  Arg.getOperand(0));
1712  }
1713  break;
1714  }
1715 
1716  // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1717  // (i32 select_cc f32, f32, -1, 0 cc)
1718  //
1719  // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1720  // this to one of the SET*_DX10 instructions.
1721  case ISD::FP_TO_SINT: {
1722  SDValue FNeg = N->getOperand(0);
1723  if (FNeg.getOpcode() != ISD::FNEG) {
1724  return SDValue();
1725  }
1726  SDValue SelectCC = FNeg.getOperand(0);
1727  if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1728  SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1729  SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1730  !isHWTrueValue(SelectCC.getOperand(2)) ||
1731  !isHWFalseValue(SelectCC.getOperand(3))) {
1732  return SDValue();
1733  }
1734 
1735  return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1736  SelectCC.getOperand(0), // LHS
1737  SelectCC.getOperand(1), // RHS
1738  DAG.getConstant(-1, DL, MVT::i32), // True
1739  DAG.getConstant(0, DL, MVT::i32), // False
1740  SelectCC.getOperand(4)); // CC
1741  }
1742 
1743  // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1744  // => build_vector elt0, ... , NewEltIdx, ... , eltN
1745  case ISD::INSERT_VECTOR_ELT: {
1746  SDValue InVec = N->getOperand(0);
1747  SDValue InVal = N->getOperand(1);
1748  SDValue EltNo = N->getOperand(2);
1749 
1750  // If the inserted element is an UNDEF, just use the input vector.
1751  if (InVal.isUndef())
1752  return InVec;
1753 
1754  EVT VT = InVec.getValueType();
1755 
1756  // If we can't generate a legal BUILD_VECTOR, exit
1758  return SDValue();
1759 
1760  // Check that we know which element is being inserted
1761  if (!isa<ConstantSDNode>(EltNo))
1762  return SDValue();
1763  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1764 
1765  // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1766  // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1767  // vector elements.
1769  if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1770  Ops.append(InVec.getNode()->op_begin(),
1771  InVec.getNode()->op_end());
1772  } else if (InVec.isUndef()) {
1773  unsigned NElts = VT.getVectorNumElements();
1774  Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1775  } else {
1776  return SDValue();
1777  }
1778 
1779  // Insert the element
1780  if (Elt < Ops.size()) {
1781  // All the operands of BUILD_VECTOR must have the same type;
1782  // we enforce that here.
1783  EVT OpVT = Ops[0].getValueType();
1784  if (InVal.getValueType() != OpVT)
1785  InVal = OpVT.bitsGT(InVal.getValueType()) ?
1786  DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
1787  DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
1788  Ops[Elt] = InVal;
1789  }
1790 
1791  // Return the new vector
1792  return DAG.getBuildVector(VT, DL, Ops);
1793  }
1794 
1795  // Extract_vec (Build_vector) generated by custom lowering
1796  // also needs to be customly combined
1797  case ISD::EXTRACT_VECTOR_ELT: {
1798  SDValue Arg = N->getOperand(0);
1799  if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1800  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1801  unsigned Element = Const->getZExtValue();
1802  return Arg->getOperand(Element);
1803  }
1804  }
1805  if (Arg.getOpcode() == ISD::BITCAST &&
1806  Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
1807  (Arg.getOperand(0).getValueType().getVectorNumElements() ==
1808  Arg.getValueType().getVectorNumElements())) {
1809  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1810  unsigned Element = Const->getZExtValue();
1811  return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1812  Arg->getOperand(0).getOperand(Element));
1813  }
1814  }
1815  break;
1816  }
1817 
1818  case ISD::SELECT_CC: {
1819  // Try common optimizations
1821  return Ret;
1822 
1823  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1824  // selectcc x, y, a, b, inv(cc)
1825  //
1826  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1827  // selectcc x, y, a, b, cc
1828  SDValue LHS = N->getOperand(0);
1829  if (LHS.getOpcode() != ISD::SELECT_CC) {
1830  return SDValue();
1831  }
1832 
1833  SDValue RHS = N->getOperand(1);
1834  SDValue True = N->getOperand(2);
1835  SDValue False = N->getOperand(3);
1836  ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1837 
1838  if (LHS.getOperand(2).getNode() != True.getNode() ||
1839  LHS.getOperand(3).getNode() != False.getNode() ||
1840  RHS.getNode() != False.getNode()) {
1841  return SDValue();
1842  }
1843 
1844  switch (NCC) {
1845  default: return SDValue();
1846  case ISD::SETNE: return LHS;
1847  case ISD::SETEQ: {
1848  ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1849  LHSCC = ISD::getSetCCInverse(LHSCC, LHS.getOperand(0).getValueType());
1850  if (DCI.isBeforeLegalizeOps() ||
1851  isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1852  return DAG.getSelectCC(DL,
1853  LHS.getOperand(0),
1854  LHS.getOperand(1),
1855  LHS.getOperand(2),
1856  LHS.getOperand(3),
1857  LHSCC);
1858  break;
1859  }
1860  }
1861  return SDValue();
1862  }
1863 
1864  case AMDGPUISD::R600_EXPORT: {
1865  SDValue Arg = N->getOperand(1);
1866  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1867  break;
1868 
1869  SDValue NewArgs[8] = {
1870  N->getOperand(0), // Chain
1871  SDValue(),
1872  N->getOperand(2), // ArrayBase
1873  N->getOperand(3), // Type
1874  N->getOperand(4), // SWZ_X
1875  N->getOperand(5), // SWZ_Y
1876  N->getOperand(6), // SWZ_Z
1877  N->getOperand(7) // SWZ_W
1878  };
1879  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
1880  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
1881  }
1882  case AMDGPUISD::TEXTURE_FETCH: {
1883  SDValue Arg = N->getOperand(1);
1884  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1885  break;
1886 
1887  SDValue NewArgs[19] = {
1888  N->getOperand(0),
1889  N->getOperand(1),
1890  N->getOperand(2),
1891  N->getOperand(3),
1892  N->getOperand(4),
1893  N->getOperand(5),
1894  N->getOperand(6),
1895  N->getOperand(7),
1896  N->getOperand(8),
1897  N->getOperand(9),
1898  N->getOperand(10),
1899  N->getOperand(11),
1900  N->getOperand(12),
1901  N->getOperand(13),
1902  N->getOperand(14),
1903  N->getOperand(15),
1904  N->getOperand(16),
1905  N->getOperand(17),
1906  N->getOperand(18),
1907  };
1908  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
1909  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
1910  }
1911 
1912  case ISD::LOAD: {
1913  LoadSDNode *LoadNode = cast<LoadSDNode>(N);
1914  SDValue Ptr = LoadNode->getBasePtr();
1915  if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
1916  isa<ConstantSDNode>(Ptr))
1917  return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
1918  break;
1919  }
1920 
1921  default: break;
1922  }
1923 
1925 }
1926 
1927 bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
1928  SDValue &Src, SDValue &Neg, SDValue &Abs,
1929  SDValue &Sel, SDValue &Imm,
1930  SelectionDAG &DAG) const {
1931  const R600InstrInfo *TII = Subtarget->getInstrInfo();
1932  if (!Src.isMachineOpcode())
1933  return false;
1934 
1935  switch (Src.getMachineOpcode()) {
1936  case R600::FNEG_R600:
1937  if (!Neg.getNode())
1938  return false;
1939  Src = Src.getOperand(0);
1940  Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1941  return true;
1942  case R600::FABS_R600:
1943  if (!Abs.getNode())
1944  return false;
1945  Src = Src.getOperand(0);
1946  Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1947  return true;
1948  case R600::CONST_COPY: {
1949  unsigned Opcode = ParentNode->getMachineOpcode();
1950  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
1951 
1952  if (!Sel.getNode())
1953  return false;
1954 
1955  SDValue CstOffset = Src.getOperand(0);
1956  if (ParentNode->getValueType(0).isVector())
1957  return false;
1958 
1959  // Gather constants values
1960  int SrcIndices[] = {
1961  TII->getOperandIdx(Opcode, R600::OpName::src0),
1962  TII->getOperandIdx(Opcode, R600::OpName::src1),
1963  TII->getOperandIdx(Opcode, R600::OpName::src2),
1964  TII->getOperandIdx(Opcode, R600::OpName::src0_X),
1965  TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
1966  TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
1967  TII->getOperandIdx(Opcode, R600::OpName::src0_W),
1968  TII->getOperandIdx(Opcode, R600::OpName::src1_X),
1969  TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
1970  TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
1971  TII->getOperandIdx(Opcode, R600::OpName::src1_W)
1972  };
1973  std::vector<unsigned> Consts;
1974  for (int OtherSrcIdx : SrcIndices) {
1975  int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1976  if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1977  continue;
1978  if (HasDst) {
1979  OtherSrcIdx--;
1980  OtherSelIdx--;
1981  }
1982  if (RegisterSDNode *Reg =
1983  dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1984  if (Reg->getReg() == R600::ALU_CONST) {
1985  ConstantSDNode *Cst
1986  = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
1987  Consts.push_back(Cst->getZExtValue());
1988  }
1989  }
1990  }
1991 
1992  ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
1993  Consts.push_back(Cst->getZExtValue());
1994  if (!TII->fitsConstReadLimitations(Consts)) {
1995  return false;
1996  }
1997 
1998  Sel = CstOffset;
1999  Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
2000  return true;
2001  }
2002  case R600::MOV_IMM_GLOBAL_ADDR:
2003  // Check if the Imm slot is used. Taken from below.
2004  if (cast<ConstantSDNode>(Imm)->getZExtValue())
2005  return false;
2006  Imm = Src.getOperand(0);
2007  Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
2008  return true;
2009  case R600::MOV_IMM_I32:
2010  case R600::MOV_IMM_F32: {
2011  unsigned ImmReg = R600::ALU_LITERAL_X;
2012  uint64_t ImmValue = 0;
2013 
2014  if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
2015  ConstantFPSDNode *FPC = cast<ConstantFPSDNode>(Src.getOperand(0));
2016  float FloatValue = FPC->getValueAPF().convertToFloat();
2017  if (FloatValue == 0.0) {
2018  ImmReg = R600::ZERO;
2019  } else if (FloatValue == 0.5) {
2020  ImmReg = R600::HALF;
2021  } else if (FloatValue == 1.0) {
2022  ImmReg = R600::ONE;
2023  } else {
2024  ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2025  }
2026  } else {
2027  ConstantSDNode *C = cast<ConstantSDNode>(Src.getOperand(0));
2028  uint64_t Value = C->getZExtValue();
2029  if (Value == 0) {
2030  ImmReg = R600::ZERO;
2031  } else if (Value == 1) {
2032  ImmReg = R600::ONE_INT;
2033  } else {
2034  ImmValue = Value;
2035  }
2036  }
2037 
2038  // Check that we aren't already using an immediate.
2039  // XXX: It's possible for an instruction to have more than one
2040  // immediate operand, but this is not supported yet.
2041  if (ImmReg == R600::ALU_LITERAL_X) {
2042  if (!Imm.getNode())
2043  return false;
2044  ConstantSDNode *C = cast<ConstantSDNode>(Imm);
2045  if (C->getZExtValue())
2046  return false;
2047  Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2048  }
2049  Src = DAG.getRegister(ImmReg, MVT::i32);
2050  return true;
2051  }
2052  default:
2053  return false;
2054  }
2055 }
2056 
2057 /// Fold the instructions after selecting them
2058 SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2059  SelectionDAG &DAG) const {
2060  const R600InstrInfo *TII = Subtarget->getInstrInfo();
2061  if (!Node->isMachineOpcode())
2062  return Node;
2063 
2064  unsigned Opcode = Node->getMachineOpcode();
2065  SDValue FakeOp;
2066 
2067  std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2068 
2069  if (Opcode == R600::DOT_4) {
2070  int OperandIdx[] = {
2071  TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2072  TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2073  TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2074  TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2075  TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2076  TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2077  TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2078  TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2079  };
2080  int NegIdx[] = {
2081  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
2082  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
2083  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
2084  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
2085  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
2086  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
2087  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
2088  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
2089  };
2090  int AbsIdx[] = {
2091  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
2092  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
2093  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
2094  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
2095  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
2096  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
2097  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
2098  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
2099  };
2100  for (unsigned i = 0; i < 8; i++) {
2101  if (OperandIdx[i] < 0)
2102  return Node;
2103  SDValue &Src = Ops[OperandIdx[i] - 1];
2104  SDValue &Neg = Ops[NegIdx[i] - 1];
2105  SDValue &Abs = Ops[AbsIdx[i] - 1];
2106  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2107  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2108  if (HasDst)
2109  SelIdx--;
2110  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2111  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2112  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2113  }
2114  } else if (Opcode == R600::REG_SEQUENCE) {
2115  for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2116  SDValue &Src = Ops[i];
2117  if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2118  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2119  }
2120  } else {
2121  if (!TII->hasInstrModifiers(Opcode))
2122  return Node;
2123  int OperandIdx[] = {
2124  TII->getOperandIdx(Opcode, R600::OpName::src0),
2125  TII->getOperandIdx(Opcode, R600::OpName::src1),
2126  TII->getOperandIdx(Opcode, R600::OpName::src2)
2127  };
2128  int NegIdx[] = {
2129  TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
2130  TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
2131  TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
2132  };
2133  int AbsIdx[] = {
2134  TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
2135  TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
2136  -1
2137  };
2138  for (unsigned i = 0; i < 3; i++) {
2139  if (OperandIdx[i] < 0)
2140  return Node;
2141  SDValue &Src = Ops[OperandIdx[i] - 1];
2142  SDValue &Neg = Ops[NegIdx[i] - 1];
2143  SDValue FakeAbs;
2144  SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2145  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2146  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2147  int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
2148  if (HasDst) {
2149  SelIdx--;
2150  ImmIdx--;
2151  }
2152  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2153  SDValue &Imm = Ops[ImmIdx];
2154  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2155  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2156  }
2157  }
2158 
2159  return Node;
2160 }
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:240
llvm::CCValAssign::getLocVT
MVT getLocVT() const
Definition: CallingConvLower.h:151
i
i
Definition: README.txt:29
llvm::ISD::SETUGE
@ SETUGE
Definition: ISDOpcodes.h:1437
llvm::AMDGPUISD::STORE_MSKOR
@ STORE_MSKOR
Definition: AMDGPUISelLowering.h:499
llvm::TargetLowering::scalarizeVectorLoad
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
Definition: TargetLowering.cpp:8574
llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1582
llvm::RegisterSDNode
Definition: SelectionDAGNodes.h:2161
llvm::APFloat::convertToFloat
float convertToFloat() const
Converts this APFloat to host float value.
Definition: APFloat.cpp:5155
llvm::StoreSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2394
llvm::TargetLoweringBase::setSchedulingPreference
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
Definition: TargetLowering.h:2302
llvm::ISD::SETLE
@ SETLE
Definition: ISDOpcodes.h:1448
llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
llvm::ISD::SETO
@ SETO
Definition: ISDOpcodes.h:1433
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:109
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::SDNode::getValueType
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Definition: SelectionDAGNodes.h:986
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:387
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1106
llvm::TargetLoweringBase::Legal
@ Legal
Definition: TargetLowering.h:196
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:667
llvm::TargetLowering::EmitInstrWithCustomInserter
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Definition: SelectionDAGISel.cpp:293
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:886
llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1449
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::AMDGPUTargetLowering
Definition: AMDGPUISelLowering.h:27
llvm::ISD::NON_EXTLOAD
@ NON_EXTLOAD
Definition: ISDOpcodes.h:1404
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:189
llvm::MachineOperand::getGlobal
const GlobalValue * getGlobal() const
Definition: MachineOperand.h:572
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::AMDGPUAS::CONSTANT_BUFFER_1
@ CONSTANT_BUFFER_1
Definition: AMDGPU.h:395
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:159
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
llvm::ISD::UADDO
@ UADDO
Definition: ISDOpcodes.h:324
llvm::ISD::CTTZ_ZERO_UNDEF
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:708
llvm::SelectionDAG::getValueType
SDValue getValueType(EVT)
Definition: SelectionDAG.cpp:1859
llvm::AMDGPUAS::CONSTANT_BUFFER_14
@ CONSTANT_BUFFER_14
Definition: AMDGPU.h:408
llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Definition: Type.cpp:727
llvm::ISD::ADDC
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
llvm::R600FrameLowering
Definition: R600FrameLowering.h:16
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:376
llvm::AMDGPUISD::CONST_ADDRESS
@ CONST_ADDRESS
Definition: AMDGPUISelLowering.h:448
llvm::ISD::SETEQ
@ SETEQ
Definition: ISDOpcodes.h:1444
llvm::SelectionDAG::getVTList
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition: SelectionDAG.cpp:9439
llvm::CallingConv::AMDGPU_CS
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:198
llvm::MVT::v2i1
@ v2i1
Definition: MachineValueType.h:67
llvm::R600Subtarget::getInstrInfo
const R600InstrInfo * getInstrInfo() const override
Definition: R600Subtarget.h:50
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::MachineSDNode
An SDNode that represents everything that will be needed to construct a MachineInstr.
Definition: SelectionDAGNodes.h:2897
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition: MachineMemOperand.h:144
llvm::SelectionDAG::getVectorIdxConstant
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition: SelectionDAG.cpp:1647
llvm::CallingConv::Cold
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition: SelectionDAGNodes.h:1355
llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition: SelectionDAGNodes.h:1378
llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:766
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:463
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:482
llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:819
llvm::TargetLowering::DAGCombinerInfo::DAG
SelectionDAG & DAG
Definition: TargetLowering.h:3929
llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition: SelectionDAGNodes.h:2344
llvm::AMDGPUAS::CONSTANT_BUFFER_3
@ CONSTANT_BUFFER_3
Definition: AMDGPU.h:397
llvm::ISD::SETULE
@ SETULE
Definition: ISDOpcodes.h:1439
llvm::CCState::AnalyzeFormalArguments
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
Definition: CallingConvLower.cpp:82
isEOP
static bool isEOP(MachineBasicBlock::iterator I)
Definition: R600ISelLowering.cpp:203
llvm::NVPTXISD::RETURN
@ RETURN
Definition: NVPTXISelLowering.h:49
llvm::ISD::SHL_PARTS
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:749
llvm::SelectionDAG::getZeroExtendInReg
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
Definition: SelectionDAG.cpp:1436
llvm::SelectionDAG::getStore
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
Definition: SelectionDAG.cpp:8099
llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition: MachineMemOperand.h:142
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:377
llvm::AMDGPUISD::CARRY
@ CARRY
Definition: AMDGPUISelLowering.h:428
llvm::R600Subtarget::hasBCNT
bool hasBCNT(unsigned Size) const
Definition: R600Subtarget.h:92
llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:736
llvm::AMDGPUAS::CONSTANT_BUFFER_12
@ CONSTANT_BUFFER_12
Definition: AMDGPU.h:406
Vector
So we should use XX3Form_Rcr to implement intrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
llvm::R600Subtarget::hasCARRY
bool hasCARRY() const
Definition: R600Subtarget.h:103
Results
Function Alias Analysis Results
Definition: AliasAnalysis.cpp:772
llvm::ISD::MERGE_VALUES
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
llvm::AMDGPUTargetLowering::ReplaceNodeResults
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
Definition: AMDGPUISelLowering.cpp:1275
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:119
llvm::ISD::EXTLOAD
@ EXTLOAD
Definition: ISDOpcodes.h:1404
llvm::LSBaseSDNode::isIndexed
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
Definition: SelectionDAGNodes.h:2332
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::AMDGPUAS::CONSTANT_BUFFER_4
@ CONSTANT_BUFFER_4
Definition: AMDGPU.h:398
llvm::R600Subtarget::hasFFBL
bool hasFFBL() const
Definition: R600Subtarget.h:111
llvm::EVT::bitsLE
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:280
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::MachineOperand::getOffset
int64_t getOffset() const
Return the offset from the symbol in this operand.
Definition: MachineOperand.h:609
R600ISelLowering.h
llvm::ISD::SETUEQ
@ SETUEQ
Definition: ISDOpcodes.h:1435
llvm::AMDGPUAS::CONSTANT_BUFFER_2
@ CONSTANT_BUFFER_2
Definition: AMDGPU.h:396
llvm::TargetLowering::expandUnalignedStore
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
Definition: TargetLowering.cpp:8893
llvm::SelectionDAG::getContext
LLVMContext * getContext() const
Definition: SelectionDAG.h:479
llvm::R600MachineFunctionInfo
Definition: R600MachineFunctionInfo.h:19
llvm::ISD::BRCOND
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1001
llvm::SelectionDAG::getRegister
SDValue getRegister(unsigned Reg, EVT VT)
Definition: SelectionDAG.cpp:2122
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
isUndef
static bool isUndef(ArrayRef< int > Mask)
Definition: HexagonISelDAGToDAGHVX.cpp:1032
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:129
llvm::SDNode::op_end
op_iterator op_end() const
Definition: SelectionDAGNodes.h:929
llvm::TargetLoweringBase::setTargetDAGCombine
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
Definition: TargetLowering.h:2504
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:187
llvm::MVT::integer_valuetypes
static auto integer_valuetypes()
Definition: MachineValueType.h:1519
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:930
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::ISD::LoadExtType
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1404
llvm::R600TargetLowering::LowerFormalArguments
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
Definition: R600ISelLowering.cpp:1436
llvm::R600Subtarget::hasBFE
bool hasBFE() const
Definition: R600Subtarget.h:84
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ISD::BR_CC
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1008
x3
In x86 we generate this spiffy xmm0 xmm0 ret in x86 we generate this which could be xmm1 movss xmm1 xmm0 ret In sse4 we could use insertps to make both better Here s another testcase that could use x3
Definition: README-SSE.txt:547
llvm::SelectionDAG::getLoad
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Definition: SelectionDAG.cpp:8049
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
llvm::EVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:362
llvm::R600Subtarget::hasFMA
bool hasFMA() const
Definition: R600Subtarget.h:119
llvm::MinAlign
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:600
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:667
llvm::ISD::GlobalAddress
@ GlobalAddress
Definition: ISDOpcodes.h:78
llvm::ISD::SELECT_CC
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:728
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1141
llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition: CallingConvLower.h:31
llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1831
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:220
llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:713
llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalizeOps
bool isBeforeLegalizeOps() const
Definition: TargetLowering.h:3935
f
Itanium Name Demangler i e convert the string _Z1fv into f()". You can also use the CRTP base ManglingParser to perform some simple analysis on the mangled name
llvm::SelectionDAG::getTruncStore
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:8151
llvm::CCValAssign::getLocReg
Register getLocReg() const
Definition: CallingConvLower.h:148
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:755
MO_FLAG_PUSH
#define MO_FLAG_PUSH
Definition: R600Defines.h:18
llvm::SelectionDAG::getUNDEF
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:1023
llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:781
llvm::AMDGPUISD::R600_EXPORT
@ R600_EXPORT
Definition: AMDGPUISelLowering.h:447
llvm::MachineMemOperand::getValue
const Value * getValue() const
Return the base address of the memory access.
Definition: MachineMemOperand.h:210
llvm::CallingConv::AMDGPU_HS
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:207
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::TargetLoweringBase::isCondCodeLegal
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
Definition: TargetLowering.h:1452
llvm::MVT::f64
@ f64
Definition: MachineValueType.h:58
llvm::SelectionDAG::getConstant
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1515
llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:308
llvm::AMDGPUAS::CONSTANT_BUFFER_15
@ CONSTANT_BUFFER_15
Definition: AMDGPU.h:409
llvm::TargetLowering::DAGCombinerInfo
Definition: TargetLowering.h:3923
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:769
llvm::TargetLoweringBase::ZeroOrNegativeOneBooleanContent
@ ZeroOrNegativeOneBooleanContent
Definition: TargetLowering.h:233
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::AMDGPUISD::DOT4
@ DOT4
Definition: AMDGPUISelLowering.h:427
llvm::ISD::UDIVREM
@ UDIVREM
Definition: ISDOpcodes.h:256
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
llvm::dwarf::Index
Index
Definition: Dwarf.h:490
llvm::TargetLoweringBase::addRegisterClass
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
Definition: TargetLowering.h:2349
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
MO_FLAG_MASK
#define MO_FLAG_MASK
Definition: R600Defines.h:17
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1486
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:926
llvm::codeview::EncodedFramePtrReg::BasePtr
@ BasePtr
llvm::ISD::ATOMIC_STORE
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1163
llvm::FrameIndexSDNode
Definition: SelectionDAGNodes.h:1785
llvm::R600TargetLowering::CCAssignFnForCall
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
Definition: R600ISelLowering.cpp:1411
llvm::SelectionDAG::getMemIntrinsicNode
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
Definition: SelectionDAG.cpp:7824
R600InstrInfo.h
llvm::R600TargetLowering::canMergeStoresTo
bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
Definition: R600ISelLowering.cpp:1513
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:666
llvm::SmallVectorImpl::append
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
llvm::ISD::SETOLT
@ SETOLT
Definition: ISDOpcodes.h:1430
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
llvm::GlobalAddressSDNode::getGlobal
const GlobalValue * getGlobal() const
Definition: SelectionDAGNodes.h:1771
llvm::APFloat::bitcastToAPInt
APInt bitcastToAPInt() const
Definition: APFloat.h:1145
llvm::CallingConv::AMDGPU_LS
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:214
llvm::CCValAssign::getLocMemOffset
unsigned getLocMemOffset() const
Definition: CallingConvLower.h:149
llvm::ISD::UNINDEXED
@ UNINDEXED
Definition: ISDOpcodes.h:1373
llvm::TargetLoweringBase::isOperationLegal
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Definition: TargetLowering.h:1292
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::TargetLoweringBase::setBooleanVectorContents
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
Definition: TargetLowering.h:2297
llvm::EVT::changeVectorElementTypeToInteger
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:396
llvm::EVT::bitsLT
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:272
llvm::AMDGPUTargetLowering::LowerGlobalAddress
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
Definition: AMDGPUISelLowering.cpp:1292
llvm::MVT::v4i16
@ v4i16
Definition: MachineValueType.h:100
llvm::EVT::getTypeForEVT
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:182
llvm::MachineRegisterInfo::use_empty
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
Definition: MachineRegisterInfo.h:514
llvm::ISD::USUBO
@ USUBO
Definition: ISDOpcodes.h:328
llvm::MVT::v4i8
@ v4i8
Definition: MachineValueType.h:87
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
ConstantAddressBlock
static int ConstantAddressBlock(unsigned AddressSpace)
Definition: R600ISelLowering.cpp:1203
llvm::StoreSDNode::isTruncatingStore
bool isTruncatingStore() const
Return true if the op does a truncation before store.
Definition: SelectionDAGNodes.h:2388
llvm::ISD::SETOLE
@ SETOLE
Definition: ISDOpcodes.h:1431
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::ISD::SETUGT
@ SETUGT
Definition: ISDOpcodes.h:1436
llvm::SelectionDAG::getTargetGlobalAddress
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:715
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:372
R600MachineFunctionInfo.h
llvm::CCAssignFn
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
Definition: CallingConvLower.h:175
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
llvm::MVT::v2i8
@ v2i8
Definition: MachineValueType.h:86
llvm::AMDGPUISD::BUILD_VERTICAL_VECTOR
@ BUILD_VERTICAL_VECTOR
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
Definition: AMDGPUISelLowering.h:482
llvm::ISD::SETUNE
@ SETUNE
Definition: ISDOpcodes.h:1440
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
R600MCTargetDesc.h
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::TargetLoweringBase::setOperationAction
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
Definition: TargetLowering.h:2366
llvm::MVT::v32i16
@ v32i16
Definition: MachineValueType.h:103
llvm::pdb::OMFSegDescFlags::Read
@ Read
llvm::AMDGPUTargetLowering::LowerUDIVREM64
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
Definition: AMDGPUISelLowering.cpp:1777
llvm::R600Subtarget
Definition: R600Subtarget.h:29
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:159
llvm::AMDGPUTargetLowering::LowerSDIVREM
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
Definition: AMDGPUISelLowering.cpp:2048
llvm::AMDGPUAS::CONSTANT_BUFFER_6
@ CONSTANT_BUFFER_6
Definition: AMDGPU.h:400
llvm::MVT::v16i8
@ v16i8
Definition: MachineValueType.h:89
llvm::R600Subtarget::hasBORROW
bool hasBORROW() const
Definition: R600Subtarget.h:99
llvm::AMDGPUISD::BORROW
@ BORROW
Definition: AMDGPUISelLowering.h:429
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:340
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:419
llvm::ISD::FMAD
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:486
llvm::MachineOperand::getTargetFlags
unsigned getTargetFlags() const
Definition: MachineOperand.h:220
llvm::DenseMapBase::clear
void clear()
Definition: DenseMap.h:110
llvm::TargetLowering::expandFP_TO_SINT
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
Definition: TargetLowering.cpp:7576
llvm::MVT::v16i16
@ v16i16
Definition: MachineValueType.h:102
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::TargetLoweringBase::setHasExtractBitsInsn
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
Definition: TargetLowering.h:2332
uint64_t
llvm::ISD::FP_TO_UINT
@ FP_TO_UINT
Definition: ISDOpcodes.h:820
llvm::ConstantFPSDNode
Definition: SelectionDAGNodes.h:1631
llvm::R600FrameLowering::getFrameIndexReference
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
Definition: R600FrameLowering.cpp:19
llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition: SelectionDAGNodes.h:1359
llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:966
llvm::SelectionDAG::getIntPtrConstant
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition: SelectionDAG.cpp:1635
llvm::TargetLoweringBase::Promote
@ Promote
Definition: TargetLowering.h:197
llvm::ConstantPointerNull::get
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1699
llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition: MachineMemOperand.h:39
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::CallingConv::AMDGPU_VS
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition: CallingConv.h:189
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::DenseMap< unsigned, unsigned >
llvm::SelectionDAG::getCopyFromReg
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:793
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:534
llvm::R600Subtarget::hasBFI
bool hasBFI() const
Definition: R600Subtarget.h:88
llvm::ISD::CTLZ_ZERO_UNDEF
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:709
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:921
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::FrameIndexSDNode::getIndex
int getIndex() const
Definition: SelectionDAGNodes.h:1796
llvm::SelectionDAG::getNode
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition: SelectionDAG.cpp:9133
llvm::SelectionDAG::getNOT
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
Definition: SelectionDAG.cpp:1471
llvm::R600::getLDSNoRetOp
int getLDSNoRetOp(uint16_t Opcode)
llvm::ISD::FCOPYSIGN
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:492
llvm::LoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Definition: SelectionDAGNodes.h:2359
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::TargetLowering::scalarizeVectorStore
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
Definition: TargetLowering.cpp:8664
llvm::AMDGPUAS::CONSTANT_BUFFER_11
@ CONSTANT_BUFFER_11
Definition: AMDGPU.h:405
llvm::MVT::v4f32
@ v4f32
Definition: MachineValueType.h:170
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::MachineMemOperand::Flags
Flags
Flags values. These may be or'd together.
Definition: MachineMemOperand.h:130
llvm::ISD::InputArg
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Definition: TargetCallingConv.h:195
llvm::StoreSDNode
This class is used to represent ISD::STORE nodes.
Definition: SelectionDAGNodes.h:2372
llvm::ISD::ZEXTLOAD
@ ZEXTLOAD
Definition: ISDOpcodes.h:1404
llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition: SelectionDAGNodes.h:179
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:46
llvm::R600Subtarget::hasFFBH
bool hasFFBH() const
Definition: R600Subtarget.h:115
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
llvm::AMDGPUAS::PARAM_I_ADDRESS
@ PARAM_I_ADDRESS
Address space for indirect addressable parameter memory (VTX1).
Definition: AMDGPU.h:386
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::AMDGPUISD::SIN_HW
@ SIN_HW
Definition: AMDGPUISelLowering.h:395
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::ISD::SETULT
@ SETULT
Definition: ISDOpcodes.h:1438
llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition: SelectionDAGNodes.h:1597
llvm::TargetLowering::expandShiftParts
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
Definition: TargetLowering.cpp:7524
llvm::AMDGPUISD::TEXTURE_FETCH
@ TEXTURE_FETCH
Definition: AMDGPUISelLowering.h:446
llvm::AMDGPUISD::RSQ
@ RSQ
Definition: AMDGPUISelLowering.h:420
llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:141
llvm::MachineMemOperand::MONonTemporal
@ MONonTemporal
The memory access is non-temporal.
Definition: MachineMemOperand.h:140
llvm::SelectionDAG::getSelectCC
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:1172
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1424
MO_FLAG_NEG
#define MO_FLAG_NEG
Definition: R600Defines.h:15
MO_FLAG_ABS
#define MO_FLAG_ABS
Definition: R600Defines.h:16
llvm::AMDGPUSubtarget::R700
@ R700
Definition: AMDGPUSubtarget.h:34
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:9877
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::ISD::isNON_EXTLoad
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
Definition: SelectionDAGNodes.h:3052
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::DenseMapBase::empty
bool empty() const
Definition: DenseMap.h:98
llvm::StoreSDNode::getValue
const SDValue & getValue() const
Definition: SelectionDAGNodes.h:2393
llvm::AfterLegalizeVectorOps
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
llvm::ISD::SRA_PARTS
@ SRA_PARTS
Definition: ISDOpcodes.h:750
getOpcode
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
llvm::AMDGPUTargetLowering::combineFMinMaxLegacy
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
Definition: AMDGPUISelLowering.cpp:1387
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::R600Subtarget::getFrameLowering
const R600FrameLowering * getFrameLowering() const override
Definition: R600Subtarget.h:52
llvm::AMDGPUAS::CONSTANT_BUFFER_7
@ CONSTANT_BUFFER_7
Definition: AMDGPU.h:401
llvm::MachineFunction::addLiveIn
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Definition: MachineFunction.cpp:679
llvm::AMDGPUAS::CONSTANT_BUFFER_0
@ CONSTANT_BUFFER_0
Definition: AMDGPU.h:394
llvm::MemSDNode::getAAInfo
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Definition: SelectionDAGNodes.h:1323
llvm::AMDGPUTargetLowering::getImplicitParameterOffset
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
Definition: AMDGPUISelLowering.cpp:4345
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:99
llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:154
llvm::AMDGPUAS::CONSTANT_BUFFER_10
@ CONSTANT_BUFFER_10
Definition: AMDGPU.h:404
llvm::MVT::i64
@ i64
Definition: MachineValueType.h:49
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:138
llvm::MVT::v2i32
@ v2i32
Definition: MachineValueType.h:110
llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:352
AMDGPU.h
llvm::logicalview::LVAttributeKind::Zero
@ Zero
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::AMDGPUAS::CONSTANT_BUFFER_8
@ CONSTANT_BUFFER_8
Definition: AMDGPU.h:402
llvm::SelectionDAG::ReplaceAllUsesOfValueWith
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
Definition: SelectionDAG.cpp:10472
llvm::MVT::v2f32
@ v2f32
Definition: MachineValueType.h:168
uint32_t
llvm::StackOffset
StackOffset is a class to represent an offset with 2 dimensions, named fixed and scalable,...
Definition: TypeSize.h:134
llvm::AMDGPUTargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Definition: AMDGPUISelLowering.cpp:1233
llvm::MinMax
Definition: AssumeBundleQueries.h:71
llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition: SelectionDAGNodes.h:1149
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::AMDGPUAS::CONSTANT_BUFFER_9
@ CONSTANT_BUFFER_9
Definition: AMDGPU.h:403
CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79
llvm::TargetLoweringBase::setTruncStoreAction
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
Definition: TargetLowering.h:2405
llvm::ISD::getSetCCInverse
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
Definition: SelectionDAG.cpp:558
llvm::MVT::v4i32
@ v4i32
Definition: MachineValueType.h:112
llvm::ISD::FMUL
@ FMUL
Definition: ISDOpcodes.h:392
llvm::R600TargetLowering::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *IsFast=nullptr) const override
Determine if the target supports unaligned memory accesses.
Definition: R600ISelLowering.cpp:1522
llvm::MVT::v32i32
@ v32i32
Definition: MachineValueType.h:122
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1404
llvm::SelectionDAG::getBuildVector
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:822
llvm::SelectionDAG::getConstantFP
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1689
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
llvm::LoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2363
llvm::MVT::v16i32
@ v16i32
Definition: MachineValueType.h:121
llvm::ISD::FrameIndex
@ FrameIndex
Definition: ISDOpcodes.h:80
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:201
llvm::AMDGPUTargetLowering::CreateLiveInRegisterRaw
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, Register Reg, EVT VT) const
Definition: AMDGPUISelLowering.h:300
j
return j(j<< 16)
llvm::ISD::SETLT
@ SETLT
Definition: ISDOpcodes.h:1447
llvm::isAllOnesConstant
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Definition: SelectionDAG.cpp:10858
llvm::AMDGPUTargetLowering::analyzeFormalArgumentsCompute
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types.
Definition: AMDGPUISelLowering.cpp:1011
CompactSwizzlableVector
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
Definition: R600ISelLowering.cpp:1541
llvm::R600TargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Definition: R600ISelLowering.cpp:394
llvm::TargetLoweringBase::setLoadExtAction
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
Definition: TargetLowering.h:2383
llvm::GlobalAddressSDNode
Definition: SelectionDAGNodes.h:1759
llvm::AMDGPUISD::RSQ_CLAMP
@ RSQ_CLAMP
Definition: AMDGPUISelLowering.h:424
llvm::isNullConstant
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
Definition: SelectionDAG.cpp:10848
llvm::EVT::getScalarType
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:295
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:351
llvm::MemSDNode::getAddressSpace
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Definition: SelectionDAGNodes.h:1366
llvm::ISD::FCOS
@ FCOS
Definition: ISDOpcodes.h:916
llvm::SelectionDAG::getEntryNode
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:548
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:924
llvm::ISD::getSetCCSwappedOperands
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
Definition: SelectionDAG.cpp:535
llvm::R600TargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition: R600ISelLowering.cpp:1700
llvm::ISD::FSIN
@ FSIN
Definition: ISDOpcodes.h:915
llvm::SelectionDAG::getDataLayout
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:469
llvm::MVT::v8i16
@ v8i16
Definition: MachineValueType.h:101
llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:514
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:48
llvm::TargetLoweringBase::Custom
@ Custom
Definition: TargetLowering.h:200
llvm::R600Subtarget::getRegisterInfo
const R600RegisterInfo * getRegisterInfo() const override
Definition: R600Subtarget.h:60
llvm::ISD::SUBC
@ SUBC
Definition: ISDOpcodes.h:270
R600Subtarget.h
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:48
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::ISD::SETUO
@ SETUO
Definition: ISDOpcodes.h:1434
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:145
llvm::R600InstrInfo
Definition: R600InstrInfo.h:38
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:967
llvm::MVT::v8i32
@ v8i32
Definition: MachineValueType.h:116
llvm::ISD::SRL_PARTS
@ SRL_PARTS
Definition: ISDOpcodes.h:751
llvm::R600TargetLowering::getSetCCResultType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
Definition: R600ISelLowering.cpp:1506
llvm::AMDGPUISD::BRANCH_COND
@ BRANCH_COND
Definition: AMDGPUISelLowering.h:352
llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition: ISDOpcodes.h:774
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
llvm::SDValue::isUndef
bool isUndef() const
Definition: SelectionDAGNodes.h:1177
llvm::SDNode::getMachineOpcode
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
Definition: SelectionDAGNodes.h:704
llvm::CallingConv::AMDGPU_ES
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:219
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:475
llvm::RISCVMatInt::Imm
@ Imm
Definition: RISCVMatInt.h:23
llvm::AMDGPUTargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition: AMDGPUISelLowering.cpp:4042
llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:300
llvm::MemSDNode::getAlign
Align getAlign() const
Definition: SelectionDAGNodes.h:1293
llvm::TargetLoweringBase::setBooleanContents
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
Definition: TargetLowering.h:2283
llvm::SDNode::op_begin
op_iterator op_begin() const
Definition: SelectionDAGNodes.h:928
llvm::AMDGPUISD::FRACT
@ FRACT
Definition: AMDGPUISelLowering.h:375
llvm::ISD::FSUB
@ FSUB
Definition: ISDOpcodes.h:391
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:691
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:357
R600Defines.h
llvm::CallingConv::AMDGPU_GS
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:192
llvm::TargetLoweringBase::Expand
@ Expand
Definition: TargetLowering.h:198
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:375
llvm::CallingConv::Fast
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
N
#define N
llvm::TargetLoweringBase::computeRegisterProperties
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
Definition: TargetLoweringBase.cpp:1286
llvm::ISD::SUBE
@ SUBE
Definition: ISDOpcodes.h:280
llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:693
llvm::TargetLoweringBase::setCondCodeAction
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
Definition: TargetLowering.h:2466
llvm::GlobalAddressSDNode::getAddressSpace
unsigned getAddressSpace() const
Definition: SelectionDAG.cpp:11679
llvm::R600TargetLowering::ReplaceNodeResults
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
Definition: R600ISelLowering.cpp:577
llvm::AMDGPUISD::COS_HW
@ COS_HW
Definition: AMDGPUISelLowering.h:394
llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:160
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition: SelectionDAGNodes.h:1137
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:669
llvm::numbers::pif
constexpr float pif
Definition: MathExtras.h:74
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::ISD::SETONE
@ SETONE
Definition: ISDOpcodes.h:1432
llvm::MVT::v32i8
@ v32i8
Definition: MachineValueType.h:90
llvm::EVT::bitsGT
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:256
llvm::MVT::i16
@ i16
Definition: MachineValueType.h:47
llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:911
llvm::SelectionDAG::getMachineFunction
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:466
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::R600TargetLowering::EmitInstrWithCustomInserter
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Definition: R600ISelLowering.cpp:210
llvm::ISD::SDIVREM
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
llvm::AMDGPUISD::DWORDADDR
@ DWORDADDR
Definition: AMDGPUISelLowering.h:374
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::EVT::bitsGE
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:264
llvm::AMDGPUAS::CONSTANT_BUFFER_13
@ CONSTANT_BUFFER_13
Definition: AMDGPU.h:407
llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:760
llvm::MVT::v8i8
@ v8i8
Definition: MachineValueType.h:88
llvm::AMDGPUFrameLowering::getStackWidth
unsigned getStackWidth(const MachineFunction &MF) const
Definition: AMDGPUFrameLowering.cpp:22
MachineFunction.h
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:925
llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:195
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::MVT::v2i16
@ v2i16
Definition: MachineValueType.h:98
llvm::AMDGPUMachineFunction
Definition: AMDGPUMachineFunction.h:22
llvm::ConstantFPSDNode::getValueAPF
const APFloat & getValueAPF() const
Definition: SelectionDAGNodes.h:1642
ReorganizeVector
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
Definition: R600ISelLowering.cpp:1586
llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:523
llvm::SelectionDAG::getExtLoad
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:8066
llvm::ISD::ADDE
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:852
llvm::MVT::f32
@ f32
Definition: MachineValueType.h:57
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::MachineMemOperand::getFlags
Flags getFlags() const
Return the raw flags of the source value,.
Definition: MachineMemOperand.h:219
llvm::ISD::ATOMIC_LOAD
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1159
llvm::MemSDNode::getPointerInfo
const MachinePointerInfo & getPointerInfo() const
Definition: SelectionDAGNodes.h:1361
llvm::AMDGPUISD::CONST_DATA_PTR
@ CONST_DATA_PTR
Pointer to the start of the shader's constant data.
Definition: AMDGPUISelLowering.h:484
llvm::TargetLoweringBase::getPointerTy
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Definition: TargetLowering.h:356
llvm::ISD::CTPOP
@ CTPOP
Definition: ISDOpcodes.h:703
llvm::AMDGPUISD::DUMMY_CHAIN
@ DUMMY_CHAIN
Definition: AMDGPUISelLowering.h:490
llvm::R600TargetLowering::R600TargetLowering
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
Definition: R600ISelLowering.cpp:29
llvm::SelectionDAG::getMergeValues
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
Definition: SelectionDAG.cpp:7813
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:288
llvm::MVT::v4i1
@ v4i1
Definition: MachineValueType.h:68
llvm::AMDGPUAS::CONSTANT_BUFFER_5
@ CONSTANT_BUFFER_5
Definition: AMDGPU.h:399
llvm::SelectionDAG::getCondCode
SDValue getCondCode(ISD::CondCode Cond)
Definition: SelectionDAG.cpp:1900
llvm::AMDGPUTargetLowering::FIRST_IMPLICIT
@ FIRST_IMPLICIT
Definition: AMDGPUISelLowering.h:325