LLVM  14.0.0git
R600ISelLowering.cpp
Go to the documentation of this file.
1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Custom DAG lowering for R600
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "R600ISelLowering.h"
15 #include "AMDGPU.h"
17 #include "R600Defines.h"
18 #include "R600InstrInfo.h"
20 #include "R600Subtarget.h"
21 #include "llvm/IR/IntrinsicsAMDGPU.h"
22 #include "llvm/IR/IntrinsicsR600.h"
23 
24 using namespace llvm;
25 
26 #include "R600GenCallingConv.inc"
27 
29  const R600Subtarget &STI)
30  : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
31  addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
32  addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
33  addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
34  addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
35  addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
36  addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
37 
40 
42 
43  // Legalize loads and stores to the private address space.
47 
48  // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
49  // spaces, so it is custom lowered to handle those where it isn't.
50  for (MVT VT : MVT::integer_valuetypes()) {
54 
58 
62  }
63 
64  // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
68 
72 
77 
80  // We need to include these since trunc STORES to PRIVATE need
81  // special handling to accommodate RMW
92 
93  // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
96 
97  // Set condition code actions
110 
115 
118 
121 
125 
127 
132 
135 
142 
147 
148  // ADD, SUB overflow.
149  // TODO: turn these into Legal?
150  if (Subtarget->hasCARRY())
152 
153  if (Subtarget->hasBORROW())
155 
156  // Expand sign extension of vectors
157  if (!Subtarget->hasBFE())
159 
162 
163  if (!Subtarget->hasBFE())
167 
168  if (!Subtarget->hasBFE())
172 
176 
178 
180 
185 
190 
191  // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
192  // to be Legal/Custom in order to avoid library calls.
196 
197  if (!Subtarget->hasFMA()) {
200  }
201 
202  // FIXME: May need no denormals check
204 
205  if (!Subtarget->hasBFI()) {
206  // fcopysign can be done in a single instruction with BFI.
209  }
210 
211  if (!Subtarget->hasBCNT(32))
213 
214  if (!Subtarget->hasBCNT(64))
216 
217  if (Subtarget->hasFFBH())
219 
220  if (Subtarget->hasFFBL())
222 
223  // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
224  // need it for R600.
225  if (Subtarget->hasBFE())
226  setHasExtractBitsInsn(true);
227 
229 
230  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
231  for (MVT VT : ScalarIntVTs) {
236  }
237 
238  // LLVM will expand these to atomic_cmp_swap(0)
239  // and atomic_swap, respectively.
242 
243  // We need to custom lower some of the intrinsics
246 
248 
255 }
256 
257 static inline bool isEOP(MachineBasicBlock::iterator I) {
258  if (std::next(I) == I->getParent()->end())
259  return false;
260  return std::next(I)->getOpcode() == R600::RETURN;
261 }
262 
265  MachineBasicBlock *BB) const {
266  MachineFunction *MF = BB->getParent();
269  const R600InstrInfo *TII = Subtarget->getInstrInfo();
270 
271  switch (MI.getOpcode()) {
272  default:
273  // Replace LDS_*_RET instruction that don't have any uses with the
274  // equivalent LDS_*_NORET instruction.
275  if (TII->isLDSRetInstr(MI.getOpcode())) {
276  int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
277  assert(DstIdx != -1);
278  MachineInstrBuilder NewMI;
279  // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
280  // LDS_1A2D support and remove this special case.
281  if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
282  MI.getOpcode() == R600::LDS_CMPST_RET)
283  return BB;
284 
285  NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
286  TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
287  for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
288  NewMI.add(MI.getOperand(i));
289  }
290  } else {
292  }
293  break;
294 
295  case R600::FABS_R600: {
296  MachineInstr *NewMI = TII->buildDefaultInstruction(
297  *BB, I, R600::MOV, MI.getOperand(0).getReg(),
298  MI.getOperand(1).getReg());
299  TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
300  break;
301  }
302 
303  case R600::FNEG_R600: {
304  MachineInstr *NewMI = TII->buildDefaultInstruction(
305  *BB, I, R600::MOV, MI.getOperand(0).getReg(),
306  MI.getOperand(1).getReg());
307  TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
308  break;
309  }
310 
311  case R600::MASK_WRITE: {
312  Register maskedRegister = MI.getOperand(0).getReg();
313  assert(maskedRegister.isVirtual());
314  MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
315  TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
316  break;
317  }
318 
319  case R600::MOV_IMM_F32:
320  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
321  .getFPImm()
322  ->getValueAPF()
323  .bitcastToAPInt()
324  .getZExtValue());
325  break;
326 
327  case R600::MOV_IMM_I32:
328  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
329  MI.getOperand(1).getImm());
330  break;
331 
332  case R600::MOV_IMM_GLOBAL_ADDR: {
333  //TODO: Perhaps combine this instruction with the next if possible
334  auto MIB = TII->buildDefaultInstruction(
335  *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
336  int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
337  //TODO: Ugh this is rather ugly
338  MIB->getOperand(Idx) = MI.getOperand(1);
339  break;
340  }
341 
342  case R600::CONST_COPY: {
343  MachineInstr *NewMI = TII->buildDefaultInstruction(
344  *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
345  TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
346  MI.getOperand(1).getImm());
347  break;
348  }
349 
350  case R600::RAT_WRITE_CACHELESS_32_eg:
351  case R600::RAT_WRITE_CACHELESS_64_eg:
352  case R600::RAT_WRITE_CACHELESS_128_eg:
353  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
354  .add(MI.getOperand(0))
355  .add(MI.getOperand(1))
356  .addImm(isEOP(I)); // Set End of program bit
357  break;
358 
359  case R600::RAT_STORE_TYPED_eg:
360  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
361  .add(MI.getOperand(0))
362  .add(MI.getOperand(1))
363  .add(MI.getOperand(2))
364  .addImm(isEOP(I)); // Set End of program bit
365  break;
366 
367  case R600::BRANCH:
368  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
369  .add(MI.getOperand(0));
370  break;
371 
372  case R600::BRANCH_COND_f32: {
373  MachineInstr *NewMI =
374  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
375  R600::PREDICATE_BIT)
376  .add(MI.getOperand(1))
377  .addImm(R600::PRED_SETNE)
378  .addImm(0); // Flags
379  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
380  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
381  .add(MI.getOperand(0))
382  .addReg(R600::PREDICATE_BIT, RegState::Kill);
383  break;
384  }
385 
386  case R600::BRANCH_COND_i32: {
387  MachineInstr *NewMI =
388  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
389  R600::PREDICATE_BIT)
390  .add(MI.getOperand(1))
391  .addImm(R600::PRED_SETNE_INT)
392  .addImm(0); // Flags
393  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
394  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
395  .add(MI.getOperand(0))
396  .addReg(R600::PREDICATE_BIT, RegState::Kill);
397  break;
398  }
399 
400  case R600::EG_ExportSwz:
401  case R600::R600_ExportSwz: {
402  // Instruction is left unmodified if its not the last one of its type
403  bool isLastInstructionOfItsType = true;
404  unsigned InstExportType = MI.getOperand(1).getImm();
405  for (MachineBasicBlock::iterator NextExportInst = std::next(I),
406  EndBlock = BB->end(); NextExportInst != EndBlock;
407  NextExportInst = std::next(NextExportInst)) {
408  if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
409  NextExportInst->getOpcode() == R600::R600_ExportSwz) {
410  unsigned CurrentInstExportType = NextExportInst->getOperand(1)
411  .getImm();
412  if (CurrentInstExportType == InstExportType) {
413  isLastInstructionOfItsType = false;
414  break;
415  }
416  }
417  }
418  bool EOP = isEOP(I);
419  if (!EOP && !isLastInstructionOfItsType)
420  return BB;
421  unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
422  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
423  .add(MI.getOperand(0))
424  .add(MI.getOperand(1))
425  .add(MI.getOperand(2))
426  .add(MI.getOperand(3))
427  .add(MI.getOperand(4))
428  .add(MI.getOperand(5))
429  .add(MI.getOperand(6))
430  .addImm(CfInst)
431  .addImm(EOP);
432  break;
433  }
434  case R600::RETURN: {
435  return BB;
436  }
437  }
438 
439  MI.eraseFromParent();
440  return BB;
441 }
442 
443 //===----------------------------------------------------------------------===//
444 // Custom DAG Lowering Operations
445 //===----------------------------------------------------------------------===//
446 
450  switch (Op.getOpcode()) {
451  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
452  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
453  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
454  case ISD::SHL_PARTS:
455  case ISD::SRA_PARTS:
456  case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
457  case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
458  case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
459  case ISD::FCOS:
460  case ISD::FSIN: return LowerTrig(Op, DAG);
461  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
462  case ISD::STORE: return LowerSTORE(Op, DAG);
463  case ISD::LOAD: {
464  SDValue Result = LowerLOAD(Op, DAG);
465  assert((!Result.getNode() ||
466  Result.getNode()->getNumValues() == 2) &&
467  "Load should return a value and a chain");
468  return Result;
469  }
470 
471  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
472  case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
473  case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
474  case ISD::INTRINSIC_VOID: {
475  SDValue Chain = Op.getOperand(0);
476  unsigned IntrinsicID =
477  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
478  switch (IntrinsicID) {
479  case Intrinsic::r600_store_swizzle: {
480  SDLoc DL(Op);
481  const SDValue Args[8] = {
482  Chain,
483  Op.getOperand(2), // Export Value
484  Op.getOperand(3), // ArrayBase
485  Op.getOperand(4), // Type
486  DAG.getConstant(0, DL, MVT::i32), // SWZ_X
487  DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
488  DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
489  DAG.getConstant(3, DL, MVT::i32) // SWZ_W
490  };
491  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
492  }
493 
494  // default for switch(IntrinsicID)
495  default: break;
496  }
497  // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
498  break;
499  }
501  unsigned IntrinsicID =
502  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
503  EVT VT = Op.getValueType();
504  SDLoc DL(Op);
505  switch (IntrinsicID) {
506  case Intrinsic::r600_tex:
507  case Intrinsic::r600_texc: {
508  unsigned TextureOp;
509  switch (IntrinsicID) {
510  case Intrinsic::r600_tex:
511  TextureOp = 0;
512  break;
513  case Intrinsic::r600_texc:
514  TextureOp = 1;
515  break;
516  default:
517  llvm_unreachable("unhandled texture operation");
518  }
519 
520  SDValue TexArgs[19] = {
521  DAG.getConstant(TextureOp, DL, MVT::i32),
522  Op.getOperand(1),
523  DAG.getConstant(0, DL, MVT::i32),
524  DAG.getConstant(1, DL, MVT::i32),
525  DAG.getConstant(2, DL, MVT::i32),
526  DAG.getConstant(3, DL, MVT::i32),
527  Op.getOperand(2),
528  Op.getOperand(3),
529  Op.getOperand(4),
530  DAG.getConstant(0, DL, MVT::i32),
531  DAG.getConstant(1, DL, MVT::i32),
532  DAG.getConstant(2, DL, MVT::i32),
533  DAG.getConstant(3, DL, MVT::i32),
534  Op.getOperand(5),
535  Op.getOperand(6),
536  Op.getOperand(7),
537  Op.getOperand(8),
538  Op.getOperand(9),
539  Op.getOperand(10)
540  };
541  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
542  }
543  case Intrinsic::r600_dot4: {
544  SDValue Args[8] = {
545  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
546  DAG.getConstant(0, DL, MVT::i32)),
547  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
548  DAG.getConstant(0, DL, MVT::i32)),
549  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
550  DAG.getConstant(1, DL, MVT::i32)),
551  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
552  DAG.getConstant(1, DL, MVT::i32)),
553  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
554  DAG.getConstant(2, DL, MVT::i32)),
555  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
556  DAG.getConstant(2, DL, MVT::i32)),
557  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
558  DAG.getConstant(3, DL, MVT::i32)),
559  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
560  DAG.getConstant(3, DL, MVT::i32))
561  };
562  return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
563  }
564 
565  case Intrinsic::r600_implicitarg_ptr: {
568  return DAG.getConstant(ByteOffset, DL, PtrVT);
569  }
570  case Intrinsic::r600_read_ngroups_x:
571  return LowerImplicitParameter(DAG, VT, DL, 0);
572  case Intrinsic::r600_read_ngroups_y:
573  return LowerImplicitParameter(DAG, VT, DL, 1);
574  case Intrinsic::r600_read_ngroups_z:
575  return LowerImplicitParameter(DAG, VT, DL, 2);
576  case Intrinsic::r600_read_global_size_x:
577  return LowerImplicitParameter(DAG, VT, DL, 3);
578  case Intrinsic::r600_read_global_size_y:
579  return LowerImplicitParameter(DAG, VT, DL, 4);
580  case Intrinsic::r600_read_global_size_z:
581  return LowerImplicitParameter(DAG, VT, DL, 5);
582  case Intrinsic::r600_read_local_size_x:
583  return LowerImplicitParameter(DAG, VT, DL, 6);
584  case Intrinsic::r600_read_local_size_y:
585  return LowerImplicitParameter(DAG, VT, DL, 7);
586  case Intrinsic::r600_read_local_size_z:
587  return LowerImplicitParameter(DAG, VT, DL, 8);
588 
589  case Intrinsic::r600_read_tgid_x:
590  case Intrinsic::amdgcn_workgroup_id_x:
591  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
592  R600::T1_X, VT);
593  case Intrinsic::r600_read_tgid_y:
594  case Intrinsic::amdgcn_workgroup_id_y:
595  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
596  R600::T1_Y, VT);
597  case Intrinsic::r600_read_tgid_z:
598  case Intrinsic::amdgcn_workgroup_id_z:
599  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
600  R600::T1_Z, VT);
601  case Intrinsic::r600_read_tidig_x:
602  case Intrinsic::amdgcn_workitem_id_x:
603  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
604  R600::T0_X, VT);
605  case Intrinsic::r600_read_tidig_y:
606  case Intrinsic::amdgcn_workitem_id_y:
607  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
608  R600::T0_Y, VT);
609  case Intrinsic::r600_read_tidig_z:
610  case Intrinsic::amdgcn_workitem_id_z:
611  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
612  R600::T0_Z, VT);
613 
614  case Intrinsic::r600_recipsqrt_ieee:
615  return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
616 
617  case Intrinsic::r600_recipsqrt_clamped:
618  return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
619  default:
620  return Op;
621  }
622 
623  // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
624  break;
625  }
626  } // end switch(Op.getOpcode())
627  return SDValue();
628 }
629 
632  SelectionDAG &DAG) const {
633  switch (N->getOpcode()) {
634  default:
636  return;
637  case ISD::FP_TO_UINT:
638  if (N->getValueType(0) == MVT::i1) {
639  Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
640  return;
641  }
642  // Since we don't care about out of bounds values we can use FP_TO_SINT for
643  // uints too. The DAGLegalizer code for uint considers some extra cases
644  // which are not necessary here.
646  case ISD::FP_TO_SINT: {
647  if (N->getValueType(0) == MVT::i1) {
648  Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
649  return;
650  }
651 
652  SDValue Result;
653  if (expandFP_TO_SINT(N, Result, DAG))
654  Results.push_back(Result);
655  return;
656  }
657  case ISD::SDIVREM: {
658  SDValue Op = SDValue(N, 1);
659  SDValue RES = LowerSDIVREM(Op, DAG);
660  Results.push_back(RES);
661  Results.push_back(RES.getValue(1));
662  break;
663  }
664  case ISD::UDIVREM: {
665  SDValue Op = SDValue(N, 0);
666  LowerUDIVREM64(Op, DAG, Results);
667  break;
668  }
669  }
670 }
671 
672 SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
673  SDValue Vector) const {
674  SDLoc DL(Vector);
675  EVT VecVT = Vector.getValueType();
676  EVT EltVT = VecVT.getVectorElementType();
678 
679  for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
680  Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
681  DAG.getVectorIdxConstant(i, DL)));
682  }
683 
684  return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
685 }
686 
687 SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
688  SelectionDAG &DAG) const {
689  SDLoc DL(Op);
690  SDValue Vector = Op.getOperand(0);
691  SDValue Index = Op.getOperand(1);
692 
693  if (isa<ConstantSDNode>(Index) ||
695  return Op;
696 
697  Vector = vectorToVerticalVector(DAG, Vector);
698  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
699  Vector, Index);
700 }
701 
702 SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
703  SelectionDAG &DAG) const {
704  SDLoc DL(Op);
705  SDValue Vector = Op.getOperand(0);
706  SDValue Value = Op.getOperand(1);
707  SDValue Index = Op.getOperand(2);
708 
709  if (isa<ConstantSDNode>(Index) ||
711  return Op;
712 
713  Vector = vectorToVerticalVector(DAG, Vector);
714  SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
715  Vector, Value, Index);
716  return vectorToVerticalVector(DAG, Insert);
717 }
718 
719 SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
720  SDValue Op,
721  SelectionDAG &DAG) const {
722  GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
725 
726  const DataLayout &DL = DAG.getDataLayout();
727  const GlobalValue *GV = GSD->getGlobal();
729 
730  SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
731  return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
732 }
733 
734 SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
735  // On hw >= R700, COS/SIN input must be between -1. and 1.
736  // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
737  EVT VT = Op.getValueType();
738  SDValue Arg = Op.getOperand(0);
739  SDLoc DL(Op);
740 
741  // TODO: Should this propagate fast-math-flags?
742  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
743  DAG.getNode(ISD::FADD, DL, VT,
744  DAG.getNode(ISD::FMUL, DL, VT, Arg,
745  DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
746  DAG.getConstantFP(0.5, DL, MVT::f32)));
747  unsigned TrigNode;
748  switch (Op.getOpcode()) {
749  case ISD::FCOS:
750  TrigNode = AMDGPUISD::COS_HW;
751  break;
752  case ISD::FSIN:
753  TrigNode = AMDGPUISD::SIN_HW;
754  break;
755  default:
756  llvm_unreachable("Wrong trig opcode");
757  }
758  SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
759  DAG.getNode(ISD::FADD, DL, VT, FractPart,
760  DAG.getConstantFP(-0.5, DL, MVT::f32)));
761  if (Gen >= AMDGPUSubtarget::R700)
762  return TrigVal;
763  // On R600 hw, COS/SIN input must be between -Pi and Pi.
764  return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
766 }
767 
768 SDValue R600TargetLowering::LowerShiftParts(SDValue Op,
769  SelectionDAG &DAG) const {
770  SDValue Lo, Hi;
771  expandShiftParts(Op.getNode(), Lo, Hi, DAG);
772  return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
773 }
774 
775 SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
776  unsigned mainop, unsigned ovf) const {
777  SDLoc DL(Op);
778  EVT VT = Op.getValueType();
779 
780  SDValue Lo = Op.getOperand(0);
781  SDValue Hi = Op.getOperand(1);
782 
783  SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
784  // Extend sign.
785  OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
786  DAG.getValueType(MVT::i1));
787 
788  SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
789 
790  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
791 }
792 
793 SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
794  SDLoc DL(Op);
795  return DAG.getNode(
796  ISD::SETCC,
797  DL,
798  MVT::i1,
799  Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
800  DAG.getCondCode(ISD::SETEQ));
801 }
802 
803 SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
804  SDLoc DL(Op);
805  return DAG.getNode(
806  ISD::SETCC,
807  DL,
808  MVT::i1,
809  Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
810  DAG.getCondCode(ISD::SETEQ));
811 }
812 
813 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
814  const SDLoc &DL,
815  unsigned DwordOffset) const {
816  unsigned ByteOffset = DwordOffset * 4;
817  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
819 
820  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
821  assert(isInt<16>(ByteOffset));
822 
823  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
824  DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
826 }
827 
828 bool R600TargetLowering::isZero(SDValue Op) const {
829  if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
830  return Cst->isNullValue();
831  } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
832  return CstFP->isZero();
833  } else {
834  return false;
835  }
836 }
837 
838 bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
839  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
840  return CFP->isExactlyValue(1.0);
841  }
842  return isAllOnesConstant(Op);
843 }
844 
845 bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
846  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
847  return CFP->getValueAPF().isZero();
848  }
849  return isNullConstant(Op);
850 }
851 
852 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
853  SDLoc DL(Op);
854  EVT VT = Op.getValueType();
855 
856  SDValue LHS = Op.getOperand(0);
857  SDValue RHS = Op.getOperand(1);
858  SDValue True = Op.getOperand(2);
859  SDValue False = Op.getOperand(3);
860  SDValue CC = Op.getOperand(4);
861  SDValue Temp;
862 
863  if (VT == MVT::f32) {
864  DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
865  SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
866  if (MinMax)
867  return MinMax;
868  }
869 
870  // LHS and RHS are guaranteed to be the same value type
871  EVT CompareVT = LHS.getValueType();
872 
873  // Check if we can lower this to a native operation.
874 
875  // Try to lower to a SET* instruction:
876  //
877  // SET* can match the following patterns:
878  //
879  // select_cc f32, f32, -1, 0, cc_supported
880  // select_cc f32, f32, 1.0f, 0.0f, cc_supported
881  // select_cc i32, i32, -1, 0, cc_supported
882  //
883 
884  // Move hardware True/False values to the correct operand.
885  if (isHWTrueValue(False) && isHWFalseValue(True)) {
886  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
887  ISD::CondCode InverseCC = ISD::getSetCCInverse(CCOpcode, CompareVT);
888  if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
889  std::swap(False, True);
890  CC = DAG.getCondCode(InverseCC);
891  } else {
892  ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
893  if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
894  std::swap(False, True);
895  std::swap(LHS, RHS);
896  CC = DAG.getCondCode(SwapInvCC);
897  }
898  }
899  }
900 
901  if (isHWTrueValue(True) && isHWFalseValue(False) &&
902  (CompareVT == VT || VT == MVT::i32)) {
903  // This can be matched by a SET* instruction.
904  return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
905  }
906 
907  // Try to lower to a CND* instruction:
908  //
909  // CND* can match the following patterns:
910  //
911  // select_cc f32, 0.0, f32, f32, cc_supported
912  // select_cc f32, 0.0, i32, i32, cc_supported
913  // select_cc i32, 0, f32, f32, cc_supported
914  // select_cc i32, 0, i32, i32, cc_supported
915  //
916 
917  // Try to move the zero value to the RHS
918  if (isZero(LHS)) {
919  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
920  // Try swapping the operands
921  ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
922  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
923  std::swap(LHS, RHS);
924  CC = DAG.getCondCode(CCSwapped);
925  } else {
926  // Try inverting the conditon and then swapping the operands
927  ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT);
928  CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
929  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
930  std::swap(True, False);
931  std::swap(LHS, RHS);
932  CC = DAG.getCondCode(CCSwapped);
933  }
934  }
935  }
936  if (isZero(RHS)) {
937  SDValue Cond = LHS;
938  SDValue Zero = RHS;
939  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
940  if (CompareVT != VT) {
941  // Bitcast True / False to the correct types. This will end up being
942  // a nop, but it allows us to define only a single pattern in the
943  // .TD files for each CND* instruction rather than having to have
944  // one pattern for integer True/False and one for fp True/False
945  True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
946  False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
947  }
948 
949  switch (CCOpcode) {
950  case ISD::SETONE:
951  case ISD::SETUNE:
952  case ISD::SETNE:
953  CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT);
954  Temp = True;
955  True = False;
956  False = Temp;
957  break;
958  default:
959  break;
960  }
961  SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
962  Cond, Zero,
963  True, False,
964  DAG.getCondCode(CCOpcode));
965  return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
966  }
967 
968  // If we make it this for it means we have no native instructions to handle
969  // this SELECT_CC, so we must lower it.
970  SDValue HWTrue, HWFalse;
971 
972  if (CompareVT == MVT::f32) {
973  HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
974  HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
975  } else if (CompareVT == MVT::i32) {
976  HWTrue = DAG.getConstant(-1, DL, CompareVT);
977  HWFalse = DAG.getConstant(0, DL, CompareVT);
978  }
979  else {
980  llvm_unreachable("Unhandled value type in LowerSELECT_CC");
981  }
982 
983  // Lower this unsupported SELECT_CC into a combination of two supported
984  // SELECT_CC operations.
985  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
986 
987  return DAG.getNode(ISD::SELECT_CC, DL, VT,
988  Cond, HWFalse,
989  True, False,
990  DAG.getCondCode(ISD::SETNE));
991 }
992 
993 /// LLVM generates byte-addressed pointers. For indirect addressing, we need to
994 /// convert these pointers to a register index. Each register holds
995 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
996 /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
997 /// for indirect addressing.
998 SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
999  unsigned StackWidth,
1000  SelectionDAG &DAG) const {
1001  unsigned SRLPad;
1002  switch(StackWidth) {
1003  case 1:
1004  SRLPad = 2;
1005  break;
1006  case 2:
1007  SRLPad = 3;
1008  break;
1009  case 4:
1010  SRLPad = 4;
1011  break;
1012  default: llvm_unreachable("Invalid stack width");
1013  }
1014 
1015  SDLoc DL(Ptr);
1016  return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1017  DAG.getConstant(SRLPad, DL, MVT::i32));
1018 }
1019 
1020 void R600TargetLowering::getStackAddress(unsigned StackWidth,
1021  unsigned ElemIdx,
1022  unsigned &Channel,
1023  unsigned &PtrIncr) const {
1024  switch (StackWidth) {
1025  default:
1026  case 1:
1027  Channel = 0;
1028  if (ElemIdx > 0) {
1029  PtrIncr = 1;
1030  } else {
1031  PtrIncr = 0;
1032  }
1033  break;
1034  case 2:
1035  Channel = ElemIdx % 2;
1036  if (ElemIdx == 2) {
1037  PtrIncr = 1;
1038  } else {
1039  PtrIncr = 0;
1040  }
1041  break;
1042  case 4:
1043  Channel = ElemIdx;
1044  PtrIncr = 0;
1045  break;
1046  }
1047 }
1048 
1049 SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1050  SelectionDAG &DAG) const {
1051  SDLoc DL(Store);
1052  //TODO: Who creates the i8 stores?
1053  assert(Store->isTruncatingStore()
1054  || Store->getValue().getValueType() == MVT::i8);
1055  assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
1056 
1057  SDValue Mask;
1058  if (Store->getMemoryVT() == MVT::i8) {
1059  assert(Store->getAlignment() >= 1);
1060  Mask = DAG.getConstant(0xff, DL, MVT::i32);
1061  } else if (Store->getMemoryVT() == MVT::i16) {
1062  assert(Store->getAlignment() >= 2);
1063  Mask = DAG.getConstant(0xffff, DL, MVT::i32);
1064  } else {
1065  llvm_unreachable("Unsupported private trunc store");
1066  }
1067 
1068  SDValue OldChain = Store->getChain();
1069  bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1070  // Skip dummy
1071  SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1072  SDValue BasePtr = Store->getBasePtr();
1073  SDValue Offset = Store->getOffset();
1074  EVT MemVT = Store->getMemoryVT();
1075 
1076  SDValue LoadPtr = BasePtr;
1077  if (!Offset.isUndef()) {
1078  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1079  }
1080 
1081  // Get dword location
1082  // TODO: this should be eliminated by the future SHR ptr, 2
1083  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1084  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1085 
1086  // Load dword
1087  // TODO: can we be smarter about machine pointer info?
1089  SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1090 
1091  Chain = Dst.getValue(1);
1092 
1093  // Get offset in dword
1094  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1095  DAG.getConstant(0x3, DL, MVT::i32));
1096 
1097  // Convert byte offset to bit shift
1098  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1099  DAG.getConstant(3, DL, MVT::i32));
1100 
1101  // TODO: Contrary to the name of the functiom,
1102  // it also handles sub i32 non-truncating stores (like i1)
1103  SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1104  Store->getValue());
1105 
1106  // Mask the value to the right type
1107  SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1108 
1109  // Shift the value in place
1110  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1111  MaskedValue, ShiftAmt);
1112 
1113  // Shift the mask in place
1114  SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
1115 
1116  // Invert the mask. NOTE: if we had native ROL instructions we could
1117  // use inverted mask
1118  DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
1119 
1120  // Cleanup the target bits
1121  Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1122 
1123  // Add the new bits
1124  SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1125 
1126  // Store dword
1127  // TODO: Can we be smarter about MachinePointerInfo?
1128  SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
1129 
1130  // If we are part of expanded vector, make our neighbors depend on this store
1131  if (VectorTrunc) {
1132  // Make all other vector elements depend on this store
1133  Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
1134  DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
1135  }
1136  return NewStore;
1137 }
1138 
1139 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1140  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1141  unsigned AS = StoreNode->getAddressSpace();
1142 
1143  SDValue Chain = StoreNode->getChain();
1144  SDValue Ptr = StoreNode->getBasePtr();
1145  SDValue Value = StoreNode->getValue();
1146 
1147  EVT VT = Value.getValueType();
1148  EVT MemVT = StoreNode->getMemoryVT();
1149  EVT PtrVT = Ptr.getValueType();
1150 
1151  SDLoc DL(Op);
1152 
1153  const bool TruncatingStore = StoreNode->isTruncatingStore();
1154 
1155  // Neither LOCAL nor PRIVATE can do vectors at the moment
1156  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS ||
1157  TruncatingStore) &&
1158  VT.isVector()) {
1159  if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) {
1160  // Add an extra level of chain to isolate this vector
1161  SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
1162  // TODO: can the chain be replaced without creating a new store?
1163  SDValue NewStore = DAG.getTruncStore(
1164  NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
1165  MemVT, StoreNode->getAlignment(),
1166  StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
1167  StoreNode = cast<StoreSDNode>(NewStore);
1168  }
1169 
1170  return scalarizeVectorStore(StoreNode, DAG);
1171  }
1172 
1173  Align Alignment = StoreNode->getAlign();
1174  if (Alignment < MemVT.getStoreSize() &&
1175  !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
1176  StoreNode->getMemOperand()->getFlags(),
1177  nullptr)) {
1178  return expandUnalignedStore(StoreNode, DAG);
1179  }
1180 
1181  SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
1182  DAG.getConstant(2, DL, PtrVT));
1183 
1184  if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
1185  // It is beneficial to create MSKOR here instead of combiner to avoid
1186  // artificial dependencies introduced by RMW
1187  if (TruncatingStore) {
1188  assert(VT.bitsLE(MVT::i32));
1189  SDValue MaskConstant;
1190  if (MemVT == MVT::i8) {
1191  MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1192  } else {
1193  assert(MemVT == MVT::i16);
1194  assert(StoreNode->getAlignment() >= 2);
1195  MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1196  }
1197 
1198  SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
1199  DAG.getConstant(0x00000003, DL, PtrVT));
1200  SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1201  DAG.getConstant(3, DL, VT));
1202 
1203  // Put the mask in correct place
1204  SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
1205 
1206  // Put the value bits in correct place
1207  SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1208  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
1209 
1210  // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1211  // vector instead.
1212  SDValue Src[4] = {
1213  ShiftedValue,
1214  DAG.getConstant(0, DL, MVT::i32),
1215  DAG.getConstant(0, DL, MVT::i32),
1216  Mask
1217  };
1218  SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
1219  SDValue Args[3] = { Chain, Input, DWordAddr };
1221  Op->getVTList(), Args, MemVT,
1222  StoreNode->getMemOperand());
1223  } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1224  // Convert pointer from byte address to dword address.
1225  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1226 
1227  if (StoreNode->isIndexed()) {
1228  llvm_unreachable("Indexed stores not supported yet");
1229  } else {
1230  Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1231  }
1232  return Chain;
1233  }
1234  }
1235 
1236  // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1237  if (AS != AMDGPUAS::PRIVATE_ADDRESS)
1238  return SDValue();
1239 
1240  if (MemVT.bitsLT(MVT::i32))
1241  return lowerPrivateTruncStore(StoreNode, DAG);
1242 
1243  // Standard i32+ store, tag it with DWORDADDR to note that the address
1244  // has been shifted
1245  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1246  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1247  return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1248  }
1249 
1250  // Tagged i32+ stores will be matched by patterns
1251  return SDValue();
1252 }
1253 
1254 // return (512 + (kc_bank << 12)
1255 static int
1257  switch (AddressSpace) {
1259  return 512;
1261  return 512 + 4096;
1263  return 512 + 4096 * 2;
1265  return 512 + 4096 * 3;
1267  return 512 + 4096 * 4;
1269  return 512 + 4096 * 5;
1271  return 512 + 4096 * 6;
1273  return 512 + 4096 * 7;
1275  return 512 + 4096 * 8;
1277  return 512 + 4096 * 9;
1279  return 512 + 4096 * 10;
1281  return 512 + 4096 * 11;
1283  return 512 + 4096 * 12;
1285  return 512 + 4096 * 13;
1287  return 512 + 4096 * 14;
1289  return 512 + 4096 * 15;
1290  default:
1291  return -1;
1292  }
1293 }
1294 
1295 SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1296  SelectionDAG &DAG) const {
1297  SDLoc DL(Op);
1298  LoadSDNode *Load = cast<LoadSDNode>(Op);
1299  ISD::LoadExtType ExtType = Load->getExtensionType();
1300  EVT MemVT = Load->getMemoryVT();
1301  assert(Load->getAlignment() >= MemVT.getStoreSize());
1302 
1303  SDValue BasePtr = Load->getBasePtr();
1304  SDValue Chain = Load->getChain();
1305  SDValue Offset = Load->getOffset();
1306 
1307  SDValue LoadPtr = BasePtr;
1308  if (!Offset.isUndef()) {
1309  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1310  }
1311 
1312  // Get dword location
1313  // NOTE: this should be eliminated by the future SHR ptr, 2
1314  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1315  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1316 
1317  // Load dword
1318  // TODO: can we be smarter about machine pointer info?
1320  SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1321 
1322  // Get offset within the register.
1323  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1324  LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
1325 
1326  // Bit offset of target byte (byteIdx * 8).
1327  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1328  DAG.getConstant(3, DL, MVT::i32));
1329 
1330  // Shift to the right.
1331  SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
1332 
1333  // Eliminate the upper bits by setting them to ...
1334  EVT MemEltVT = MemVT.getScalarType();
1335 
1336  if (ExtType == ISD::SEXTLOAD) { // ... ones.
1337  SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1338  Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
1339  } else { // ... or zeros.
1340  Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
1341  }
1342 
1343  SDValue Ops[] = {
1344  Ret,
1345  Read.getValue(1) // This should be our output chain
1346  };
1347 
1348  return DAG.getMergeValues(Ops, DL);
1349 }
1350 
1351 SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1352  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1353  unsigned AS = LoadNode->getAddressSpace();
1354  EVT MemVT = LoadNode->getMemoryVT();
1355  ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1356 
1357  if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1358  ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1359  return lowerPrivateExtLoad(Op, DAG);
1360  }
1361 
1362  SDLoc DL(Op);
1363  EVT VT = Op.getValueType();
1364  SDValue Chain = LoadNode->getChain();
1365  SDValue Ptr = LoadNode->getBasePtr();
1366 
1367  if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1368  LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
1369  VT.isVector()) {
1370  SDValue Ops[2];
1371  std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(LoadNode, DAG);
1372  return DAG.getMergeValues(Ops, DL);
1373  }
1374 
1375  // This is still used for explicit load from addrspace(8)
1376  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1377  if (ConstantBlock > -1 &&
1378  ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1379  (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1380  SDValue Result;
1381  if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1382  isa<ConstantSDNode>(Ptr)) {
1383  return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
1384  } else {
1385  //TODO: Does this even work?
1386  // non-constant ptr can't be folded, keeps it as a v4f32 load
1388  DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1389  DAG.getConstant(4, DL, MVT::i32)),
1390  DAG.getConstant(LoadNode->getAddressSpace() -
1392  );
1393  }
1394 
1395  if (!VT.isVector()) {
1397  DAG.getConstant(0, DL, MVT::i32));
1398  }
1399 
1400  SDValue MergedValues[2] = {
1401  Result,
1402  Chain
1403  };
1404  return DAG.getMergeValues(MergedValues, DL);
1405  }
1406 
1407  // For most operations returning SDValue() will result in the node being
1408  // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1409  // need to manually expand loads that may be legal in some address spaces and
1410  // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1411  // compute shaders, since the data is sign extended when it is uploaded to the
1412  // buffer. However SEXT loads from other address spaces are not supported, so
1413  // we need to expand them here.
1414  if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1415  assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1416  SDValue NewLoad = DAG.getExtLoad(
1417  ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1418  LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags());
1419  SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1420  DAG.getValueType(MemVT));
1421 
1422  SDValue MergedValues[2] = { Res, Chain };
1423  return DAG.getMergeValues(MergedValues, DL);
1424  }
1425 
1426  if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1427  return SDValue();
1428  }
1429 
1430  // DWORDADDR ISD marks already shifted address
1431  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1432  assert(VT == MVT::i32);
1433  Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
1434  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
1435  return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1436  }
1437  return SDValue();
1438 }
1439 
1440 SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1441  SDValue Chain = Op.getOperand(0);
1442  SDValue Cond = Op.getOperand(1);
1443  SDValue Jump = Op.getOperand(2);
1444 
1445  return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1446  Chain, Jump, Cond);
1447 }
1448 
1449 SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1450  SelectionDAG &DAG) const {
1451  MachineFunction &MF = DAG.getMachineFunction();
1452  const R600FrameLowering *TFL = Subtarget->getFrameLowering();
1453 
1454  FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1455 
1456  unsigned FrameIndex = FIN->getIndex();
1457  Register IgnoredFrameReg;
1459  TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1460  return DAG.getConstant(Offset.getFixed() * 4 * TFL->getStackWidth(MF),
1461  SDLoc(Op), Op.getValueType());
1462 }
1463 
1465  bool IsVarArg) const {
1466  switch (CC) {
1469  case CallingConv::C:
1470  case CallingConv::Fast:
1471  case CallingConv::Cold:
1472  llvm_unreachable("kernels should not be handled here");
1480  return CC_R600;
1481  default:
1482  report_fatal_error("Unsupported calling convention.");
1483  }
1484 }
1485 
1486 /// XXX Only kernel functions are supported, so we can assume for now that
1487 /// every function is a kernel function, but in the future we should use
1488 /// separate calling conventions for kernel and non-kernel functions.
1490  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1491  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1492  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1494  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1495  *DAG.getContext());
1496  MachineFunction &MF = DAG.getMachineFunction();
1498 
1499  if (AMDGPU::isShader(CallConv)) {
1500  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
1501  } else {
1503  }
1504 
1505  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1506  CCValAssign &VA = ArgLocs[i];
1507  const ISD::InputArg &In = Ins[i];
1508  EVT VT = In.VT;
1509  EVT MemVT = VA.getLocVT();
1510  if (!VT.isVector() && MemVT.isVector()) {
1511  // Get load source type if scalarized.
1512  MemVT = MemVT.getVectorElementType();
1513  }
1514 
1515  if (AMDGPU::isShader(CallConv)) {
1516  Register Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
1517  SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1518  InVals.push_back(Register);
1519  continue;
1520  }
1521 
1522  // i64 isn't a legal type, so the register type used ends up as i32, which
1523  // isn't expected here. It attempts to create this sextload, but it ends up
1524  // being invalid. Somehow this seems to work with i64 arguments, but breaks
1525  // for <1 x i64>.
1526 
1527  // The first 36 bytes of the input buffer contains information about
1528  // thread group and global sizes.
1530  if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1531  // FIXME: This should really check the extload type, but the handling of
1532  // extload vector parameters seems to be broken.
1533 
1534  // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1535  Ext = ISD::SEXTLOAD;
1536  }
1537 
1538  // Compute the offset from the value.
1539  // XXX - I think PartOffset should give you this, but it seems to give the
1540  // size of the register which isn't useful.
1541 
1542  unsigned PartOffset = VA.getLocMemOffset();
1543  unsigned Alignment = MinAlign(VT.getStoreSize(), PartOffset);
1544 
1546  SDValue Arg = DAG.getLoad(
1547  ISD::UNINDEXED, Ext, VT, DL, Chain,
1548  DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
1549  PtrInfo,
1550  MemVT, Alignment, MachineMemOperand::MONonTemporal |
1553 
1554  InVals.push_back(Arg);
1555  }
1556  return Chain;
1557 }
1558 
1560  EVT VT) const {
1561  if (!VT.isVector())
1562  return MVT::i32;
1564 }
1565 
1567  const SelectionDAG &DAG) const {
1568  // Local and Private addresses do not handle vectors. Limit to i32
1569  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) {
1570  return (MemVT.getSizeInBits() <= 32);
1571  }
1572  return true;
1573 }
1574 
1576  EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1577  bool *IsFast) const {
1578  if (IsFast)
1579  *IsFast = false;
1580 
1581  if (!VT.isSimple() || VT == MVT::Other)
1582  return false;
1583 
1584  if (VT.bitsLT(MVT::i32))
1585  return false;
1586 
1587  // TODO: This is a rough estimate.
1588  if (IsFast)
1589  *IsFast = true;
1590 
1591  return VT.bitsGT(MVT::i32) && Alignment >= Align(4);
1592 }
1593 
1595  SelectionDAG &DAG, SDValue VectorEntry,
1596  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1597  assert(RemapSwizzle.empty());
1598 
1599  SDLoc DL(VectorEntry);
1600  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1601 
1602  SDValue NewBldVec[4];
1603  for (unsigned i = 0; i < 4; i++)
1604  NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1605  DAG.getIntPtrConstant(i, DL));
1606 
1607  for (unsigned i = 0; i < 4; i++) {
1608  if (NewBldVec[i].isUndef())
1609  // We mask write here to teach later passes that the ith element of this
1610  // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1611  // break false dependencies and additionnaly make assembly easier to read.
1612  RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1613  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1614  if (C->isZero()) {
1615  RemapSwizzle[i] = 4; // SEL_0
1616  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1617  } else if (C->isExactlyValue(1.0)) {
1618  RemapSwizzle[i] = 5; // SEL_1
1619  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1620  }
1621  }
1622 
1623  if (NewBldVec[i].isUndef())
1624  continue;
1625 
1626  for (unsigned j = 0; j < i; j++) {
1627  if (NewBldVec[i] == NewBldVec[j]) {
1628  NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1629  RemapSwizzle[i] = j;
1630  break;
1631  }
1632  }
1633  }
1634 
1635  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1636  NewBldVec);
1637 }
1638 
1640  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1641  assert(RemapSwizzle.empty());
1642 
1643  SDLoc DL(VectorEntry);
1644  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1645 
1646  SDValue NewBldVec[4];
1647  bool isUnmovable[4] = {false, false, false, false};
1648  for (unsigned i = 0; i < 4; i++)
1649  NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1650  DAG.getIntPtrConstant(i, DL));
1651 
1652  for (unsigned i = 0; i < 4; i++) {
1653  RemapSwizzle[i] = i;
1654  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1655  unsigned Idx = cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1656  ->getZExtValue();
1657  if (i == Idx)
1658  isUnmovable[Idx] = true;
1659  }
1660  }
1661 
1662  for (unsigned i = 0; i < 4; i++) {
1663  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1664  unsigned Idx = cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1665  ->getZExtValue();
1666  if (isUnmovable[Idx])
1667  continue;
1668  // Swap i and Idx
1669  std::swap(NewBldVec[Idx], NewBldVec[i]);
1670  std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1671  break;
1672  }
1673  }
1674 
1675  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1676  NewBldVec);
1677 }
1678 
1679 SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
1680  SelectionDAG &DAG,
1681  const SDLoc &DL) const {
1682  // Old -> New swizzle values
1683  DenseMap<unsigned, unsigned> SwizzleRemap;
1684 
1685  BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1686  for (unsigned i = 0; i < 4; i++) {
1687  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1688  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1689  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1690  }
1691 
1692  SwizzleRemap.clear();
1693  BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1694  for (unsigned i = 0; i < 4; i++) {
1695  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1696  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1697  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1698  }
1699 
1700  return BuildVector;
1701 }
1702 
1703 SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
1704  SelectionDAG &DAG) const {
1705  SDLoc DL(LoadNode);
1706  EVT VT = LoadNode->getValueType(0);
1707  SDValue Chain = LoadNode->getChain();
1708  SDValue Ptr = LoadNode->getBasePtr();
1709  assert (isa<ConstantSDNode>(Ptr));
1710 
1711  //TODO: Support smaller loads
1712  if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
1713  return SDValue();
1714 
1715  if (LoadNode->getAlignment() < 4)
1716  return SDValue();
1717 
1718  int ConstantBlock = ConstantAddressBlock(Block);
1719 
1720  SDValue Slots[4];
1721  for (unsigned i = 0; i < 4; i++) {
1722  // We want Const position encoded with the following formula :
1723  // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1724  // const_index is Ptr computed by llvm using an alignment of 16.
1725  // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1726  // then div by 4 at the ISel step
1727  SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1728  DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1729  Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1730  }
1731  EVT NewVT = MVT::v4i32;
1732  unsigned NumElements = 4;
1733  if (VT.isVector()) {
1734  NewVT = VT;
1735  NumElements = VT.getVectorNumElements();
1736  }
1737  SDValue Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
1738  if (!VT.isVector()) {
1740  DAG.getConstant(0, DL, MVT::i32));
1741  }
1742  SDValue MergedValues[2] = {
1743  Result,
1744  Chain
1745  };
1746  return DAG.getMergeValues(MergedValues, DL);
1747 }
1748 
1749 //===----------------------------------------------------------------------===//
1750 // Custom DAG Optimizations
1751 //===----------------------------------------------------------------------===//
1752 
1754  DAGCombinerInfo &DCI) const {
1755  SelectionDAG &DAG = DCI.DAG;
1756  SDLoc DL(N);
1757 
1758  switch (N->getOpcode()) {
1759  // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1760  case ISD::FP_ROUND: {
1761  SDValue Arg = N->getOperand(0);
1762  if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1763  return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1764  Arg.getOperand(0));
1765  }
1766  break;
1767  }
1768 
1769  // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1770  // (i32 select_cc f32, f32, -1, 0 cc)
1771  //
1772  // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1773  // this to one of the SET*_DX10 instructions.
1774  case ISD::FP_TO_SINT: {
1775  SDValue FNeg = N->getOperand(0);
1776  if (FNeg.getOpcode() != ISD::FNEG) {
1777  return SDValue();
1778  }
1779  SDValue SelectCC = FNeg.getOperand(0);
1780  if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1781  SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1782  SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1783  !isHWTrueValue(SelectCC.getOperand(2)) ||
1784  !isHWFalseValue(SelectCC.getOperand(3))) {
1785  return SDValue();
1786  }
1787 
1788  return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1789  SelectCC.getOperand(0), // LHS
1790  SelectCC.getOperand(1), // RHS
1791  DAG.getConstant(-1, DL, MVT::i32), // True
1792  DAG.getConstant(0, DL, MVT::i32), // False
1793  SelectCC.getOperand(4)); // CC
1794  }
1795 
1796  // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1797  // => build_vector elt0, ... , NewEltIdx, ... , eltN
1798  case ISD::INSERT_VECTOR_ELT: {
1799  SDValue InVec = N->getOperand(0);
1800  SDValue InVal = N->getOperand(1);
1801  SDValue EltNo = N->getOperand(2);
1802 
1803  // If the inserted element is an UNDEF, just use the input vector.
1804  if (InVal.isUndef())
1805  return InVec;
1806 
1807  EVT VT = InVec.getValueType();
1808 
1809  // If we can't generate a legal BUILD_VECTOR, exit
1811  return SDValue();
1812 
1813  // Check that we know which element is being inserted
1814  if (!isa<ConstantSDNode>(EltNo))
1815  return SDValue();
1816  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1817 
1818  // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1819  // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1820  // vector elements.
1822  if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1823  Ops.append(InVec.getNode()->op_begin(),
1824  InVec.getNode()->op_end());
1825  } else if (InVec.isUndef()) {
1826  unsigned NElts = VT.getVectorNumElements();
1827  Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1828  } else {
1829  return SDValue();
1830  }
1831 
1832  // Insert the element
1833  if (Elt < Ops.size()) {
1834  // All the operands of BUILD_VECTOR must have the same type;
1835  // we enforce that here.
1836  EVT OpVT = Ops[0].getValueType();
1837  if (InVal.getValueType() != OpVT)
1838  InVal = OpVT.bitsGT(InVal.getValueType()) ?
1839  DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
1840  DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
1841  Ops[Elt] = InVal;
1842  }
1843 
1844  // Return the new vector
1845  return DAG.getBuildVector(VT, DL, Ops);
1846  }
1847 
1848  // Extract_vec (Build_vector) generated by custom lowering
1849  // also needs to be customly combined
1850  case ISD::EXTRACT_VECTOR_ELT: {
1851  SDValue Arg = N->getOperand(0);
1852  if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1853  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1854  unsigned Element = Const->getZExtValue();
1855  return Arg->getOperand(Element);
1856  }
1857  }
1858  if (Arg.getOpcode() == ISD::BITCAST &&
1859  Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
1860  (Arg.getOperand(0).getValueType().getVectorNumElements() ==
1861  Arg.getValueType().getVectorNumElements())) {
1862  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1863  unsigned Element = Const->getZExtValue();
1864  return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1865  Arg->getOperand(0).getOperand(Element));
1866  }
1867  }
1868  break;
1869  }
1870 
1871  case ISD::SELECT_CC: {
1872  // Try common optimizations
1874  return Ret;
1875 
1876  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1877  // selectcc x, y, a, b, inv(cc)
1878  //
1879  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1880  // selectcc x, y, a, b, cc
1881  SDValue LHS = N->getOperand(0);
1882  if (LHS.getOpcode() != ISD::SELECT_CC) {
1883  return SDValue();
1884  }
1885 
1886  SDValue RHS = N->getOperand(1);
1887  SDValue True = N->getOperand(2);
1888  SDValue False = N->getOperand(3);
1889  ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1890 
1891  if (LHS.getOperand(2).getNode() != True.getNode() ||
1892  LHS.getOperand(3).getNode() != False.getNode() ||
1893  RHS.getNode() != False.getNode()) {
1894  return SDValue();
1895  }
1896 
1897  switch (NCC) {
1898  default: return SDValue();
1899  case ISD::SETNE: return LHS;
1900  case ISD::SETEQ: {
1901  ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1902  LHSCC = ISD::getSetCCInverse(LHSCC, LHS.getOperand(0).getValueType());
1903  if (DCI.isBeforeLegalizeOps() ||
1905  return DAG.getSelectCC(DL,
1906  LHS.getOperand(0),
1907  LHS.getOperand(1),
1908  LHS.getOperand(2),
1909  LHS.getOperand(3),
1910  LHSCC);
1911  break;
1912  }
1913  }
1914  return SDValue();
1915  }
1916 
1917  case AMDGPUISD::R600_EXPORT: {
1918  SDValue Arg = N->getOperand(1);
1919  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1920  break;
1921 
1922  SDValue NewArgs[8] = {
1923  N->getOperand(0), // Chain
1924  SDValue(),
1925  N->getOperand(2), // ArrayBase
1926  N->getOperand(3), // Type
1927  N->getOperand(4), // SWZ_X
1928  N->getOperand(5), // SWZ_Y
1929  N->getOperand(6), // SWZ_Z
1930  N->getOperand(7) // SWZ_W
1931  };
1932  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
1933  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
1934  }
1935  case AMDGPUISD::TEXTURE_FETCH: {
1936  SDValue Arg = N->getOperand(1);
1937  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1938  break;
1939 
1940  SDValue NewArgs[19] = {
1941  N->getOperand(0),
1942  N->getOperand(1),
1943  N->getOperand(2),
1944  N->getOperand(3),
1945  N->getOperand(4),
1946  N->getOperand(5),
1947  N->getOperand(6),
1948  N->getOperand(7),
1949  N->getOperand(8),
1950  N->getOperand(9),
1951  N->getOperand(10),
1952  N->getOperand(11),
1953  N->getOperand(12),
1954  N->getOperand(13),
1955  N->getOperand(14),
1956  N->getOperand(15),
1957  N->getOperand(16),
1958  N->getOperand(17),
1959  N->getOperand(18),
1960  };
1961  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
1962  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
1963  }
1964 
1965  case ISD::LOAD: {
1966  LoadSDNode *LoadNode = cast<LoadSDNode>(N);
1967  SDValue Ptr = LoadNode->getBasePtr();
1968  if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
1969  isa<ConstantSDNode>(Ptr))
1970  return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
1971  break;
1972  }
1973 
1974  default: break;
1975  }
1976 
1978 }
1979 
1980 bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
1981  SDValue &Src, SDValue &Neg, SDValue &Abs,
1982  SDValue &Sel, SDValue &Imm,
1983  SelectionDAG &DAG) const {
1984  const R600InstrInfo *TII = Subtarget->getInstrInfo();
1985  if (!Src.isMachineOpcode())
1986  return false;
1987 
1988  switch (Src.getMachineOpcode()) {
1989  case R600::FNEG_R600:
1990  if (!Neg.getNode())
1991  return false;
1992  Src = Src.getOperand(0);
1993  Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1994  return true;
1995  case R600::FABS_R600:
1996  if (!Abs.getNode())
1997  return false;
1998  Src = Src.getOperand(0);
1999  Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
2000  return true;
2001  case R600::CONST_COPY: {
2002  unsigned Opcode = ParentNode->getMachineOpcode();
2003  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2004 
2005  if (!Sel.getNode())
2006  return false;
2007 
2008  SDValue CstOffset = Src.getOperand(0);
2009  if (ParentNode->getValueType(0).isVector())
2010  return false;
2011 
2012  // Gather constants values
2013  int SrcIndices[] = {
2014  TII->getOperandIdx(Opcode, R600::OpName::src0),
2015  TII->getOperandIdx(Opcode, R600::OpName::src1),
2016  TII->getOperandIdx(Opcode, R600::OpName::src2),
2017  TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2018  TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2019  TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2020  TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2021  TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2022  TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2023  TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2024  TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2025  };
2026  std::vector<unsigned> Consts;
2027  for (int OtherSrcIdx : SrcIndices) {
2028  int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2029  if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2030  continue;
2031  if (HasDst) {
2032  OtherSrcIdx--;
2033  OtherSelIdx--;
2034  }
2035  if (RegisterSDNode *Reg =
2036  dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2037  if (Reg->getReg() == R600::ALU_CONST) {
2038  ConstantSDNode *Cst
2039  = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
2040  Consts.push_back(Cst->getZExtValue());
2041  }
2042  }
2043  }
2044 
2045  ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
2046  Consts.push_back(Cst->getZExtValue());
2047  if (!TII->fitsConstReadLimitations(Consts)) {
2048  return false;
2049  }
2050 
2051  Sel = CstOffset;
2052  Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
2053  return true;
2054  }
2055  case R600::MOV_IMM_GLOBAL_ADDR:
2056  // Check if the Imm slot is used. Taken from below.
2057  if (cast<ConstantSDNode>(Imm)->getZExtValue())
2058  return false;
2059  Imm = Src.getOperand(0);
2060  Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
2061  return true;
2062  case R600::MOV_IMM_I32:
2063  case R600::MOV_IMM_F32: {
2064  unsigned ImmReg = R600::ALU_LITERAL_X;
2065  uint64_t ImmValue = 0;
2066 
2067  if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
2068  ConstantFPSDNode *FPC = cast<ConstantFPSDNode>(Src.getOperand(0));
2069  float FloatValue = FPC->getValueAPF().convertToFloat();
2070  if (FloatValue == 0.0) {
2071  ImmReg = R600::ZERO;
2072  } else if (FloatValue == 0.5) {
2073  ImmReg = R600::HALF;
2074  } else if (FloatValue == 1.0) {
2075  ImmReg = R600::ONE;
2076  } else {
2077  ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2078  }
2079  } else {
2080  ConstantSDNode *C = cast<ConstantSDNode>(Src.getOperand(0));
2081  uint64_t Value = C->getZExtValue();
2082  if (Value == 0) {
2083  ImmReg = R600::ZERO;
2084  } else if (Value == 1) {
2085  ImmReg = R600::ONE_INT;
2086  } else {
2087  ImmValue = Value;
2088  }
2089  }
2090 
2091  // Check that we aren't already using an immediate.
2092  // XXX: It's possible for an instruction to have more than one
2093  // immediate operand, but this is not supported yet.
2094  if (ImmReg == R600::ALU_LITERAL_X) {
2095  if (!Imm.getNode())
2096  return false;
2097  ConstantSDNode *C = cast<ConstantSDNode>(Imm);
2098  if (C->getZExtValue())
2099  return false;
2100  Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2101  }
2102  Src = DAG.getRegister(ImmReg, MVT::i32);
2103  return true;
2104  }
2105  default:
2106  return false;
2107  }
2108 }
2109 
2110 /// Fold the instructions after selecting them
2111 SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2112  SelectionDAG &DAG) const {
2113  const R600InstrInfo *TII = Subtarget->getInstrInfo();
2114  if (!Node->isMachineOpcode())
2115  return Node;
2116 
2117  unsigned Opcode = Node->getMachineOpcode();
2118  SDValue FakeOp;
2119 
2120  std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2121 
2122  if (Opcode == R600::DOT_4) {
2123  int OperandIdx[] = {
2124  TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2125  TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2126  TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2127  TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2128  TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2129  TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2130  TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2131  TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2132  };
2133  int NegIdx[] = {
2134  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
2135  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
2136  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
2137  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
2138  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
2139  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
2140  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
2141  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
2142  };
2143  int AbsIdx[] = {
2144  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
2145  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
2146  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
2147  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
2148  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
2149  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
2150  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
2151  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
2152  };
2153  for (unsigned i = 0; i < 8; i++) {
2154  if (OperandIdx[i] < 0)
2155  return Node;
2156  SDValue &Src = Ops[OperandIdx[i] - 1];
2157  SDValue &Neg = Ops[NegIdx[i] - 1];
2158  SDValue &Abs = Ops[AbsIdx[i] - 1];
2159  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2160  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2161  if (HasDst)
2162  SelIdx--;
2163  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2164  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2165  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2166  }
2167  } else if (Opcode == R600::REG_SEQUENCE) {
2168  for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2169  SDValue &Src = Ops[i];
2170  if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2171  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2172  }
2173  } else {
2174  if (!TII->hasInstrModifiers(Opcode))
2175  return Node;
2176  int OperandIdx[] = {
2177  TII->getOperandIdx(Opcode, R600::OpName::src0),
2178  TII->getOperandIdx(Opcode, R600::OpName::src1),
2179  TII->getOperandIdx(Opcode, R600::OpName::src2)
2180  };
2181  int NegIdx[] = {
2182  TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
2183  TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
2184  TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
2185  };
2186  int AbsIdx[] = {
2187  TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
2188  TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
2189  -1
2190  };
2191  for (unsigned i = 0; i < 3; i++) {
2192  if (OperandIdx[i] < 0)
2193  return Node;
2194  SDValue &Src = Ops[OperandIdx[i] - 1];
2195  SDValue &Neg = Ops[NegIdx[i] - 1];
2196  SDValue FakeAbs;
2197  SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2198  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2199  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2200  int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
2201  if (HasDst) {
2202  SelIdx--;
2203  ImmIdx--;
2204  }
2205  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2206  SDValue &Imm = Ops[ImmIdx];
2207  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2208  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2209  }
2210  }
2211 
2212  return Node;
2213 }
llvm::AMDGPUAS::CONSTANT_BUFFER_2
@ CONSTANT_BUFFER_2
Definition: AMDGPU.h:406
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:240
llvm::CCValAssign::getLocVT
MVT getLocVT() const
Definition: CallingConvLower.h:153
llvm::AMDGPUAS::CONSTANT_BUFFER_8
@ CONSTANT_BUFFER_8
Definition: AMDGPU.h:412
i
i
Definition: README.txt:29
llvm::ISD::SETUGE
@ SETUGE
Definition: ISDOpcodes.h:1368
llvm::AMDGPUISD::STORE_MSKOR
@ STORE_MSKOR
Definition: AMDGPUISelLowering.h:485
llvm::TargetLowering::scalarizeVectorLoad
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
Definition: TargetLowering.cpp:7321
llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1542
llvm::RegisterSDNode
Definition: SelectionDAGNodes.h:2080
llvm::APFloat::convertToFloat
float convertToFloat() const
Converts this APFloat to host float value.
Definition: APFloat.cpp:4898
llvm::StoreSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2313
llvm::TargetLoweringBase::setSchedulingPreference
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
Definition: TargetLowering.h:2121
llvm::AMDGPUAS::CONSTANT_BUFFER_3
@ CONSTANT_BUFFER_3
Definition: AMDGPU.h:407
llvm::ISD::SETLE
@ SETLE
Definition: ISDOpcodes.h:1379
llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
llvm::ISD::SETO
@ SETO
Definition: ISDOpcodes.h:1364
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:102
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::SDNode::getValueType
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Definition: SelectionDAGNodes.h:958
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1078
llvm::TargetLoweringBase::Legal
@ Legal
Definition: TargetLowering.h:196
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:633
llvm::TargetLowering::EmitInstrWithCustomInserter
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Definition: SelectionDAGISel.cpp:294
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:848
llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1380
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::AMDGPUTargetLowering
Definition: AMDGPUISelLowering.h:27
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:216
llvm::ISD::NON_EXTLOAD
@ NON_EXTLOAD
Definition: ISDOpcodes.h:1335
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:191
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:152
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
llvm::CallingConv::AMDGPU_HS
@ AMDGPU_HS
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:223
llvm::ISD::UADDO
@ UADDO
Definition: ISDOpcodes.h:311
llvm::ISD::CTTZ_ZERO_UNDEF
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:674
llvm::SelectionDAG::getValueType
SDValue getValueType(EVT)
Definition: SelectionDAG.cpp:1697
llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Definition: Type.cpp:693
llvm::ISD::ADDC
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
llvm::R600FrameLowering
Definition: R600FrameLowering.h:16
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::AMDGPUISD::CONST_ADDRESS
@ CONST_ADDRESS
Definition: AMDGPUISelLowering.h:437
llvm::ISD::SETEQ
@ SETEQ
Definition: ISDOpcodes.h:1375
llvm::CallingConv::AMDGPU_GS
@ AMDGPU_GS
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:207
llvm::SelectionDAG::getVTList
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition: SelectionDAG.cpp:8157
llvm::MVT::v2i1
@ v2i1
Definition: MachineValueType.h:65
llvm::R600Subtarget::getInstrInfo
const R600InstrInfo * getInstrInfo() const override
Definition: R600Subtarget.h:57
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::MachineSDNode
An SDNode that represents everything that will be needed to construct a MachineInstr.
Definition: SelectionDAGNodes.h:2530
llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition: MachineMemOperand.h:145
llvm::SelectionDAG::getVectorIdxConstant
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition: SelectionDAG.cpp:1483
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:48
llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition: SelectionDAGNodes.h:1328
llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition: SelectionDAGNodes.h:1351
llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:732
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:455
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:466
llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:785
llvm::TargetLowering::DAGCombinerInfo::DAG
SelectionDAG & DAG
Definition: TargetLowering.h:3531
llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition: SelectionDAGNodes.h:2263
llvm::ISD::SETULE
@ SETULE
Definition: ISDOpcodes.h:1370
llvm::CCState::AnalyzeFormalArguments
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
Definition: CallingConvLower.cpp:90
isEOP
static bool isEOP(MachineBasicBlock::iterator I)
Definition: R600ISelLowering.cpp:257
llvm::NVPTXISD::RETURN
@ RETURN
Definition: NVPTXISelLowering.h:49
llvm::ISD::SHL_PARTS
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:715
llvm::SelectionDAG::getZeroExtendInReg
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
Definition: SelectionDAG.cpp:1290
llvm::SelectionDAG::getStore
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
Definition: SelectionDAG.cpp:7486
llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition: MachineMemOperand.h:143
llvm::AMDGPUISD::CARRY
@ CARRY
Definition: AMDGPUISelLowering.h:417
llvm::R600Subtarget::hasBCNT
bool hasBCNT(unsigned Size) const
Definition: R600Subtarget.h:99
llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:702
llvm::AMDGPUAS::CONSTANT_BUFFER_7
@ CONSTANT_BUFFER_7
Definition: AMDGPU.h:411
llvm::R600Subtarget::hasCARRY
bool hasCARRY() const
Definition: R600Subtarget.h:110
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
Results
Function Alias Analysis Results
Definition: AliasAnalysis.cpp:853
llvm::ISD::MERGE_VALUES
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
llvm::AMDGPUTargetLowering::ReplaceNodeResults
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
Definition: AMDGPUISelLowering.cpp:1320
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::ISD::EXTLOAD
@ EXTLOAD
Definition: ISDOpcodes.h:1335
llvm::AMDGPUAS::CONSTANT_BUFFER_11
@ CONSTANT_BUFFER_11
Definition: AMDGPU.h:415
llvm::LSBaseSDNode::isIndexed
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
Definition: SelectionDAGNodes.h:2251
llvm::R600Subtarget::hasFFBL
bool hasFFBL() const
Definition: R600Subtarget.h:118
llvm::EVT::bitsLE
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:281
llvm::AMDGPUAS::CONSTANT_BUFFER_4
@ CONSTANT_BUFFER_4
Definition: AMDGPU.h:408
llvm::AMDGPUAS::CONSTANT_BUFFER_9
@ CONSTANT_BUFFER_9
Definition: AMDGPU.h:413
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
R600ISelLowering.h
llvm::ISD::SETUEQ
@ SETUEQ
Definition: ISDOpcodes.h:1366
llvm::TargetLowering::expandUnalignedStore
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
Definition: TargetLowering.cpp:7640
llvm::SelectionDAG::getContext
LLVMContext * getContext() const
Definition: SelectionDAG.h:447
llvm::R600MachineFunctionInfo
Definition: R600MachineFunctionInfo.h:19
llvm::ISD::BRCOND
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:956
llvm::SelectionDAG::getRegister
SDValue getRegister(unsigned Reg, EVT VT)
Definition: SelectionDAG.cpp:1960
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
isUndef
static bool isUndef(ArrayRef< int > Mask)
Definition: HexagonISelDAGToDAGHVX.cpp:912
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:130
llvm::SDNode::op_end
op_iterator op_end() const
Definition: SelectionDAGNodes.h:904
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
llvm::MVT::integer_valuetypes
static auto integer_valuetypes()
Definition: MachineValueType.h:1411
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:885
llvm::ISD::LoadExtType
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1335
llvm::R600TargetLowering::LowerFormalArguments
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
Definition: R600ISelLowering.cpp:1489
llvm::R600Subtarget::hasBFE
bool hasBFE() const
Definition: R600Subtarget.h:91
llvm::ISD::BR_CC
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:963
x3
In x86 we generate this spiffy xmm0 xmm0 ret in x86 we generate this which could be xmm1 movss xmm1 xmm0 ret In sse4 we could use insertps to make both better Here s another testcase that could use x3
Definition: README-SSE.txt:547
llvm::SelectionDAG::getLoad
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Definition: SelectionDAG.cpp:7436
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
llvm::SDNode::getOpcode
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Definition: SelectionDAGNodes.h:621
llvm::EVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:363
llvm::R600Subtarget::hasFMA
bool hasFMA() const
Definition: R600Subtarget.h:126
llvm::MinAlign
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:672
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:632
llvm::AMDGPUAS::CONSTANT_BUFFER_1
@ CONSTANT_BUFFER_1
Definition: AMDGPU.h:405
llvm::ISD::GlobalAddress
@ GlobalAddress
Definition: ISDOpcodes.h:78
llvm::ISD::SELECT_CC
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:694
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1113
llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition: CallingConvLower.h:33
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1358
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:216
llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:679
llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalizeOps
bool isBeforeLegalizeOps() const
Definition: TargetLowering.h:3537
f
Itanium Name Demangler i e convert the string _Z1fv into f()". You can also use the CRTP base ManglingParser to perform some simple analysis on the mangled name
llvm::SelectionDAG::getTruncStore
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:7537
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::CCValAssign::getLocReg
Register getLocReg() const
Definition: CallingConvLower.h:150
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:720
MO_FLAG_PUSH
#define MO_FLAG_PUSH
Definition: R600Defines.h:18
llvm::SelectionDAG::getUNDEF
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:951
llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:747
llvm::AMDGPUISD::R600_EXPORT
@ R600_EXPORT
Definition: AMDGPUISelLowering.h:436
llvm::MachineMemOperand::getValue
const Value * getValue() const
Return the base address of the memory access.
Definition: MachineMemOperand.h:211
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::TargetLoweringBase::isCondCodeLegal
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
Definition: TargetLowering.h:1363
llvm::R600TargetLowering::canMergeStoresTo
bool canMergeStoresTo(unsigned AS, EVT MemVT, const SelectionDAG &DAG) const override
Returns if it's reasonable to merge stores to MemVT size.
Definition: R600ISelLowering.cpp:1566
llvm::MVT::f64
@ f64
Definition: MachineValueType.h:56
llvm::SelectionDAG::getConstant
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1348
llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:309
llvm::TargetLowering::DAGCombinerInfo
Definition: TargetLowering.h:3525
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:199
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:735
llvm::CallingConv::C
@ C
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
llvm::TargetLoweringBase::ZeroOrNegativeOneBooleanContent
@ ZeroOrNegativeOneBooleanContent
Definition: TargetLowering.h:233
llvm::AMDGPUAS::CONSTANT_BUFFER_10
@ CONSTANT_BUFFER_10
Definition: AMDGPU.h:414
llvm::AMDGPUISD::DOT4
@ DOT4
Definition: AMDGPUISelLowering.h:416
llvm::ISD::UDIVREM
@ UDIVREM
Definition: ISDOpcodes.h:256
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::TargetLoweringBase::addRegisterClass
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
Definition: TargetLowering.h:2168
llvm::AMDGPUAS::CONSTANT_BUFFER_13
@ CONSTANT_BUFFER_13
Definition: AMDGPU.h:417
MO_FLAG_MASK
#define MO_FLAG_MASK
Definition: R600Defines.h:17
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1631
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:881
llvm::codeview::EncodedFramePtrReg::BasePtr
@ BasePtr
llvm::ISD::ATOMIC_STORE
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1118
llvm::FrameIndexSDNode
Definition: SelectionDAGNodes.h:1726
llvm::CallingConv::Fast
@ Fast
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
llvm::R600TargetLowering::CCAssignFnForCall
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
Definition: R600ISelLowering.cpp:1464
llvm::SelectionDAG::getMemIntrinsicNode
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
Definition: SelectionDAG.cpp:7213
R600InstrInfo.h
llvm::R600TargetLowering::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *IsFast=nullptr) const override
Determine if the target supports unaligned memory accesses.
Definition: R600ISelLowering.cpp:1575
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:632
llvm::ISD::SETOLT
@ SETOLT
Definition: ISDOpcodes.h:1361
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::GlobalAddressSDNode::getGlobal
const GlobalValue * getGlobal() const
Definition: SelectionDAGNodes.h:1712
llvm::APFloat::bitcastToAPInt
APInt bitcastToAPInt() const
Definition: APFloat.h:1132
llvm::CCValAssign::getLocMemOffset
unsigned getLocMemOffset() const
Definition: CallingConvLower.h:151
llvm::ISD::UNINDEXED
@ UNINDEXED
Definition: ISDOpcodes.h:1304
llvm::TargetLoweringBase::isOperationLegal
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Definition: TargetLowering.h:1217
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::TargetLoweringBase::setBooleanVectorContents
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
Definition: TargetLowering.h:2116
llvm::EVT::changeVectorElementTypeToInteger
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:400
llvm::EVT::bitsLT
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:273
llvm::AMDGPUTargetLowering::LowerGlobalAddress
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
Definition: AMDGPUISelLowering.cpp:1345
llvm::MVT::v4i16
@ v4i16
Definition: MachineValueType.h:91
llvm::EVT::getTypeForEVT
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:181
llvm::MachineRegisterInfo::use_empty
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
Definition: MachineRegisterInfo.h:506
llvm::ISD::USUBO
@ USUBO
Definition: ISDOpcodes.h:315
llvm::MVT::v4i8
@ v4i8
Definition: MachineValueType.h:78
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
ConstantAddressBlock
static int ConstantAddressBlock(unsigned AddressSpace)
Definition: R600ISelLowering.cpp:1256
llvm::StoreSDNode::isTruncatingStore
bool isTruncatingStore() const
Return true if the op does a truncation before store.
Definition: SelectionDAGNodes.h:2307
llvm::ISD::SETOLE
@ SETOLE
Definition: ISDOpcodes.h:1362
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::ISD::SETUGT
@ SETUGT
Definition: ISDOpcodes.h:1367
llvm::SelectionDAG::getTargetGlobalAddress
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:683
R600MachineFunctionInfo.h
llvm::CCAssignFn
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
Definition: CallingConvLower.h:177
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:377
llvm::MVT::v2i8
@ v2i8
Definition: MachineValueType.h:77
llvm::AMDGPUISD::BUILD_VERTICAL_VECTOR
@ BUILD_VERTICAL_VECTOR
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
Definition: AMDGPUISelLowering.h:471
llvm::ISD::SETUNE
@ SETUNE
Definition: ISDOpcodes.h:1371
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::TargetLoweringBase::setOperationAction
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
Definition: TargetLowering.h:2185
llvm::MVT::v32i16
@ v32i16
Definition: MachineValueType.h:94
llvm::pdb::OMFSegDescFlags::Read
@ Read
llvm::AMDGPUTargetLowering::LowerUDIVREM64
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
Definition: AMDGPUISelLowering.cpp:1822
llvm::R600Subtarget
Definition: R600Subtarget.h:36
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:156
llvm::AMDGPUTargetLowering::LowerSDIVREM
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
Definition: AMDGPUISelLowering.cpp:2092
llvm::MVT::v16i8
@ v16i8
Definition: MachineValueType.h:80
AMDGPUMCTargetDesc.h
llvm::R600Subtarget::hasBORROW
bool hasBORROW() const
Definition: R600Subtarget.h:106
llvm::AMDGPUISD::BORROW
@ BORROW
Definition: AMDGPUISelLowering.h:418
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:341
llvm::ISD::FMAD
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:470
llvm::DenseMapBase::clear
void clear()
Definition: DenseMap.h:111
llvm::TargetLowering::expandFP_TO_SINT
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
Definition: TargetLowering.cpp:6684
llvm::MVT::v16i16
@ v16i16
Definition: MachineValueType.h:93
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::TargetLoweringBase::setHasExtractBitsInsn
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
Definition: TargetLowering.h:2151
llvm::ISD::FP_TO_UINT
@ FP_TO_UINT
Definition: ISDOpcodes.h:786
llvm::ConstantFPSDNode
Definition: SelectionDAGNodes.h:1587
llvm::R600FrameLowering::getFrameIndexReference
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
Definition: R600FrameLowering.cpp:18
llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition: SelectionDAGNodes.h:1332
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:387
llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:921
llvm::SelectionDAG::getIntPtrConstant
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition: SelectionDAG.cpp:1471
llvm::TargetLoweringBase::Promote
@ Promote
Definition: TargetLowering.h:197
llvm::ConstantPointerNull::get
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1770
llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition: MachineMemOperand.h:38
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap< unsigned, unsigned >
llvm::SelectionDAG::getCopyFromReg
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:761
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:511
llvm::R600Subtarget::hasBFI
bool hasBFI() const
Definition: R600Subtarget.h:95
llvm::ISD::CTLZ_ZERO_UNDEF
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:675
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:896
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::FrameIndexSDNode::getIndex
int getIndex() const
Definition: SelectionDAGNodes.h:1737
llvm::SelectionDAG::getNode
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition: SelectionDAG.cpp:7922
llvm::CallingConv::AMDGPU_ES
@ AMDGPU_ES
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:236
llvm::SelectionDAG::getNOT
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
Definition: SelectionDAG.cpp:1321
llvm::R600::getLDSNoRetOp
int getLDSNoRetOp(uint16_t Opcode)
llvm::ISD::FCOPYSIGN
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:476
llvm::LoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Definition: SelectionDAGNodes.h:2278
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:631
llvm::TargetLowering::scalarizeVectorStore
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
Definition: TargetLowering.cpp:7411
llvm::MVT::v4f32
@ v4f32
Definition: MachineValueType.h:157
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::MachineMemOperand::Flags
Flags
Flags values. These may be or'd together.
Definition: MachineMemOperand.h:131
llvm::ISD::InputArg
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Definition: TargetCallingConv.h:195
llvm::StoreSDNode
This class is used to represent ISD::STORE nodes.
Definition: SelectionDAGNodes.h:2291
llvm::ISD::ZEXTLOAD
@ ZEXTLOAD
Definition: ISDOpcodes.h:1335
llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition: SelectionDAGNodes.h:172
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:44
llvm::R600Subtarget::hasFFBH
bool hasFFBH() const
Definition: R600Subtarget.h:122
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::CallingConv::Cold
@ Cold
Definition: CallingConv.h:48
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::AMDGPUISD::SIN_HW
@ SIN_HW
Definition: AMDGPUISelLowering.h:384
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::ISD::SETULT
@ SETULT
Definition: ISDOpcodes.h:1369
llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition: SelectionDAGNodes.h:1557
llvm::TargetLowering::expandShiftParts
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
Definition: TargetLowering.cpp:6632
llvm::AMDGPUISD::TEXTURE_FETCH
@ TEXTURE_FETCH
Definition: AMDGPUISelLowering.h:435
llvm::AMDGPUISD::RSQ
@ RSQ
Definition: AMDGPUISelLowering.h:409
llvm::MachineMemOperand::MONonTemporal
@ MONonTemporal
The memory access is non-temporal.
Definition: MachineMemOperand.h:141
llvm::SelectionDAG::getSelectCC
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:1087
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1355
MO_FLAG_NEG
#define MO_FLAG_NEG
Definition: R600Defines.h:15
MO_FLAG_ABS
#define MO_FLAG_ABS
Definition: R600Defines.h:16
llvm::AMDGPUSubtarget::R700
@ R700
Definition: AMDGPUSubtarget.h:34
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:8595
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::ISD::isNON_EXTLoad
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
Definition: SelectionDAGNodes.h:2685
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::StoreSDNode::getValue
const SDValue & getValue() const
Definition: SelectionDAGNodes.h:2312
llvm::AfterLegalizeVectorOps
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
llvm::ISD::SRA_PARTS
@ SRA_PARTS
Definition: ISDOpcodes.h:716
llvm::AMDGPUTargetLowering::combineFMinMaxLegacy
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
Definition: AMDGPUISelLowering.cpp:1435
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::R600Subtarget::getFrameLowering
const R600FrameLowering * getFrameLowering() const override
Definition: R600Subtarget.h:59
llvm::AMDGPUAS::CONSTANT_BUFFER_6
@ CONSTANT_BUFFER_6
Definition: AMDGPU.h:410
llvm::MachineFunction::addLiveIn
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Definition: MachineFunction.cpp:653
llvm::MemSDNode::getAAInfo
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Definition: SelectionDAGNodes.h:1296
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:386
llvm::AMDGPUTargetLowering::getImplicitParameterOffset
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
Definition: AMDGPUISelLowering.cpp:4309
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:99
llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:155
llvm::MVT::i64
@ i64
Definition: MachineValueType.h:47
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::MVT::v2i32
@ v2i32
Definition: MachineValueType.h:101
llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:353
AMDGPU.h
llvm::SDValue::getMachineOpcode
unsigned getMachineOpcode() const
Definition: SelectionDAGNodes.h:1145
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::SelectionDAG::ReplaceAllUsesOfValueWith
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
Definition: SelectionDAG.cpp:9183
llvm::MVT::v2f32
@ v2f32
Definition: MachineValueType.h:155
llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:210
uint32_t
llvm::StackOffset
StackOffset is a class to represent an offset with 2 dimensions, named fixed and scalable,...
Definition: TypeSize.h:134
llvm::AMDGPUTargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Definition: AMDGPUISelLowering.cpp:1278
llvm::MinMax
Definition: AssumeBundleQueries.h:72
llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition: SelectionDAGNodes.h:1121
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::TargetLoweringBase::setCondCodeAction
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
Definition: TargetLowering.h:2251
llvm::TargetLoweringBase::setTruncStoreAction
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
Definition: TargetLowering.h:2205
llvm::ISD::getSetCCInverse
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
Definition: SelectionDAG.cpp:477
llvm::SDValue::getSimpleValueType
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:183
llvm::MVT::v4i32
@ v4i32
Definition: MachineValueType.h:103
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:273
llvm::ISD::FMUL
@ FMUL
Definition: ISDOpcodes.h:379
llvm::MVT::v32i32
@ v32i32
Definition: MachineValueType.h:109
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1335
llvm::SelectionDAG::getBuildVector
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:790
llvm::SelectionDAG::getConstantFP
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1527
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
llvm::LoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2282
llvm::AMDGPUAS::CONSTANT_BUFFER_0
@ CONSTANT_BUFFER_0
Definition: AMDGPU.h:404
llvm::DenseMapBase::empty
LLVM_NODISCARD bool empty() const
Definition: DenseMap.h:97
llvm::MVT::v16i32
@ v16i32
Definition: MachineValueType.h:108
llvm::ISD::FrameIndex
@ FrameIndex
Definition: ISDOpcodes.h:80
llvm::isInt< 16 >
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:370
llvm::AMDGPUTargetLowering::CreateLiveInRegisterRaw
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, Register Reg, EVT VT) const
Definition: AMDGPUISelLowering.h:290
j
return j(j<< 16)
llvm::ISD::SETLT
@ SETLT
Definition: ISDOpcodes.h:1378
llvm::isAllOnesConstant
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Definition: SelectionDAG.cpp:9547
llvm::AMDGPUTargetLowering::analyzeFormalArgumentsCompute
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types.
Definition: AMDGPUISelLowering.cpp:1055
CompactSwizzlableVector
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
Definition: R600ISelLowering.cpp:1594
llvm::R600TargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Definition: R600ISelLowering.cpp:447
llvm::TargetLoweringBase::setLoadExtAction
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
Definition: TargetLowering.h:2193
llvm::GlobalAddressSDNode
Definition: SelectionDAGNodes.h:1700
llvm::AMDGPUISD::RSQ_CLAMP
@ RSQ_CLAMP
Definition: AMDGPUISelLowering.h:413
llvm::isNullConstant
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
Definition: SelectionDAG.cpp:9537
llvm::EVT::getScalarType
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:296
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::MemSDNode::getAddressSpace
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Definition: SelectionDAGNodes.h:1339
llvm::MemSDNode::getAlignment
unsigned getAlignment() const
Definition: SelectionDAGNodes.h:1266
llvm::ISD::FCOS
@ FCOS
Definition: ISDOpcodes.h:871
llvm::SelectionDAG::getEntryNode
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:516
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:879
llvm::ISD::getSetCCSwappedOperands
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
Definition: SelectionDAG.cpp:454
llvm::R600TargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition: R600ISelLowering.cpp:1753
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:385
llvm::ISD::FSIN
@ FSIN
Definition: ISDOpcodes.h:870
llvm::SelectionDAG::getDataLayout
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:440
llvm::MVT::v8i16
@ v8i16
Definition: MachineValueType.h:92
llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:491
llvm::TargetLoweringBase::Custom
@ Custom
Definition: TargetLowering.h:200
llvm::R600Subtarget::getRegisterInfo
const R600RegisterInfo * getRegisterInfo() const override
Definition: R600Subtarget.h:67
llvm::ISD::SUBC
@ SUBC
Definition: ISDOpcodes.h:270
R600Subtarget.h
llvm::AMDGPUAS::CONSTANT_BUFFER_5
@ CONSTANT_BUFFER_5
Definition: AMDGPU.h:409
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:46
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::ISD::SETUO
@ SETUO
Definition: ISDOpcodes.h:1365
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:138
llvm::R600InstrInfo
Definition: R600InstrInfo.h:39
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:922
llvm::MVT::v8i32
@ v8i32
Definition: MachineValueType.h:107
llvm::ISD::SRL_PARTS
@ SRL_PARTS
Definition: ISDOpcodes.h:717
llvm::R600TargetLowering::getSetCCResultType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
Definition: R600ISelLowering.cpp:1559
llvm::AMDGPUISD::BRANCH_COND
@ BRANCH_COND
Definition: AMDGPUISelLowering.h:341
llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition: ISDOpcodes.h:740
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
llvm::SDValue::isUndef
bool isUndef() const
Definition: SelectionDAGNodes.h:1149
llvm::SDNode::getMachineOpcode
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
Definition: SelectionDAGNodes.h:678
Vector
So we should use XX3Form_Rcr to implement instrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:476
llvm::AMDGPUTargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition: AMDGPUISelLowering.cpp:4009
llvm::AMDGPUAS::CONSTANT_BUFFER_12
@ CONSTANT_BUFFER_12
Definition: AMDGPU.h:416
llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:301
llvm::MemSDNode::getAlign
Align getAlign() const
Definition: SelectionDAGNodes.h:1264
llvm::TargetLoweringBase::setBooleanContents
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
Definition: TargetLowering.h:2102
llvm::SDNode::op_begin
op_iterator op_begin() const
Definition: SelectionDAGNodes.h:903
llvm::AMDGPUISD::FRACT
@ FRACT
Definition: AMDGPUISelLowering.h:364
llvm::ISD::FSUB
@ FSUB
Definition: ISDOpcodes.h:378
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:657
R600Defines.h
llvm::TargetLoweringBase::Expand
@ Expand
Definition: TargetLowering.h:198
llvm::TargetLoweringBase::setTargetDAGCombine
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
Definition: TargetLowering.h:2282
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
N
#define N
llvm::TargetLoweringBase::computeRegisterProperties
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
Definition: TargetLoweringBase.cpp:1284
llvm::ISD::SUBE
@ SUBE
Definition: ISDOpcodes.h:280
llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:659
llvm::AMDGPUAS::CONSTANT_BUFFER_15
@ CONSTANT_BUFFER_15
Definition: AMDGPU.h:419
llvm::GlobalAddressSDNode::getAddressSpace
unsigned getAddressSpace() const
Definition: SelectionDAG.cpp:10288
llvm::R600TargetLowering::ReplaceNodeResults
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
Definition: R600ISelLowering.cpp:630
llvm::AMDGPUISD::COS_HW
@ COS_HW
Definition: AMDGPUISelLowering.h:383
llvm::CallingConv::AMDGPU_LS
@ AMDGPU_LS
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:231
llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:157
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition: SelectionDAGNodes.h:1109
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:637
llvm::numbers::pif
constexpr float pif
Definition: MathExtras.h:78
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::ISD::SETONE
@ SETONE
Definition: ISDOpcodes.h:1363
llvm::MVT::v32i8
@ v32i8
Definition: MachineValueType.h:81
llvm::EVT::bitsGT
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:257
llvm::MVT::i16
@ i16
Definition: MachineValueType.h:45
llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:866
llvm::SelectionDAG::getMachineFunction
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:437
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::R600TargetLowering::EmitInstrWithCustomInserter
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Definition: R600ISelLowering.cpp:264
llvm::ISD::SDIVREM
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
llvm::AMDGPUISD::DWORDADDR
@ DWORDADDR
Definition: AMDGPUISelLowering.h:363
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::EVT::bitsGE
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:265
llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:726
llvm::MVT::v8i8
@ v8i8
Definition: MachineValueType.h:79
llvm::AMDGPUFrameLowering::getStackWidth
unsigned getStackWidth(const MachineFunction &MF) const
Definition: AMDGPUFrameLowering.cpp:22
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:880
llvm::SDValue::isMachineOpcode
bool isMachineOpcode() const
Definition: SelectionDAGNodes.h:1141
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::MVT::v2i16
@ v2i16
Definition: MachineValueType.h:89
llvm::AMDGPUMachineFunction
Definition: AMDGPUMachineFunction.h:20
llvm::ConstantFPSDNode::getValueAPF
const APFloat & getValueAPF() const
Definition: SelectionDAGNodes.h:1598
llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:152
ReorganizeVector
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
Definition: R600ISelLowering.cpp:1639
llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:500
llvm::AMDGPUAS::CONSTANT_BUFFER_14
@ CONSTANT_BUFFER_14
Definition: AMDGPU.h:418
llvm::AMDGPUAS::PARAM_I_ADDRESS
@ PARAM_I_ADDRESS
Address space for indirect addressible parameter memory (VTX1).
Definition: AMDGPU.h:396
llvm::SelectionDAG::getExtLoad
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:7453
llvm::ISD::ADDE
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:814
llvm::MVT::f32
@ f32
Definition: MachineValueType.h:55
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::MachineMemOperand::getFlags
Flags getFlags() const
Return the raw flags of the source value,.
Definition: MachineMemOperand.h:220
llvm::CallingConv::AMDGPU_CS
@ AMDGPU_CS
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:213
llvm::ISD::ATOMIC_LOAD
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1114
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:382
llvm::MemSDNode::getPointerInfo
const MachinePointerInfo & getPointerInfo() const
Definition: SelectionDAGNodes.h:1334
llvm::AMDGPUISD::CONST_DATA_PTR
@ CONST_DATA_PTR
Pointer to the start of the shader's constant data.
Definition: AMDGPUISelLowering.h:473
llvm::TargetLoweringBase::getPointerTy
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Definition: TargetLowering.h:345
llvm::ISD::CTPOP
@ CTPOP
Definition: ISDOpcodes.h:669
llvm::CallingConv::AMDGPU_VS
@ AMDGPU_VS
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:204
llvm::AMDGPUISD::DUMMY_CHAIN
@ DUMMY_CHAIN
Definition: AMDGPUISelLowering.h:476
llvm::R600TargetLowering::R600TargetLowering
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
Definition: R600ISelLowering.cpp:28
llvm::SelectionDAG::getMergeValues
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
Definition: SelectionDAG.cpp:7202
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:289
llvm::MVT::v4i1
@ v4i1
Definition: MachineValueType.h:66
llvm::SelectionDAG::getCondCode
SDValue getCondCode(ISD::CondCode Cond)
Definition: SelectionDAG.cpp:1738
llvm::AMDGPUTargetLowering::FIRST_IMPLICIT
@ FIRST_IMPLICIT
Definition: AMDGPUISelLowering.h:315