LLVM  10.0.0svn
R600ISelLowering.cpp
Go to the documentation of this file.
1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Custom DAG lowering for R600
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "R600ISelLowering.h"
15 #include "AMDGPUFrameLowering.h"
16 #include "AMDGPUSubtarget.h"
17 #include "R600Defines.h"
18 #include "R600FrameLowering.h"
19 #include "R600InstrInfo.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "llvm/ADT/APFloat.h"
24 #include "llvm/ADT/APInt.h"
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/IR/Constants.h"
39 #include "llvm/IR/DerivedTypes.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/Compiler.h"
45 #include <cassert>
46 #include <cstdint>
47 #include <iterator>
48 #include <utility>
49 #include <vector>
50 
51 using namespace llvm;
52 
53 #include "R600GenCallingConv.inc"
54 
56  const R600Subtarget &STI)
57  : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
58  addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
59  addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
60  addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
61  addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
62  addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
63  addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
64 
66 
67  // Legalize loads and stores to the private address space.
71 
72  // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
73  // spaces, so it is custom lowered to handle those where it isn't.
74  for (MVT VT : MVT::integer_valuetypes()) {
78 
82 
86  }
87 
88  // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
92 
96 
101 
104  // We need to include these since trunc STORES to PRIVATE need
105  // special handling to accommodate RMW
116 
117  // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
120 
121  // Set condition code actions
134 
139 
142 
145 
149 
151 
156 
159 
166 
171 
172  // ADD, SUB overflow.
173  // TODO: turn these into Legal?
174  if (Subtarget->hasCARRY())
176 
177  if (Subtarget->hasBORROW())
179 
180  // Expand sign extension of vectors
181  if (!Subtarget->hasBFE())
183 
186 
187  if (!Subtarget->hasBFE())
191 
192  if (!Subtarget->hasBFE())
196 
200 
202 
204 
209 
214 
215  // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
216  // to be Legal/Custom in order to avoid library calls.
220 
221  if (!Subtarget->hasFMA()) {
224  }
225 
226  // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
227  // need it for R600.
228  if (!Subtarget->hasFP32Denormals())
230 
231  if (!Subtarget->hasBFI()) {
232  // fcopysign can be done in a single instruction with BFI.
235  }
236 
237  if (!Subtarget->hasBCNT(32))
239 
240  if (!Subtarget->hasBCNT(64))
242 
243  if (Subtarget->hasFFBH())
245 
246  if (Subtarget->hasFFBL())
248 
249  // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
250  // need it for R600.
251  if (Subtarget->hasBFE())
252  setHasExtractBitsInsn(true);
253 
255 
256  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
257  for (MVT VT : ScalarIntVTs) {
262  }
263 
264  // LLVM will expand these to atomic_cmp_swap(0)
265  // and atomic_swap, respectively.
268 
269  // We need to custom lower some of the intrinsics
272 
274 
281 }
282 
283 static inline bool isEOP(MachineBasicBlock::iterator I) {
284  if (std::next(I) == I->getParent()->end())
285  return false;
286  return std::next(I)->getOpcode() == R600::RETURN;
287 }
288 
291  MachineBasicBlock *BB) const {
292  MachineFunction *MF = BB->getParent();
295  const R600InstrInfo *TII = Subtarget->getInstrInfo();
296 
297  switch (MI.getOpcode()) {
298  default:
299  // Replace LDS_*_RET instruction that don't have any uses with the
300  // equivalent LDS_*_NORET instruction.
301  if (TII->isLDSRetInstr(MI.getOpcode())) {
302  int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
303  assert(DstIdx != -1);
304  MachineInstrBuilder NewMI;
305  // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
306  // LDS_1A2D support and remove this special case.
307  if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
308  MI.getOpcode() == R600::LDS_CMPST_RET)
309  return BB;
310 
311  NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
312  TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
313  for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
314  NewMI.add(MI.getOperand(i));
315  }
316  } else {
318  }
319  break;
320 
321  case R600::FABS_R600: {
323  *BB, I, R600::MOV, MI.getOperand(0).getReg(),
324  MI.getOperand(1).getReg());
325  TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
326  break;
327  }
328 
329  case R600::FNEG_R600: {
331  *BB, I, R600::MOV, MI.getOperand(0).getReg(),
332  MI.getOperand(1).getReg());
333  TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
334  break;
335  }
336 
337  case R600::MASK_WRITE: {
338  Register maskedRegister = MI.getOperand(0).getReg();
339  assert(Register::isVirtualRegister(maskedRegister));
340  MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
341  TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
342  break;
343  }
344 
345  case R600::MOV_IMM_F32:
346  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
347  .getFPImm()
348  ->getValueAPF()
349  .bitcastToAPInt()
350  .getZExtValue());
351  break;
352 
353  case R600::MOV_IMM_I32:
354  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
355  MI.getOperand(1).getImm());
356  break;
357 
358  case R600::MOV_IMM_GLOBAL_ADDR: {
359  //TODO: Perhaps combine this instruction with the next if possible
360  auto MIB = TII->buildDefaultInstruction(
361  *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
362  int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
363  //TODO: Ugh this is rather ugly
364  MIB->getOperand(Idx) = MI.getOperand(1);
365  break;
366  }
367 
368  case R600::CONST_COPY: {
370  *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
371  TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
372  MI.getOperand(1).getImm());
373  break;
374  }
375 
376  case R600::RAT_WRITE_CACHELESS_32_eg:
377  case R600::RAT_WRITE_CACHELESS_64_eg:
378  case R600::RAT_WRITE_CACHELESS_128_eg:
379  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
380  .add(MI.getOperand(0))
381  .add(MI.getOperand(1))
382  .addImm(isEOP(I)); // Set End of program bit
383  break;
384 
385  case R600::RAT_STORE_TYPED_eg:
386  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
387  .add(MI.getOperand(0))
388  .add(MI.getOperand(1))
389  .add(MI.getOperand(2))
390  .addImm(isEOP(I)); // Set End of program bit
391  break;
392 
393  case R600::BRANCH:
394  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
395  .add(MI.getOperand(0));
396  break;
397 
398  case R600::BRANCH_COND_f32: {
399  MachineInstr *NewMI =
400  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
401  R600::PREDICATE_BIT)
402  .add(MI.getOperand(1))
403  .addImm(R600::PRED_SETNE)
404  .addImm(0); // Flags
405  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
406  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
407  .add(MI.getOperand(0))
408  .addReg(R600::PREDICATE_BIT, RegState::Kill);
409  break;
410  }
411 
412  case R600::BRANCH_COND_i32: {
413  MachineInstr *NewMI =
414  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
415  R600::PREDICATE_BIT)
416  .add(MI.getOperand(1))
417  .addImm(R600::PRED_SETNE_INT)
418  .addImm(0); // Flags
419  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
420  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
421  .add(MI.getOperand(0))
422  .addReg(R600::PREDICATE_BIT, RegState::Kill);
423  break;
424  }
425 
426  case R600::EG_ExportSwz:
427  case R600::R600_ExportSwz: {
428  // Instruction is left unmodified if its not the last one of its type
429  bool isLastInstructionOfItsType = true;
430  unsigned InstExportType = MI.getOperand(1).getImm();
431  for (MachineBasicBlock::iterator NextExportInst = std::next(I),
432  EndBlock = BB->end(); NextExportInst != EndBlock;
433  NextExportInst = std::next(NextExportInst)) {
434  if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
435  NextExportInst->getOpcode() == R600::R600_ExportSwz) {
436  unsigned CurrentInstExportType = NextExportInst->getOperand(1)
437  .getImm();
438  if (CurrentInstExportType == InstExportType) {
439  isLastInstructionOfItsType = false;
440  break;
441  }
442  }
443  }
444  bool EOP = isEOP(I);
445  if (!EOP && !isLastInstructionOfItsType)
446  return BB;
447  unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
448  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
449  .add(MI.getOperand(0))
450  .add(MI.getOperand(1))
451  .add(MI.getOperand(2))
452  .add(MI.getOperand(3))
453  .add(MI.getOperand(4))
454  .add(MI.getOperand(5))
455  .add(MI.getOperand(6))
456  .addImm(CfInst)
457  .addImm(EOP);
458  break;
459  }
460  case R600::RETURN: {
461  return BB;
462  }
463  }
464 
465  MI.eraseFromParent();
466  return BB;
467 }
468 
469 //===----------------------------------------------------------------------===//
470 // Custom DAG Lowering Operations
471 //===----------------------------------------------------------------------===//
472 
476  switch (Op.getOpcode()) {
477  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
478  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
479  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
480  case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
481  case ISD::SRA_PARTS:
482  case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
483  case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
484  case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
485  case ISD::FCOS:
486  case ISD::FSIN: return LowerTrig(Op, DAG);
487  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
488  case ISD::STORE: return LowerSTORE(Op, DAG);
489  case ISD::LOAD: {
490  SDValue Result = LowerLOAD(Op, DAG);
491  assert((!Result.getNode() ||
492  Result.getNode()->getNumValues() == 2) &&
493  "Load should return a value and a chain");
494  return Result;
495  }
496 
497  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
498  case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
499  case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
500  case ISD::INTRINSIC_VOID: {
501  SDValue Chain = Op.getOperand(0);
502  unsigned IntrinsicID =
503  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
504  switch (IntrinsicID) {
505  case Intrinsic::r600_store_swizzle: {
506  SDLoc DL(Op);
507  const SDValue Args[8] = {
508  Chain,
509  Op.getOperand(2), // Export Value
510  Op.getOperand(3), // ArrayBase
511  Op.getOperand(4), // Type
512  DAG.getConstant(0, DL, MVT::i32), // SWZ_X
513  DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
514  DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
515  DAG.getConstant(3, DL, MVT::i32) // SWZ_W
516  };
517  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
518  }
519 
520  // default for switch(IntrinsicID)
521  default: break;
522  }
523  // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
524  break;
525  }
527  unsigned IntrinsicID =
528  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
529  EVT VT = Op.getValueType();
530  SDLoc DL(Op);
531  switch (IntrinsicID) {
532  case Intrinsic::r600_tex:
533  case Intrinsic::r600_texc: {
534  unsigned TextureOp;
535  switch (IntrinsicID) {
536  case Intrinsic::r600_tex:
537  TextureOp = 0;
538  break;
539  case Intrinsic::r600_texc:
540  TextureOp = 1;
541  break;
542  default:
543  llvm_unreachable("unhandled texture operation");
544  }
545 
546  SDValue TexArgs[19] = {
547  DAG.getConstant(TextureOp, DL, MVT::i32),
548  Op.getOperand(1),
549  DAG.getConstant(0, DL, MVT::i32),
550  DAG.getConstant(1, DL, MVT::i32),
551  DAG.getConstant(2, DL, MVT::i32),
552  DAG.getConstant(3, DL, MVT::i32),
553  Op.getOperand(2),
554  Op.getOperand(3),
555  Op.getOperand(4),
556  DAG.getConstant(0, DL, MVT::i32),
557  DAG.getConstant(1, DL, MVT::i32),
558  DAG.getConstant(2, DL, MVT::i32),
559  DAG.getConstant(3, DL, MVT::i32),
560  Op.getOperand(5),
561  Op.getOperand(6),
562  Op.getOperand(7),
563  Op.getOperand(8),
564  Op.getOperand(9),
565  Op.getOperand(10)
566  };
567  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
568  }
569  case Intrinsic::r600_dot4: {
570  SDValue Args[8] = {
572  DAG.getConstant(0, DL, MVT::i32)),
574  DAG.getConstant(0, DL, MVT::i32)),
576  DAG.getConstant(1, DL, MVT::i32)),
578  DAG.getConstant(1, DL, MVT::i32)),
580  DAG.getConstant(2, DL, MVT::i32)),
582  DAG.getConstant(2, DL, MVT::i32)),
584  DAG.getConstant(3, DL, MVT::i32)),
586  DAG.getConstant(3, DL, MVT::i32))
587  };
588  return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
589  }
590 
591  case Intrinsic::r600_implicitarg_ptr: {
594  return DAG.getConstant(ByteOffset, DL, PtrVT);
595  }
596  case Intrinsic::r600_read_ngroups_x:
597  return LowerImplicitParameter(DAG, VT, DL, 0);
598  case Intrinsic::r600_read_ngroups_y:
599  return LowerImplicitParameter(DAG, VT, DL, 1);
600  case Intrinsic::r600_read_ngroups_z:
601  return LowerImplicitParameter(DAG, VT, DL, 2);
602  case Intrinsic::r600_read_global_size_x:
603  return LowerImplicitParameter(DAG, VT, DL, 3);
604  case Intrinsic::r600_read_global_size_y:
605  return LowerImplicitParameter(DAG, VT, DL, 4);
606  case Intrinsic::r600_read_global_size_z:
607  return LowerImplicitParameter(DAG, VT, DL, 5);
608  case Intrinsic::r600_read_local_size_x:
609  return LowerImplicitParameter(DAG, VT, DL, 6);
610  case Intrinsic::r600_read_local_size_y:
611  return LowerImplicitParameter(DAG, VT, DL, 7);
612  case Intrinsic::r600_read_local_size_z:
613  return LowerImplicitParameter(DAG, VT, DL, 8);
614 
615  case Intrinsic::r600_read_tgid_x:
616  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
617  R600::T1_X, VT);
618  case Intrinsic::r600_read_tgid_y:
619  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
620  R600::T1_Y, VT);
621  case Intrinsic::r600_read_tgid_z:
622  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
623  R600::T1_Z, VT);
624  case Intrinsic::r600_read_tidig_x:
625  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
626  R600::T0_X, VT);
627  case Intrinsic::r600_read_tidig_y:
628  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
629  R600::T0_Y, VT);
630  case Intrinsic::r600_read_tidig_z:
631  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
632  R600::T0_Z, VT);
633 
634  case Intrinsic::r600_recipsqrt_ieee:
635  return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
636 
637  case Intrinsic::r600_recipsqrt_clamped:
638  return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
639  default:
640  return Op;
641  }
642 
643  // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
644  break;
645  }
646  } // end switch(Op.getOpcode())
647  return SDValue();
648 }
649 
652  SelectionDAG &DAG) const {
653  switch (N->getOpcode()) {
654  default:
656  return;
657  case ISD::FP_TO_UINT:
658  if (N->getValueType(0) == MVT::i1) {
659  Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
660  return;
661  }
662  // Since we don't care about out of bounds values we can use FP_TO_SINT for
663  // uints too. The DAGLegalizer code for uint considers some extra cases
664  // which are not necessary here.
666  case ISD::FP_TO_SINT: {
667  if (N->getValueType(0) == MVT::i1) {
668  Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
669  return;
670  }
671 
672  SDValue Result;
673  if (expandFP_TO_SINT(N, Result, DAG))
674  Results.push_back(Result);
675  return;
676  }
677  case ISD::SDIVREM: {
678  SDValue Op = SDValue(N, 1);
679  SDValue RES = LowerSDIVREM(Op, DAG);
680  Results.push_back(RES);
681  Results.push_back(RES.getValue(1));
682  break;
683  }
684  case ISD::UDIVREM: {
685  SDValue Op = SDValue(N, 0);
686  LowerUDIVREM64(Op, DAG, Results);
687  break;
688  }
689  }
690 }
691 
692 SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
693  SDValue Vector) const {
694  SDLoc DL(Vector);
695  EVT VecVT = Vector.getValueType();
696  EVT EltVT = VecVT.getVectorElementType();
698 
699  for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
700  Args.push_back(DAG.getNode(
701  ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
702  DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
703  }
704 
705  return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
706 }
707 
708 SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
709  SelectionDAG &DAG) const {
710  SDLoc DL(Op);
711  SDValue Vector = Op.getOperand(0);
712  SDValue Index = Op.getOperand(1);
713 
714  if (isa<ConstantSDNode>(Index) ||
716  return Op;
717 
718  Vector = vectorToVerticalVector(DAG, Vector);
719  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
720  Vector, Index);
721 }
722 
723 SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
724  SelectionDAG &DAG) const {
725  SDLoc DL(Op);
726  SDValue Vector = Op.getOperand(0);
727  SDValue Value = Op.getOperand(1);
728  SDValue Index = Op.getOperand(2);
729 
730  if (isa<ConstantSDNode>(Index) ||
732  return Op;
733 
734  Vector = vectorToVerticalVector(DAG, Vector);
735  SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
736  Vector, Value, Index);
737  return vectorToVerticalVector(DAG, Insert);
738 }
739 
740 SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
741  SDValue Op,
742  SelectionDAG &DAG) const {
743  GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
745  return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
746 
747  const DataLayout &DL = DAG.getDataLayout();
748  const GlobalValue *GV = GSD->getGlobal();
749  MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
750 
751  SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
752  return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
753 }
754 
755 SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
756  // On hw >= R700, COS/SIN input must be between -1. and 1.
757  // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
758  EVT VT = Op.getValueType();
759  SDValue Arg = Op.getOperand(0);
760  SDLoc DL(Op);
761 
762  // TODO: Should this propagate fast-math-flags?
763  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
764  DAG.getNode(ISD::FADD, DL, VT,
765  DAG.getNode(ISD::FMUL, DL, VT, Arg,
766  DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
767  DAG.getConstantFP(0.5, DL, MVT::f32)));
768  unsigned TrigNode;
769  switch (Op.getOpcode()) {
770  case ISD::FCOS:
771  TrigNode = AMDGPUISD::COS_HW;
772  break;
773  case ISD::FSIN:
774  TrigNode = AMDGPUISD::SIN_HW;
775  break;
776  default:
777  llvm_unreachable("Wrong trig opcode");
778  }
779  SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
780  DAG.getNode(ISD::FADD, DL, VT, FractPart,
781  DAG.getConstantFP(-0.5, DL, MVT::f32)));
782  if (Gen >= AMDGPUSubtarget::R700)
783  return TrigVal;
784  // On R600 hw, COS/SIN input must be between -Pi and Pi.
785  return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
787 }
788 
789 SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
790  SDLoc DL(Op);
791  EVT VT = Op.getValueType();
792 
793  SDValue Lo = Op.getOperand(0);
794  SDValue Hi = Op.getOperand(1);
795  SDValue Shift = Op.getOperand(2);
796  SDValue Zero = DAG.getConstant(0, DL, VT);
797  SDValue One = DAG.getConstant(1, DL, VT);
798 
799  SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
800  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
801  SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
802  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
803 
804  // The dance around Width1 is necessary for 0 special case.
805  // Without it the CompShift might be 32, producing incorrect results in
806  // Overflow. So we do the shift in two steps, the alternative is to
807  // add a conditional to filter the special case.
808 
809  SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
810  Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
811 
812  SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
813  HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
814  SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
815 
816  SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
817  SDValue LoBig = Zero;
818 
819  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
820  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
821 
822  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
823 }
824 
825 SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
826  SDLoc DL(Op);
827  EVT VT = Op.getValueType();
828 
829  SDValue Lo = Op.getOperand(0);
830  SDValue Hi = Op.getOperand(1);
831  SDValue Shift = Op.getOperand(2);
832  SDValue Zero = DAG.getConstant(0, DL, VT);
833  SDValue One = DAG.getConstant(1, DL, VT);
834 
835  const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
836 
837  SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
838  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
839  SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
840  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
841 
842  // The dance around Width1 is necessary for 0 special case.
843  // Without it the CompShift might be 32, producing incorrect results in
844  // Overflow. So we do the shift in two steps, the alternative is to
845  // add a conditional to filter the special case.
846 
847  SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
848  Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
849 
850  SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
851  SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
852  LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
853 
854  SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
855  SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
856 
857  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
858  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
859 
860  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
861 }
862 
863 SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
864  unsigned mainop, unsigned ovf) const {
865  SDLoc DL(Op);
866  EVT VT = Op.getValueType();
867 
868  SDValue Lo = Op.getOperand(0);
869  SDValue Hi = Op.getOperand(1);
870 
871  SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
872  // Extend sign.
873  OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
874  DAG.getValueType(MVT::i1));
875 
876  SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
877 
878  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
879 }
880 
881 SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
882  SDLoc DL(Op);
883  return DAG.getNode(
884  ISD::SETCC,
885  DL,
886  MVT::i1,
887  Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
888  DAG.getCondCode(ISD::SETEQ));
889 }
890 
891 SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
892  SDLoc DL(Op);
893  return DAG.getNode(
894  ISD::SETCC,
895  DL,
896  MVT::i1,
897  Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
898  DAG.getCondCode(ISD::SETEQ));
899 }
900 
901 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
902  const SDLoc &DL,
903  unsigned DwordOffset) const {
904  unsigned ByteOffset = DwordOffset * 4;
905  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
907 
908  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
909  assert(isInt<16>(ByteOffset));
910 
911  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
912  DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
914 }
915 
916 bool R600TargetLowering::isZero(SDValue Op) const {
917  if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
918  return Cst->isNullValue();
919  } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
920  return CstFP->isZero();
921  } else {
922  return false;
923  }
924 }
925 
926 bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
927  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
928  return CFP->isExactlyValue(1.0);
929  }
930  return isAllOnesConstant(Op);
931 }
932 
933 bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
934  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
935  return CFP->getValueAPF().isZero();
936  }
937  return isNullConstant(Op);
938 }
939 
940 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
941  SDLoc DL(Op);
942  EVT VT = Op.getValueType();
943 
944  SDValue LHS = Op.getOperand(0);
945  SDValue RHS = Op.getOperand(1);
946  SDValue True = Op.getOperand(2);
947  SDValue False = Op.getOperand(3);
948  SDValue CC = Op.getOperand(4);
949  SDValue Temp;
950 
951  if (VT == MVT::f32) {
952  DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
953  SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
954  if (MinMax)
955  return MinMax;
956  }
957 
958  // LHS and RHS are guaranteed to be the same value type
959  EVT CompareVT = LHS.getValueType();
960 
961  // Check if we can lower this to a native operation.
962 
963  // Try to lower to a SET* instruction:
964  //
965  // SET* can match the following patterns:
966  //
967  // select_cc f32, f32, -1, 0, cc_supported
968  // select_cc f32, f32, 1.0f, 0.0f, cc_supported
969  // select_cc i32, i32, -1, 0, cc_supported
970  //
971 
972  // Move hardware True/False values to the correct operand.
973  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
974  ISD::CondCode InverseCC =
975  ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
976  if (isHWTrueValue(False) && isHWFalseValue(True)) {
977  if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
978  std::swap(False, True);
979  CC = DAG.getCondCode(InverseCC);
980  } else {
981  ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
982  if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
983  std::swap(False, True);
984  std::swap(LHS, RHS);
985  CC = DAG.getCondCode(SwapInvCC);
986  }
987  }
988  }
989 
990  if (isHWTrueValue(True) && isHWFalseValue(False) &&
991  (CompareVT == VT || VT == MVT::i32)) {
992  // This can be matched by a SET* instruction.
993  return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
994  }
995 
996  // Try to lower to a CND* instruction:
997  //
998  // CND* can match the following patterns:
999  //
1000  // select_cc f32, 0.0, f32, f32, cc_supported
1001  // select_cc f32, 0.0, i32, i32, cc_supported
1002  // select_cc i32, 0, f32, f32, cc_supported
1003  // select_cc i32, 0, i32, i32, cc_supported
1004  //
1005 
1006  // Try to move the zero value to the RHS
1007  if (isZero(LHS)) {
1008  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1009  // Try swapping the operands
1010  ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1011  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1012  std::swap(LHS, RHS);
1013  CC = DAG.getCondCode(CCSwapped);
1014  } else {
1015  // Try inverting the conditon and then swapping the operands
1016  ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1017  CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1018  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1019  std::swap(True, False);
1020  std::swap(LHS, RHS);
1021  CC = DAG.getCondCode(CCSwapped);
1022  }
1023  }
1024  }
1025  if (isZero(RHS)) {
1026  SDValue Cond = LHS;
1027  SDValue Zero = RHS;
1028  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1029  if (CompareVT != VT) {
1030  // Bitcast True / False to the correct types. This will end up being
1031  // a nop, but it allows us to define only a single pattern in the
1032  // .TD files for each CND* instruction rather than having to have
1033  // one pattern for integer True/False and one for fp True/False
1034  True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1035  False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1036  }
1037 
1038  switch (CCOpcode) {
1039  case ISD::SETONE:
1040  case ISD::SETUNE:
1041  case ISD::SETNE:
1042  CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1043  Temp = True;
1044  True = False;
1045  False = Temp;
1046  break;
1047  default:
1048  break;
1049  }
1050  SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1051  Cond, Zero,
1052  True, False,
1053  DAG.getCondCode(CCOpcode));
1054  return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1055  }
1056 
1057  // If we make it this for it means we have no native instructions to handle
1058  // this SELECT_CC, so we must lower it.
1059  SDValue HWTrue, HWFalse;
1060 
1061  if (CompareVT == MVT::f32) {
1062  HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1063  HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
1064  } else if (CompareVT == MVT::i32) {
1065  HWTrue = DAG.getConstant(-1, DL, CompareVT);
1066  HWFalse = DAG.getConstant(0, DL, CompareVT);
1067  }
1068  else {
1069  llvm_unreachable("Unhandled value type in LowerSELECT_CC");
1070  }
1071 
1072  // Lower this unsupported SELECT_CC into a combination of two supported
1073  // SELECT_CC operations.
1074  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1075 
1076  return DAG.getNode(ISD::SELECT_CC, DL, VT,
1077  Cond, HWFalse,
1078  True, False,
1079  DAG.getCondCode(ISD::SETNE));
1080 }
1081 
1082 /// LLVM generates byte-addressed pointers. For indirect addressing, we need to
1083 /// convert these pointers to a register index. Each register holds
1084 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1085 /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1086 /// for indirect addressing.
1087 SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1088  unsigned StackWidth,
1089  SelectionDAG &DAG) const {
1090  unsigned SRLPad;
1091  switch(StackWidth) {
1092  case 1:
1093  SRLPad = 2;
1094  break;
1095  case 2:
1096  SRLPad = 3;
1097  break;
1098  case 4:
1099  SRLPad = 4;
1100  break;
1101  default: llvm_unreachable("Invalid stack width");
1102  }
1103 
1104  SDLoc DL(Ptr);
1105  return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1106  DAG.getConstant(SRLPad, DL, MVT::i32));
1107 }
1108 
1109 void R600TargetLowering::getStackAddress(unsigned StackWidth,
1110  unsigned ElemIdx,
1111  unsigned &Channel,
1112  unsigned &PtrIncr) const {
1113  switch (StackWidth) {
1114  default:
1115  case 1:
1116  Channel = 0;
1117  if (ElemIdx > 0) {
1118  PtrIncr = 1;
1119  } else {
1120  PtrIncr = 0;
1121  }
1122  break;
1123  case 2:
1124  Channel = ElemIdx % 2;
1125  if (ElemIdx == 2) {
1126  PtrIncr = 1;
1127  } else {
1128  PtrIncr = 0;
1129  }
1130  break;
1131  case 4:
1132  Channel = ElemIdx;
1133  PtrIncr = 0;
1134  break;
1135  }
1136 }
1137 
1138 SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1139  SelectionDAG &DAG) const {
1140  SDLoc DL(Store);
1141  //TODO: Who creates the i8 stores?
1142  assert(Store->isTruncatingStore()
1143  || Store->getValue().getValueType() == MVT::i8);
1145 
1146  SDValue Mask;
1147  if (Store->getMemoryVT() == MVT::i8) {
1148  assert(Store->getAlignment() >= 1);
1149  Mask = DAG.getConstant(0xff, DL, MVT::i32);
1150  } else if (Store->getMemoryVT() == MVT::i16) {
1151  assert(Store->getAlignment() >= 2);
1152  Mask = DAG.getConstant(0xffff, DL, MVT::i32);
1153  } else {
1154  llvm_unreachable("Unsupported private trunc store");
1155  }
1156 
1157  SDValue OldChain = Store->getChain();
1158  bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1159  // Skip dummy
1160  SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1161  SDValue BasePtr = Store->getBasePtr();
1162  SDValue Offset = Store->getOffset();
1163  EVT MemVT = Store->getMemoryVT();
1164 
1165  SDValue LoadPtr = BasePtr;
1166  if (!Offset.isUndef()) {
1167  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1168  }
1169 
1170  // Get dword location
1171  // TODO: this should be eliminated by the future SHR ptr, 2
1172  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1173  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1174 
1175  // Load dword
1176  // TODO: can we be smarter about machine pointer info?
1179  SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1180 
1181  Chain = Dst.getValue(1);
1182 
1183  // Get offset in dword
1184  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1185  DAG.getConstant(0x3, DL, MVT::i32));
1186 
1187  // Convert byte offset to bit shift
1188  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1189  DAG.getConstant(3, DL, MVT::i32));
1190 
1191  // TODO: Contrary to the name of the functiom,
1192  // it also handles sub i32 non-truncating stores (like i1)
1193  SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1194  Store->getValue());
1195 
1196  // Mask the value to the right type
1197  SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1198 
1199  // Shift the value in place
1200  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1201  MaskedValue, ShiftAmt);
1202 
1203  // Shift the mask in place
1204  SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
1205 
1206  // Invert the mask. NOTE: if we had native ROL instructions we could
1207  // use inverted mask
1208  DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
1209 
1210  // Cleanup the target bits
1211  Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1212 
1213  // Add the new bits
1214  SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1215 
1216  // Store dword
1217  // TODO: Can we be smarter about MachinePointerInfo?
1218  SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
1219 
1220  // If we are part of expanded vector, make our neighbors depend on this store
1221  if (VectorTrunc) {
1222  // Make all other vector elements depend on this store
1223  Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
1224  DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
1225  }
1226  return NewStore;
1227 }
1228 
1229 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1230  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1231  unsigned AS = StoreNode->getAddressSpace();
1232 
1233  SDValue Chain = StoreNode->getChain();
1234  SDValue Ptr = StoreNode->getBasePtr();
1235  SDValue Value = StoreNode->getValue();
1236 
1237  EVT VT = Value.getValueType();
1238  EVT MemVT = StoreNode->getMemoryVT();
1239  EVT PtrVT = Ptr.getValueType();
1240 
1241  SDLoc DL(Op);
1242 
1243  const bool TruncatingStore = StoreNode->isTruncatingStore();
1244 
1245  // Neither LOCAL nor PRIVATE can do vectors at the moment
1246  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS ||
1247  TruncatingStore) &&
1248  VT.isVector()) {
1249  if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) {
1250  // Add an extra level of chain to isolate this vector
1251  SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
1252  // TODO: can the chain be replaced without creating a new store?
1253  SDValue NewStore = DAG.getTruncStore(
1254  NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
1255  MemVT, StoreNode->getAlignment(),
1256  StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
1257  StoreNode = cast<StoreSDNode>(NewStore);
1258  }
1259 
1260  return scalarizeVectorStore(StoreNode, DAG);
1261  }
1262 
1263  unsigned Align = StoreNode->getAlignment();
1264  if (Align < MemVT.getStoreSize() &&
1266  MemVT, AS, Align, StoreNode->getMemOperand()->getFlags(), nullptr)) {
1267  return expandUnalignedStore(StoreNode, DAG);
1268  }
1269 
1270  SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
1271  DAG.getConstant(2, DL, PtrVT));
1272 
1273  if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
1274  // It is beneficial to create MSKOR here instead of combiner to avoid
1275  // artificial dependencies introduced by RMW
1276  if (TruncatingStore) {
1277  assert(VT.bitsLE(MVT::i32));
1278  SDValue MaskConstant;
1279  if (MemVT == MVT::i8) {
1280  MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1281  } else {
1282  assert(MemVT == MVT::i16);
1283  assert(StoreNode->getAlignment() >= 2);
1284  MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1285  }
1286 
1287  SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
1288  DAG.getConstant(0x00000003, DL, PtrVT));
1289  SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1290  DAG.getConstant(3, DL, VT));
1291 
1292  // Put the mask in correct place
1293  SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
1294 
1295  // Put the value bits in correct place
1296  SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1297  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
1298 
1299  // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1300  // vector instead.
1301  SDValue Src[4] = {
1302  ShiftedValue,
1303  DAG.getConstant(0, DL, MVT::i32),
1304  DAG.getConstant(0, DL, MVT::i32),
1305  Mask
1306  };
1307  SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
1308  SDValue Args[3] = { Chain, Input, DWordAddr };
1310  Op->getVTList(), Args, MemVT,
1311  StoreNode->getMemOperand());
1312  } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1313  // Convert pointer from byte address to dword address.
1314  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1315 
1316  if (StoreNode->isIndexed()) {
1317  llvm_unreachable("Indexed stores not supported yet");
1318  } else {
1319  Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1320  }
1321  return Chain;
1322  }
1323  }
1324 
1325  // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1326  if (AS != AMDGPUAS::PRIVATE_ADDRESS)
1327  return SDValue();
1328 
1329  if (MemVT.bitsLT(MVT::i32))
1330  return lowerPrivateTruncStore(StoreNode, DAG);
1331 
1332  // Standard i32+ store, tag it with DWORDADDR to note that the address
1333  // has been shifted
1334  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1335  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1336  return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1337  }
1338 
1339  // Tagged i32+ stores will be matched by patterns
1340  return SDValue();
1341 }
1342 
1343 // return (512 + (kc_bank << 12)
1344 static int
1346  switch (AddressSpace) {
1348  return 512;
1350  return 512 + 4096;
1352  return 512 + 4096 * 2;
1354  return 512 + 4096 * 3;
1356  return 512 + 4096 * 4;
1358  return 512 + 4096 * 5;
1360  return 512 + 4096 * 6;
1362  return 512 + 4096 * 7;
1364  return 512 + 4096 * 8;
1366  return 512 + 4096 * 9;
1368  return 512 + 4096 * 10;
1370  return 512 + 4096 * 11;
1372  return 512 + 4096 * 12;
1374  return 512 + 4096 * 13;
1376  return 512 + 4096 * 14;
1378  return 512 + 4096 * 15;
1379  default:
1380  return -1;
1381  }
1382 }
1383 
1384 SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1385  SelectionDAG &DAG) const {
1386  SDLoc DL(Op);
1387  LoadSDNode *Load = cast<LoadSDNode>(Op);
1389  EVT MemVT = Load->getMemoryVT();
1390  assert(Load->getAlignment() >= MemVT.getStoreSize());
1391 
1392  SDValue BasePtr = Load->getBasePtr();
1393  SDValue Chain = Load->getChain();
1394  SDValue Offset = Load->getOffset();
1395 
1396  SDValue LoadPtr = BasePtr;
1397  if (!Offset.isUndef()) {
1398  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1399  }
1400 
1401  // Get dword location
1402  // NOTE: this should be eliminated by the future SHR ptr, 2
1403  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1404  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1405 
1406  // Load dword
1407  // TODO: can we be smarter about machine pointer info?
1410  SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1411 
1412  // Get offset within the register.
1413  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1414  LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
1415 
1416  // Bit offset of target byte (byteIdx * 8).
1417  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1418  DAG.getConstant(3, DL, MVT::i32));
1419 
1420  // Shift to the right.
1421  SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
1422 
1423  // Eliminate the upper bits by setting them to ...
1424  EVT MemEltVT = MemVT.getScalarType();
1425 
1426  if (ExtType == ISD::SEXTLOAD) { // ... ones.
1427  SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1428  Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
1429  } else { // ... or zeros.
1430  Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
1431  }
1432 
1433  SDValue Ops[] = {
1434  Ret,
1435  Read.getValue(1) // This should be our output chain
1436  };
1437 
1438  return DAG.getMergeValues(Ops, DL);
1439 }
1440 
1441 SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1442  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1443  unsigned AS = LoadNode->getAddressSpace();
1444  EVT MemVT = LoadNode->getMemoryVT();
1446 
1447  if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1448  ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1449  return lowerPrivateExtLoad(Op, DAG);
1450  }
1451 
1452  SDLoc DL(Op);
1453  EVT VT = Op.getValueType();
1454  SDValue Chain = LoadNode->getChain();
1455  SDValue Ptr = LoadNode->getBasePtr();
1456 
1457  if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1458  LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
1459  VT.isVector()) {
1460  return scalarizeVectorLoad(LoadNode, DAG);
1461  }
1462 
1463  // This is still used for explicit load from addrspace(8)
1464  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1465  if (ConstantBlock > -1 &&
1466  ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1467  (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1468  SDValue Result;
1469  if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1470  isa<ConstantSDNode>(Ptr)) {
1471  return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
1472  } else {
1473  //TODO: Does this even work?
1474  // non-constant ptr can't be folded, keeps it as a v4f32 load
1475  Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
1476  DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1477  DAG.getConstant(4, DL, MVT::i32)),
1478  DAG.getConstant(LoadNode->getAddressSpace() -
1480  );
1481  }
1482 
1483  if (!VT.isVector()) {
1484  Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1485  DAG.getConstant(0, DL, MVT::i32));
1486  }
1487 
1488  SDValue MergedValues[2] = {
1489  Result,
1490  Chain
1491  };
1492  return DAG.getMergeValues(MergedValues, DL);
1493  }
1494 
1495  // For most operations returning SDValue() will result in the node being
1496  // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1497  // need to manually expand loads that may be legal in some address spaces and
1498  // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1499  // compute shaders, since the data is sign extended when it is uploaded to the
1500  // buffer. However SEXT loads from other address spaces are not supported, so
1501  // we need to expand them here.
1502  if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1503  EVT MemVT = LoadNode->getMemoryVT();
1504  assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1505  SDValue NewLoad = DAG.getExtLoad(
1506  ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1507  LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags());
1508  SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1509  DAG.getValueType(MemVT));
1510 
1511  SDValue MergedValues[2] = { Res, Chain };
1512  return DAG.getMergeValues(MergedValues, DL);
1513  }
1514 
1515  if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1516  return SDValue();
1517  }
1518 
1519  // DWORDADDR ISD marks already shifted address
1520  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1521  assert(VT == MVT::i32);
1522  Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
1523  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
1524  return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1525  }
1526  return SDValue();
1527 }
1528 
1529 SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1530  SDValue Chain = Op.getOperand(0);
1531  SDValue Cond = Op.getOperand(1);
1532  SDValue Jump = Op.getOperand(2);
1533 
1534  return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1535  Chain, Jump, Cond);
1536 }
1537 
1538 SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1539  SelectionDAG &DAG) const {
1540  MachineFunction &MF = DAG.getMachineFunction();
1541  const R600FrameLowering *TFL = Subtarget->getFrameLowering();
1542 
1543  FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1544 
1545  unsigned FrameIndex = FIN->getIndex();
1546  unsigned IgnoredFrameReg;
1547  unsigned Offset =
1548  TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1549  return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
1550  Op.getValueType());
1551 }
1552 
1554  bool IsVarArg) const {
1555  switch (CC) {
1558  case CallingConv::C:
1559  case CallingConv::Fast:
1560  case CallingConv::Cold:
1561  llvm_unreachable("kernels should not be handled here");
1569  return CC_R600;
1570  default:
1571  report_fatal_error("Unsupported calling convention.");
1572  }
1573 }
1574 
1575 /// XXX Only kernel functions are supported, so we can assume for now that
1576 /// every function is a kernel function, but in the future we should use
1577 /// separate calling conventions for kernel and non-kernel functions.
1579  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1580  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1581  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1583  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1584  *DAG.getContext());
1585  MachineFunction &MF = DAG.getMachineFunction();
1587 
1588  if (AMDGPU::isShader(CallConv)) {
1589  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
1590  } else {
1591  analyzeFormalArgumentsCompute(CCInfo, Ins);
1592  }
1593 
1594  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1595  CCValAssign &VA = ArgLocs[i];
1596  const ISD::InputArg &In = Ins[i];
1597  EVT VT = In.VT;
1598  EVT MemVT = VA.getLocVT();
1599  if (!VT.isVector() && MemVT.isVector()) {
1600  // Get load source type if scalarized.
1601  MemVT = MemVT.getVectorElementType();
1602  }
1603 
1604  if (AMDGPU::isShader(CallConv)) {
1605  unsigned Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
1606  SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1607  InVals.push_back(Register);
1608  continue;
1609  }
1610 
1613 
1614  // i64 isn't a legal type, so the register type used ends up as i32, which
1615  // isn't expected here. It attempts to create this sextload, but it ends up
1616  // being invalid. Somehow this seems to work with i64 arguments, but breaks
1617  // for <1 x i64>.
1618 
1619  // The first 36 bytes of the input buffer contains information about
1620  // thread group and global sizes.
1622  if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1623  // FIXME: This should really check the extload type, but the handling of
1624  // extload vector parameters seems to be broken.
1625 
1626  // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1627  Ext = ISD::SEXTLOAD;
1628  }
1629 
1630  // Compute the offset from the value.
1631  // XXX - I think PartOffset should give you this, but it seems to give the
1632  // size of the register which isn't useful.
1633 
1634  unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
1635  unsigned PartOffset = VA.getLocMemOffset();
1636  unsigned Alignment = MinAlign(VT.getStoreSize(), PartOffset);
1637 
1638  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1639  SDValue Arg = DAG.getLoad(
1640  ISD::UNINDEXED, Ext, VT, DL, Chain,
1641  DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
1642  PtrInfo,
1643  MemVT, Alignment, MachineMemOperand::MONonTemporal |
1646 
1647  InVals.push_back(Arg);
1648  }
1649  return Chain;
1650 }
1651 
1653  EVT VT) const {
1654  if (!VT.isVector())
1655  return MVT::i32;
1657 }
1658 
1660  const SelectionDAG &DAG) const {
1661  // Local and Private addresses do not handle vectors. Limit to i32
1662  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) {
1663  return (MemVT.getSizeInBits() <= 32);
1664  }
1665  return true;
1666 }
1667 
1669  EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
1670  bool *IsFast) const {
1671  if (IsFast)
1672  *IsFast = false;
1673 
1674  if (!VT.isSimple() || VT == MVT::Other)
1675  return false;
1676 
1677  if (VT.bitsLT(MVT::i32))
1678  return false;
1679 
1680  // TODO: This is a rough estimate.
1681  if (IsFast)
1682  *IsFast = true;
1683 
1684  return VT.bitsGT(MVT::i32) && Align % 4 == 0;
1685 }
1686 
1688  SelectionDAG &DAG, SDValue VectorEntry,
1689  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1690  assert(RemapSwizzle.empty());
1691 
1692  SDLoc DL(VectorEntry);
1693  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1694 
1695  SDValue NewBldVec[4];
1696  for (unsigned i = 0; i < 4; i++)
1697  NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1698  DAG.getIntPtrConstant(i, DL));
1699 
1700  for (unsigned i = 0; i < 4; i++) {
1701  if (NewBldVec[i].isUndef())
1702  // We mask write here to teach later passes that the ith element of this
1703  // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1704  // break false dependencies and additionnaly make assembly easier to read.
1705  RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1706  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1707  if (C->isZero()) {
1708  RemapSwizzle[i] = 4; // SEL_0
1709  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1710  } else if (C->isExactlyValue(1.0)) {
1711  RemapSwizzle[i] = 5; // SEL_1
1712  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1713  }
1714  }
1715 
1716  if (NewBldVec[i].isUndef())
1717  continue;
1718  // Fix spurious warning with gcc 7.3 -O3
1719  // warning: array subscript is above array bounds [-Warray-bounds]
1720  // if (NewBldVec[i] == NewBldVec[j]) {
1721  // ~~~~~~~~~~~^
1722  if (i >= 4)
1723  continue;
1724  for (unsigned j = 0; j < i; j++) {
1725  if (NewBldVec[i] == NewBldVec[j]) {
1726  NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1727  RemapSwizzle[i] = j;
1728  break;
1729  }
1730  }
1731  }
1732 
1733  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1734  NewBldVec);
1735 }
1736 
1738  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1739  assert(RemapSwizzle.empty());
1740 
1741  SDLoc DL(VectorEntry);
1742  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1743 
1744  SDValue NewBldVec[4];
1745  bool isUnmovable[4] = {false, false, false, false};
1746  for (unsigned i = 0; i < 4; i++)
1747  NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1748  DAG.getIntPtrConstant(i, DL));
1749 
1750  for (unsigned i = 0; i < 4; i++) {
1751  RemapSwizzle[i] = i;
1752  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1753  unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1754  ->getZExtValue();
1755  if (i == Idx)
1756  isUnmovable[Idx] = true;
1757  }
1758  }
1759 
1760  for (unsigned i = 0; i < 4; i++) {
1761  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1762  unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1763  ->getZExtValue();
1764  if (isUnmovable[Idx])
1765  continue;
1766  // Swap i and Idx
1767  std::swap(NewBldVec[Idx], NewBldVec[i]);
1768  std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1769  break;
1770  }
1771  }
1772 
1773  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1774  NewBldVec);
1775 }
1776 
1777 SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
1778  SelectionDAG &DAG,
1779  const SDLoc &DL) const {
1780  // Old -> New swizzle values
1781  DenseMap<unsigned, unsigned> SwizzleRemap;
1782 
1783  BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1784  for (unsigned i = 0; i < 4; i++) {
1785  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1786  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1787  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1788  }
1789 
1790  SwizzleRemap.clear();
1791  BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1792  for (unsigned i = 0; i < 4; i++) {
1793  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1794  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1795  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1796  }
1797 
1798  return BuildVector;
1799 }
1800 
1801 SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
1802  SelectionDAG &DAG) const {
1803  SDLoc DL(LoadNode);
1804  EVT VT = LoadNode->getValueType(0);
1805  SDValue Chain = LoadNode->getChain();
1806  SDValue Ptr = LoadNode->getBasePtr();
1807  assert (isa<ConstantSDNode>(Ptr));
1808 
1809  //TODO: Support smaller loads
1810  if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
1811  return SDValue();
1812 
1813  if (LoadNode->getAlignment() < 4)
1814  return SDValue();
1815 
1816  int ConstantBlock = ConstantAddressBlock(Block);
1817 
1818  SDValue Slots[4];
1819  for (unsigned i = 0; i < 4; i++) {
1820  // We want Const position encoded with the following formula :
1821  // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1822  // const_index is Ptr computed by llvm using an alignment of 16.
1823  // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1824  // then div by 4 at the ISel step
1825  SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1826  DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1827  Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1828  }
1829  EVT NewVT = MVT::v4i32;
1830  unsigned NumElements = 4;
1831  if (VT.isVector()) {
1832  NewVT = VT;
1833  NumElements = VT.getVectorNumElements();
1834  }
1835  SDValue Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
1836  if (!VT.isVector()) {
1837  Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1838  DAG.getConstant(0, DL, MVT::i32));
1839  }
1840  SDValue MergedValues[2] = {
1841  Result,
1842  Chain
1843  };
1844  return DAG.getMergeValues(MergedValues, DL);
1845 }
1846 
1847 //===----------------------------------------------------------------------===//
1848 // Custom DAG Optimizations
1849 //===----------------------------------------------------------------------===//
1850 
1852  DAGCombinerInfo &DCI) const {
1853  SelectionDAG &DAG = DCI.DAG;
1854  SDLoc DL(N);
1855 
1856  switch (N->getOpcode()) {
1857  // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1858  case ISD::FP_ROUND: {
1859  SDValue Arg = N->getOperand(0);
1860  if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1861  return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1862  Arg.getOperand(0));
1863  }
1864  break;
1865  }
1866 
1867  // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1868  // (i32 select_cc f32, f32, -1, 0 cc)
1869  //
1870  // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1871  // this to one of the SET*_DX10 instructions.
1872  case ISD::FP_TO_SINT: {
1873  SDValue FNeg = N->getOperand(0);
1874  if (FNeg.getOpcode() != ISD::FNEG) {
1875  return SDValue();
1876  }
1877  SDValue SelectCC = FNeg.getOperand(0);
1878  if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1879  SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1880  SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1881  !isHWTrueValue(SelectCC.getOperand(2)) ||
1882  !isHWFalseValue(SelectCC.getOperand(3))) {
1883  return SDValue();
1884  }
1885 
1886  return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1887  SelectCC.getOperand(0), // LHS
1888  SelectCC.getOperand(1), // RHS
1889  DAG.getConstant(-1, DL, MVT::i32), // True
1890  DAG.getConstant(0, DL, MVT::i32), // False
1891  SelectCC.getOperand(4)); // CC
1892 
1893  break;
1894  }
1895 
1896  // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1897  // => build_vector elt0, ... , NewEltIdx, ... , eltN
1898  case ISD::INSERT_VECTOR_ELT: {
1899  SDValue InVec = N->getOperand(0);
1900  SDValue InVal = N->getOperand(1);
1901  SDValue EltNo = N->getOperand(2);
1902 
1903  // If the inserted element is an UNDEF, just use the input vector.
1904  if (InVal.isUndef())
1905  return InVec;
1906 
1907  EVT VT = InVec.getValueType();
1908 
1909  // If we can't generate a legal BUILD_VECTOR, exit
1911  return SDValue();
1912 
1913  // Check that we know which element is being inserted
1914  if (!isa<ConstantSDNode>(EltNo))
1915  return SDValue();
1916  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1917 
1918  // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1919  // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1920  // vector elements.
1922  if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1923  Ops.append(InVec.getNode()->op_begin(),
1924  InVec.getNode()->op_end());
1925  } else if (InVec.isUndef()) {
1926  unsigned NElts = VT.getVectorNumElements();
1927  Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1928  } else {
1929  return SDValue();
1930  }
1931 
1932  // Insert the element
1933  if (Elt < Ops.size()) {
1934  // All the operands of BUILD_VECTOR must have the same type;
1935  // we enforce that here.
1936  EVT OpVT = Ops[0].getValueType();
1937  if (InVal.getValueType() != OpVT)
1938  InVal = OpVT.bitsGT(InVal.getValueType()) ?
1939  DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
1940  DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
1941  Ops[Elt] = InVal;
1942  }
1943 
1944  // Return the new vector
1945  return DAG.getBuildVector(VT, DL, Ops);
1946  }
1947 
1948  // Extract_vec (Build_vector) generated by custom lowering
1949  // also needs to be customly combined
1950  case ISD::EXTRACT_VECTOR_ELT: {
1951  SDValue Arg = N->getOperand(0);
1952  if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1953  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1954  unsigned Element = Const->getZExtValue();
1955  return Arg->getOperand(Element);
1956  }
1957  }
1958  if (Arg.getOpcode() == ISD::BITCAST &&
1959  Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
1962  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1963  unsigned Element = Const->getZExtValue();
1964  return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1965  Arg->getOperand(0).getOperand(Element));
1966  }
1967  }
1968  break;
1969  }
1970 
1971  case ISD::SELECT_CC: {
1972  // Try common optimizations
1974  return Ret;
1975 
1976  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1977  // selectcc x, y, a, b, inv(cc)
1978  //
1979  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1980  // selectcc x, y, a, b, cc
1981  SDValue LHS = N->getOperand(0);
1982  if (LHS.getOpcode() != ISD::SELECT_CC) {
1983  return SDValue();
1984  }
1985 
1986  SDValue RHS = N->getOperand(1);
1987  SDValue True = N->getOperand(2);
1988  SDValue False = N->getOperand(3);
1989  ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1990 
1991  if (LHS.getOperand(2).getNode() != True.getNode() ||
1992  LHS.getOperand(3).getNode() != False.getNode() ||
1993  RHS.getNode() != False.getNode()) {
1994  return SDValue();
1995  }
1996 
1997  switch (NCC) {
1998  default: return SDValue();
1999  case ISD::SETNE: return LHS;
2000  case ISD::SETEQ: {
2001  ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
2002  LHSCC = ISD::getSetCCInverse(LHSCC,
2003  LHS.getOperand(0).getValueType().isInteger());
2004  if (DCI.isBeforeLegalizeOps() ||
2006  return DAG.getSelectCC(DL,
2007  LHS.getOperand(0),
2008  LHS.getOperand(1),
2009  LHS.getOperand(2),
2010  LHS.getOperand(3),
2011  LHSCC);
2012  break;
2013  }
2014  }
2015  return SDValue();
2016  }
2017 
2018  case AMDGPUISD::R600_EXPORT: {
2019  SDValue Arg = N->getOperand(1);
2020  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2021  break;
2022 
2023  SDValue NewArgs[8] = {
2024  N->getOperand(0), // Chain
2025  SDValue(),
2026  N->getOperand(2), // ArrayBase
2027  N->getOperand(3), // Type
2028  N->getOperand(4), // SWZ_X
2029  N->getOperand(5), // SWZ_Y
2030  N->getOperand(6), // SWZ_Z
2031  N->getOperand(7) // SWZ_W
2032  };
2033  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
2034  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
2035  }
2036  case AMDGPUISD::TEXTURE_FETCH: {
2037  SDValue Arg = N->getOperand(1);
2038  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2039  break;
2040 
2041  SDValue NewArgs[19] = {
2042  N->getOperand(0),
2043  N->getOperand(1),
2044  N->getOperand(2),
2045  N->getOperand(3),
2046  N->getOperand(4),
2047  N->getOperand(5),
2048  N->getOperand(6),
2049  N->getOperand(7),
2050  N->getOperand(8),
2051  N->getOperand(9),
2052  N->getOperand(10),
2053  N->getOperand(11),
2054  N->getOperand(12),
2055  N->getOperand(13),
2056  N->getOperand(14),
2057  N->getOperand(15),
2058  N->getOperand(16),
2059  N->getOperand(17),
2060  N->getOperand(18),
2061  };
2062  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2063  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
2064  }
2065 
2066  case ISD::LOAD: {
2067  LoadSDNode *LoadNode = cast<LoadSDNode>(N);
2068  SDValue Ptr = LoadNode->getBasePtr();
2069  if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
2070  isa<ConstantSDNode>(Ptr))
2071  return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
2072  break;
2073  }
2074 
2075  default: break;
2076  }
2077 
2079 }
2080 
2081 bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
2082  SDValue &Src, SDValue &Neg, SDValue &Abs,
2083  SDValue &Sel, SDValue &Imm,
2084  SelectionDAG &DAG) const {
2085  const R600InstrInfo *TII = Subtarget->getInstrInfo();
2086  if (!Src.isMachineOpcode())
2087  return false;
2088 
2089  switch (Src.getMachineOpcode()) {
2090  case R600::FNEG_R600:
2091  if (!Neg.getNode())
2092  return false;
2093  Src = Src.getOperand(0);
2094  Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
2095  return true;
2096  case R600::FABS_R600:
2097  if (!Abs.getNode())
2098  return false;
2099  Src = Src.getOperand(0);
2100  Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
2101  return true;
2102  case R600::CONST_COPY: {
2103  unsigned Opcode = ParentNode->getMachineOpcode();
2104  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2105 
2106  if (!Sel.getNode())
2107  return false;
2108 
2109  SDValue CstOffset = Src.getOperand(0);
2110  if (ParentNode->getValueType(0).isVector())
2111  return false;
2112 
2113  // Gather constants values
2114  int SrcIndices[] = {
2115  TII->getOperandIdx(Opcode, R600::OpName::src0),
2116  TII->getOperandIdx(Opcode, R600::OpName::src1),
2117  TII->getOperandIdx(Opcode, R600::OpName::src2),
2118  TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2119  TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2120  TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2121  TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2122  TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2123  TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2124  TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2125  TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2126  };
2127  std::vector<unsigned> Consts;
2128  for (int OtherSrcIdx : SrcIndices) {
2129  int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2130  if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2131  continue;
2132  if (HasDst) {
2133  OtherSrcIdx--;
2134  OtherSelIdx--;
2135  }
2136  if (RegisterSDNode *Reg =
2137  dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2138  if (Reg->getReg() == R600::ALU_CONST) {
2139  ConstantSDNode *Cst
2140  = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
2141  Consts.push_back(Cst->getZExtValue());
2142  }
2143  }
2144  }
2145 
2146  ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
2147  Consts.push_back(Cst->getZExtValue());
2148  if (!TII->fitsConstReadLimitations(Consts)) {
2149  return false;
2150  }
2151 
2152  Sel = CstOffset;
2153  Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
2154  return true;
2155  }
2156  case R600::MOV_IMM_GLOBAL_ADDR:
2157  // Check if the Imm slot is used. Taken from below.
2158  if (cast<ConstantSDNode>(Imm)->getZExtValue())
2159  return false;
2160  Imm = Src.getOperand(0);
2161  Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
2162  return true;
2163  case R600::MOV_IMM_I32:
2164  case R600::MOV_IMM_F32: {
2165  unsigned ImmReg = R600::ALU_LITERAL_X;
2166  uint64_t ImmValue = 0;
2167 
2168  if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
2170  float FloatValue = FPC->getValueAPF().convertToFloat();
2171  if (FloatValue == 0.0) {
2172  ImmReg = R600::ZERO;
2173  } else if (FloatValue == 0.5) {
2174  ImmReg = R600::HALF;
2175  } else if (FloatValue == 1.0) {
2176  ImmReg = R600::ONE;
2177  } else {
2178  ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2179  }
2180  } else {
2182  uint64_t Value = C->getZExtValue();
2183  if (Value == 0) {
2184  ImmReg = R600::ZERO;
2185  } else if (Value == 1) {
2186  ImmReg = R600::ONE_INT;
2187  } else {
2188  ImmValue = Value;
2189  }
2190  }
2191 
2192  // Check that we aren't already using an immediate.
2193  // XXX: It's possible for an instruction to have more than one
2194  // immediate operand, but this is not supported yet.
2195  if (ImmReg == R600::ALU_LITERAL_X) {
2196  if (!Imm.getNode())
2197  return false;
2199  assert(C);
2200  if (C->getZExtValue())
2201  return false;
2202  Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2203  }
2204  Src = DAG.getRegister(ImmReg, MVT::i32);
2205  return true;
2206  }
2207  default:
2208  return false;
2209  }
2210 }
2211 
2212 /// Fold the instructions after selecting them
2213 SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2214  SelectionDAG &DAG) const {
2215  const R600InstrInfo *TII = Subtarget->getInstrInfo();
2216  if (!Node->isMachineOpcode())
2217  return Node;
2218 
2219  unsigned Opcode = Node->getMachineOpcode();
2220  SDValue FakeOp;
2221 
2222  std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2223 
2224  if (Opcode == R600::DOT_4) {
2225  int OperandIdx[] = {
2226  TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2227  TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2228  TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2229  TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2230  TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2231  TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2232  TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2233  TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2234  };
2235  int NegIdx[] = {
2236  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
2237  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
2238  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
2239  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
2240  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
2241  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
2242  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
2243  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
2244  };
2245  int AbsIdx[] = {
2246  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
2247  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
2248  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
2249  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
2250  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
2251  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
2252  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
2253  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
2254  };
2255  for (unsigned i = 0; i < 8; i++) {
2256  if (OperandIdx[i] < 0)
2257  return Node;
2258  SDValue &Src = Ops[OperandIdx[i] - 1];
2259  SDValue &Neg = Ops[NegIdx[i] - 1];
2260  SDValue &Abs = Ops[AbsIdx[i] - 1];
2261  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2262  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2263  if (HasDst)
2264  SelIdx--;
2265  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2266  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2267  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2268  }
2269  } else if (Opcode == R600::REG_SEQUENCE) {
2270  for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2271  SDValue &Src = Ops[i];
2272  if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2273  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2274  }
2275  } else {
2276  if (!TII->hasInstrModifiers(Opcode))
2277  return Node;
2278  int OperandIdx[] = {
2279  TII->getOperandIdx(Opcode, R600::OpName::src0),
2280  TII->getOperandIdx(Opcode, R600::OpName::src1),
2281  TII->getOperandIdx(Opcode, R600::OpName::src2)
2282  };
2283  int NegIdx[] = {
2284  TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
2285  TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
2286  TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
2287  };
2288  int AbsIdx[] = {
2289  TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
2290  TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
2291  -1
2292  };
2293  for (unsigned i = 0; i < 3; i++) {
2294  if (OperandIdx[i] < 0)
2295  return Node;
2296  SDValue &Src = Ops[OperandIdx[i] - 1];
2297  SDValue &Neg = Ops[NegIdx[i] - 1];
2298  SDValue FakeAbs;
2299  SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2300  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2301  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2302  int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
2303  if (HasDst) {
2304  SelIdx--;
2305  ImmIdx--;
2306  }
2307  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2308  SDValue &Imm = Ops[ImmIdx];
2309  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2310  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2311  }
2312  }
2313 
2314  return Node;
2315 }
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:603
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:577
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
const MachineInstrBuilder & add(const MachineOperand &MO) const
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
EVT getValueType() const
Return the ValueType of the referenced return value.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
const SDValue & getOffset() const
bool isUndef() const
const GlobalValue * getGlobal() const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1571
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
AMDGPU specific subclass of TargetSubtarget.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:686
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Register getLocReg() const
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types...
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Interface definition for R600InstrInfo.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:259
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *IsFast=nullptr) const override
Determine if the target supports unaligned memory accesses.
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:199
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
bool canMergeStoresTo(unsigned AS, EVT MemVT, const SelectionDAG &DAG) const override
Returns if it&#39;s reasonable to merge stores to MemVT size.
void addFlag(MachineInstr &MI, unsigned Operand, unsigned Flag) const
Add one of the MO_FLAG* flags to the specified Operand.
const SDValue & getBasePtr() const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:222
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
const SDValue & getValue() const
SDVTList getVTList() const
unsigned Reg
static PointerType * getInt32PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:232
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:637
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the &#39;usesCustomInserter&#39; fla...
float convertToFloat() const
Definition: APFloat.h:1113
const SDValue & getChain() const
Function Alias Analysis Results
unsigned getAlignment() const
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:342
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:140
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:474
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:209
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:495
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:270
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Pointer to the start of the shader&#39;s constant data.
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
#define MO_FLAG_ABS
Definition: R600Defines.h:18
Shift and rotation operations.
Definition: ISDOpcodes.h:449
const ConstantFP * getFPImm() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:413
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:264
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:193
int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
static Optional< unsigned > getOpcode(ArrayRef< VPValue *> Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:196
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
op_iterator op_end() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn&#39;t supported on the target and indicate what to d...
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:303
The memory access is dereferenceable (i.e., doesn&#39;t trap).
int getSelIdx(unsigned Opcode, unsigned SrcIdx) const
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
void setImmOperand(MachineInstr &MI, unsigned Op, int64_t Imm) const
Helper function for setting instruction flag values.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:477
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:417
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Interface to describe a layout of a stack frame on an AMDGPU target.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
This file implements a class to represent arbitrary precision integral constant values and operations...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool hasInstrModifiers(unsigned Opcode) const
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:297
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:414
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
#define MO_FLAG_NEG
Definition: R600Defines.h:17
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:878
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:196
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:1020
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:220
op_iterator op_begin() const
MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opcode, unsigned DstReg, unsigned Src0Reg, unsigned Src1Reg=0) const
buildDefaultInstruction - This function returns a MachineInstr with all the instruction modifiers ini...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:596
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:150
Class to represent pointers.
Definition: DerivedTypes.h:579
This class is used to represent ISD::STORE nodes.
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:563
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:359
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
const Value * getValue() const
Return the base address of the memory access.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:228
const R600FrameLowering * getFrameLowering() const override
Address space for private memory.
Definition: AMDGPU.h:275
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:661
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool hasFP32Denormals() const
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
const SDValue & getBasePtr() const
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:234
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:165
unsigned const MachineRegisterInfo * MRI
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1432
Machine Value Type.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
Simple binary floating point operators.
Definition: ISDOpcodes.h:287
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool isMachineOpcode() const
Address space for local memory.
Definition: AMDGPU.h:274
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:272
static int ConstantAddressBlock(unsigned AddressSpace)
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:995
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:364
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:246
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:231
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
const SDValue & getOffset() const
This file declares a class to represent arbitrary precision floating point values and provide a varie...
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
const R600InstrInfo * getInstrInfo() const override
unsigned getMachineOpcode() const
constexpr double e
Definition: MathExtras.h:57
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
The memory access is non-temporal.
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the &#39;usesCustomInserter&#39; fla...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y)...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:455
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Extended Value Type.
Definition: ValueTypes.h:33
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1446
size_t size() const
Definition: SmallVector.h:52
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool fitsConstReadLimitations(const std::vector< MachineInstr *> &) const
An instruction group can only access 2 channel pair (either [XY] or [ZW]) from KCache bank on R700+...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value...
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
Address space for indirect addressible parameter memory (VTX1).
Definition: AMDGPU.h:284
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
unsigned getAddressSpace() const
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
const APFloat & getValueAPF() const
Definition: Constants.h:302
const R600RegisterInfo * getRegisterInfo() const override
static bool isUndef(ArrayRef< int > Mask)
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const
CCState - This class holds information needed while lowering arguments and return values...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:642
SDValue scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:371
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:264
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:221
static uint64_t add(uint64_t LeftOp, uint64_t RightOp)
Definition: FileCheck.cpp:215
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:750
CCValAssign - Represent assignment of one arg/retval to a location.
AddressSpace
Definition: NVPTXBaseInfo.h:21
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:95
int getLDSNoRetOp(uint16_t Opcode)
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:680
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
An SDNode that represents everything that will be needed to construct a MachineInstr.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:202
Represents one node in the SelectionDAG.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
Return the operation corresponding to !(X op Y), where &#39;op&#39; is a valid SetCC operation.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
int64_t getImm() const
static mvt_range integer_valuetypes()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
EVT getMemoryVT() const
Return the type of the in-memory value.
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:141
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool isShader(CallingConv::ID cc)
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:459
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:240
Address space for constant memory (VTX2).
Definition: AMDGPU.h:273
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:510
bool use_empty(unsigned RegNo) const
use_empty - Return true if there are no instructions using the specified register.
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors...
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:344
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use...
Definition: CallingConv.h:225
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:387
bool isLDSRetInstr(unsigned Opcode) const
bool hasBCNT(unsigned Size) const
Flags
Flags values. These may be or&#39;d together.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
unsigned getOrigArgIndex() const
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:150
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:426
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:721
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getLocMemOffset() const
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
R600 DAG Lowering interface definition.
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:525
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:650
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Flags getFlags() const
Return the raw flags of the source value,.
The memory access always returns the same value (or traps).
iterator end()
Definition: DenseMap.h:82
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
unsigned getOpcode() const
SDValue getValue(unsigned R) const
LLVM_NODISCARD bool empty() const
Definition: DenseMap.h:96
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getCondCode(ISD::CondCode Cond)
const MachinePointerInfo & getPointerInfo() const
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:212
constexpr float pif
Definition: MathExtras.h:78
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LLVM Value Representation.
Definition: Value.h:74
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:334
SDValue getRegister(unsigned Reg, EVT VT)
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC&#39;s if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:988
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:273
SDValue getValueType(EVT)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
static bool isEOP(MachineBasicBlock::iterator I)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
IRTranslator LLVM IR MI
const APFloat & getValueAPF() const
#define MO_FLAG_MASK
Definition: R600Defines.h:19
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:338
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:482
APInt bitcastToAPInt() const
Definition: APFloat.h:1109
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:197
Register getReg() const
getReg - Returns the register number.
Conversion operators.
Definition: ISDOpcodes.h:504
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:828
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:513
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:125
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:618
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:205
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:824
MVT getVectorIdxTy(const DataLayout &) const override
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
LLVMContext * getContext() const
Definition: SelectionDAG.h:424
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
MachineInstr * buildMovImm(MachineBasicBlock &BB, MachineBasicBlock::iterator I, unsigned DstReg, uint64_t Imm) const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
#define MO_FLAG_PUSH
Definition: R600Defines.h:20
This class is used to represent ISD::LOAD nodes.
unsigned getStackWidth(const MachineFunction &MF) const
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.