LLVM  14.0.0git
R600ISelLowering.cpp
Go to the documentation of this file.
1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Custom DAG lowering for R600
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "R600ISelLowering.h"
15 #include "AMDGPU.h"
17 #include "R600Defines.h"
18 #include "R600InstrInfo.h"
20 #include "R600Subtarget.h"
22 #include "llvm/IR/IntrinsicsAMDGPU.h"
23 #include "llvm/IR/IntrinsicsR600.h"
24 
25 using namespace llvm;
26 
27 #include "R600GenCallingConv.inc"
28 
30  const R600Subtarget &STI)
31  : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
32  addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
33  addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
34  addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
35  addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
36  addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
37  addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
38 
41 
43 
44  // Legalize loads and stores to the private address space.
48 
49  // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
50  // spaces, so it is custom lowered to handle those where it isn't.
51  for (MVT VT : MVT::integer_valuetypes()) {
55 
59 
63  }
64 
65  // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
69 
73 
78 
81  // We need to include these since trunc STORES to PRIVATE need
82  // special handling to accommodate RMW
93 
94  // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
97 
98  // Set condition code actions
111 
116 
119 
122 
126 
128 
133 
136 
143 
148 
149  // ADD, SUB overflow.
150  // TODO: turn these into Legal?
151  if (Subtarget->hasCARRY())
153 
154  if (Subtarget->hasBORROW())
156 
157  // Expand sign extension of vectors
158  if (!Subtarget->hasBFE())
160 
163 
164  if (!Subtarget->hasBFE())
168 
169  if (!Subtarget->hasBFE())
173 
177 
179 
181 
186 
191 
192  // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
193  // to be Legal/Custom in order to avoid library calls.
197 
198  if (!Subtarget->hasFMA()) {
201  }
202 
203  // FIXME: May need no denormals check
205 
206  if (!Subtarget->hasBFI()) {
207  // fcopysign can be done in a single instruction with BFI.
210  }
211 
212  if (!Subtarget->hasBCNT(32))
214 
215  if (!Subtarget->hasBCNT(64))
217 
218  if (Subtarget->hasFFBH())
220 
221  if (Subtarget->hasFFBL())
223 
224  // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
225  // need it for R600.
226  if (Subtarget->hasBFE())
227  setHasExtractBitsInsn(true);
228 
230 
231  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
232  for (MVT VT : ScalarIntVTs) {
237  }
238 
239  // LLVM will expand these to atomic_cmp_swap(0)
240  // and atomic_swap, respectively.
243 
244  // We need to custom lower some of the intrinsics
247 
249 
256 }
257 
258 static inline bool isEOP(MachineBasicBlock::iterator I) {
259  if (std::next(I) == I->getParent()->end())
260  return false;
261  return std::next(I)->getOpcode() == R600::RETURN;
262 }
263 
266  MachineBasicBlock *BB) const {
267  MachineFunction *MF = BB->getParent();
270  const R600InstrInfo *TII = Subtarget->getInstrInfo();
271 
272  switch (MI.getOpcode()) {
273  default:
274  // Replace LDS_*_RET instruction that don't have any uses with the
275  // equivalent LDS_*_NORET instruction.
276  if (TII->isLDSRetInstr(MI.getOpcode())) {
277  int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
278  assert(DstIdx != -1);
279  MachineInstrBuilder NewMI;
280  // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
281  // LDS_1A2D support and remove this special case.
282  if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
283  MI.getOpcode() == R600::LDS_CMPST_RET)
284  return BB;
285 
286  NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
287  TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
288  for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
289  NewMI.add(MI.getOperand(i));
290  }
291  } else {
293  }
294  break;
295 
296  case R600::FABS_R600: {
297  MachineInstr *NewMI = TII->buildDefaultInstruction(
298  *BB, I, R600::MOV, MI.getOperand(0).getReg(),
299  MI.getOperand(1).getReg());
300  TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
301  break;
302  }
303 
304  case R600::FNEG_R600: {
305  MachineInstr *NewMI = TII->buildDefaultInstruction(
306  *BB, I, R600::MOV, MI.getOperand(0).getReg(),
307  MI.getOperand(1).getReg());
308  TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
309  break;
310  }
311 
312  case R600::MASK_WRITE: {
313  Register maskedRegister = MI.getOperand(0).getReg();
314  assert(maskedRegister.isVirtual());
315  MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
316  TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
317  break;
318  }
319 
320  case R600::MOV_IMM_F32:
321  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
322  .getFPImm()
323  ->getValueAPF()
324  .bitcastToAPInt()
325  .getZExtValue());
326  break;
327 
328  case R600::MOV_IMM_I32:
329  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
330  MI.getOperand(1).getImm());
331  break;
332 
333  case R600::MOV_IMM_GLOBAL_ADDR: {
334  //TODO: Perhaps combine this instruction with the next if possible
335  auto MIB = TII->buildDefaultInstruction(
336  *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
337  int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
338  //TODO: Ugh this is rather ugly
339  const MachineOperand &MO = MI.getOperand(1);
340  MIB->getOperand(Idx).ChangeToGA(MO.getGlobal(), MO.getOffset(),
341  MO.getTargetFlags());
342  break;
343  }
344 
345  case R600::CONST_COPY: {
346  MachineInstr *NewMI = TII->buildDefaultInstruction(
347  *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
348  TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
349  MI.getOperand(1).getImm());
350  break;
351  }
352 
353  case R600::RAT_WRITE_CACHELESS_32_eg:
354  case R600::RAT_WRITE_CACHELESS_64_eg:
355  case R600::RAT_WRITE_CACHELESS_128_eg:
356  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
357  .add(MI.getOperand(0))
358  .add(MI.getOperand(1))
359  .addImm(isEOP(I)); // Set End of program bit
360  break;
361 
362  case R600::RAT_STORE_TYPED_eg:
363  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
364  .add(MI.getOperand(0))
365  .add(MI.getOperand(1))
366  .add(MI.getOperand(2))
367  .addImm(isEOP(I)); // Set End of program bit
368  break;
369 
370  case R600::BRANCH:
371  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
372  .add(MI.getOperand(0));
373  break;
374 
375  case R600::BRANCH_COND_f32: {
376  MachineInstr *NewMI =
377  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
378  R600::PREDICATE_BIT)
379  .add(MI.getOperand(1))
380  .addImm(R600::PRED_SETNE)
381  .addImm(0); // Flags
382  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
383  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
384  .add(MI.getOperand(0))
385  .addReg(R600::PREDICATE_BIT, RegState::Kill);
386  break;
387  }
388 
389  case R600::BRANCH_COND_i32: {
390  MachineInstr *NewMI =
391  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
392  R600::PREDICATE_BIT)
393  .add(MI.getOperand(1))
394  .addImm(R600::PRED_SETNE_INT)
395  .addImm(0); // Flags
396  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
397  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
398  .add(MI.getOperand(0))
399  .addReg(R600::PREDICATE_BIT, RegState::Kill);
400  break;
401  }
402 
403  case R600::EG_ExportSwz:
404  case R600::R600_ExportSwz: {
405  // Instruction is left unmodified if its not the last one of its type
406  bool isLastInstructionOfItsType = true;
407  unsigned InstExportType = MI.getOperand(1).getImm();
408  for (MachineBasicBlock::iterator NextExportInst = std::next(I),
409  EndBlock = BB->end(); NextExportInst != EndBlock;
410  NextExportInst = std::next(NextExportInst)) {
411  if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
412  NextExportInst->getOpcode() == R600::R600_ExportSwz) {
413  unsigned CurrentInstExportType = NextExportInst->getOperand(1)
414  .getImm();
415  if (CurrentInstExportType == InstExportType) {
416  isLastInstructionOfItsType = false;
417  break;
418  }
419  }
420  }
421  bool EOP = isEOP(I);
422  if (!EOP && !isLastInstructionOfItsType)
423  return BB;
424  unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
425  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
426  .add(MI.getOperand(0))
427  .add(MI.getOperand(1))
428  .add(MI.getOperand(2))
429  .add(MI.getOperand(3))
430  .add(MI.getOperand(4))
431  .add(MI.getOperand(5))
432  .add(MI.getOperand(6))
433  .addImm(CfInst)
434  .addImm(EOP);
435  break;
436  }
437  case R600::RETURN: {
438  return BB;
439  }
440  }
441 
442  MI.eraseFromParent();
443  return BB;
444 }
445 
446 //===----------------------------------------------------------------------===//
447 // Custom DAG Lowering Operations
448 //===----------------------------------------------------------------------===//
449 
453  switch (Op.getOpcode()) {
454  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
455  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
456  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
457  case ISD::SHL_PARTS:
458  case ISD::SRA_PARTS:
459  case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
460  case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
461  case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
462  case ISD::FCOS:
463  case ISD::FSIN: return LowerTrig(Op, DAG);
464  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
465  case ISD::STORE: return LowerSTORE(Op, DAG);
466  case ISD::LOAD: {
467  SDValue Result = LowerLOAD(Op, DAG);
468  assert((!Result.getNode() ||
469  Result.getNode()->getNumValues() == 2) &&
470  "Load should return a value and a chain");
471  return Result;
472  }
473 
474  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
475  case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
476  case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
477  case ISD::INTRINSIC_VOID: {
478  SDValue Chain = Op.getOperand(0);
479  unsigned IntrinsicID =
480  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
481  switch (IntrinsicID) {
482  case Intrinsic::r600_store_swizzle: {
483  SDLoc DL(Op);
484  const SDValue Args[8] = {
485  Chain,
486  Op.getOperand(2), // Export Value
487  Op.getOperand(3), // ArrayBase
488  Op.getOperand(4), // Type
489  DAG.getConstant(0, DL, MVT::i32), // SWZ_X
490  DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
491  DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
492  DAG.getConstant(3, DL, MVT::i32) // SWZ_W
493  };
494  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
495  }
496 
497  // default for switch(IntrinsicID)
498  default: break;
499  }
500  // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
501  break;
502  }
504  unsigned IntrinsicID =
505  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
506  EVT VT = Op.getValueType();
507  SDLoc DL(Op);
508  switch (IntrinsicID) {
509  case Intrinsic::r600_tex:
510  case Intrinsic::r600_texc: {
511  unsigned TextureOp;
512  switch (IntrinsicID) {
513  case Intrinsic::r600_tex:
514  TextureOp = 0;
515  break;
516  case Intrinsic::r600_texc:
517  TextureOp = 1;
518  break;
519  default:
520  llvm_unreachable("unhandled texture operation");
521  }
522 
523  SDValue TexArgs[19] = {
524  DAG.getConstant(TextureOp, DL, MVT::i32),
525  Op.getOperand(1),
526  DAG.getConstant(0, DL, MVT::i32),
527  DAG.getConstant(1, DL, MVT::i32),
528  DAG.getConstant(2, DL, MVT::i32),
529  DAG.getConstant(3, DL, MVT::i32),
530  Op.getOperand(2),
531  Op.getOperand(3),
532  Op.getOperand(4),
533  DAG.getConstant(0, DL, MVT::i32),
534  DAG.getConstant(1, DL, MVT::i32),
535  DAG.getConstant(2, DL, MVT::i32),
536  DAG.getConstant(3, DL, MVT::i32),
537  Op.getOperand(5),
538  Op.getOperand(6),
539  Op.getOperand(7),
540  Op.getOperand(8),
541  Op.getOperand(9),
542  Op.getOperand(10)
543  };
544  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
545  }
546  case Intrinsic::r600_dot4: {
547  SDValue Args[8] = {
548  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
549  DAG.getConstant(0, DL, MVT::i32)),
550  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
551  DAG.getConstant(0, DL, MVT::i32)),
552  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
553  DAG.getConstant(1, DL, MVT::i32)),
554  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
555  DAG.getConstant(1, DL, MVT::i32)),
556  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
557  DAG.getConstant(2, DL, MVT::i32)),
558  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
559  DAG.getConstant(2, DL, MVT::i32)),
560  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
561  DAG.getConstant(3, DL, MVT::i32)),
562  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
563  DAG.getConstant(3, DL, MVT::i32))
564  };
565  return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
566  }
567 
568  case Intrinsic::r600_implicitarg_ptr: {
571  return DAG.getConstant(ByteOffset, DL, PtrVT);
572  }
573  case Intrinsic::r600_read_ngroups_x:
574  return LowerImplicitParameter(DAG, VT, DL, 0);
575  case Intrinsic::r600_read_ngroups_y:
576  return LowerImplicitParameter(DAG, VT, DL, 1);
577  case Intrinsic::r600_read_ngroups_z:
578  return LowerImplicitParameter(DAG, VT, DL, 2);
579  case Intrinsic::r600_read_global_size_x:
580  return LowerImplicitParameter(DAG, VT, DL, 3);
581  case Intrinsic::r600_read_global_size_y:
582  return LowerImplicitParameter(DAG, VT, DL, 4);
583  case Intrinsic::r600_read_global_size_z:
584  return LowerImplicitParameter(DAG, VT, DL, 5);
585  case Intrinsic::r600_read_local_size_x:
586  return LowerImplicitParameter(DAG, VT, DL, 6);
587  case Intrinsic::r600_read_local_size_y:
588  return LowerImplicitParameter(DAG, VT, DL, 7);
589  case Intrinsic::r600_read_local_size_z:
590  return LowerImplicitParameter(DAG, VT, DL, 8);
591 
592  case Intrinsic::r600_read_tgid_x:
593  case Intrinsic::amdgcn_workgroup_id_x:
594  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
595  R600::T1_X, VT);
596  case Intrinsic::r600_read_tgid_y:
597  case Intrinsic::amdgcn_workgroup_id_y:
598  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
599  R600::T1_Y, VT);
600  case Intrinsic::r600_read_tgid_z:
601  case Intrinsic::amdgcn_workgroup_id_z:
602  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
603  R600::T1_Z, VT);
604  case Intrinsic::r600_read_tidig_x:
605  case Intrinsic::amdgcn_workitem_id_x:
606  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
607  R600::T0_X, VT);
608  case Intrinsic::r600_read_tidig_y:
609  case Intrinsic::amdgcn_workitem_id_y:
610  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
611  R600::T0_Y, VT);
612  case Intrinsic::r600_read_tidig_z:
613  case Intrinsic::amdgcn_workitem_id_z:
614  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
615  R600::T0_Z, VT);
616 
617  case Intrinsic::r600_recipsqrt_ieee:
618  return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
619 
620  case Intrinsic::r600_recipsqrt_clamped:
621  return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
622  default:
623  return Op;
624  }
625 
626  // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
627  break;
628  }
629  } // end switch(Op.getOpcode())
630  return SDValue();
631 }
632 
635  SelectionDAG &DAG) const {
636  switch (N->getOpcode()) {
637  default:
639  return;
640  case ISD::FP_TO_UINT:
641  if (N->getValueType(0) == MVT::i1) {
642  Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
643  return;
644  }
645  // Since we don't care about out of bounds values we can use FP_TO_SINT for
646  // uints too. The DAGLegalizer code for uint considers some extra cases
647  // which are not necessary here.
649  case ISD::FP_TO_SINT: {
650  if (N->getValueType(0) == MVT::i1) {
651  Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
652  return;
653  }
654 
655  SDValue Result;
656  if (expandFP_TO_SINT(N, Result, DAG))
657  Results.push_back(Result);
658  return;
659  }
660  case ISD::SDIVREM: {
661  SDValue Op = SDValue(N, 1);
662  SDValue RES = LowerSDIVREM(Op, DAG);
663  Results.push_back(RES);
664  Results.push_back(RES.getValue(1));
665  break;
666  }
667  case ISD::UDIVREM: {
668  SDValue Op = SDValue(N, 0);
669  LowerUDIVREM64(Op, DAG, Results);
670  break;
671  }
672  }
673 }
674 
675 SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
676  SDValue Vector) const {
677  SDLoc DL(Vector);
678  EVT VecVT = Vector.getValueType();
679  EVT EltVT = VecVT.getVectorElementType();
681 
682  for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
683  Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
684  DAG.getVectorIdxConstant(i, DL)));
685  }
686 
687  return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
688 }
689 
690 SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
691  SelectionDAG &DAG) const {
692  SDLoc DL(Op);
693  SDValue Vector = Op.getOperand(0);
694  SDValue Index = Op.getOperand(1);
695 
696  if (isa<ConstantSDNode>(Index) ||
698  return Op;
699 
700  Vector = vectorToVerticalVector(DAG, Vector);
701  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
702  Vector, Index);
703 }
704 
705 SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
706  SelectionDAG &DAG) const {
707  SDLoc DL(Op);
708  SDValue Vector = Op.getOperand(0);
709  SDValue Value = Op.getOperand(1);
710  SDValue Index = Op.getOperand(2);
711 
712  if (isa<ConstantSDNode>(Index) ||
714  return Op;
715 
716  Vector = vectorToVerticalVector(DAG, Vector);
717  SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
718  Vector, Value, Index);
719  return vectorToVerticalVector(DAG, Insert);
720 }
721 
722 SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
723  SDValue Op,
724  SelectionDAG &DAG) const {
725  GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
728 
729  const DataLayout &DL = DAG.getDataLayout();
730  const GlobalValue *GV = GSD->getGlobal();
732 
733  SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
734  return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
735 }
736 
737 SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
738  // On hw >= R700, COS/SIN input must be between -1. and 1.
739  // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
740  EVT VT = Op.getValueType();
741  SDValue Arg = Op.getOperand(0);
742  SDLoc DL(Op);
743 
744  // TODO: Should this propagate fast-math-flags?
745  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
746  DAG.getNode(ISD::FADD, DL, VT,
747  DAG.getNode(ISD::FMUL, DL, VT, Arg,
748  DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
749  DAG.getConstantFP(0.5, DL, MVT::f32)));
750  unsigned TrigNode;
751  switch (Op.getOpcode()) {
752  case ISD::FCOS:
753  TrigNode = AMDGPUISD::COS_HW;
754  break;
755  case ISD::FSIN:
756  TrigNode = AMDGPUISD::SIN_HW;
757  break;
758  default:
759  llvm_unreachable("Wrong trig opcode");
760  }
761  SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
762  DAG.getNode(ISD::FADD, DL, VT, FractPart,
763  DAG.getConstantFP(-0.5, DL, MVT::f32)));
764  if (Gen >= AMDGPUSubtarget::R700)
765  return TrigVal;
766  // On R600 hw, COS/SIN input must be between -Pi and Pi.
767  return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
769 }
770 
771 SDValue R600TargetLowering::LowerShiftParts(SDValue Op,
772  SelectionDAG &DAG) const {
773  SDValue Lo, Hi;
774  expandShiftParts(Op.getNode(), Lo, Hi, DAG);
775  return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
776 }
777 
778 SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
779  unsigned mainop, unsigned ovf) const {
780  SDLoc DL(Op);
781  EVT VT = Op.getValueType();
782 
783  SDValue Lo = Op.getOperand(0);
784  SDValue Hi = Op.getOperand(1);
785 
786  SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
787  // Extend sign.
788  OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
789  DAG.getValueType(MVT::i1));
790 
791  SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
792 
793  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
794 }
795 
796 SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
797  SDLoc DL(Op);
798  return DAG.getNode(
799  ISD::SETCC,
800  DL,
801  MVT::i1,
802  Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
803  DAG.getCondCode(ISD::SETEQ));
804 }
805 
806 SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
807  SDLoc DL(Op);
808  return DAG.getNode(
809  ISD::SETCC,
810  DL,
811  MVT::i1,
812  Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
813  DAG.getCondCode(ISD::SETEQ));
814 }
815 
816 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
817  const SDLoc &DL,
818  unsigned DwordOffset) const {
819  unsigned ByteOffset = DwordOffset * 4;
820  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
822 
823  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
824  assert(isInt<16>(ByteOffset));
825 
826  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
827  DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
829 }
830 
831 bool R600TargetLowering::isZero(SDValue Op) const {
832  if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
833  return Cst->isZero();
834  } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
835  return CstFP->isZero();
836  } else {
837  return false;
838  }
839 }
840 
841 bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
842  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
843  return CFP->isExactlyValue(1.0);
844  }
845  return isAllOnesConstant(Op);
846 }
847 
848 bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
849  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
850  return CFP->getValueAPF().isZero();
851  }
852  return isNullConstant(Op);
853 }
854 
855 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
856  SDLoc DL(Op);
857  EVT VT = Op.getValueType();
858 
859  SDValue LHS = Op.getOperand(0);
860  SDValue RHS = Op.getOperand(1);
861  SDValue True = Op.getOperand(2);
862  SDValue False = Op.getOperand(3);
863  SDValue CC = Op.getOperand(4);
864  SDValue Temp;
865 
866  if (VT == MVT::f32) {
867  DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
868  SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
869  if (MinMax)
870  return MinMax;
871  }
872 
873  // LHS and RHS are guaranteed to be the same value type
874  EVT CompareVT = LHS.getValueType();
875 
876  // Check if we can lower this to a native operation.
877 
878  // Try to lower to a SET* instruction:
879  //
880  // SET* can match the following patterns:
881  //
882  // select_cc f32, f32, -1, 0, cc_supported
883  // select_cc f32, f32, 1.0f, 0.0f, cc_supported
884  // select_cc i32, i32, -1, 0, cc_supported
885  //
886 
887  // Move hardware True/False values to the correct operand.
888  if (isHWTrueValue(False) && isHWFalseValue(True)) {
889  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
890  ISD::CondCode InverseCC = ISD::getSetCCInverse(CCOpcode, CompareVT);
891  if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
892  std::swap(False, True);
893  CC = DAG.getCondCode(InverseCC);
894  } else {
895  ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
896  if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
897  std::swap(False, True);
898  std::swap(LHS, RHS);
899  CC = DAG.getCondCode(SwapInvCC);
900  }
901  }
902  }
903 
904  if (isHWTrueValue(True) && isHWFalseValue(False) &&
905  (CompareVT == VT || VT == MVT::i32)) {
906  // This can be matched by a SET* instruction.
907  return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
908  }
909 
910  // Try to lower to a CND* instruction:
911  //
912  // CND* can match the following patterns:
913  //
914  // select_cc f32, 0.0, f32, f32, cc_supported
915  // select_cc f32, 0.0, i32, i32, cc_supported
916  // select_cc i32, 0, f32, f32, cc_supported
917  // select_cc i32, 0, i32, i32, cc_supported
918  //
919 
920  // Try to move the zero value to the RHS
921  if (isZero(LHS)) {
922  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
923  // Try swapping the operands
924  ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
925  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
926  std::swap(LHS, RHS);
927  CC = DAG.getCondCode(CCSwapped);
928  } else {
929  // Try inverting the conditon and then swapping the operands
930  ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT);
931  CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
932  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
933  std::swap(True, False);
934  std::swap(LHS, RHS);
935  CC = DAG.getCondCode(CCSwapped);
936  }
937  }
938  }
939  if (isZero(RHS)) {
940  SDValue Cond = LHS;
941  SDValue Zero = RHS;
942  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
943  if (CompareVT != VT) {
944  // Bitcast True / False to the correct types. This will end up being
945  // a nop, but it allows us to define only a single pattern in the
946  // .TD files for each CND* instruction rather than having to have
947  // one pattern for integer True/False and one for fp True/False
948  True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
949  False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
950  }
951 
952  switch (CCOpcode) {
953  case ISD::SETONE:
954  case ISD::SETUNE:
955  case ISD::SETNE:
956  CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT);
957  Temp = True;
958  True = False;
959  False = Temp;
960  break;
961  default:
962  break;
963  }
964  SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
965  Cond, Zero,
966  True, False,
967  DAG.getCondCode(CCOpcode));
968  return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
969  }
970 
971  // If we make it this for it means we have no native instructions to handle
972  // this SELECT_CC, so we must lower it.
973  SDValue HWTrue, HWFalse;
974 
975  if (CompareVT == MVT::f32) {
976  HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
977  HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
978  } else if (CompareVT == MVT::i32) {
979  HWTrue = DAG.getConstant(-1, DL, CompareVT);
980  HWFalse = DAG.getConstant(0, DL, CompareVT);
981  }
982  else {
983  llvm_unreachable("Unhandled value type in LowerSELECT_CC");
984  }
985 
986  // Lower this unsupported SELECT_CC into a combination of two supported
987  // SELECT_CC operations.
988  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
989 
990  return DAG.getNode(ISD::SELECT_CC, DL, VT,
991  Cond, HWFalse,
992  True, False,
993  DAG.getCondCode(ISD::SETNE));
994 }
995 
996 /// LLVM generates byte-addressed pointers. For indirect addressing, we need to
997 /// convert these pointers to a register index. Each register holds
998 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
999 /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1000 /// for indirect addressing.
1001 SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1002  unsigned StackWidth,
1003  SelectionDAG &DAG) const {
1004  unsigned SRLPad;
1005  switch(StackWidth) {
1006  case 1:
1007  SRLPad = 2;
1008  break;
1009  case 2:
1010  SRLPad = 3;
1011  break;
1012  case 4:
1013  SRLPad = 4;
1014  break;
1015  default: llvm_unreachable("Invalid stack width");
1016  }
1017 
1018  SDLoc DL(Ptr);
1019  return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1020  DAG.getConstant(SRLPad, DL, MVT::i32));
1021 }
1022 
1023 void R600TargetLowering::getStackAddress(unsigned StackWidth,
1024  unsigned ElemIdx,
1025  unsigned &Channel,
1026  unsigned &PtrIncr) const {
1027  switch (StackWidth) {
1028  default:
1029  case 1:
1030  Channel = 0;
1031  if (ElemIdx > 0) {
1032  PtrIncr = 1;
1033  } else {
1034  PtrIncr = 0;
1035  }
1036  break;
1037  case 2:
1038  Channel = ElemIdx % 2;
1039  if (ElemIdx == 2) {
1040  PtrIncr = 1;
1041  } else {
1042  PtrIncr = 0;
1043  }
1044  break;
1045  case 4:
1046  Channel = ElemIdx;
1047  PtrIncr = 0;
1048  break;
1049  }
1050 }
1051 
1052 SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1053  SelectionDAG &DAG) const {
1054  SDLoc DL(Store);
1055  //TODO: Who creates the i8 stores?
1056  assert(Store->isTruncatingStore()
1057  || Store->getValue().getValueType() == MVT::i8);
1058  assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
1059 
1060  SDValue Mask;
1061  if (Store->getMemoryVT() == MVT::i8) {
1062  assert(Store->getAlignment() >= 1);
1063  Mask = DAG.getConstant(0xff, DL, MVT::i32);
1064  } else if (Store->getMemoryVT() == MVT::i16) {
1065  assert(Store->getAlignment() >= 2);
1066  Mask = DAG.getConstant(0xffff, DL, MVT::i32);
1067  } else {
1068  llvm_unreachable("Unsupported private trunc store");
1069  }
1070 
1071  SDValue OldChain = Store->getChain();
1072  bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1073  // Skip dummy
1074  SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1075  SDValue BasePtr = Store->getBasePtr();
1076  SDValue Offset = Store->getOffset();
1077  EVT MemVT = Store->getMemoryVT();
1078 
1079  SDValue LoadPtr = BasePtr;
1080  if (!Offset.isUndef()) {
1081  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1082  }
1083 
1084  // Get dword location
1085  // TODO: this should be eliminated by the future SHR ptr, 2
1086  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1087  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1088 
1089  // Load dword
1090  // TODO: can we be smarter about machine pointer info?
1092  SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1093 
1094  Chain = Dst.getValue(1);
1095 
1096  // Get offset in dword
1097  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1098  DAG.getConstant(0x3, DL, MVT::i32));
1099 
1100  // Convert byte offset to bit shift
1101  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1102  DAG.getConstant(3, DL, MVT::i32));
1103 
1104  // TODO: Contrary to the name of the functiom,
1105  // it also handles sub i32 non-truncating stores (like i1)
1106  SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1107  Store->getValue());
1108 
1109  // Mask the value to the right type
1110  SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1111 
1112  // Shift the value in place
1113  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1114  MaskedValue, ShiftAmt);
1115 
1116  // Shift the mask in place
1117  SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
1118 
1119  // Invert the mask. NOTE: if we had native ROL instructions we could
1120  // use inverted mask
1121  DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
1122 
1123  // Cleanup the target bits
1124  Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1125 
1126  // Add the new bits
1127  SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1128 
1129  // Store dword
1130  // TODO: Can we be smarter about MachinePointerInfo?
1131  SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
1132 
1133  // If we are part of expanded vector, make our neighbors depend on this store
1134  if (VectorTrunc) {
1135  // Make all other vector elements depend on this store
1136  Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
1137  DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
1138  }
1139  return NewStore;
1140 }
1141 
1142 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1143  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1144  unsigned AS = StoreNode->getAddressSpace();
1145 
1146  SDValue Chain = StoreNode->getChain();
1147  SDValue Ptr = StoreNode->getBasePtr();
1148  SDValue Value = StoreNode->getValue();
1149 
1150  EVT VT = Value.getValueType();
1151  EVT MemVT = StoreNode->getMemoryVT();
1152  EVT PtrVT = Ptr.getValueType();
1153 
1154  SDLoc DL(Op);
1155 
1156  const bool TruncatingStore = StoreNode->isTruncatingStore();
1157 
1158  // Neither LOCAL nor PRIVATE can do vectors at the moment
1159  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS ||
1160  TruncatingStore) &&
1161  VT.isVector()) {
1162  if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) {
1163  // Add an extra level of chain to isolate this vector
1164  SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
1165  // TODO: can the chain be replaced without creating a new store?
1166  SDValue NewStore = DAG.getTruncStore(
1167  NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
1168  MemVT, StoreNode->getAlignment(),
1169  StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
1170  StoreNode = cast<StoreSDNode>(NewStore);
1171  }
1172 
1173  return scalarizeVectorStore(StoreNode, DAG);
1174  }
1175 
1176  Align Alignment = StoreNode->getAlign();
1177  if (Alignment < MemVT.getStoreSize() &&
1178  !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
1179  StoreNode->getMemOperand()->getFlags(),
1180  nullptr)) {
1181  return expandUnalignedStore(StoreNode, DAG);
1182  }
1183 
1184  SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
1185  DAG.getConstant(2, DL, PtrVT));
1186 
1187  if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
1188  // It is beneficial to create MSKOR here instead of combiner to avoid
1189  // artificial dependencies introduced by RMW
1190  if (TruncatingStore) {
1191  assert(VT.bitsLE(MVT::i32));
1192  SDValue MaskConstant;
1193  if (MemVT == MVT::i8) {
1194  MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1195  } else {
1196  assert(MemVT == MVT::i16);
1197  assert(StoreNode->getAlignment() >= 2);
1198  MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1199  }
1200 
1201  SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
1202  DAG.getConstant(0x00000003, DL, PtrVT));
1203  SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1204  DAG.getConstant(3, DL, VT));
1205 
1206  // Put the mask in correct place
1207  SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
1208 
1209  // Put the value bits in correct place
1210  SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1211  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
1212 
1213  // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1214  // vector instead.
1215  SDValue Src[4] = {
1216  ShiftedValue,
1217  DAG.getConstant(0, DL, MVT::i32),
1218  DAG.getConstant(0, DL, MVT::i32),
1219  Mask
1220  };
1221  SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
1222  SDValue Args[3] = { Chain, Input, DWordAddr };
1224  Op->getVTList(), Args, MemVT,
1225  StoreNode->getMemOperand());
1226  } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1227  // Convert pointer from byte address to dword address.
1228  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1229 
1230  if (StoreNode->isIndexed()) {
1231  llvm_unreachable("Indexed stores not supported yet");
1232  } else {
1233  Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1234  }
1235  return Chain;
1236  }
1237  }
1238 
1239  // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1240  if (AS != AMDGPUAS::PRIVATE_ADDRESS)
1241  return SDValue();
1242 
1243  if (MemVT.bitsLT(MVT::i32))
1244  return lowerPrivateTruncStore(StoreNode, DAG);
1245 
1246  // Standard i32+ store, tag it with DWORDADDR to note that the address
1247  // has been shifted
1248  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1249  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1250  return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1251  }
1252 
1253  // Tagged i32+ stores will be matched by patterns
1254  return SDValue();
1255 }
1256 
1257 // return (512 + (kc_bank << 12)
1258 static int
1260  switch (AddressSpace) {
1262  return 512;
1264  return 512 + 4096;
1266  return 512 + 4096 * 2;
1268  return 512 + 4096 * 3;
1270  return 512 + 4096 * 4;
1272  return 512 + 4096 * 5;
1274  return 512 + 4096 * 6;
1276  return 512 + 4096 * 7;
1278  return 512 + 4096 * 8;
1280  return 512 + 4096 * 9;
1282  return 512 + 4096 * 10;
1284  return 512 + 4096 * 11;
1286  return 512 + 4096 * 12;
1288  return 512 + 4096 * 13;
1290  return 512 + 4096 * 14;
1292  return 512 + 4096 * 15;
1293  default:
1294  return -1;
1295  }
1296 }
1297 
1298 SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1299  SelectionDAG &DAG) const {
1300  SDLoc DL(Op);
1301  LoadSDNode *Load = cast<LoadSDNode>(Op);
1302  ISD::LoadExtType ExtType = Load->getExtensionType();
1303  EVT MemVT = Load->getMemoryVT();
1304  assert(Load->getAlignment() >= MemVT.getStoreSize());
1305 
1306  SDValue BasePtr = Load->getBasePtr();
1307  SDValue Chain = Load->getChain();
1308  SDValue Offset = Load->getOffset();
1309 
1310  SDValue LoadPtr = BasePtr;
1311  if (!Offset.isUndef()) {
1312  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1313  }
1314 
1315  // Get dword location
1316  // NOTE: this should be eliminated by the future SHR ptr, 2
1317  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1318  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1319 
1320  // Load dword
1321  // TODO: can we be smarter about machine pointer info?
1323  SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1324 
1325  // Get offset within the register.
1326  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1327  LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
1328 
1329  // Bit offset of target byte (byteIdx * 8).
1330  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1331  DAG.getConstant(3, DL, MVT::i32));
1332 
1333  // Shift to the right.
1334  SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
1335 
1336  // Eliminate the upper bits by setting them to ...
1337  EVT MemEltVT = MemVT.getScalarType();
1338 
1339  if (ExtType == ISD::SEXTLOAD) { // ... ones.
1340  SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1341  Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
1342  } else { // ... or zeros.
1343  Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
1344  }
1345 
1346  SDValue Ops[] = {
1347  Ret,
1348  Read.getValue(1) // This should be our output chain
1349  };
1350 
1351  return DAG.getMergeValues(Ops, DL);
1352 }
1353 
1354 SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1355  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1356  unsigned AS = LoadNode->getAddressSpace();
1357  EVT MemVT = LoadNode->getMemoryVT();
1358  ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1359 
1360  if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1361  ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1362  return lowerPrivateExtLoad(Op, DAG);
1363  }
1364 
1365  SDLoc DL(Op);
1366  EVT VT = Op.getValueType();
1367  SDValue Chain = LoadNode->getChain();
1368  SDValue Ptr = LoadNode->getBasePtr();
1369 
1370  if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1371  LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
1372  VT.isVector()) {
1373  SDValue Ops[2];
1374  std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(LoadNode, DAG);
1375  return DAG.getMergeValues(Ops, DL);
1376  }
1377 
1378  // This is still used for explicit load from addrspace(8)
1379  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1380  if (ConstantBlock > -1 &&
1381  ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1382  (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1383  SDValue Result;
1384  if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1385  isa<ConstantSDNode>(Ptr)) {
1386  return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
1387  } else {
1388  //TODO: Does this even work?
1389  // non-constant ptr can't be folded, keeps it as a v4f32 load
1391  DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1392  DAG.getConstant(4, DL, MVT::i32)),
1393  DAG.getConstant(LoadNode->getAddressSpace() -
1395  );
1396  }
1397 
1398  if (!VT.isVector()) {
1400  DAG.getConstant(0, DL, MVT::i32));
1401  }
1402 
1403  SDValue MergedValues[2] = {
1404  Result,
1405  Chain
1406  };
1407  return DAG.getMergeValues(MergedValues, DL);
1408  }
1409 
1410  // For most operations returning SDValue() will result in the node being
1411  // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1412  // need to manually expand loads that may be legal in some address spaces and
1413  // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1414  // compute shaders, since the data is sign extended when it is uploaded to the
1415  // buffer. However SEXT loads from other address spaces are not supported, so
1416  // we need to expand them here.
1417  if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1418  assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1419  SDValue NewLoad = DAG.getExtLoad(
1420  ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1421  LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags());
1422  SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1423  DAG.getValueType(MemVT));
1424 
1425  SDValue MergedValues[2] = { Res, Chain };
1426  return DAG.getMergeValues(MergedValues, DL);
1427  }
1428 
1429  if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1430  return SDValue();
1431  }
1432 
1433  // DWORDADDR ISD marks already shifted address
1434  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1435  assert(VT == MVT::i32);
1436  Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
1437  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
1438  return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1439  }
1440  return SDValue();
1441 }
1442 
1443 SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1444  SDValue Chain = Op.getOperand(0);
1445  SDValue Cond = Op.getOperand(1);
1446  SDValue Jump = Op.getOperand(2);
1447 
1448  return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1449  Chain, Jump, Cond);
1450 }
1451 
1452 SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1453  SelectionDAG &DAG) const {
1454  MachineFunction &MF = DAG.getMachineFunction();
1455  const R600FrameLowering *TFL = Subtarget->getFrameLowering();
1456 
1457  FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1458 
1459  unsigned FrameIndex = FIN->getIndex();
1460  Register IgnoredFrameReg;
1462  TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1463  return DAG.getConstant(Offset.getFixed() * 4 * TFL->getStackWidth(MF),
1464  SDLoc(Op), Op.getValueType());
1465 }
1466 
1468  bool IsVarArg) const {
1469  switch (CC) {
1472  case CallingConv::C:
1473  case CallingConv::Fast:
1474  case CallingConv::Cold:
1475  llvm_unreachable("kernels should not be handled here");
1483  return CC_R600;
1484  default:
1485  report_fatal_error("Unsupported calling convention.");
1486  }
1487 }
1488 
1489 /// XXX Only kernel functions are supported, so we can assume for now that
1490 /// every function is a kernel function, but in the future we should use
1491 /// separate calling conventions for kernel and non-kernel functions.
1493  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1494  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1495  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1497  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1498  *DAG.getContext());
1499  MachineFunction &MF = DAG.getMachineFunction();
1501 
1502  if (AMDGPU::isShader(CallConv)) {
1503  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
1504  } else {
1506  }
1507 
1508  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1509  CCValAssign &VA = ArgLocs[i];
1510  const ISD::InputArg &In = Ins[i];
1511  EVT VT = In.VT;
1512  EVT MemVT = VA.getLocVT();
1513  if (!VT.isVector() && MemVT.isVector()) {
1514  // Get load source type if scalarized.
1515  MemVT = MemVT.getVectorElementType();
1516  }
1517 
1518  if (AMDGPU::isShader(CallConv)) {
1519  Register Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
1520  SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1521  InVals.push_back(Register);
1522  continue;
1523  }
1524 
1525  // i64 isn't a legal type, so the register type used ends up as i32, which
1526  // isn't expected here. It attempts to create this sextload, but it ends up
1527  // being invalid. Somehow this seems to work with i64 arguments, but breaks
1528  // for <1 x i64>.
1529 
1530  // The first 36 bytes of the input buffer contains information about
1531  // thread group and global sizes.
1533  if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1534  // FIXME: This should really check the extload type, but the handling of
1535  // extload vector parameters seems to be broken.
1536 
1537  // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1538  Ext = ISD::SEXTLOAD;
1539  }
1540 
1541  // Compute the offset from the value.
1542  // XXX - I think PartOffset should give you this, but it seems to give the
1543  // size of the register which isn't useful.
1544 
1545  unsigned PartOffset = VA.getLocMemOffset();
1546  unsigned Alignment = MinAlign(VT.getStoreSize(), PartOffset);
1547 
1549  SDValue Arg = DAG.getLoad(
1550  ISD::UNINDEXED, Ext, VT, DL, Chain,
1551  DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
1552  PtrInfo,
1553  MemVT, Alignment, MachineMemOperand::MONonTemporal |
1556 
1557  InVals.push_back(Arg);
1558  }
1559  return Chain;
1560 }
1561 
1563  EVT VT) const {
1564  if (!VT.isVector())
1565  return MVT::i32;
1567 }
1568 
1570  const MachineFunction &MF) const {
1571  // Local and Private addresses do not handle vectors. Limit to i32
1572  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) {
1573  return (MemVT.getSizeInBits() <= 32);
1574  }
1575  return true;
1576 }
1577 
1579  EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1580  bool *IsFast) const {
1581  if (IsFast)
1582  *IsFast = false;
1583 
1584  if (!VT.isSimple() || VT == MVT::Other)
1585  return false;
1586 
1587  if (VT.bitsLT(MVT::i32))
1588  return false;
1589 
1590  // TODO: This is a rough estimate.
1591  if (IsFast)
1592  *IsFast = true;
1593 
1594  return VT.bitsGT(MVT::i32) && Alignment >= Align(4);
1595 }
1596 
1598  SelectionDAG &DAG, SDValue VectorEntry,
1599  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1600  assert(RemapSwizzle.empty());
1601 
1602  SDLoc DL(VectorEntry);
1603  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1604 
1605  SDValue NewBldVec[4];
1606  for (unsigned i = 0; i < 4; i++)
1607  NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1608  DAG.getIntPtrConstant(i, DL));
1609 
1610  for (unsigned i = 0; i < 4; i++) {
1611  if (NewBldVec[i].isUndef())
1612  // We mask write here to teach later passes that the ith element of this
1613  // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1614  // break false dependencies and additionnaly make assembly easier to read.
1615  RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1616  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1617  if (C->isZero()) {
1618  RemapSwizzle[i] = 4; // SEL_0
1619  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1620  } else if (C->isExactlyValue(1.0)) {
1621  RemapSwizzle[i] = 5; // SEL_1
1622  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1623  }
1624  }
1625 
1626  if (NewBldVec[i].isUndef())
1627  continue;
1628 
1629  for (unsigned j = 0; j < i; j++) {
1630  if (NewBldVec[i] == NewBldVec[j]) {
1631  NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1632  RemapSwizzle[i] = j;
1633  break;
1634  }
1635  }
1636  }
1637 
1638  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1639  NewBldVec);
1640 }
1641 
1643  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1644  assert(RemapSwizzle.empty());
1645 
1646  SDLoc DL(VectorEntry);
1647  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1648 
1649  SDValue NewBldVec[4];
1650  bool isUnmovable[4] = {false, false, false, false};
1651  for (unsigned i = 0; i < 4; i++)
1652  NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1653  DAG.getIntPtrConstant(i, DL));
1654 
1655  for (unsigned i = 0; i < 4; i++) {
1656  RemapSwizzle[i] = i;
1657  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1658  unsigned Idx = cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1659  ->getZExtValue();
1660  if (i == Idx)
1661  isUnmovable[Idx] = true;
1662  }
1663  }
1664 
1665  for (unsigned i = 0; i < 4; i++) {
1666  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1667  unsigned Idx = cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1668  ->getZExtValue();
1669  if (isUnmovable[Idx])
1670  continue;
1671  // Swap i and Idx
1672  std::swap(NewBldVec[Idx], NewBldVec[i]);
1673  std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1674  break;
1675  }
1676  }
1677 
1678  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1679  NewBldVec);
1680 }
1681 
1682 SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
1683  SelectionDAG &DAG,
1684  const SDLoc &DL) const {
1685  // Old -> New swizzle values
1686  DenseMap<unsigned, unsigned> SwizzleRemap;
1687 
1688  BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1689  for (unsigned i = 0; i < 4; i++) {
1690  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1691  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1692  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1693  }
1694 
1695  SwizzleRemap.clear();
1696  BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1697  for (unsigned i = 0; i < 4; i++) {
1698  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1699  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1700  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1701  }
1702 
1703  return BuildVector;
1704 }
1705 
1706 SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
1707  SelectionDAG &DAG) const {
1708  SDLoc DL(LoadNode);
1709  EVT VT = LoadNode->getValueType(0);
1710  SDValue Chain = LoadNode->getChain();
1711  SDValue Ptr = LoadNode->getBasePtr();
1712  assert (isa<ConstantSDNode>(Ptr));
1713 
1714  //TODO: Support smaller loads
1715  if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
1716  return SDValue();
1717 
1718  if (LoadNode->getAlignment() < 4)
1719  return SDValue();
1720 
1721  int ConstantBlock = ConstantAddressBlock(Block);
1722 
1723  SDValue Slots[4];
1724  for (unsigned i = 0; i < 4; i++) {
1725  // We want Const position encoded with the following formula :
1726  // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1727  // const_index is Ptr computed by llvm using an alignment of 16.
1728  // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1729  // then div by 4 at the ISel step
1730  SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1731  DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1732  Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1733  }
1734  EVT NewVT = MVT::v4i32;
1735  unsigned NumElements = 4;
1736  if (VT.isVector()) {
1737  NewVT = VT;
1738  NumElements = VT.getVectorNumElements();
1739  }
1740  SDValue Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
1741  if (!VT.isVector()) {
1743  DAG.getConstant(0, DL, MVT::i32));
1744  }
1745  SDValue MergedValues[2] = {
1746  Result,
1747  Chain
1748  };
1749  return DAG.getMergeValues(MergedValues, DL);
1750 }
1751 
1752 //===----------------------------------------------------------------------===//
1753 // Custom DAG Optimizations
1754 //===----------------------------------------------------------------------===//
1755 
1757  DAGCombinerInfo &DCI) const {
1758  SelectionDAG &DAG = DCI.DAG;
1759  SDLoc DL(N);
1760 
1761  switch (N->getOpcode()) {
1762  // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1763  case ISD::FP_ROUND: {
1764  SDValue Arg = N->getOperand(0);
1765  if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1766  return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1767  Arg.getOperand(0));
1768  }
1769  break;
1770  }
1771 
1772  // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1773  // (i32 select_cc f32, f32, -1, 0 cc)
1774  //
1775  // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1776  // this to one of the SET*_DX10 instructions.
1777  case ISD::FP_TO_SINT: {
1778  SDValue FNeg = N->getOperand(0);
1779  if (FNeg.getOpcode() != ISD::FNEG) {
1780  return SDValue();
1781  }
1782  SDValue SelectCC = FNeg.getOperand(0);
1783  if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1784  SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1785  SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1786  !isHWTrueValue(SelectCC.getOperand(2)) ||
1787  !isHWFalseValue(SelectCC.getOperand(3))) {
1788  return SDValue();
1789  }
1790 
1791  return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1792  SelectCC.getOperand(0), // LHS
1793  SelectCC.getOperand(1), // RHS
1794  DAG.getConstant(-1, DL, MVT::i32), // True
1795  DAG.getConstant(0, DL, MVT::i32), // False
1796  SelectCC.getOperand(4)); // CC
1797  }
1798 
1799  // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1800  // => build_vector elt0, ... , NewEltIdx, ... , eltN
1801  case ISD::INSERT_VECTOR_ELT: {
1802  SDValue InVec = N->getOperand(0);
1803  SDValue InVal = N->getOperand(1);
1804  SDValue EltNo = N->getOperand(2);
1805 
1806  // If the inserted element is an UNDEF, just use the input vector.
1807  if (InVal.isUndef())
1808  return InVec;
1809 
1810  EVT VT = InVec.getValueType();
1811 
1812  // If we can't generate a legal BUILD_VECTOR, exit
1814  return SDValue();
1815 
1816  // Check that we know which element is being inserted
1817  if (!isa<ConstantSDNode>(EltNo))
1818  return SDValue();
1819  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1820 
1821  // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1822  // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1823  // vector elements.
1825  if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1826  Ops.append(InVec.getNode()->op_begin(),
1827  InVec.getNode()->op_end());
1828  } else if (InVec.isUndef()) {
1829  unsigned NElts = VT.getVectorNumElements();
1830  Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1831  } else {
1832  return SDValue();
1833  }
1834 
1835  // Insert the element
1836  if (Elt < Ops.size()) {
1837  // All the operands of BUILD_VECTOR must have the same type;
1838  // we enforce that here.
1839  EVT OpVT = Ops[0].getValueType();
1840  if (InVal.getValueType() != OpVT)
1841  InVal = OpVT.bitsGT(InVal.getValueType()) ?
1842  DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
1843  DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
1844  Ops[Elt] = InVal;
1845  }
1846 
1847  // Return the new vector
1848  return DAG.getBuildVector(VT, DL, Ops);
1849  }
1850 
1851  // Extract_vec (Build_vector) generated by custom lowering
1852  // also needs to be customly combined
1853  case ISD::EXTRACT_VECTOR_ELT: {
1854  SDValue Arg = N->getOperand(0);
1855  if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1856  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1857  unsigned Element = Const->getZExtValue();
1858  return Arg->getOperand(Element);
1859  }
1860  }
1861  if (Arg.getOpcode() == ISD::BITCAST &&
1862  Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
1863  (Arg.getOperand(0).getValueType().getVectorNumElements() ==
1864  Arg.getValueType().getVectorNumElements())) {
1865  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1866  unsigned Element = Const->getZExtValue();
1867  return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1868  Arg->getOperand(0).getOperand(Element));
1869  }
1870  }
1871  break;
1872  }
1873 
1874  case ISD::SELECT_CC: {
1875  // Try common optimizations
1877  return Ret;
1878 
1879  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1880  // selectcc x, y, a, b, inv(cc)
1881  //
1882  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1883  // selectcc x, y, a, b, cc
1884  SDValue LHS = N->getOperand(0);
1885  if (LHS.getOpcode() != ISD::SELECT_CC) {
1886  return SDValue();
1887  }
1888 
1889  SDValue RHS = N->getOperand(1);
1890  SDValue True = N->getOperand(2);
1891  SDValue False = N->getOperand(3);
1892  ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1893 
1894  if (LHS.getOperand(2).getNode() != True.getNode() ||
1895  LHS.getOperand(3).getNode() != False.getNode() ||
1896  RHS.getNode() != False.getNode()) {
1897  return SDValue();
1898  }
1899 
1900  switch (NCC) {
1901  default: return SDValue();
1902  case ISD::SETNE: return LHS;
1903  case ISD::SETEQ: {
1904  ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1905  LHSCC = ISD::getSetCCInverse(LHSCC, LHS.getOperand(0).getValueType());
1906  if (DCI.isBeforeLegalizeOps() ||
1908  return DAG.getSelectCC(DL,
1909  LHS.getOperand(0),
1910  LHS.getOperand(1),
1911  LHS.getOperand(2),
1912  LHS.getOperand(3),
1913  LHSCC);
1914  break;
1915  }
1916  }
1917  return SDValue();
1918  }
1919 
1920  case AMDGPUISD::R600_EXPORT: {
1921  SDValue Arg = N->getOperand(1);
1922  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1923  break;
1924 
1925  SDValue NewArgs[8] = {
1926  N->getOperand(0), // Chain
1927  SDValue(),
1928  N->getOperand(2), // ArrayBase
1929  N->getOperand(3), // Type
1930  N->getOperand(4), // SWZ_X
1931  N->getOperand(5), // SWZ_Y
1932  N->getOperand(6), // SWZ_Z
1933  N->getOperand(7) // SWZ_W
1934  };
1935  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
1936  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
1937  }
1938  case AMDGPUISD::TEXTURE_FETCH: {
1939  SDValue Arg = N->getOperand(1);
1940  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1941  break;
1942 
1943  SDValue NewArgs[19] = {
1944  N->getOperand(0),
1945  N->getOperand(1),
1946  N->getOperand(2),
1947  N->getOperand(3),
1948  N->getOperand(4),
1949  N->getOperand(5),
1950  N->getOperand(6),
1951  N->getOperand(7),
1952  N->getOperand(8),
1953  N->getOperand(9),
1954  N->getOperand(10),
1955  N->getOperand(11),
1956  N->getOperand(12),
1957  N->getOperand(13),
1958  N->getOperand(14),
1959  N->getOperand(15),
1960  N->getOperand(16),
1961  N->getOperand(17),
1962  N->getOperand(18),
1963  };
1964  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
1965  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
1966  }
1967 
1968  case ISD::LOAD: {
1969  LoadSDNode *LoadNode = cast<LoadSDNode>(N);
1970  SDValue Ptr = LoadNode->getBasePtr();
1971  if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
1972  isa<ConstantSDNode>(Ptr))
1973  return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
1974  break;
1975  }
1976 
1977  default: break;
1978  }
1979 
1981 }
1982 
1983 bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
1984  SDValue &Src, SDValue &Neg, SDValue &Abs,
1985  SDValue &Sel, SDValue &Imm,
1986  SelectionDAG &DAG) const {
1987  const R600InstrInfo *TII = Subtarget->getInstrInfo();
1988  if (!Src.isMachineOpcode())
1989  return false;
1990 
1991  switch (Src.getMachineOpcode()) {
1992  case R600::FNEG_R600:
1993  if (!Neg.getNode())
1994  return false;
1995  Src = Src.getOperand(0);
1996  Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1997  return true;
1998  case R600::FABS_R600:
1999  if (!Abs.getNode())
2000  return false;
2001  Src = Src.getOperand(0);
2002  Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
2003  return true;
2004  case R600::CONST_COPY: {
2005  unsigned Opcode = ParentNode->getMachineOpcode();
2006  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2007 
2008  if (!Sel.getNode())
2009  return false;
2010 
2011  SDValue CstOffset = Src.getOperand(0);
2012  if (ParentNode->getValueType(0).isVector())
2013  return false;
2014 
2015  // Gather constants values
2016  int SrcIndices[] = {
2017  TII->getOperandIdx(Opcode, R600::OpName::src0),
2018  TII->getOperandIdx(Opcode, R600::OpName::src1),
2019  TII->getOperandIdx(Opcode, R600::OpName::src2),
2020  TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2021  TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2022  TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2023  TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2024  TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2025  TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2026  TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2027  TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2028  };
2029  std::vector<unsigned> Consts;
2030  for (int OtherSrcIdx : SrcIndices) {
2031  int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2032  if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2033  continue;
2034  if (HasDst) {
2035  OtherSrcIdx--;
2036  OtherSelIdx--;
2037  }
2038  if (RegisterSDNode *Reg =
2039  dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2040  if (Reg->getReg() == R600::ALU_CONST) {
2041  ConstantSDNode *Cst
2042  = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
2043  Consts.push_back(Cst->getZExtValue());
2044  }
2045  }
2046  }
2047 
2048  ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
2049  Consts.push_back(Cst->getZExtValue());
2050  if (!TII->fitsConstReadLimitations(Consts)) {
2051  return false;
2052  }
2053 
2054  Sel = CstOffset;
2055  Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
2056  return true;
2057  }
2058  case R600::MOV_IMM_GLOBAL_ADDR:
2059  // Check if the Imm slot is used. Taken from below.
2060  if (cast<ConstantSDNode>(Imm)->getZExtValue())
2061  return false;
2062  Imm = Src.getOperand(0);
2063  Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
2064  return true;
2065  case R600::MOV_IMM_I32:
2066  case R600::MOV_IMM_F32: {
2067  unsigned ImmReg = R600::ALU_LITERAL_X;
2068  uint64_t ImmValue = 0;
2069 
2070  if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
2071  ConstantFPSDNode *FPC = cast<ConstantFPSDNode>(Src.getOperand(0));
2072  float FloatValue = FPC->getValueAPF().convertToFloat();
2073  if (FloatValue == 0.0) {
2074  ImmReg = R600::ZERO;
2075  } else if (FloatValue == 0.5) {
2076  ImmReg = R600::HALF;
2077  } else if (FloatValue == 1.0) {
2078  ImmReg = R600::ONE;
2079  } else {
2080  ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2081  }
2082  } else {
2083  ConstantSDNode *C = cast<ConstantSDNode>(Src.getOperand(0));
2084  uint64_t Value = C->getZExtValue();
2085  if (Value == 0) {
2086  ImmReg = R600::ZERO;
2087  } else if (Value == 1) {
2088  ImmReg = R600::ONE_INT;
2089  } else {
2090  ImmValue = Value;
2091  }
2092  }
2093 
2094  // Check that we aren't already using an immediate.
2095  // XXX: It's possible for an instruction to have more than one
2096  // immediate operand, but this is not supported yet.
2097  if (ImmReg == R600::ALU_LITERAL_X) {
2098  if (!Imm.getNode())
2099  return false;
2100  ConstantSDNode *C = cast<ConstantSDNode>(Imm);
2101  if (C->getZExtValue())
2102  return false;
2103  Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2104  }
2105  Src = DAG.getRegister(ImmReg, MVT::i32);
2106  return true;
2107  }
2108  default:
2109  return false;
2110  }
2111 }
2112 
2113 /// Fold the instructions after selecting them
2114 SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2115  SelectionDAG &DAG) const {
2116  const R600InstrInfo *TII = Subtarget->getInstrInfo();
2117  if (!Node->isMachineOpcode())
2118  return Node;
2119 
2120  unsigned Opcode = Node->getMachineOpcode();
2121  SDValue FakeOp;
2122 
2123  std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2124 
2125  if (Opcode == R600::DOT_4) {
2126  int OperandIdx[] = {
2127  TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2128  TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2129  TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2130  TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2131  TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2132  TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2133  TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2134  TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2135  };
2136  int NegIdx[] = {
2137  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
2138  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
2139  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
2140  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
2141  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
2142  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
2143  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
2144  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
2145  };
2146  int AbsIdx[] = {
2147  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
2148  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
2149  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
2150  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
2151  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
2152  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
2153  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
2154  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
2155  };
2156  for (unsigned i = 0; i < 8; i++) {
2157  if (OperandIdx[i] < 0)
2158  return Node;
2159  SDValue &Src = Ops[OperandIdx[i] - 1];
2160  SDValue &Neg = Ops[NegIdx[i] - 1];
2161  SDValue &Abs = Ops[AbsIdx[i] - 1];
2162  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2163  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2164  if (HasDst)
2165  SelIdx--;
2166  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2167  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2168  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2169  }
2170  } else if (Opcode == R600::REG_SEQUENCE) {
2171  for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2172  SDValue &Src = Ops[i];
2173  if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2174  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2175  }
2176  } else {
2177  if (!TII->hasInstrModifiers(Opcode))
2178  return Node;
2179  int OperandIdx[] = {
2180  TII->getOperandIdx(Opcode, R600::OpName::src0),
2181  TII->getOperandIdx(Opcode, R600::OpName::src1),
2182  TII->getOperandIdx(Opcode, R600::OpName::src2)
2183  };
2184  int NegIdx[] = {
2185  TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
2186  TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
2187  TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
2188  };
2189  int AbsIdx[] = {
2190  TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
2191  TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
2192  -1
2193  };
2194  for (unsigned i = 0; i < 3; i++) {
2195  if (OperandIdx[i] < 0)
2196  return Node;
2197  SDValue &Src = Ops[OperandIdx[i] - 1];
2198  SDValue &Neg = Ops[NegIdx[i] - 1];
2199  SDValue FakeAbs;
2200  SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2201  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2202  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2203  int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
2204  if (HasDst) {
2205  SelIdx--;
2206  ImmIdx--;
2207  }
2208  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2209  SDValue &Imm = Ops[ImmIdx];
2210  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2211  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2212  }
2213  }
2214 
2215  return Node;
2216 }
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:240
llvm::CCValAssign::getLocVT
MVT getLocVT() const
Definition: CallingConvLower.h:153
i
i
Definition: README.txt:29
llvm::ISD::SETUGE
@ SETUGE
Definition: ISDOpcodes.h:1374
llvm::AMDGPUISD::STORE_MSKOR
@ STORE_MSKOR
Definition: AMDGPUISelLowering.h:485
llvm::TargetLowering::scalarizeVectorLoad
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
Definition: TargetLowering.cpp:7321
llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1556
llvm::RegisterSDNode
Definition: SelectionDAGNodes.h:2098
llvm::APFloat::convertToFloat
float convertToFloat() const
Converts this APFloat to host float value.
Definition: APFloat.cpp:4908
llvm::StoreSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2331
llvm::TargetLoweringBase::setSchedulingPreference
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
Definition: TargetLowering.h:2142
llvm::ISD::SETLE
@ SETLE
Definition: ISDOpcodes.h:1385
llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
llvm::ISD::SETO
@ SETO
Definition: ISDOpcodes.h:1370
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::SDNode::getValueType
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Definition: SelectionDAGNodes.h:966
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1086
llvm::TargetLoweringBase::Legal
@ Legal
Definition: TargetLowering.h:197
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:633
llvm::TargetLowering::EmitInstrWithCustomInserter
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Definition: SelectionDAGISel.cpp:295
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:848
llvm::AMDGPUAS::CONSTANT_BUFFER_8
@ CONSTANT_BUFFER_8
Definition: AMDGPU.h:389
llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1386
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::AMDGPUTargetLowering
Definition: AMDGPUISelLowering.h:27
llvm::ISD::NON_EXTLOAD
@ NON_EXTLOAD
Definition: ISDOpcodes.h:1341
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:191
llvm::MachineOperand::getGlobal
const GlobalValue * getGlobal() const
Definition: MachineOperand.h:563
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:152
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
llvm::ISD::UADDO
@ UADDO
Definition: ISDOpcodes.h:311
llvm::ISD::CTTZ_ZERO_UNDEF
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:674
llvm::SelectionDAG::getValueType
SDValue getValueType(EVT)
Definition: SelectionDAG.cpp:1744
llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Definition: Type.cpp:729
llvm::ISD::ADDC
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
llvm::R600FrameLowering
Definition: R600FrameLowering.h:16
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::AMDGPUISD::CONST_ADDRESS
@ CONST_ADDRESS
Definition: AMDGPUISelLowering.h:437
llvm::ISD::SETEQ
@ SETEQ
Definition: ISDOpcodes.h:1381
llvm::SelectionDAG::getVTList
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition: SelectionDAG.cpp:8580
llvm::AMDGPUAS::CONSTANT_BUFFER_6
@ CONSTANT_BUFFER_6
Definition: AMDGPU.h:387
llvm::MVT::v2i1
@ v2i1
Definition: MachineValueType.h:65
llvm::R600Subtarget::getInstrInfo
const R600InstrInfo * getInstrInfo() const override
Definition: R600Subtarget.h:56
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::MachineSDNode
An SDNode that represents everything that will be needed to construct a MachineInstr.
Definition: SelectionDAGNodes.h:2746
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition: MachineMemOperand.h:145
llvm::SelectionDAG::getVectorIdxConstant
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition: SelectionDAG.cpp:1530
llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition: SelectionDAGNodes.h:1336
llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition: SelectionDAGNodes.h:1359
llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:732
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:455
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:466
llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:785
llvm::TargetLowering::DAGCombinerInfo::DAG
SelectionDAG & DAG
Definition: TargetLowering.h:3556
llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition: SelectionDAGNodes.h:2281
llvm::ISD::SETULE
@ SETULE
Definition: ISDOpcodes.h:1376
llvm::CCState::AnalyzeFormalArguments
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
Definition: CallingConvLower.cpp:90
isEOP
static bool isEOP(MachineBasicBlock::iterator I)
Definition: R600ISelLowering.cpp:258
llvm::NVPTXISD::RETURN
@ RETURN
Definition: NVPTXISelLowering.h:49
llvm::ISD::SHL_PARTS
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:715
llvm::SelectionDAG::getZeroExtendInReg
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
Definition: SelectionDAG.cpp:1340
llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:210
llvm::SelectionDAG::getStore
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
Definition: SelectionDAG.cpp:7541
llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition: MachineMemOperand.h:143
llvm::AMDGPUISD::CARRY
@ CARRY
Definition: AMDGPUISelLowering.h:417
llvm::R600Subtarget::hasBCNT
bool hasBCNT(unsigned Size) const
Definition: R600Subtarget.h:98
llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:702
llvm::R600Subtarget::hasCARRY
bool hasCARRY() const
Definition: R600Subtarget.h:109
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
Results
Function Alias Analysis Results
Definition: AliasAnalysis.cpp:847
llvm::ISD::MERGE_VALUES
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
llvm::AMDGPUTargetLowering::ReplaceNodeResults
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
Definition: AMDGPUISelLowering.cpp:1318
llvm::CallingConv::AMDGPU_ES
@ AMDGPU_ES
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:236
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::AMDGPUAS::CONSTANT_BUFFER_2
@ CONSTANT_BUFFER_2
Definition: AMDGPU.h:383
llvm::ISD::EXTLOAD
@ EXTLOAD
Definition: ISDOpcodes.h:1341
llvm::LSBaseSDNode::isIndexed
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
Definition: SelectionDAGNodes.h:2269
llvm::R600Subtarget::hasFFBL
bool hasFFBL() const
Definition: R600Subtarget.h:117
llvm::EVT::bitsLE
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:281
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::MachineOperand::getOffset
int64_t getOffset() const
Return the offset from the symbol in this operand.
Definition: MachineOperand.h:600
R600ISelLowering.h
llvm::ISD::SETUEQ
@ SETUEQ
Definition: ISDOpcodes.h:1372
llvm::TargetLowering::expandUnalignedStore
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
Definition: TargetLowering.cpp:7640
llvm::SelectionDAG::getContext
LLVMContext * getContext() const
Definition: SelectionDAG.h:447
llvm::R600MachineFunctionInfo
Definition: R600MachineFunctionInfo.h:19
llvm::ISD::BRCOND
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:956
llvm::SelectionDAG::getRegister
SDValue getRegister(unsigned Reg, EVT VT)
Definition: SelectionDAG.cpp:2007
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:48
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
isUndef
static bool isUndef(ArrayRef< int > Mask)
Definition: HexagonISelDAGToDAGHVX.cpp:912
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:130
llvm::SDNode::op_end
op_iterator op_end() const
Definition: SelectionDAGNodes.h:912
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
llvm::MVT::integer_valuetypes
static auto integer_valuetypes()
Definition: MachineValueType.h:1411
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:885
llvm::ISD::LoadExtType
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1341
llvm::R600TargetLowering::LowerFormalArguments
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
Definition: R600ISelLowering.cpp:1492
llvm::R600Subtarget::hasBFE
bool hasBFE() const
Definition: R600Subtarget.h:90
llvm::ISD::BR_CC
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:963
x3
In x86 we generate this spiffy xmm0 xmm0 ret in x86 we generate this which could be xmm1 movss xmm1 xmm0 ret In sse4 we could use insertps to make both better Here s another testcase that could use x3
Definition: README-SSE.txt:547
llvm::SelectionDAG::getLoad
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Definition: SelectionDAG.cpp:7491
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
llvm::SDNode::getOpcode
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Definition: SelectionDAGNodes.h:629
llvm::EVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:363
llvm::R600Subtarget::hasFMA
bool hasFMA() const
Definition: R600Subtarget.h:125
llvm::MinAlign
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:672
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:644
llvm::ISD::GlobalAddress
@ GlobalAddress
Definition: ISDOpcodes.h:78
llvm::ISD::SELECT_CC
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:694
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1121
llvm::AMDGPUAS::CONSTANT_BUFFER_15
@ CONSTANT_BUFFER_15
Definition: AMDGPU.h:396
llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition: CallingConvLower.h:33
llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1360
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:216
llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:679
llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalizeOps
bool isBeforeLegalizeOps() const
Definition: TargetLowering.h:3562
llvm::AMDGPUAS::CONSTANT_BUFFER_13
@ CONSTANT_BUFFER_13
Definition: AMDGPU.h:394
f
Itanium Name Demangler i e convert the string _Z1fv into f()". You can also use the CRTP base ManglingParser to perform some simple analysis on the mangled name
llvm::SelectionDAG::getTruncStore
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:7592
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::CCValAssign::getLocReg
Register getLocReg() const
Definition: CallingConvLower.h:150
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:732
MO_FLAG_PUSH
#define MO_FLAG_PUSH
Definition: R600Defines.h:18
llvm::SelectionDAG::getUNDEF
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:951
llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:747
llvm::AMDGPUISD::R600_EXPORT
@ R600_EXPORT
Definition: AMDGPUISelLowering.h:436
llvm::MachineMemOperand::getValue
const Value * getValue() const
Return the base address of the memory access.
Definition: MachineMemOperand.h:211
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::TargetLoweringBase::isCondCodeLegal
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
Definition: TargetLowering.h:1372
llvm::MVT::f64
@ f64
Definition: MachineValueType.h:56
llvm::SelectionDAG::getConstant
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1395
llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:309
llvm::TargetLowering::DAGCombinerInfo
Definition: TargetLowering.h:3550
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:199
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:735
llvm::TargetLoweringBase::ZeroOrNegativeOneBooleanContent
@ ZeroOrNegativeOneBooleanContent
Definition: TargetLowering.h:234
llvm::AMDGPUISD::DOT4
@ DOT4
Definition: AMDGPUISelLowering.h:416
llvm::ISD::UDIVREM
@ UDIVREM
Definition: ISDOpcodes.h:256
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::TargetLoweringBase::addRegisterClass
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
Definition: TargetLowering.h:2189
llvm::AMDGPUAS::CONSTANT_BUFFER_7
@ CONSTANT_BUFFER_7
Definition: AMDGPU.h:388
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
MO_FLAG_MASK
#define MO_FLAG_MASK
Definition: R600Defines.h:17
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1460
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:881
llvm::codeview::EncodedFramePtrReg::BasePtr
@ BasePtr
llvm::ISD::ATOMIC_STORE
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1118
llvm::FrameIndexSDNode
Definition: SelectionDAGNodes.h:1744
llvm::R600TargetLowering::CCAssignFnForCall
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
Definition: R600ISelLowering.cpp:1467
llvm::SelectionDAG::getMemIntrinsicNode
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
Definition: SelectionDAG.cpp:7268
R600InstrInfo.h
llvm::R600TargetLowering::canMergeStoresTo
bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
Definition: R600ISelLowering.cpp:1569
llvm::R600TargetLowering::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *IsFast=nullptr) const override
Determine if the target supports unaligned memory accesses.
Definition: R600ISelLowering.cpp:1578
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:632
llvm::ISD::SETOLT
@ SETOLT
Definition: ISDOpcodes.h:1367
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::GlobalAddressSDNode::getGlobal
const GlobalValue * getGlobal() const
Definition: SelectionDAGNodes.h:1730
llvm::APFloat::bitcastToAPInt
APInt bitcastToAPInt() const
Definition: APFloat.h:1130
llvm::CCValAssign::getLocMemOffset
unsigned getLocMemOffset() const
Definition: CallingConvLower.h:151
llvm::ISD::UNINDEXED
@ UNINDEXED
Definition: ISDOpcodes.h:1310
llvm::TargetLoweringBase::isOperationLegal
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Definition: TargetLowering.h:1226
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::TargetLoweringBase::setBooleanVectorContents
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
Definition: TargetLowering.h:2137
llvm::EVT::changeVectorElementTypeToInteger
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:400
llvm::EVT::bitsLT
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:273
llvm::AMDGPUTargetLowering::LowerGlobalAddress
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
Definition: AMDGPUISelLowering.cpp:1343
llvm::MVT::v4i16
@ v4i16
Definition: MachineValueType.h:91
llvm::EVT::getTypeForEVT
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:181
llvm::MachineRegisterInfo::use_empty
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
Definition: MachineRegisterInfo.h:506
llvm::ISD::USUBO
@ USUBO
Definition: ISDOpcodes.h:315
llvm::MVT::v4i8
@ v4i8
Definition: MachineValueType.h:78
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::CallingConv::AMDGPU_GS
@ AMDGPU_GS
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:207
ConstantAddressBlock
static int ConstantAddressBlock(unsigned AddressSpace)
Definition: R600ISelLowering.cpp:1259
llvm::StoreSDNode::isTruncatingStore
bool isTruncatingStore() const
Return true if the op does a truncation before store.
Definition: SelectionDAGNodes.h:2325
llvm::ISD::SETOLE
@ SETOLE
Definition: ISDOpcodes.h:1368
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::ISD::SETUGT
@ SETUGT
Definition: ISDOpcodes.h:1373
llvm::SelectionDAG::getTargetGlobalAddress
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:683
llvm::CallingConv::AMDGPU_CS
@ AMDGPU_CS
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:213
R600MachineFunctionInfo.h
llvm::CCAssignFn
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
Definition: CallingConvLower.h:177
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:377
llvm::MVT::v2i8
@ v2i8
Definition: MachineValueType.h:77
llvm::AMDGPUISD::BUILD_VERTICAL_VECTOR
@ BUILD_VERTICAL_VECTOR
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
Definition: AMDGPUISelLowering.h:471
llvm::ISD::SETUNE
@ SETUNE
Definition: ISDOpcodes.h:1377
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
R600MCTargetDesc.h
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::TargetLoweringBase::setOperationAction
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
Definition: TargetLowering.h:2206
llvm::MVT::v32i16
@ v32i16
Definition: MachineValueType.h:94
llvm::pdb::OMFSegDescFlags::Read
@ Read
llvm::AMDGPUTargetLowering::LowerUDIVREM64
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
Definition: AMDGPUISelLowering.cpp:1815
llvm::R600Subtarget
Definition: R600Subtarget.h:35
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:156
llvm::AMDGPUTargetLowering::LowerSDIVREM
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
Definition: AMDGPUISelLowering.cpp:2085
llvm::MVT::v16i8
@ v16i8
Definition: MachineValueType.h:80
llvm::AMDGPUAS::CONSTANT_BUFFER_9
@ CONSTANT_BUFFER_9
Definition: AMDGPU.h:390
llvm::R600Subtarget::hasBORROW
bool hasBORROW() const
Definition: R600Subtarget.h:105
llvm::AMDGPUISD::BORROW
@ BORROW
Definition: AMDGPUISelLowering.h:418
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:341
llvm::ISD::FMAD
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:470
llvm::MachineOperand::getTargetFlags
unsigned getTargetFlags() const
Definition: MachineOperand.h:221
llvm::DenseMapBase::clear
void clear()
Definition: DenseMap.h:111
llvm::TargetLowering::expandFP_TO_SINT
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
Definition: TargetLowering.cpp:6694
llvm::MVT::v16i16
@ v16i16
Definition: MachineValueType.h:93
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::TargetLoweringBase::setHasExtractBitsInsn
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
Definition: TargetLowering.h:2172
uint64_t
llvm::ISD::FP_TO_UINT
@ FP_TO_UINT
Definition: ISDOpcodes.h:786
llvm::ConstantFPSDNode
Definition: SelectionDAGNodes.h:1605
llvm::R600FrameLowering::getFrameIndexReference
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
Definition: R600FrameLowering.cpp:18
llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition: SelectionDAGNodes.h:1340
llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:921
llvm::SelectionDAG::getIntPtrConstant
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition: SelectionDAG.cpp:1518
llvm::TargetLoweringBase::Promote
@ Promote
Definition: TargetLowering.h:198
llvm::ConstantPointerNull::get
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1782
llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition: MachineMemOperand.h:38
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap< unsigned, unsigned >
llvm::SelectionDAG::getCopyFromReg
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:761
llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:152
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:511
llvm::R600Subtarget::hasBFI
bool hasBFI() const
Definition: R600Subtarget.h:94
llvm::ISD::CTLZ_ZERO_UNDEF
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:675
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:904
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::FrameIndexSDNode::getIndex
int getIndex() const
Definition: SelectionDAGNodes.h:1755
llvm::SelectionDAG::getNode
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition: SelectionDAG.cpp:8345
llvm::SelectionDAG::getNOT
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
Definition: SelectionDAG.cpp:1371
llvm::R600::getLDSNoRetOp
int getLDSNoRetOp(uint16_t Opcode)
llvm::ISD::FCOPYSIGN
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:476
llvm::LoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Definition: SelectionDAGNodes.h:2296
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::TargetLowering::scalarizeVectorStore
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
Definition: TargetLowering.cpp:7411
llvm::MVT::v4f32
@ v4f32
Definition: MachineValueType.h:157
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::MachineMemOperand::Flags
Flags
Flags values. These may be or'd together.
Definition: MachineMemOperand.h:131
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:363
llvm::ISD::InputArg
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Definition: TargetCallingConv.h:195
llvm::StoreSDNode
This class is used to represent ISD::STORE nodes.
Definition: SelectionDAGNodes.h:2309
llvm::ISD::ZEXTLOAD
@ ZEXTLOAD
Definition: ISDOpcodes.h:1341
llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition: SelectionDAGNodes.h:172
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:44
llvm::R600Subtarget::hasFFBH
bool hasFFBH() const
Definition: R600Subtarget.h:121
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:79
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::AMDGPUISD::SIN_HW
@ SIN_HW
Definition: AMDGPUISelLowering.h:384
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::ISD::SETULT
@ SETULT
Definition: ISDOpcodes.h:1375
llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition: SelectionDAGNodes.h:1571
llvm::TargetLowering::expandShiftParts
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
Definition: TargetLowering.cpp:6642
llvm::AMDGPUISD::TEXTURE_FETCH
@ TEXTURE_FETCH
Definition: AMDGPUISelLowering.h:435
llvm::AMDGPUISD::RSQ
@ RSQ
Definition: AMDGPUISelLowering.h:409
llvm::AMDGPUAS::CONSTANT_BUFFER_4
@ CONSTANT_BUFFER_4
Definition: AMDGPU.h:385
llvm::MachineMemOperand::MONonTemporal
@ MONonTemporal
The memory access is non-temporal.
Definition: MachineMemOperand.h:141
llvm::SelectionDAG::getSelectCC
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:1087
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1361
MO_FLAG_NEG
#define MO_FLAG_NEG
Definition: R600Defines.h:15
MO_FLAG_ABS
#define MO_FLAG_ABS
Definition: R600Defines.h:16
llvm::AMDGPUSubtarget::R700
@ R700
Definition: AMDGPUSubtarget.h:34
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:9018
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::ISD::isNON_EXTLoad
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
Definition: SelectionDAGNodes.h:2901
llvm::CallingConv::Cold
@ Cold
Definition: CallingConv.h:48
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::StoreSDNode::getValue
const SDValue & getValue() const
Definition: SelectionDAGNodes.h:2330
llvm::AfterLegalizeVectorOps
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
llvm::ISD::SRA_PARTS
@ SRA_PARTS
Definition: ISDOpcodes.h:716
llvm::AMDGPUTargetLowering::combineFMinMaxLegacy
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
Definition: AMDGPUISelLowering.cpp:1428
llvm::MachineFunction
Definition: MachineFunction.h:234
llvm::R600Subtarget::getFrameLowering
const R600FrameLowering * getFrameLowering() const override
Definition: R600Subtarget.h:58
llvm::MachineFunction::addLiveIn
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Definition: MachineFunction.cpp:654
llvm::MemSDNode::getAAInfo
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Definition: SelectionDAGNodes.h:1304
llvm::AMDGPUTargetLowering::getImplicitParameterOffset
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
Definition: AMDGPUISelLowering.cpp:4312
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:100
llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:155
llvm::AMDGPUAS::PARAM_I_ADDRESS
@ PARAM_I_ADDRESS
Address space for indirect addressable parameter memory (VTX1).
Definition: AMDGPU.h:373
llvm::MVT::i64
@ i64
Definition: MachineValueType.h:47
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::MVT::v2i32
@ v2i32
Definition: MachineValueType.h:101
llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:353
AMDGPU.h
llvm::AMDGPUAS::CONSTANT_BUFFER_10
@ CONSTANT_BUFFER_10
Definition: AMDGPU.h:391
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:362
llvm::SelectionDAG::ReplaceAllUsesOfValueWith
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
Definition: SelectionDAG.cpp:9606
llvm::MVT::v2f32
@ v2f32
Definition: MachineValueType.h:155
uint32_t
llvm::StackOffset
StackOffset is a class to represent an offset with 2 dimensions, named fixed and scalable,...
Definition: TypeSize.h:134
llvm::AMDGPUTargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Definition: AMDGPUISelLowering.cpp:1276
llvm::MinMax
Definition: AssumeBundleQueries.h:71
llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition: SelectionDAGNodes.h:1129
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::AMDGPUAS::CONSTANT_BUFFER_1
@ CONSTANT_BUFFER_1
Definition: AMDGPU.h:382
llvm::TargetLoweringBase::setCondCodeAction
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
Definition: TargetLowering.h:2270
llvm::TargetLoweringBase::setTruncStoreAction
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
Definition: TargetLowering.h:2225
llvm::ISD::getSetCCInverse
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
Definition: SelectionDAG.cpp:499
llvm::SDValue::getSimpleValueType
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:183
llvm::AMDGPUAS::CONSTANT_BUFFER_14
@ CONSTANT_BUFFER_14
Definition: AMDGPU.h:395
llvm::MVT::v4i32
@ v4i32
Definition: MachineValueType.h:103
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:286
llvm::ISD::FMUL
@ FMUL
Definition: ISDOpcodes.h:379
llvm::CallingConv::Fast
@ Fast
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::MVT::v32i32
@ v32i32
Definition: MachineValueType.h:109
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1341
llvm::SelectionDAG::getBuildVector
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:790
llvm::SelectionDAG::getConstantFP
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1574
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
llvm::LoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2300
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:216
llvm::DenseMapBase::empty
LLVM_NODISCARD bool empty() const
Definition: DenseMap.h:97
llvm::MVT::v16i32
@ v16i32
Definition: MachineValueType.h:108
llvm::ISD::FrameIndex
@ FrameIndex
Definition: ISDOpcodes.h:80
llvm::isInt< 16 >
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:370
llvm::AMDGPUTargetLowering::CreateLiveInRegisterRaw
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, Register Reg, EVT VT) const
Definition: AMDGPUISelLowering.h:290
llvm::AMDGPUAS::CONSTANT_BUFFER_3
@ CONSTANT_BUFFER_3
Definition: AMDGPU.h:384
j
return j(j<< 16)
llvm::ISD::SETLT
@ SETLT
Definition: ISDOpcodes.h:1384
llvm::isAllOnesConstant
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Definition: SelectionDAG.cpp:9967
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:364
llvm::AMDGPUTargetLowering::analyzeFormalArgumentsCompute
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types.
Definition: AMDGPUISelLowering.cpp:1053
CompactSwizzlableVector
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
Definition: R600ISelLowering.cpp:1597
llvm::R600TargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Definition: R600ISelLowering.cpp:450
llvm::TargetLoweringBase::setLoadExtAction
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
Definition: TargetLowering.h:2213
llvm::GlobalAddressSDNode
Definition: SelectionDAGNodes.h:1718
llvm::AMDGPUISD::RSQ_CLAMP
@ RSQ_CLAMP
Definition: AMDGPUISelLowering.h:413
llvm::isNullConstant
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
Definition: SelectionDAG.cpp:9957
llvm::EVT::getScalarType
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:296
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:324
llvm::MemSDNode::getAddressSpace
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Definition: SelectionDAGNodes.h:1347
llvm::AMDGPUAS::CONSTANT_BUFFER_11
@ CONSTANT_BUFFER_11
Definition: AMDGPU.h:392
llvm::MemSDNode::getAlignment
unsigned getAlignment() const
Definition: SelectionDAGNodes.h:1274
llvm::ISD::FCOS
@ FCOS
Definition: ISDOpcodes.h:871
llvm::SelectionDAG::getEntryNode
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:516
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:879
llvm::ISD::getSetCCSwappedOperands
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
Definition: SelectionDAG.cpp:476
llvm::R600TargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition: R600ISelLowering.cpp:1756
llvm::ISD::FSIN
@ FSIN
Definition: ISDOpcodes.h:870
llvm::SelectionDAG::getDataLayout
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:440
llvm::MVT::v8i16
@ v8i16
Definition: MachineValueType.h:92
llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:491
llvm::AMDGPUAS::CONSTANT_BUFFER_0
@ CONSTANT_BUFFER_0
Definition: AMDGPU.h:381
llvm::TargetLoweringBase::Custom
@ Custom
Definition: TargetLowering.h:201
llvm::R600Subtarget::getRegisterInfo
const R600RegisterInfo * getRegisterInfo() const override
Definition: R600Subtarget.h:66
llvm::ISD::SUBC
@ SUBC
Definition: ISDOpcodes.h:270
R600Subtarget.h
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:46
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::ISD::SETUO
@ SETUO
Definition: ISDOpcodes.h:1371
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:138
llvm::CallingConv::AMDGPU_VS
@ AMDGPU_VS
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:204
llvm::R600InstrInfo
Definition: R600InstrInfo.h:39
llvm::CallingConv::AMDGPU_HS
@ AMDGPU_HS
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:223
llvm::CallingConv::AMDGPU_LS
@ AMDGPU_LS
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:231
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:922
llvm::MVT::v8i32
@ v8i32
Definition: MachineValueType.h:107
llvm::ISD::SRL_PARTS
@ SRL_PARTS
Definition: ISDOpcodes.h:717
llvm::R600TargetLowering::getSetCCResultType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
Definition: R600ISelLowering.cpp:1562
llvm::AMDGPUISD::BRANCH_COND
@ BRANCH_COND
Definition: AMDGPUISelLowering.h:341
llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition: ISDOpcodes.h:740
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
llvm::SDValue::isUndef
bool isUndef() const
Definition: SelectionDAGNodes.h:1157
llvm::SDNode::getMachineOpcode
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
Definition: SelectionDAGNodes.h:686
Vector
So we should use XX3Form_Rcr to implement instrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:476
llvm::AMDGPUTargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition: AMDGPUISelLowering.cpp:4012
llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:301
llvm::MemSDNode::getAlign
Align getAlign() const
Definition: SelectionDAGNodes.h:1272
llvm::TargetLoweringBase::setBooleanContents
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
Definition: TargetLowering.h:2123
llvm::SDNode::op_begin
op_iterator op_begin() const
Definition: SelectionDAGNodes.h:911
llvm::AMDGPUISD::FRACT
@ FRACT
Definition: AMDGPUISelLowering.h:364
llvm::ISD::FSUB
@ FSUB
Definition: ISDOpcodes.h:378
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:657
R600Defines.h
llvm::TargetLoweringBase::Expand
@ Expand
Definition: TargetLowering.h:199
llvm::TargetLoweringBase::setTargetDAGCombine
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
Definition: TargetLowering.h:2301
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
N
#define N
llvm::TargetLoweringBase::computeRegisterProperties
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
Definition: TargetLoweringBase.cpp:1292
llvm::ISD::SUBE
@ SUBE
Definition: ISDOpcodes.h:280
llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:659
llvm::GlobalAddressSDNode::getAddressSpace
unsigned getAddressSpace() const
Definition: SelectionDAG.cpp:10708
llvm::R600TargetLowering::ReplaceNodeResults
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
Definition: R600ISelLowering.cpp:633
llvm::AMDGPUISD::COS_HW
@ COS_HW
Definition: AMDGPUISelLowering.h:383
llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:157
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition: SelectionDAGNodes.h:1117
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:637
llvm::numbers::pif
constexpr float pif
Definition: MathExtras.h:78
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::ISD::SETONE
@ SETONE
Definition: ISDOpcodes.h:1369
llvm::MVT::v32i8
@ v32i8
Definition: MachineValueType.h:81
llvm::EVT::bitsGT
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:257
llvm::MVT::i16
@ i16
Definition: MachineValueType.h:45
llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:866
llvm::SelectionDAG::getMachineFunction
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:437
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::R600TargetLowering::EmitInstrWithCustomInserter
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Definition: R600ISelLowering.cpp:265
llvm::CallingConv::C
@ C
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
llvm::ISD::SDIVREM
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:359
llvm::AMDGPUISD::DWORDADDR
@ DWORDADDR
Definition: AMDGPUISelLowering.h:363
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::EVT::bitsGE
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:265
llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:726
llvm::MVT::v8i8
@ v8i8
Definition: MachineValueType.h:79
llvm::AMDGPUFrameLowering::getStackWidth
unsigned getStackWidth(const MachineFunction &MF) const
Definition: AMDGPUFrameLowering.cpp:22
MachineFunction.h
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:880
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::MVT::v2i16
@ v2i16
Definition: MachineValueType.h:89
llvm::AMDGPUMachineFunction
Definition: AMDGPUMachineFunction.h:20
llvm::ConstantFPSDNode::getValueAPF
const APFloat & getValueAPF() const
Definition: SelectionDAGNodes.h:1616
ReorganizeVector
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
Definition: R600ISelLowering.cpp:1642
llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:500
llvm::SelectionDAG::getExtLoad
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:7508
llvm::ISD::ADDE
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:814
llvm::MVT::f32
@ f32
Definition: MachineValueType.h:55
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::MachineMemOperand::getFlags
Flags getFlags() const
Return the raw flags of the source value,.
Definition: MachineMemOperand.h:220
llvm::ISD::ATOMIC_LOAD
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1114
llvm::MemSDNode::getPointerInfo
const MachinePointerInfo & getPointerInfo() const
Definition: SelectionDAGNodes.h:1342
llvm::AMDGPUISD::CONST_DATA_PTR
@ CONST_DATA_PTR
Pointer to the start of the shader's constant data.
Definition: AMDGPUISelLowering.h:473
llvm::TargetLoweringBase::getPointerTy
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Definition: TargetLowering.h:346
llvm::ISD::CTPOP
@ CTPOP
Definition: ISDOpcodes.h:669
llvm::AMDGPUAS::CONSTANT_BUFFER_5
@ CONSTANT_BUFFER_5
Definition: AMDGPU.h:386
llvm::AMDGPUISD::DUMMY_CHAIN
@ DUMMY_CHAIN
Definition: AMDGPUISelLowering.h:476
llvm::R600TargetLowering::R600TargetLowering
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
Definition: R600ISelLowering.cpp:29
llvm::SelectionDAG::getMergeValues
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
Definition: SelectionDAG.cpp:7257
llvm::AMDGPUAS::CONSTANT_BUFFER_12
@ CONSTANT_BUFFER_12
Definition: AMDGPU.h:393
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:289
llvm::MVT::v4i1
@ v4i1
Definition: MachineValueType.h:66
llvm::SelectionDAG::getCondCode
SDValue getCondCode(ISD::CondCode Cond)
Definition: SelectionDAG.cpp:1785
llvm::AMDGPUTargetLowering::FIRST_IMPLICIT
@ FIRST_IMPLICIT
Definition: AMDGPUISelLowering.h:315