LLVM  13.0.0git
R600ISelLowering.cpp
Go to the documentation of this file.
1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Custom DAG lowering for R600
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "R600ISelLowering.h"
15 #include "AMDGPU.h"
17 #include "R600Defines.h"
18 #include "R600InstrInfo.h"
20 #include "R600Subtarget.h"
21 #include "llvm/IR/IntrinsicsAMDGPU.h"
22 #include "llvm/IR/IntrinsicsR600.h"
23 
24 using namespace llvm;
25 
26 #include "R600GenCallingConv.inc"
27 
29  const R600Subtarget &STI)
30  : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
31  addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
32  addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
33  addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
34  addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
35  addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
36  addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
37 
40 
42 
43  // Legalize loads and stores to the private address space.
47 
48  // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
49  // spaces, so it is custom lowered to handle those where it isn't.
50  for (MVT VT : MVT::integer_valuetypes()) {
54 
58 
62  }
63 
64  // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
68 
72 
77 
80  // We need to include these since trunc STORES to PRIVATE need
81  // special handling to accommodate RMW
92 
93  // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
96 
97  // Set condition code actions
110 
115 
118 
121 
125 
127 
132 
135 
142 
147 
148  // ADD, SUB overflow.
149  // TODO: turn these into Legal?
150  if (Subtarget->hasCARRY())
152 
153  if (Subtarget->hasBORROW())
155 
156  // Expand sign extension of vectors
157  if (!Subtarget->hasBFE())
159 
162 
163  if (!Subtarget->hasBFE())
167 
168  if (!Subtarget->hasBFE())
172 
176 
178 
180 
185 
190 
191  // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
192  // to be Legal/Custom in order to avoid library calls.
196 
197  if (!Subtarget->hasFMA()) {
200  }
201 
202  // FIXME: May need no denormals check
204 
205  if (!Subtarget->hasBFI()) {
206  // fcopysign can be done in a single instruction with BFI.
209  }
210 
211  if (!Subtarget->hasBCNT(32))
213 
214  if (!Subtarget->hasBCNT(64))
216 
217  if (Subtarget->hasFFBH())
219 
220  if (Subtarget->hasFFBL())
222 
223  // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
224  // need it for R600.
225  if (Subtarget->hasBFE())
226  setHasExtractBitsInsn(true);
227 
229 
230  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
231  for (MVT VT : ScalarIntVTs) {
236  }
237 
238  // LLVM will expand these to atomic_cmp_swap(0)
239  // and atomic_swap, respectively.
242 
243  // We need to custom lower some of the intrinsics
246 
248 
255 }
256 
257 static inline bool isEOP(MachineBasicBlock::iterator I) {
258  if (std::next(I) == I->getParent()->end())
259  return false;
260  return std::next(I)->getOpcode() == R600::RETURN;
261 }
262 
265  MachineBasicBlock *BB) const {
266  MachineFunction *MF = BB->getParent();
269  const R600InstrInfo *TII = Subtarget->getInstrInfo();
270 
271  switch (MI.getOpcode()) {
272  default:
273  // Replace LDS_*_RET instruction that don't have any uses with the
274  // equivalent LDS_*_NORET instruction.
275  if (TII->isLDSRetInstr(MI.getOpcode())) {
276  int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
277  assert(DstIdx != -1);
278  MachineInstrBuilder NewMI;
279  // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
280  // LDS_1A2D support and remove this special case.
281  if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
282  MI.getOpcode() == R600::LDS_CMPST_RET)
283  return BB;
284 
285  NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
286  TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
287  for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
288  NewMI.add(MI.getOperand(i));
289  }
290  } else {
292  }
293  break;
294 
295  case R600::FABS_R600: {
296  MachineInstr *NewMI = TII->buildDefaultInstruction(
297  *BB, I, R600::MOV, MI.getOperand(0).getReg(),
298  MI.getOperand(1).getReg());
299  TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
300  break;
301  }
302 
303  case R600::FNEG_R600: {
304  MachineInstr *NewMI = TII->buildDefaultInstruction(
305  *BB, I, R600::MOV, MI.getOperand(0).getReg(),
306  MI.getOperand(1).getReg());
307  TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
308  break;
309  }
310 
311  case R600::MASK_WRITE: {
312  Register maskedRegister = MI.getOperand(0).getReg();
313  assert(maskedRegister.isVirtual());
314  MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
315  TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
316  break;
317  }
318 
319  case R600::MOV_IMM_F32:
320  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
321  .getFPImm()
322  ->getValueAPF()
323  .bitcastToAPInt()
324  .getZExtValue());
325  break;
326 
327  case R600::MOV_IMM_I32:
328  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
329  MI.getOperand(1).getImm());
330  break;
331 
332  case R600::MOV_IMM_GLOBAL_ADDR: {
333  //TODO: Perhaps combine this instruction with the next if possible
334  auto MIB = TII->buildDefaultInstruction(
335  *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
336  int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
337  //TODO: Ugh this is rather ugly
338  MIB->getOperand(Idx) = MI.getOperand(1);
339  break;
340  }
341 
342  case R600::CONST_COPY: {
343  MachineInstr *NewMI = TII->buildDefaultInstruction(
344  *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
345  TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
346  MI.getOperand(1).getImm());
347  break;
348  }
349 
350  case R600::RAT_WRITE_CACHELESS_32_eg:
351  case R600::RAT_WRITE_CACHELESS_64_eg:
352  case R600::RAT_WRITE_CACHELESS_128_eg:
353  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
354  .add(MI.getOperand(0))
355  .add(MI.getOperand(1))
356  .addImm(isEOP(I)); // Set End of program bit
357  break;
358 
359  case R600::RAT_STORE_TYPED_eg:
360  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
361  .add(MI.getOperand(0))
362  .add(MI.getOperand(1))
363  .add(MI.getOperand(2))
364  .addImm(isEOP(I)); // Set End of program bit
365  break;
366 
367  case R600::BRANCH:
368  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
369  .add(MI.getOperand(0));
370  break;
371 
372  case R600::BRANCH_COND_f32: {
373  MachineInstr *NewMI =
374  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
375  R600::PREDICATE_BIT)
376  .add(MI.getOperand(1))
377  .addImm(R600::PRED_SETNE)
378  .addImm(0); // Flags
379  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
380  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
381  .add(MI.getOperand(0))
382  .addReg(R600::PREDICATE_BIT, RegState::Kill);
383  break;
384  }
385 
386  case R600::BRANCH_COND_i32: {
387  MachineInstr *NewMI =
388  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
389  R600::PREDICATE_BIT)
390  .add(MI.getOperand(1))
391  .addImm(R600::PRED_SETNE_INT)
392  .addImm(0); // Flags
393  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
394  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
395  .add(MI.getOperand(0))
396  .addReg(R600::PREDICATE_BIT, RegState::Kill);
397  break;
398  }
399 
400  case R600::EG_ExportSwz:
401  case R600::R600_ExportSwz: {
402  // Instruction is left unmodified if its not the last one of its type
403  bool isLastInstructionOfItsType = true;
404  unsigned InstExportType = MI.getOperand(1).getImm();
405  for (MachineBasicBlock::iterator NextExportInst = std::next(I),
406  EndBlock = BB->end(); NextExportInst != EndBlock;
407  NextExportInst = std::next(NextExportInst)) {
408  if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
409  NextExportInst->getOpcode() == R600::R600_ExportSwz) {
410  unsigned CurrentInstExportType = NextExportInst->getOperand(1)
411  .getImm();
412  if (CurrentInstExportType == InstExportType) {
413  isLastInstructionOfItsType = false;
414  break;
415  }
416  }
417  }
418  bool EOP = isEOP(I);
419  if (!EOP && !isLastInstructionOfItsType)
420  return BB;
421  unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
422  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
423  .add(MI.getOperand(0))
424  .add(MI.getOperand(1))
425  .add(MI.getOperand(2))
426  .add(MI.getOperand(3))
427  .add(MI.getOperand(4))
428  .add(MI.getOperand(5))
429  .add(MI.getOperand(6))
430  .addImm(CfInst)
431  .addImm(EOP);
432  break;
433  }
434  case R600::RETURN: {
435  return BB;
436  }
437  }
438 
439  MI.eraseFromParent();
440  return BB;
441 }
442 
443 //===----------------------------------------------------------------------===//
444 // Custom DAG Lowering Operations
445 //===----------------------------------------------------------------------===//
446 
450  switch (Op.getOpcode()) {
451  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
452  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
453  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
454  case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
455  case ISD::SRA_PARTS:
456  case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
457  case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
458  case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
459  case ISD::FCOS:
460  case ISD::FSIN: return LowerTrig(Op, DAG);
461  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
462  case ISD::STORE: return LowerSTORE(Op, DAG);
463  case ISD::LOAD: {
464  SDValue Result = LowerLOAD(Op, DAG);
465  assert((!Result.getNode() ||
466  Result.getNode()->getNumValues() == 2) &&
467  "Load should return a value and a chain");
468  return Result;
469  }
470 
471  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
472  case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
473  case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
474  case ISD::INTRINSIC_VOID: {
475  SDValue Chain = Op.getOperand(0);
476  unsigned IntrinsicID =
477  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
478  switch (IntrinsicID) {
479  case Intrinsic::r600_store_swizzle: {
480  SDLoc DL(Op);
481  const SDValue Args[8] = {
482  Chain,
483  Op.getOperand(2), // Export Value
484  Op.getOperand(3), // ArrayBase
485  Op.getOperand(4), // Type
486  DAG.getConstant(0, DL, MVT::i32), // SWZ_X
487  DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
488  DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
489  DAG.getConstant(3, DL, MVT::i32) // SWZ_W
490  };
491  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
492  }
493 
494  // default for switch(IntrinsicID)
495  default: break;
496  }
497  // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
498  break;
499  }
501  unsigned IntrinsicID =
502  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
503  EVT VT = Op.getValueType();
504  SDLoc DL(Op);
505  switch (IntrinsicID) {
506  case Intrinsic::r600_tex:
507  case Intrinsic::r600_texc: {
508  unsigned TextureOp;
509  switch (IntrinsicID) {
510  case Intrinsic::r600_tex:
511  TextureOp = 0;
512  break;
513  case Intrinsic::r600_texc:
514  TextureOp = 1;
515  break;
516  default:
517  llvm_unreachable("unhandled texture operation");
518  }
519 
520  SDValue TexArgs[19] = {
521  DAG.getConstant(TextureOp, DL, MVT::i32),
522  Op.getOperand(1),
523  DAG.getConstant(0, DL, MVT::i32),
524  DAG.getConstant(1, DL, MVT::i32),
525  DAG.getConstant(2, DL, MVT::i32),
526  DAG.getConstant(3, DL, MVT::i32),
527  Op.getOperand(2),
528  Op.getOperand(3),
529  Op.getOperand(4),
530  DAG.getConstant(0, DL, MVT::i32),
531  DAG.getConstant(1, DL, MVT::i32),
532  DAG.getConstant(2, DL, MVT::i32),
533  DAG.getConstant(3, DL, MVT::i32),
534  Op.getOperand(5),
535  Op.getOperand(6),
536  Op.getOperand(7),
537  Op.getOperand(8),
538  Op.getOperand(9),
539  Op.getOperand(10)
540  };
541  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
542  }
543  case Intrinsic::r600_dot4: {
544  SDValue Args[8] = {
545  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
546  DAG.getConstant(0, DL, MVT::i32)),
547  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
548  DAG.getConstant(0, DL, MVT::i32)),
549  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
550  DAG.getConstant(1, DL, MVT::i32)),
551  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
552  DAG.getConstant(1, DL, MVT::i32)),
553  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
554  DAG.getConstant(2, DL, MVT::i32)),
555  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
556  DAG.getConstant(2, DL, MVT::i32)),
557  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
558  DAG.getConstant(3, DL, MVT::i32)),
559  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
560  DAG.getConstant(3, DL, MVT::i32))
561  };
562  return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
563  }
564 
565  case Intrinsic::r600_implicitarg_ptr: {
568  return DAG.getConstant(ByteOffset, DL, PtrVT);
569  }
570  case Intrinsic::r600_read_ngroups_x:
571  return LowerImplicitParameter(DAG, VT, DL, 0);
572  case Intrinsic::r600_read_ngroups_y:
573  return LowerImplicitParameter(DAG, VT, DL, 1);
574  case Intrinsic::r600_read_ngroups_z:
575  return LowerImplicitParameter(DAG, VT, DL, 2);
576  case Intrinsic::r600_read_global_size_x:
577  return LowerImplicitParameter(DAG, VT, DL, 3);
578  case Intrinsic::r600_read_global_size_y:
579  return LowerImplicitParameter(DAG, VT, DL, 4);
580  case Intrinsic::r600_read_global_size_z:
581  return LowerImplicitParameter(DAG, VT, DL, 5);
582  case Intrinsic::r600_read_local_size_x:
583  return LowerImplicitParameter(DAG, VT, DL, 6);
584  case Intrinsic::r600_read_local_size_y:
585  return LowerImplicitParameter(DAG, VT, DL, 7);
586  case Intrinsic::r600_read_local_size_z:
587  return LowerImplicitParameter(DAG, VT, DL, 8);
588 
589  case Intrinsic::r600_read_tgid_x:
590  case Intrinsic::amdgcn_workgroup_id_x:
591  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
592  R600::T1_X, VT);
593  case Intrinsic::r600_read_tgid_y:
594  case Intrinsic::amdgcn_workgroup_id_y:
595  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
596  R600::T1_Y, VT);
597  case Intrinsic::r600_read_tgid_z:
598  case Intrinsic::amdgcn_workgroup_id_z:
599  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
600  R600::T1_Z, VT);
601  case Intrinsic::r600_read_tidig_x:
602  case Intrinsic::amdgcn_workitem_id_x:
603  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
604  R600::T0_X, VT);
605  case Intrinsic::r600_read_tidig_y:
606  case Intrinsic::amdgcn_workitem_id_y:
607  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
608  R600::T0_Y, VT);
609  case Intrinsic::r600_read_tidig_z:
610  case Intrinsic::amdgcn_workitem_id_z:
611  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
612  R600::T0_Z, VT);
613 
614  case Intrinsic::r600_recipsqrt_ieee:
615  return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
616 
617  case Intrinsic::r600_recipsqrt_clamped:
618  return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
619  default:
620  return Op;
621  }
622 
623  // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
624  break;
625  }
626  } // end switch(Op.getOpcode())
627  return SDValue();
628 }
629 
632  SelectionDAG &DAG) const {
633  switch (N->getOpcode()) {
634  default:
636  return;
637  case ISD::FP_TO_UINT:
638  if (N->getValueType(0) == MVT::i1) {
639  Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
640  return;
641  }
642  // Since we don't care about out of bounds values we can use FP_TO_SINT for
643  // uints too. The DAGLegalizer code for uint considers some extra cases
644  // which are not necessary here.
646  case ISD::FP_TO_SINT: {
647  if (N->getValueType(0) == MVT::i1) {
648  Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
649  return;
650  }
651 
652  SDValue Result;
653  if (expandFP_TO_SINT(N, Result, DAG))
654  Results.push_back(Result);
655  return;
656  }
657  case ISD::SDIVREM: {
658  SDValue Op = SDValue(N, 1);
659  SDValue RES = LowerSDIVREM(Op, DAG);
660  Results.push_back(RES);
661  Results.push_back(RES.getValue(1));
662  break;
663  }
664  case ISD::UDIVREM: {
665  SDValue Op = SDValue(N, 0);
666  LowerUDIVREM64(Op, DAG, Results);
667  break;
668  }
669  }
670 }
671 
672 SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
673  SDValue Vector) const {
674  SDLoc DL(Vector);
675  EVT VecVT = Vector.getValueType();
676  EVT EltVT = VecVT.getVectorElementType();
678 
679  for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
680  Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
681  DAG.getVectorIdxConstant(i, DL)));
682  }
683 
684  return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
685 }
686 
687 SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
688  SelectionDAG &DAG) const {
689  SDLoc DL(Op);
690  SDValue Vector = Op.getOperand(0);
691  SDValue Index = Op.getOperand(1);
692 
693  if (isa<ConstantSDNode>(Index) ||
695  return Op;
696 
697  Vector = vectorToVerticalVector(DAG, Vector);
698  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
699  Vector, Index);
700 }
701 
702 SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
703  SelectionDAG &DAG) const {
704  SDLoc DL(Op);
705  SDValue Vector = Op.getOperand(0);
706  SDValue Value = Op.getOperand(1);
707  SDValue Index = Op.getOperand(2);
708 
709  if (isa<ConstantSDNode>(Index) ||
711  return Op;
712 
713  Vector = vectorToVerticalVector(DAG, Vector);
714  SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
715  Vector, Value, Index);
716  return vectorToVerticalVector(DAG, Insert);
717 }
718 
719 SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
720  SDValue Op,
721  SelectionDAG &DAG) const {
722  GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
725 
726  const DataLayout &DL = DAG.getDataLayout();
727  const GlobalValue *GV = GSD->getGlobal();
729 
730  SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
731  return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
732 }
733 
734 SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
735  // On hw >= R700, COS/SIN input must be between -1. and 1.
736  // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
737  EVT VT = Op.getValueType();
738  SDValue Arg = Op.getOperand(0);
739  SDLoc DL(Op);
740 
741  // TODO: Should this propagate fast-math-flags?
742  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
743  DAG.getNode(ISD::FADD, DL, VT,
744  DAG.getNode(ISD::FMUL, DL, VT, Arg,
745  DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
746  DAG.getConstantFP(0.5, DL, MVT::f32)));
747  unsigned TrigNode;
748  switch (Op.getOpcode()) {
749  case ISD::FCOS:
750  TrigNode = AMDGPUISD::COS_HW;
751  break;
752  case ISD::FSIN:
753  TrigNode = AMDGPUISD::SIN_HW;
754  break;
755  default:
756  llvm_unreachable("Wrong trig opcode");
757  }
758  SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
759  DAG.getNode(ISD::FADD, DL, VT, FractPart,
760  DAG.getConstantFP(-0.5, DL, MVT::f32)));
761  if (Gen >= AMDGPUSubtarget::R700)
762  return TrigVal;
763  // On R600 hw, COS/SIN input must be between -Pi and Pi.
764  return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
766 }
767 
768 SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
769  SDLoc DL(Op);
770  EVT VT = Op.getValueType();
771 
772  SDValue Lo = Op.getOperand(0);
773  SDValue Hi = Op.getOperand(1);
774  SDValue Shift = Op.getOperand(2);
775  SDValue Zero = DAG.getConstant(0, DL, VT);
776  SDValue One = DAG.getConstant(1, DL, VT);
777 
778  SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
779  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
780  SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
781  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
782 
783  // The dance around Width1 is necessary for 0 special case.
784  // Without it the CompShift might be 32, producing incorrect results in
785  // Overflow. So we do the shift in two steps, the alternative is to
786  // add a conditional to filter the special case.
787 
788  SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
789  Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
790 
791  SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
792  HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
793  SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
794 
795  SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
796  SDValue LoBig = Zero;
797 
798  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
799  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
800 
801  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
802 }
803 
804 SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
805  SDLoc DL(Op);
806  EVT VT = Op.getValueType();
807 
808  SDValue Lo = Op.getOperand(0);
809  SDValue Hi = Op.getOperand(1);
810  SDValue Shift = Op.getOperand(2);
811  SDValue Zero = DAG.getConstant(0, DL, VT);
812  SDValue One = DAG.getConstant(1, DL, VT);
813 
814  const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
815 
816  SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
817  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
818  SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
819  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
820 
821  // The dance around Width1 is necessary for 0 special case.
822  // Without it the CompShift might be 32, producing incorrect results in
823  // Overflow. So we do the shift in two steps, the alternative is to
824  // add a conditional to filter the special case.
825 
826  SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
827  Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
828 
829  SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
830  SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
831  LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
832 
833  SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
834  SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
835 
836  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
837  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
838 
839  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
840 }
841 
842 SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
843  unsigned mainop, unsigned ovf) const {
844  SDLoc DL(Op);
845  EVT VT = Op.getValueType();
846 
847  SDValue Lo = Op.getOperand(0);
848  SDValue Hi = Op.getOperand(1);
849 
850  SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
851  // Extend sign.
852  OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
853  DAG.getValueType(MVT::i1));
854 
855  SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
856 
857  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
858 }
859 
860 SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
861  SDLoc DL(Op);
862  return DAG.getNode(
863  ISD::SETCC,
864  DL,
865  MVT::i1,
866  Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
867  DAG.getCondCode(ISD::SETEQ));
868 }
869 
870 SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
871  SDLoc DL(Op);
872  return DAG.getNode(
873  ISD::SETCC,
874  DL,
875  MVT::i1,
876  Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
877  DAG.getCondCode(ISD::SETEQ));
878 }
879 
880 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
881  const SDLoc &DL,
882  unsigned DwordOffset) const {
883  unsigned ByteOffset = DwordOffset * 4;
884  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
886 
887  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
888  assert(isInt<16>(ByteOffset));
889 
890  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
891  DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
893 }
894 
895 bool R600TargetLowering::isZero(SDValue Op) const {
896  if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
897  return Cst->isNullValue();
898  } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
899  return CstFP->isZero();
900  } else {
901  return false;
902  }
903 }
904 
905 bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
906  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
907  return CFP->isExactlyValue(1.0);
908  }
909  return isAllOnesConstant(Op);
910 }
911 
912 bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
913  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
914  return CFP->getValueAPF().isZero();
915  }
916  return isNullConstant(Op);
917 }
918 
919 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
920  SDLoc DL(Op);
921  EVT VT = Op.getValueType();
922 
923  SDValue LHS = Op.getOperand(0);
924  SDValue RHS = Op.getOperand(1);
925  SDValue True = Op.getOperand(2);
926  SDValue False = Op.getOperand(3);
927  SDValue CC = Op.getOperand(4);
928  SDValue Temp;
929 
930  if (VT == MVT::f32) {
931  DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
932  SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
933  if (MinMax)
934  return MinMax;
935  }
936 
937  // LHS and RHS are guaranteed to be the same value type
938  EVT CompareVT = LHS.getValueType();
939 
940  // Check if we can lower this to a native operation.
941 
942  // Try to lower to a SET* instruction:
943  //
944  // SET* can match the following patterns:
945  //
946  // select_cc f32, f32, -1, 0, cc_supported
947  // select_cc f32, f32, 1.0f, 0.0f, cc_supported
948  // select_cc i32, i32, -1, 0, cc_supported
949  //
950 
951  // Move hardware True/False values to the correct operand.
952  if (isHWTrueValue(False) && isHWFalseValue(True)) {
953  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
954  ISD::CondCode InverseCC = ISD::getSetCCInverse(CCOpcode, CompareVT);
955  if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
956  std::swap(False, True);
957  CC = DAG.getCondCode(InverseCC);
958  } else {
959  ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
960  if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
961  std::swap(False, True);
962  std::swap(LHS, RHS);
963  CC = DAG.getCondCode(SwapInvCC);
964  }
965  }
966  }
967 
968  if (isHWTrueValue(True) && isHWFalseValue(False) &&
969  (CompareVT == VT || VT == MVT::i32)) {
970  // This can be matched by a SET* instruction.
971  return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
972  }
973 
974  // Try to lower to a CND* instruction:
975  //
976  // CND* can match the following patterns:
977  //
978  // select_cc f32, 0.0, f32, f32, cc_supported
979  // select_cc f32, 0.0, i32, i32, cc_supported
980  // select_cc i32, 0, f32, f32, cc_supported
981  // select_cc i32, 0, i32, i32, cc_supported
982  //
983 
984  // Try to move the zero value to the RHS
985  if (isZero(LHS)) {
986  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
987  // Try swapping the operands
988  ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
989  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
990  std::swap(LHS, RHS);
991  CC = DAG.getCondCode(CCSwapped);
992  } else {
993  // Try inverting the conditon and then swapping the operands
994  ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT);
995  CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
996  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
997  std::swap(True, False);
998  std::swap(LHS, RHS);
999  CC = DAG.getCondCode(CCSwapped);
1000  }
1001  }
1002  }
1003  if (isZero(RHS)) {
1004  SDValue Cond = LHS;
1005  SDValue Zero = RHS;
1006  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1007  if (CompareVT != VT) {
1008  // Bitcast True / False to the correct types. This will end up being
1009  // a nop, but it allows us to define only a single pattern in the
1010  // .TD files for each CND* instruction rather than having to have
1011  // one pattern for integer True/False and one for fp True/False
1012  True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1013  False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1014  }
1015 
1016  switch (CCOpcode) {
1017  case ISD::SETONE:
1018  case ISD::SETUNE:
1019  case ISD::SETNE:
1020  CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT);
1021  Temp = True;
1022  True = False;
1023  False = Temp;
1024  break;
1025  default:
1026  break;
1027  }
1028  SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1029  Cond, Zero,
1030  True, False,
1031  DAG.getCondCode(CCOpcode));
1032  return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1033  }
1034 
1035  // If we make it this for it means we have no native instructions to handle
1036  // this SELECT_CC, so we must lower it.
1037  SDValue HWTrue, HWFalse;
1038 
1039  if (CompareVT == MVT::f32) {
1040  HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1041  HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
1042  } else if (CompareVT == MVT::i32) {
1043  HWTrue = DAG.getConstant(-1, DL, CompareVT);
1044  HWFalse = DAG.getConstant(0, DL, CompareVT);
1045  }
1046  else {
1047  llvm_unreachable("Unhandled value type in LowerSELECT_CC");
1048  }
1049 
1050  // Lower this unsupported SELECT_CC into a combination of two supported
1051  // SELECT_CC operations.
1052  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1053 
1054  return DAG.getNode(ISD::SELECT_CC, DL, VT,
1055  Cond, HWFalse,
1056  True, False,
1057  DAG.getCondCode(ISD::SETNE));
1058 }
1059 
1060 /// LLVM generates byte-addressed pointers. For indirect addressing, we need to
1061 /// convert these pointers to a register index. Each register holds
1062 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1063 /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1064 /// for indirect addressing.
1065 SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1066  unsigned StackWidth,
1067  SelectionDAG &DAG) const {
1068  unsigned SRLPad;
1069  switch(StackWidth) {
1070  case 1:
1071  SRLPad = 2;
1072  break;
1073  case 2:
1074  SRLPad = 3;
1075  break;
1076  case 4:
1077  SRLPad = 4;
1078  break;
1079  default: llvm_unreachable("Invalid stack width");
1080  }
1081 
1082  SDLoc DL(Ptr);
1083  return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1084  DAG.getConstant(SRLPad, DL, MVT::i32));
1085 }
1086 
1087 void R600TargetLowering::getStackAddress(unsigned StackWidth,
1088  unsigned ElemIdx,
1089  unsigned &Channel,
1090  unsigned &PtrIncr) const {
1091  switch (StackWidth) {
1092  default:
1093  case 1:
1094  Channel = 0;
1095  if (ElemIdx > 0) {
1096  PtrIncr = 1;
1097  } else {
1098  PtrIncr = 0;
1099  }
1100  break;
1101  case 2:
1102  Channel = ElemIdx % 2;
1103  if (ElemIdx == 2) {
1104  PtrIncr = 1;
1105  } else {
1106  PtrIncr = 0;
1107  }
1108  break;
1109  case 4:
1110  Channel = ElemIdx;
1111  PtrIncr = 0;
1112  break;
1113  }
1114 }
1115 
1116 SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1117  SelectionDAG &DAG) const {
1118  SDLoc DL(Store);
1119  //TODO: Who creates the i8 stores?
1120  assert(Store->isTruncatingStore()
1121  || Store->getValue().getValueType() == MVT::i8);
1122  assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
1123 
1124  SDValue Mask;
1125  if (Store->getMemoryVT() == MVT::i8) {
1126  assert(Store->getAlignment() >= 1);
1127  Mask = DAG.getConstant(0xff, DL, MVT::i32);
1128  } else if (Store->getMemoryVT() == MVT::i16) {
1129  assert(Store->getAlignment() >= 2);
1130  Mask = DAG.getConstant(0xffff, DL, MVT::i32);
1131  } else {
1132  llvm_unreachable("Unsupported private trunc store");
1133  }
1134 
1135  SDValue OldChain = Store->getChain();
1136  bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1137  // Skip dummy
1138  SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1139  SDValue BasePtr = Store->getBasePtr();
1140  SDValue Offset = Store->getOffset();
1141  EVT MemVT = Store->getMemoryVT();
1142 
1143  SDValue LoadPtr = BasePtr;
1144  if (!Offset.isUndef()) {
1145  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1146  }
1147 
1148  // Get dword location
1149  // TODO: this should be eliminated by the future SHR ptr, 2
1150  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1151  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1152 
1153  // Load dword
1154  // TODO: can we be smarter about machine pointer info?
1156  SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1157 
1158  Chain = Dst.getValue(1);
1159 
1160  // Get offset in dword
1161  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1162  DAG.getConstant(0x3, DL, MVT::i32));
1163 
1164  // Convert byte offset to bit shift
1165  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1166  DAG.getConstant(3, DL, MVT::i32));
1167 
1168  // TODO: Contrary to the name of the functiom,
1169  // it also handles sub i32 non-truncating stores (like i1)
1170  SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1171  Store->getValue());
1172 
1173  // Mask the value to the right type
1174  SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1175 
1176  // Shift the value in place
1177  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1178  MaskedValue, ShiftAmt);
1179 
1180  // Shift the mask in place
1181  SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
1182 
1183  // Invert the mask. NOTE: if we had native ROL instructions we could
1184  // use inverted mask
1185  DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
1186 
1187  // Cleanup the target bits
1188  Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1189 
1190  // Add the new bits
1191  SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1192 
1193  // Store dword
1194  // TODO: Can we be smarter about MachinePointerInfo?
1195  SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
1196 
1197  // If we are part of expanded vector, make our neighbors depend on this store
1198  if (VectorTrunc) {
1199  // Make all other vector elements depend on this store
1200  Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
1201  DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
1202  }
1203  return NewStore;
1204 }
1205 
1206 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1207  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1208  unsigned AS = StoreNode->getAddressSpace();
1209 
1210  SDValue Chain = StoreNode->getChain();
1211  SDValue Ptr = StoreNode->getBasePtr();
1212  SDValue Value = StoreNode->getValue();
1213 
1214  EVT VT = Value.getValueType();
1215  EVT MemVT = StoreNode->getMemoryVT();
1216  EVT PtrVT = Ptr.getValueType();
1217 
1218  SDLoc DL(Op);
1219 
1220  const bool TruncatingStore = StoreNode->isTruncatingStore();
1221 
1222  // Neither LOCAL nor PRIVATE can do vectors at the moment
1223  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS ||
1224  TruncatingStore) &&
1225  VT.isVector()) {
1226  if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) {
1227  // Add an extra level of chain to isolate this vector
1228  SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
1229  // TODO: can the chain be replaced without creating a new store?
1230  SDValue NewStore = DAG.getTruncStore(
1231  NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
1232  MemVT, StoreNode->getAlignment(),
1233  StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
1234  StoreNode = cast<StoreSDNode>(NewStore);
1235  }
1236 
1237  return scalarizeVectorStore(StoreNode, DAG);
1238  }
1239 
1240  Align Alignment = StoreNode->getAlign();
1241  if (Alignment < MemVT.getStoreSize() &&
1242  !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
1243  StoreNode->getMemOperand()->getFlags(),
1244  nullptr)) {
1245  return expandUnalignedStore(StoreNode, DAG);
1246  }
1247 
1248  SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
1249  DAG.getConstant(2, DL, PtrVT));
1250 
1251  if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
1252  // It is beneficial to create MSKOR here instead of combiner to avoid
1253  // artificial dependencies introduced by RMW
1254  if (TruncatingStore) {
1255  assert(VT.bitsLE(MVT::i32));
1256  SDValue MaskConstant;
1257  if (MemVT == MVT::i8) {
1258  MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1259  } else {
1260  assert(MemVT == MVT::i16);
1261  assert(StoreNode->getAlignment() >= 2);
1262  MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1263  }
1264 
1265  SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
1266  DAG.getConstant(0x00000003, DL, PtrVT));
1267  SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1268  DAG.getConstant(3, DL, VT));
1269 
1270  // Put the mask in correct place
1271  SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
1272 
1273  // Put the value bits in correct place
1274  SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1275  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
1276 
1277  // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1278  // vector instead.
1279  SDValue Src[4] = {
1280  ShiftedValue,
1281  DAG.getConstant(0, DL, MVT::i32),
1282  DAG.getConstant(0, DL, MVT::i32),
1283  Mask
1284  };
1285  SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
1286  SDValue Args[3] = { Chain, Input, DWordAddr };
1288  Op->getVTList(), Args, MemVT,
1289  StoreNode->getMemOperand());
1290  } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1291  // Convert pointer from byte address to dword address.
1292  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1293 
1294  if (StoreNode->isIndexed()) {
1295  llvm_unreachable("Indexed stores not supported yet");
1296  } else {
1297  Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1298  }
1299  return Chain;
1300  }
1301  }
1302 
1303  // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1304  if (AS != AMDGPUAS::PRIVATE_ADDRESS)
1305  return SDValue();
1306 
1307  if (MemVT.bitsLT(MVT::i32))
1308  return lowerPrivateTruncStore(StoreNode, DAG);
1309 
1310  // Standard i32+ store, tag it with DWORDADDR to note that the address
1311  // has been shifted
1312  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1313  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1314  return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1315  }
1316 
1317  // Tagged i32+ stores will be matched by patterns
1318  return SDValue();
1319 }
1320 
1321 // return (512 + (kc_bank << 12)
1322 static int
1324  switch (AddressSpace) {
1326  return 512;
1328  return 512 + 4096;
1330  return 512 + 4096 * 2;
1332  return 512 + 4096 * 3;
1334  return 512 + 4096 * 4;
1336  return 512 + 4096 * 5;
1338  return 512 + 4096 * 6;
1340  return 512 + 4096 * 7;
1342  return 512 + 4096 * 8;
1344  return 512 + 4096 * 9;
1346  return 512 + 4096 * 10;
1348  return 512 + 4096 * 11;
1350  return 512 + 4096 * 12;
1352  return 512 + 4096 * 13;
1354  return 512 + 4096 * 14;
1356  return 512 + 4096 * 15;
1357  default:
1358  return -1;
1359  }
1360 }
1361 
1362 SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1363  SelectionDAG &DAG) const {
1364  SDLoc DL(Op);
1365  LoadSDNode *Load = cast<LoadSDNode>(Op);
1366  ISD::LoadExtType ExtType = Load->getExtensionType();
1367  EVT MemVT = Load->getMemoryVT();
1368  assert(Load->getAlignment() >= MemVT.getStoreSize());
1369 
1370  SDValue BasePtr = Load->getBasePtr();
1371  SDValue Chain = Load->getChain();
1372  SDValue Offset = Load->getOffset();
1373 
1374  SDValue LoadPtr = BasePtr;
1375  if (!Offset.isUndef()) {
1376  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1377  }
1378 
1379  // Get dword location
1380  // NOTE: this should be eliminated by the future SHR ptr, 2
1381  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1382  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1383 
1384  // Load dword
1385  // TODO: can we be smarter about machine pointer info?
1387  SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1388 
1389  // Get offset within the register.
1390  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1391  LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
1392 
1393  // Bit offset of target byte (byteIdx * 8).
1394  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1395  DAG.getConstant(3, DL, MVT::i32));
1396 
1397  // Shift to the right.
1398  SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
1399 
1400  // Eliminate the upper bits by setting them to ...
1401  EVT MemEltVT = MemVT.getScalarType();
1402 
1403  if (ExtType == ISD::SEXTLOAD) { // ... ones.
1404  SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1405  Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
1406  } else { // ... or zeros.
1407  Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
1408  }
1409 
1410  SDValue Ops[] = {
1411  Ret,
1412  Read.getValue(1) // This should be our output chain
1413  };
1414 
1415  return DAG.getMergeValues(Ops, DL);
1416 }
1417 
1418 SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1419  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1420  unsigned AS = LoadNode->getAddressSpace();
1421  EVT MemVT = LoadNode->getMemoryVT();
1422  ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1423 
1424  if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1425  ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1426  return lowerPrivateExtLoad(Op, DAG);
1427  }
1428 
1429  SDLoc DL(Op);
1430  EVT VT = Op.getValueType();
1431  SDValue Chain = LoadNode->getChain();
1432  SDValue Ptr = LoadNode->getBasePtr();
1433 
1434  if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1435  LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
1436  VT.isVector()) {
1437  SDValue Ops[2];
1438  std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(LoadNode, DAG);
1439  return DAG.getMergeValues(Ops, DL);
1440  }
1441 
1442  // This is still used for explicit load from addrspace(8)
1443  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1444  if (ConstantBlock > -1 &&
1445  ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1446  (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1447  SDValue Result;
1448  if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1449  isa<ConstantSDNode>(Ptr)) {
1450  return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
1451  } else {
1452  //TODO: Does this even work?
1453  // non-constant ptr can't be folded, keeps it as a v4f32 load
1455  DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1456  DAG.getConstant(4, DL, MVT::i32)),
1457  DAG.getConstant(LoadNode->getAddressSpace() -
1459  );
1460  }
1461 
1462  if (!VT.isVector()) {
1464  DAG.getConstant(0, DL, MVT::i32));
1465  }
1466 
1467  SDValue MergedValues[2] = {
1468  Result,
1469  Chain
1470  };
1471  return DAG.getMergeValues(MergedValues, DL);
1472  }
1473 
1474  // For most operations returning SDValue() will result in the node being
1475  // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1476  // need to manually expand loads that may be legal in some address spaces and
1477  // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1478  // compute shaders, since the data is sign extended when it is uploaded to the
1479  // buffer. However SEXT loads from other address spaces are not supported, so
1480  // we need to expand them here.
1481  if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1482  assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1483  SDValue NewLoad = DAG.getExtLoad(
1484  ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1485  LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags());
1486  SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1487  DAG.getValueType(MemVT));
1488 
1489  SDValue MergedValues[2] = { Res, Chain };
1490  return DAG.getMergeValues(MergedValues, DL);
1491  }
1492 
1493  if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1494  return SDValue();
1495  }
1496 
1497  // DWORDADDR ISD marks already shifted address
1498  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1499  assert(VT == MVT::i32);
1500  Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
1501  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
1502  return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1503  }
1504  return SDValue();
1505 }
1506 
1507 SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1508  SDValue Chain = Op.getOperand(0);
1509  SDValue Cond = Op.getOperand(1);
1510  SDValue Jump = Op.getOperand(2);
1511 
1512  return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1513  Chain, Jump, Cond);
1514 }
1515 
1516 SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1517  SelectionDAG &DAG) const {
1518  MachineFunction &MF = DAG.getMachineFunction();
1519  const R600FrameLowering *TFL = Subtarget->getFrameLowering();
1520 
1521  FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1522 
1523  unsigned FrameIndex = FIN->getIndex();
1524  Register IgnoredFrameReg;
1526  TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1527  return DAG.getConstant(Offset.getFixed() * 4 * TFL->getStackWidth(MF),
1528  SDLoc(Op), Op.getValueType());
1529 }
1530 
1532  bool IsVarArg) const {
1533  switch (CC) {
1536  case CallingConv::C:
1537  case CallingConv::Fast:
1538  case CallingConv::Cold:
1539  llvm_unreachable("kernels should not be handled here");
1547  return CC_R600;
1548  default:
1549  report_fatal_error("Unsupported calling convention.");
1550  }
1551 }
1552 
1553 /// XXX Only kernel functions are supported, so we can assume for now that
1554 /// every function is a kernel function, but in the future we should use
1555 /// separate calling conventions for kernel and non-kernel functions.
1557  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1558  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1559  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1561  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1562  *DAG.getContext());
1563  MachineFunction &MF = DAG.getMachineFunction();
1565 
1566  if (AMDGPU::isShader(CallConv)) {
1567  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
1568  } else {
1570  }
1571 
1572  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1573  CCValAssign &VA = ArgLocs[i];
1574  const ISD::InputArg &In = Ins[i];
1575  EVT VT = In.VT;
1576  EVT MemVT = VA.getLocVT();
1577  if (!VT.isVector() && MemVT.isVector()) {
1578  // Get load source type if scalarized.
1579  MemVT = MemVT.getVectorElementType();
1580  }
1581 
1582  if (AMDGPU::isShader(CallConv)) {
1583  Register Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
1584  SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1585  InVals.push_back(Register);
1586  continue;
1587  }
1588 
1589  // i64 isn't a legal type, so the register type used ends up as i32, which
1590  // isn't expected here. It attempts to create this sextload, but it ends up
1591  // being invalid. Somehow this seems to work with i64 arguments, but breaks
1592  // for <1 x i64>.
1593 
1594  // The first 36 bytes of the input buffer contains information about
1595  // thread group and global sizes.
1597  if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1598  // FIXME: This should really check the extload type, but the handling of
1599  // extload vector parameters seems to be broken.
1600 
1601  // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1602  Ext = ISD::SEXTLOAD;
1603  }
1604 
1605  // Compute the offset from the value.
1606  // XXX - I think PartOffset should give you this, but it seems to give the
1607  // size of the register which isn't useful.
1608 
1609  unsigned PartOffset = VA.getLocMemOffset();
1610  unsigned Alignment = MinAlign(VT.getStoreSize(), PartOffset);
1611 
1613  SDValue Arg = DAG.getLoad(
1614  ISD::UNINDEXED, Ext, VT, DL, Chain,
1615  DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
1616  PtrInfo,
1617  MemVT, Alignment, MachineMemOperand::MONonTemporal |
1620 
1621  InVals.push_back(Arg);
1622  }
1623  return Chain;
1624 }
1625 
1627  EVT VT) const {
1628  if (!VT.isVector())
1629  return MVT::i32;
1631 }
1632 
1634  const SelectionDAG &DAG) const {
1635  // Local and Private addresses do not handle vectors. Limit to i32
1636  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) {
1637  return (MemVT.getSizeInBits() <= 32);
1638  }
1639  return true;
1640 }
1641 
1643  EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1644  bool *IsFast) const {
1645  if (IsFast)
1646  *IsFast = false;
1647 
1648  if (!VT.isSimple() || VT == MVT::Other)
1649  return false;
1650 
1651  if (VT.bitsLT(MVT::i32))
1652  return false;
1653 
1654  // TODO: This is a rough estimate.
1655  if (IsFast)
1656  *IsFast = true;
1657 
1658  return VT.bitsGT(MVT::i32) && Alignment >= Align(4);
1659 }
1660 
1662  SelectionDAG &DAG, SDValue VectorEntry,
1663  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1664  assert(RemapSwizzle.empty());
1665 
1666  SDLoc DL(VectorEntry);
1667  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1668 
1669  SDValue NewBldVec[4];
1670  for (unsigned i = 0; i < 4; i++)
1671  NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1672  DAG.getIntPtrConstant(i, DL));
1673 
1674  for (unsigned i = 0; i < 4; i++) {
1675  if (NewBldVec[i].isUndef())
1676  // We mask write here to teach later passes that the ith element of this
1677  // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1678  // break false dependencies and additionnaly make assembly easier to read.
1679  RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1680  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1681  if (C->isZero()) {
1682  RemapSwizzle[i] = 4; // SEL_0
1683  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1684  } else if (C->isExactlyValue(1.0)) {
1685  RemapSwizzle[i] = 5; // SEL_1
1686  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1687  }
1688  }
1689 
1690  if (NewBldVec[i].isUndef())
1691  continue;
1692 
1693  for (unsigned j = 0; j < i; j++) {
1694  if (NewBldVec[i] == NewBldVec[j]) {
1695  NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1696  RemapSwizzle[i] = j;
1697  break;
1698  }
1699  }
1700  }
1701 
1702  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1703  NewBldVec);
1704 }
1705 
1707  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1708  assert(RemapSwizzle.empty());
1709 
1710  SDLoc DL(VectorEntry);
1711  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1712 
1713  SDValue NewBldVec[4];
1714  bool isUnmovable[4] = {false, false, false, false};
1715  for (unsigned i = 0; i < 4; i++)
1716  NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1717  DAG.getIntPtrConstant(i, DL));
1718 
1719  for (unsigned i = 0; i < 4; i++) {
1720  RemapSwizzle[i] = i;
1721  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1722  unsigned Idx = cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1723  ->getZExtValue();
1724  if (i == Idx)
1725  isUnmovable[Idx] = true;
1726  }
1727  }
1728 
1729  for (unsigned i = 0; i < 4; i++) {
1730  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1731  unsigned Idx = cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1732  ->getZExtValue();
1733  if (isUnmovable[Idx])
1734  continue;
1735  // Swap i and Idx
1736  std::swap(NewBldVec[Idx], NewBldVec[i]);
1737  std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1738  break;
1739  }
1740  }
1741 
1742  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1743  NewBldVec);
1744 }
1745 
1746 SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
1747  SelectionDAG &DAG,
1748  const SDLoc &DL) const {
1749  // Old -> New swizzle values
1750  DenseMap<unsigned, unsigned> SwizzleRemap;
1751 
1752  BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1753  for (unsigned i = 0; i < 4; i++) {
1754  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1755  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1756  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1757  }
1758 
1759  SwizzleRemap.clear();
1760  BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1761  for (unsigned i = 0; i < 4; i++) {
1762  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1763  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1764  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1765  }
1766 
1767  return BuildVector;
1768 }
1769 
1770 SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
1771  SelectionDAG &DAG) const {
1772  SDLoc DL(LoadNode);
1773  EVT VT = LoadNode->getValueType(0);
1774  SDValue Chain = LoadNode->getChain();
1775  SDValue Ptr = LoadNode->getBasePtr();
1776  assert (isa<ConstantSDNode>(Ptr));
1777 
1778  //TODO: Support smaller loads
1779  if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
1780  return SDValue();
1781 
1782  if (LoadNode->getAlignment() < 4)
1783  return SDValue();
1784 
1785  int ConstantBlock = ConstantAddressBlock(Block);
1786 
1787  SDValue Slots[4];
1788  for (unsigned i = 0; i < 4; i++) {
1789  // We want Const position encoded with the following formula :
1790  // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1791  // const_index is Ptr computed by llvm using an alignment of 16.
1792  // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1793  // then div by 4 at the ISel step
1794  SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1795  DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1796  Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1797  }
1798  EVT NewVT = MVT::v4i32;
1799  unsigned NumElements = 4;
1800  if (VT.isVector()) {
1801  NewVT = VT;
1802  NumElements = VT.getVectorNumElements();
1803  }
1804  SDValue Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
1805  if (!VT.isVector()) {
1807  DAG.getConstant(0, DL, MVT::i32));
1808  }
1809  SDValue MergedValues[2] = {
1810  Result,
1811  Chain
1812  };
1813  return DAG.getMergeValues(MergedValues, DL);
1814 }
1815 
1816 //===----------------------------------------------------------------------===//
1817 // Custom DAG Optimizations
1818 //===----------------------------------------------------------------------===//
1819 
1821  DAGCombinerInfo &DCI) const {
1822  SelectionDAG &DAG = DCI.DAG;
1823  SDLoc DL(N);
1824 
1825  switch (N->getOpcode()) {
1826  // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1827  case ISD::FP_ROUND: {
1828  SDValue Arg = N->getOperand(0);
1829  if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1830  return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1831  Arg.getOperand(0));
1832  }
1833  break;
1834  }
1835 
1836  // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1837  // (i32 select_cc f32, f32, -1, 0 cc)
1838  //
1839  // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1840  // this to one of the SET*_DX10 instructions.
1841  case ISD::FP_TO_SINT: {
1842  SDValue FNeg = N->getOperand(0);
1843  if (FNeg.getOpcode() != ISD::FNEG) {
1844  return SDValue();
1845  }
1846  SDValue SelectCC = FNeg.getOperand(0);
1847  if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1848  SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1849  SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1850  !isHWTrueValue(SelectCC.getOperand(2)) ||
1851  !isHWFalseValue(SelectCC.getOperand(3))) {
1852  return SDValue();
1853  }
1854 
1855  return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1856  SelectCC.getOperand(0), // LHS
1857  SelectCC.getOperand(1), // RHS
1858  DAG.getConstant(-1, DL, MVT::i32), // True
1859  DAG.getConstant(0, DL, MVT::i32), // False
1860  SelectCC.getOperand(4)); // CC
1861  }
1862 
1863  // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1864  // => build_vector elt0, ... , NewEltIdx, ... , eltN
1865  case ISD::INSERT_VECTOR_ELT: {
1866  SDValue InVec = N->getOperand(0);
1867  SDValue InVal = N->getOperand(1);
1868  SDValue EltNo = N->getOperand(2);
1869 
1870  // If the inserted element is an UNDEF, just use the input vector.
1871  if (InVal.isUndef())
1872  return InVec;
1873 
1874  EVT VT = InVec.getValueType();
1875 
1876  // If we can't generate a legal BUILD_VECTOR, exit
1878  return SDValue();
1879 
1880  // Check that we know which element is being inserted
1881  if (!isa<ConstantSDNode>(EltNo))
1882  return SDValue();
1883  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1884 
1885  // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1886  // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1887  // vector elements.
1889  if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1890  Ops.append(InVec.getNode()->op_begin(),
1891  InVec.getNode()->op_end());
1892  } else if (InVec.isUndef()) {
1893  unsigned NElts = VT.getVectorNumElements();
1894  Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1895  } else {
1896  return SDValue();
1897  }
1898 
1899  // Insert the element
1900  if (Elt < Ops.size()) {
1901  // All the operands of BUILD_VECTOR must have the same type;
1902  // we enforce that here.
1903  EVT OpVT = Ops[0].getValueType();
1904  if (InVal.getValueType() != OpVT)
1905  InVal = OpVT.bitsGT(InVal.getValueType()) ?
1906  DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
1907  DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
1908  Ops[Elt] = InVal;
1909  }
1910 
1911  // Return the new vector
1912  return DAG.getBuildVector(VT, DL, Ops);
1913  }
1914 
1915  // Extract_vec (Build_vector) generated by custom lowering
1916  // also needs to be customly combined
1917  case ISD::EXTRACT_VECTOR_ELT: {
1918  SDValue Arg = N->getOperand(0);
1919  if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1920  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1921  unsigned Element = Const->getZExtValue();
1922  return Arg->getOperand(Element);
1923  }
1924  }
1925  if (Arg.getOpcode() == ISD::BITCAST &&
1926  Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
1927  (Arg.getOperand(0).getValueType().getVectorNumElements() ==
1928  Arg.getValueType().getVectorNumElements())) {
1929  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1930  unsigned Element = Const->getZExtValue();
1931  return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1932  Arg->getOperand(0).getOperand(Element));
1933  }
1934  }
1935  break;
1936  }
1937 
1938  case ISD::SELECT_CC: {
1939  // Try common optimizations
1941  return Ret;
1942 
1943  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1944  // selectcc x, y, a, b, inv(cc)
1945  //
1946  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1947  // selectcc x, y, a, b, cc
1948  SDValue LHS = N->getOperand(0);
1949  if (LHS.getOpcode() != ISD::SELECT_CC) {
1950  return SDValue();
1951  }
1952 
1953  SDValue RHS = N->getOperand(1);
1954  SDValue True = N->getOperand(2);
1955  SDValue False = N->getOperand(3);
1956  ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1957 
1958  if (LHS.getOperand(2).getNode() != True.getNode() ||
1959  LHS.getOperand(3).getNode() != False.getNode() ||
1960  RHS.getNode() != False.getNode()) {
1961  return SDValue();
1962  }
1963 
1964  switch (NCC) {
1965  default: return SDValue();
1966  case ISD::SETNE: return LHS;
1967  case ISD::SETEQ: {
1968  ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1969  LHSCC = ISD::getSetCCInverse(LHSCC, LHS.getOperand(0).getValueType());
1970  if (DCI.isBeforeLegalizeOps() ||
1972  return DAG.getSelectCC(DL,
1973  LHS.getOperand(0),
1974  LHS.getOperand(1),
1975  LHS.getOperand(2),
1976  LHS.getOperand(3),
1977  LHSCC);
1978  break;
1979  }
1980  }
1981  return SDValue();
1982  }
1983 
1984  case AMDGPUISD::R600_EXPORT: {
1985  SDValue Arg = N->getOperand(1);
1986  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1987  break;
1988 
1989  SDValue NewArgs[8] = {
1990  N->getOperand(0), // Chain
1991  SDValue(),
1992  N->getOperand(2), // ArrayBase
1993  N->getOperand(3), // Type
1994  N->getOperand(4), // SWZ_X
1995  N->getOperand(5), // SWZ_Y
1996  N->getOperand(6), // SWZ_Z
1997  N->getOperand(7) // SWZ_W
1998  };
1999  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
2000  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
2001  }
2002  case AMDGPUISD::TEXTURE_FETCH: {
2003  SDValue Arg = N->getOperand(1);
2004  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2005  break;
2006 
2007  SDValue NewArgs[19] = {
2008  N->getOperand(0),
2009  N->getOperand(1),
2010  N->getOperand(2),
2011  N->getOperand(3),
2012  N->getOperand(4),
2013  N->getOperand(5),
2014  N->getOperand(6),
2015  N->getOperand(7),
2016  N->getOperand(8),
2017  N->getOperand(9),
2018  N->getOperand(10),
2019  N->getOperand(11),
2020  N->getOperand(12),
2021  N->getOperand(13),
2022  N->getOperand(14),
2023  N->getOperand(15),
2024  N->getOperand(16),
2025  N->getOperand(17),
2026  N->getOperand(18),
2027  };
2028  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2029  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
2030  }
2031 
2032  case ISD::LOAD: {
2033  LoadSDNode *LoadNode = cast<LoadSDNode>(N);
2034  SDValue Ptr = LoadNode->getBasePtr();
2035  if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
2036  isa<ConstantSDNode>(Ptr))
2037  return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
2038  break;
2039  }
2040 
2041  default: break;
2042  }
2043 
2045 }
2046 
2047 bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
2048  SDValue &Src, SDValue &Neg, SDValue &Abs,
2049  SDValue &Sel, SDValue &Imm,
2050  SelectionDAG &DAG) const {
2051  const R600InstrInfo *TII = Subtarget->getInstrInfo();
2052  if (!Src.isMachineOpcode())
2053  return false;
2054 
2055  switch (Src.getMachineOpcode()) {
2056  case R600::FNEG_R600:
2057  if (!Neg.getNode())
2058  return false;
2059  Src = Src.getOperand(0);
2060  Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
2061  return true;
2062  case R600::FABS_R600:
2063  if (!Abs.getNode())
2064  return false;
2065  Src = Src.getOperand(0);
2066  Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
2067  return true;
2068  case R600::CONST_COPY: {
2069  unsigned Opcode = ParentNode->getMachineOpcode();
2070  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2071 
2072  if (!Sel.getNode())
2073  return false;
2074 
2075  SDValue CstOffset = Src.getOperand(0);
2076  if (ParentNode->getValueType(0).isVector())
2077  return false;
2078 
2079  // Gather constants values
2080  int SrcIndices[] = {
2081  TII->getOperandIdx(Opcode, R600::OpName::src0),
2082  TII->getOperandIdx(Opcode, R600::OpName::src1),
2083  TII->getOperandIdx(Opcode, R600::OpName::src2),
2084  TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2085  TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2086  TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2087  TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2088  TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2089  TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2090  TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2091  TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2092  };
2093  std::vector<unsigned> Consts;
2094  for (int OtherSrcIdx : SrcIndices) {
2095  int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2096  if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2097  continue;
2098  if (HasDst) {
2099  OtherSrcIdx--;
2100  OtherSelIdx--;
2101  }
2102  if (RegisterSDNode *Reg =
2103  dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2104  if (Reg->getReg() == R600::ALU_CONST) {
2105  ConstantSDNode *Cst
2106  = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
2107  Consts.push_back(Cst->getZExtValue());
2108  }
2109  }
2110  }
2111 
2112  ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
2113  Consts.push_back(Cst->getZExtValue());
2114  if (!TII->fitsConstReadLimitations(Consts)) {
2115  return false;
2116  }
2117 
2118  Sel = CstOffset;
2119  Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
2120  return true;
2121  }
2122  case R600::MOV_IMM_GLOBAL_ADDR:
2123  // Check if the Imm slot is used. Taken from below.
2124  if (cast<ConstantSDNode>(Imm)->getZExtValue())
2125  return false;
2126  Imm = Src.getOperand(0);
2127  Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
2128  return true;
2129  case R600::MOV_IMM_I32:
2130  case R600::MOV_IMM_F32: {
2131  unsigned ImmReg = R600::ALU_LITERAL_X;
2132  uint64_t ImmValue = 0;
2133 
2134  if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
2135  ConstantFPSDNode *FPC = cast<ConstantFPSDNode>(Src.getOperand(0));
2136  float FloatValue = FPC->getValueAPF().convertToFloat();
2137  if (FloatValue == 0.0) {
2138  ImmReg = R600::ZERO;
2139  } else if (FloatValue == 0.5) {
2140  ImmReg = R600::HALF;
2141  } else if (FloatValue == 1.0) {
2142  ImmReg = R600::ONE;
2143  } else {
2144  ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2145  }
2146  } else {
2147  ConstantSDNode *C = cast<ConstantSDNode>(Src.getOperand(0));
2148  uint64_t Value = C->getZExtValue();
2149  if (Value == 0) {
2150  ImmReg = R600::ZERO;
2151  } else if (Value == 1) {
2152  ImmReg = R600::ONE_INT;
2153  } else {
2154  ImmValue = Value;
2155  }
2156  }
2157 
2158  // Check that we aren't already using an immediate.
2159  // XXX: It's possible for an instruction to have more than one
2160  // immediate operand, but this is not supported yet.
2161  if (ImmReg == R600::ALU_LITERAL_X) {
2162  if (!Imm.getNode())
2163  return false;
2164  ConstantSDNode *C = cast<ConstantSDNode>(Imm);
2165  if (C->getZExtValue())
2166  return false;
2167  Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2168  }
2169  Src = DAG.getRegister(ImmReg, MVT::i32);
2170  return true;
2171  }
2172  default:
2173  return false;
2174  }
2175 }
2176 
2177 /// Fold the instructions after selecting them
2178 SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2179  SelectionDAG &DAG) const {
2180  const R600InstrInfo *TII = Subtarget->getInstrInfo();
2181  if (!Node->isMachineOpcode())
2182  return Node;
2183 
2184  unsigned Opcode = Node->getMachineOpcode();
2185  SDValue FakeOp;
2186 
2187  std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2188 
2189  if (Opcode == R600::DOT_4) {
2190  int OperandIdx[] = {
2191  TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2192  TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2193  TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2194  TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2195  TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2196  TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2197  TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2198  TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2199  };
2200  int NegIdx[] = {
2201  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
2202  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
2203  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
2204  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
2205  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
2206  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
2207  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
2208  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
2209  };
2210  int AbsIdx[] = {
2211  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
2212  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
2213  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
2214  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
2215  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
2216  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
2217  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
2218  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
2219  };
2220  for (unsigned i = 0; i < 8; i++) {
2221  if (OperandIdx[i] < 0)
2222  return Node;
2223  SDValue &Src = Ops[OperandIdx[i] - 1];
2224  SDValue &Neg = Ops[NegIdx[i] - 1];
2225  SDValue &Abs = Ops[AbsIdx[i] - 1];
2226  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2227  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2228  if (HasDst)
2229  SelIdx--;
2230  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2231  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2232  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2233  }
2234  } else if (Opcode == R600::REG_SEQUENCE) {
2235  for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2236  SDValue &Src = Ops[i];
2237  if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2238  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2239  }
2240  } else {
2241  if (!TII->hasInstrModifiers(Opcode))
2242  return Node;
2243  int OperandIdx[] = {
2244  TII->getOperandIdx(Opcode, R600::OpName::src0),
2245  TII->getOperandIdx(Opcode, R600::OpName::src1),
2246  TII->getOperandIdx(Opcode, R600::OpName::src2)
2247  };
2248  int NegIdx[] = {
2249  TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
2250  TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
2251  TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
2252  };
2253  int AbsIdx[] = {
2254  TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
2255  TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
2256  -1
2257  };
2258  for (unsigned i = 0; i < 3; i++) {
2259  if (OperandIdx[i] < 0)
2260  return Node;
2261  SDValue &Src = Ops[OperandIdx[i] - 1];
2262  SDValue &Neg = Ops[NegIdx[i] - 1];
2263  SDValue FakeAbs;
2264  SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2265  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2266  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2267  int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
2268  if (HasDst) {
2269  SelIdx--;
2270  ImmIdx--;
2271  }
2272  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2273  SDValue &Imm = Ops[ImmIdx];
2274  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2275  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2276  }
2277  }
2278 
2279  return Node;
2280 }
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:233
llvm::CCValAssign::getLocVT
MVT getLocVT() const
Definition: CallingConvLower.h:153
i
i
Definition: README.txt:29
llvm::ISD::SETUGE
@ SETUGE
Definition: ISDOpcodes.h:1349
llvm::AMDGPUISD::STORE_MSKOR
@ STORE_MSKOR
Definition: AMDGPUISelLowering.h:486
llvm::TargetLowering::scalarizeVectorLoad
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
Definition: TargetLowering.cpp:7225
llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1536
llvm::RegisterSDNode
Definition: SelectionDAGNodes.h:2072
llvm::APFloat::convertToFloat
float convertToFloat() const
Definition: APFloat.h:1136
llvm::StoreSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2305
llvm::TargetLoweringBase::setSchedulingPreference
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
Definition: TargetLowering.h:2093
llvm::AMDGPUAS::CONSTANT_BUFFER_13
@ CONSTANT_BUFFER_13
Definition: AMDGPU.h:408
llvm::AMDGPUAS::CONSTANT_BUFFER_6
@ CONSTANT_BUFFER_6
Definition: AMDGPU.h:401
llvm::ISD::SETLE
@ SETLE
Definition: ISDOpcodes.h:1360
llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:192
llvm::ISD::SETO
@ SETO
Definition: ISDOpcodes.h:1345
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:100
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:132
llvm
Definition: AllocatorList.h:23
llvm::CallingConv::AMDGPU_HS
@ AMDGPU_HS
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:218
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::SDNode::getValueType
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Definition: SelectionDAGNodes.h:958
llvm::SystemZISD::TM
@ TM
Definition: SystemZISelLowering.h:65
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1078
llvm::TargetLoweringBase::Legal
@ Legal
Definition: TargetLowering.h:193
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:618
llvm::TargetLowering::EmitInstrWithCustomInserter
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Definition: SelectionDAGISel.cpp:290
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:833
llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1361
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::AMDGPUTargetLowering
Definition: AMDGPUISelLowering.h:27
llvm::ISD::NON_EXTLOAD
@ NON_EXTLOAD
Definition: ISDOpcodes.h:1316
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:191
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:152
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:225
llvm::ISD::UADDO
@ UADDO
Definition: ISDOpcodes.h:304
llvm::ISD::CTTZ_ZERO_UNDEF
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:659
llvm::SelectionDAG::getValueType
SDValue getValueType(EVT)
Definition: SelectionDAG.cpp:1696
llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Definition: Type.cpp:693
llvm::ISD::ADDC
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:262
llvm::R600FrameLowering
Definition: R600FrameLowering.h:16
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::AMDGPUISD::CONST_ADDRESS
@ CONST_ADDRESS
Definition: AMDGPUISelLowering.h:435
llvm::ISD::SETEQ
@ SETEQ
Definition: ISDOpcodes.h:1356
llvm::SelectionDAG::getVTList
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition: SelectionDAG.cpp:7967
llvm::MVT::v2i1
@ v2i1
Definition: MachineValueType.h:62
llvm::R600Subtarget::getInstrInfo
const R600InstrInfo * getInstrInfo() const override
Definition: R600Subtarget.h:57
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::MachineSDNode
An SDNode that represents everything that will be needed to construct a MachineInstr.
Definition: SelectionDAGNodes.h:2522
llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition: MachineMemOperand.h:144
llvm::SelectionDAG::getVectorIdxConstant
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition: SelectionDAG.cpp:1482
llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition: SelectionDAGNodes.h:1325
llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition: SelectionDAGNodes.h:1348
llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:717
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:455
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:460
llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:770
llvm::TargetLowering::DAGCombinerInfo::DAG
SelectionDAG & DAG
Definition: TargetLowering.h:3495
llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition: SelectionDAGNodes.h:2255
Shift
bool Shift
Definition: README.txt:468
llvm::ISD::SETULE
@ SETULE
Definition: ISDOpcodes.h:1351
llvm::CCState::AnalyzeFormalArguments
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
Definition: CallingConvLower.cpp:90
isEOP
static bool isEOP(MachineBasicBlock::iterator I)
Definition: R600ISelLowering.cpp:257
llvm::NVPTXISD::RETURN
@ RETURN
Definition: NVPTXISelLowering.h:49
llvm::ISD::SHL_PARTS
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:700
llvm::SelectionDAG::getZeroExtendInReg
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
Definition: SelectionDAG.cpp:1288
llvm::SelectionDAG::getStore
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
Definition: SelectionDAG.cpp:7296
llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition: MachineMemOperand.h:142
llvm::AMDGPUISD::CARRY
@ CARRY
Definition: AMDGPUISelLowering.h:415
llvm::R600Subtarget::hasBCNT
bool hasBCNT(unsigned Size) const
Definition: R600Subtarget.h:99
llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:687
llvm::MVT::integer_valuetypes
static mvt_range integer_valuetypes()
Definition: MachineValueType.h:1362
llvm::R600Subtarget::hasCARRY
bool hasCARRY() const
Definition: R600Subtarget.h:110
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
Results
Function Alias Analysis Results
Definition: AliasAnalysis.cpp:849
llvm::ISD::MERGE_VALUES
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:229
llvm::AMDGPUTargetLowering::ReplaceNodeResults
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
Definition: AMDGPUISelLowering.cpp:1272
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::AMDGPUAS::CONSTANT_BUFFER_4
@ CONSTANT_BUFFER_4
Definition: AMDGPU.h:399
llvm::ISD::EXTLOAD
@ EXTLOAD
Definition: ISDOpcodes.h:1316
llvm::LSBaseSDNode::isIndexed
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
Definition: SelectionDAGNodes.h:2243
llvm::R600Subtarget::hasFFBL
bool hasFFBL() const
Definition: R600Subtarget.h:118
llvm::EVT::bitsLE
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:273
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
R600ISelLowering.h
llvm::ISD::SETUEQ
@ SETUEQ
Definition: ISDOpcodes.h:1347
llvm::TargetLowering::expandUnalignedStore
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
Definition: TargetLowering.cpp:7544
llvm::SelectionDAG::getContext
LLVMContext * getContext() const
Definition: SelectionDAG.h:447
llvm::R600MachineFunctionInfo
Definition: R600MachineFunctionInfo.h:19
llvm::ISD::BRCOND
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:941
llvm::SelectionDAG::getRegister
SDValue getRegister(unsigned Reg, EVT VT)
Definition: SelectionDAG.cpp:1952
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
isUndef
static bool isUndef(ArrayRef< int > Mask)
Definition: HexagonISelDAGToDAGHVX.cpp:904
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:124
llvm::SDNode::op_end
op_iterator op_end() const
Definition: SelectionDAGNodes.h:904
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:205
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:870
llvm::ISD::LoadExtType
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1316
llvm::R600TargetLowering::LowerFormalArguments
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
Definition: R600ISelLowering.cpp:1556
llvm::R600Subtarget::hasBFE
bool hasBFE() const
Definition: R600Subtarget.h:91
llvm::ISD::BR_CC
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:948
x3
In x86 we generate this spiffy xmm0 xmm0 ret in x86 we generate this which could be xmm1 movss xmm1 xmm0 ret In sse4 we could use insertps to make both better Here s another testcase that could use x3
Definition: README-SSE.txt:547
llvm::SelectionDAG::getLoad
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Definition: SelectionDAG.cpp:7246
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:40
llvm::SDNode::getOpcode
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Definition: SelectionDAGNodes.h:621
llvm::EVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:355
llvm::R600Subtarget::hasFMA
bool hasFMA() const
Definition: R600Subtarget.h:126
llvm::MinAlign
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:672
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:565
llvm::ISD::GlobalAddress
@ GlobalAddress
Definition: ISDOpcodes.h:71
llvm::ISD::SELECT_CC
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:679
llvm::AMDGPUAS::CONSTANT_BUFFER_10
@ CONSTANT_BUFFER_10
Definition: AMDGPU.h:405
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1113
llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition: CallingConvLower.h:33
llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1313
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:216
llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:664
llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalizeOps
bool isBeforeLegalizeOps() const
Definition: TargetLowering.h:3501
f
Itanium Name Demangler i e convert the string _Z1fv into f()". You can also use the CRTP base ManglingParser to perform some simple analysis on the mangled name
llvm::SelectionDAG::getTruncStore
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:7347
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::CCValAssign::getLocReg
Register getLocReg() const
Definition: CallingConvLower.h:150
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:653
MO_FLAG_PUSH
#define MO_FLAG_PUSH
Definition: R600Defines.h:18
llvm::SelectionDAG::getUNDEF
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:947
llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:732
llvm::AMDGPUISD::R600_EXPORT
@ R600_EXPORT
Definition: AMDGPUISelLowering.h:434
llvm::MachineMemOperand::getValue
const Value * getValue() const
Return the base address of the memory access.
Definition: MachineMemOperand.h:200
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::TargetLoweringBase::isCondCodeLegal
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
Definition: TargetLowering.h:1342
llvm::R600TargetLowering::canMergeStoresTo
bool canMergeStoresTo(unsigned AS, EVT MemVT, const SelectionDAG &DAG) const override
Returns if it's reasonable to merge stores to MemVT size.
Definition: R600ISelLowering.cpp:1633
llvm::MVT::f64
@ f64
Definition: MachineValueType.h:53
llvm::SelectionDAG::getConstant
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1346
llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:301
llvm::TargetLowering::DAGCombinerInfo
Definition: TargetLowering.h:3489
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:199
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:720
llvm::ISD::SRA
@ SRA
Definition: ISDOpcodes.h:643
llvm::TargetLoweringBase::ZeroOrNegativeOneBooleanContent
@ ZeroOrNegativeOneBooleanContent
Definition: TargetLowering.h:230
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:373
llvm::AMDGPUISD::DOT4
@ DOT4
Definition: AMDGPUISelLowering.h:414
llvm::ISD::UDIVREM
@ UDIVREM
Definition: ISDOpcodes.h:249
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::TargetLoweringBase::addRegisterClass
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
Definition: TargetLowering.h:2140
MO_FLAG_MASK
#define MO_FLAG_MASK
Definition: R600Defines.h:17
llvm::report_fatal_error
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1632
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:866
llvm::codeview::EncodedFramePtrReg::BasePtr
@ BasePtr
llvm::ISD::ATOMIC_STORE
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1099
llvm::FrameIndexSDNode
Definition: SelectionDAGNodes.h:1718
llvm::R600TargetLowering::CCAssignFnForCall
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
Definition: R600ISelLowering.cpp:1531
llvm::SelectionDAG::getMemIntrinsicNode
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
Definition: SelectionDAG.cpp:7023
R600InstrInfo.h
llvm::R600TargetLowering::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *IsFast=nullptr) const override
Determine if the target supports unaligned memory accesses.
Definition: R600ISelLowering.cpp:1642
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:617
llvm::ISD::SETOLT
@ SETOLT
Definition: ISDOpcodes.h:1342
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::GlobalAddressSDNode::getGlobal
const GlobalValue * getGlobal() const
Definition: SelectionDAGNodes.h:1704
llvm::APFloat::bitcastToAPInt
APInt bitcastToAPInt() const
Definition: APFloat.h:1132
llvm::CCValAssign::getLocMemOffset
unsigned getLocMemOffset() const
Definition: CallingConvLower.h:151
llvm::ISD::UNINDEXED
@ UNINDEXED
Definition: ISDOpcodes.h:1285
llvm::TargetLoweringBase::isOperationLegal
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Definition: TargetLowering.h:1204
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::TargetLoweringBase::setBooleanVectorContents
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
Definition: TargetLowering.h:2088
llvm::AMDGPUAS::CONSTANT_BUFFER_0
@ CONSTANT_BUFFER_0
Definition: AMDGPU.h:395
llvm::EVT::changeVectorElementTypeToInteger
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:400
llvm::EVT::bitsLT
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:265
llvm::AMDGPUTargetLowering::LowerGlobalAddress
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
Definition: AMDGPUISelLowering.cpp:1297
llvm::MVT::v4i16
@ v4i16
Definition: MachineValueType.h:86
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:49
llvm::EVT::getTypeForEVT
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:181
llvm::MachineRegisterInfo::use_empty
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
Definition: MachineRegisterInfo.h:506
llvm::ISD::USUBO
@ USUBO
Definition: ISDOpcodes.h:308
llvm::MVT::v4i8
@ v4i8
Definition: MachineValueType.h:75
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
ConstantAddressBlock
static int ConstantAddressBlock(unsigned AddressSpace)
Definition: R600ISelLowering.cpp:1323
llvm::AMDGPUAS::CONSTANT_BUFFER_15
@ CONSTANT_BUFFER_15
Definition: AMDGPU.h:410
llvm::StoreSDNode::isTruncatingStore
bool isTruncatingStore() const
Return true if the op does a truncation before store.
Definition: SelectionDAGNodes.h:2299
llvm::ISD::SETOLE
@ SETOLE
Definition: ISDOpcodes.h:1343
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::ISD::SETUGT
@ SETUGT
Definition: ISDOpcodes.h:1348
llvm::SelectionDAG::getTargetGlobalAddress
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:683
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:378
R600MachineFunctionInfo.h
llvm::CCAssignFn
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
Definition: CallingConvLower.h:177
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:371
llvm::MVT::v2i8
@ v2i8
Definition: MachineValueType.h:74
llvm::AMDGPUISD::BUILD_VERTICAL_VECTOR
@ BUILD_VERTICAL_VECTOR
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
Definition: AMDGPUISelLowering.h:472
llvm::AMDGPUAS::CONSTANT_BUFFER_5
@ CONSTANT_BUFFER_5
Definition: AMDGPU.h:400
llvm::ISD::SETUNE
@ SETUNE
Definition: ISDOpcodes.h:1352
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::TargetLoweringBase::setOperationAction
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
Definition: TargetLowering.h:2157
llvm::MVT::v32i16
@ v32i16
Definition: MachineValueType.h:89
llvm::CallingConv::AMDGPU_GS
@ AMDGPU_GS
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:202
llvm::pdb::OMFSegDescFlags::Read
@ Read
llvm::AMDGPUTargetLowering::LowerUDIVREM64
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
Definition: AMDGPUISelLowering.cpp:1765
llvm::R600Subtarget
Definition: R600Subtarget.h:36
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:211
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:156
llvm::AMDGPUTargetLowering::LowerSDIVREM
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
Definition: AMDGPUISelLowering.cpp:2035
llvm::MVT::v16i8
@ v16i8
Definition: MachineValueType.h:77
AMDGPUMCTargetDesc.h
llvm::R600Subtarget::hasBORROW
bool hasBORROW() const
Definition: R600Subtarget.h:106
llvm::AMDGPUISD::BORROW
@ BORROW
Definition: AMDGPUISelLowering.h:416
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:333
llvm::ISD::FMAD
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:464
llvm::DenseMapBase::clear
void clear()
Definition: DenseMap.h:111
llvm::TargetLowering::expandFP_TO_SINT
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
Definition: TargetLowering.cpp:6588
llvm::MVT::v16i16
@ v16i16
Definition: MachineValueType.h:88
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:70
llvm::TargetLoweringBase::setHasExtractBitsInsn
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
Definition: TargetLowering.h:2123
llvm::ISD::FP_TO_UINT
@ FP_TO_UINT
Definition: ISDOpcodes.h:771
llvm::ConstantFPSDNode
Definition: SelectionDAGNodes.h:1579
llvm::R600FrameLowering::getFrameIndexReference
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
Definition: R600FrameLowering.cpp:18
llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition: SelectionDAGNodes.h:1329
llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:906
llvm::SelectionDAG::getIntPtrConstant
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition: SelectionDAG.cpp:1470
llvm::TargetLoweringBase::Promote
@ Promote
Definition: TargetLowering.h:194
llvm::ConstantPointerNull::get
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1756
llvm::CallingConv::Fast
@ Fast
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition: MachineMemOperand.h:37
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap< unsigned, unsigned >
llvm::SelectionDAG::getCopyFromReg
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:761
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:505
llvm::R600Subtarget::hasBFI
bool hasBFI() const
Definition: R600Subtarget.h:95
llvm::ISD::CTLZ_ZERO_UNDEF
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:660
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:896
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::FrameIndexSDNode::getIndex
int getIndex() const
Definition: SelectionDAGNodes.h:1729
llvm::SelectionDAG::getNode
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition: SelectionDAG.cpp:7732
llvm::SelectionDAG::getNOT
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
Definition: SelectionDAG.cpp:1319
llvm::R600::getLDSNoRetOp
int getLDSNoRetOp(uint16_t Opcode)
llvm::CallingConv::Cold
@ Cold
Definition: CallingConv.h:48
llvm::ISD::FCOPYSIGN
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:470
llvm::LoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Definition: SelectionDAGNodes.h:2270
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:634
llvm::TargetLowering::scalarizeVectorStore
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
Definition: TargetLowering.cpp:7315
llvm::MVT::v4f32
@ v4f32
Definition: MachineValueType.h:145
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::MachineMemOperand::Flags
Flags
Flags values. These may be or'd together.
Definition: MachineMemOperand.h:130
llvm::ISD::InputArg
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Definition: TargetCallingConv.h:190
llvm::StoreSDNode
This class is used to represent ISD::STORE nodes.
Definition: SelectionDAGNodes.h:2283
llvm::ISD::ZEXTLOAD
@ ZEXTLOAD
Definition: ISDOpcodes.h:1316
llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition: SelectionDAGNodes.h:172
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:41
llvm::R600Subtarget::hasFFBH
bool hasFFBH() const
Definition: R600Subtarget.h:122
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::AMDGPUAS::CONSTANT_BUFFER_2
@ CONSTANT_BUFFER_2
Definition: AMDGPU.h:397
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:39
llvm::AMDGPUISD::SIN_HW
@ SIN_HW
Definition: AMDGPUISelLowering.h:382
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::ISD::SETULT
@ SETULT
Definition: ISDOpcodes.h:1350
llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition: SelectionDAGNodes.h:1551
llvm::AMDGPUISD::TEXTURE_FETCH
@ TEXTURE_FETCH
Definition: AMDGPUISelLowering.h:433
llvm::AMDGPUISD::RSQ
@ RSQ
Definition: AMDGPUISelLowering.h:407
llvm::MachineMemOperand::MONonTemporal
@ MONonTemporal
The memory access is non-temporal.
Definition: MachineMemOperand.h:140
llvm::SelectionDAG::getSelectCC
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:1111
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1336
MO_FLAG_NEG
#define MO_FLAG_NEG
Definition: R600Defines.h:15
MO_FLAG_ABS
#define MO_FLAG_ABS
Definition: R600Defines.h:16
llvm::AMDGPUSubtarget::R700
@ R700
Definition: AMDGPUSubtarget.h:34
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:8405
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:30
llvm::ISD::isNON_EXTLoad
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
Definition: SelectionDAGNodes.h:2677
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:98
llvm::StoreSDNode::getValue
const SDValue & getValue() const
Definition: SelectionDAGNodes.h:2304
llvm::AfterLegalizeVectorOps
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
llvm::ISD::SRA_PARTS
@ SRA_PARTS
Definition: ISDOpcodes.h:701
llvm::AMDGPUTargetLowering::combineFMinMaxLegacy
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
Definition: AMDGPUISelLowering.cpp:1378
llvm::MachineFunction
Definition: MachineFunction.h:227
llvm::R600Subtarget::getFrameLowering
const R600FrameLowering * getFrameLowering() const override
Definition: R600Subtarget.h:59
llvm::MachineFunction::addLiveIn
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Definition: MachineFunction.cpp:634
llvm::MemSDNode::getAAInfo
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Definition: SelectionDAGNodes.h:1300
llvm::AMDGPUTargetLowering::getImplicitParameterOffset
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
Definition: AMDGPUISelLowering.cpp:4192
llvm::AMDGPUAS::PARAM_I_ADDRESS
@ PARAM_I_ADDRESS
Address space for indirect addressible parameter memory (VTX1).
Definition: AMDGPU.h:387
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:98
llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:149
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::MVT::i64
@ i64
Definition: MachineValueType.h:44
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:167
llvm::MVT::v2i32
@ v2i32
Definition: MachineValueType.h:94
llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:345
AMDGPU.h
llvm::AMDGPUAS::CONSTANT_BUFFER_3
@ CONSTANT_BUFFER_3
Definition: AMDGPU.h:398
llvm::AMDGPUAS::CONSTANT_BUFFER_14
@ CONSTANT_BUFFER_14
Definition: AMDGPU.h:409
llvm::SDValue::getMachineOpcode
unsigned getMachineOpcode() const
Definition: SelectionDAGNodes.h:1145
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::SelectionDAG::ReplaceAllUsesOfValueWith
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
Definition: SelectionDAG.cpp:8993
llvm::MVT::v2f32
@ v2f32
Definition: MachineValueType.h:143
uint32_t
llvm::StackOffset
StackOffset is a class to represent an offset with 2 dimensions, named fixed and scalable,...
Definition: TypeSize.h:134
llvm::AMDGPUTargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Definition: AMDGPUISelLowering.cpp:1231
llvm::MinMax
Definition: AssumeBundleQueries.h:72
llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition: SelectionDAGNodes.h:1121
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::AMDGPUAS::CONSTANT_BUFFER_7
@ CONSTANT_BUFFER_7
Definition: AMDGPU.h:402
llvm::TargetLoweringBase::setCondCodeAction
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
Definition: TargetLowering.h:2223
llvm::TargetLoweringBase::setTruncStoreAction
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
Definition: TargetLowering.h:2177
llvm::ISD::getSetCCInverse
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
Definition: SelectionDAG.cpp:475
llvm::SDValue::getSimpleValueType
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:183
llvm::MVT::v4i32
@ v4i32
Definition: MachineValueType.h:96
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:281
llvm::ISD::FMUL
@ FMUL
Definition: ISDOpcodes.h:373
llvm::MVT::v32i32
@ v32i32
Definition: MachineValueType.h:100
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1316
llvm::SelectionDAG::getBuildVector
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:790
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::SelectionDAG::getConstantFP
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1526
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:177
llvm::CallingConv::AMDGPU_LS
@ AMDGPU_LS
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:226
llvm::LoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2274
llvm::DenseMapBase::empty
LLVM_NODISCARD bool empty() const
Definition: DenseMap.h:97
llvm::MVT::v16i32
@ v16i32
Definition: MachineValueType.h:99
llvm::ISD::FrameIndex
@ FrameIndex
Definition: ISDOpcodes.h:73
llvm::isInt< 16 >
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:370
llvm::AMDGPUTargetLowering::CreateLiveInRegisterRaw
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, Register Reg, EVT VT) const
Definition: AMDGPUISelLowering.h:291
j
return j(j<< 16)
llvm::ISD::SETLT
@ SETLT
Definition: ISDOpcodes.h:1359
llvm::isAllOnesConstant
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Definition: SelectionDAG.cpp:9357
llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:147
llvm::AMDGPUTargetLowering::analyzeFormalArgumentsCompute
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types.
Definition: AMDGPUISelLowering.cpp:1008
CompactSwizzlableVector
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
Definition: R600ISelLowering.cpp:1661
llvm::R600TargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Definition: R600ISelLowering.cpp:447
llvm::CallingConv::C
@ C
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
llvm::TargetLoweringBase::setLoadExtAction
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
Definition: TargetLowering.h:2165
llvm::GlobalAddressSDNode
Definition: SelectionDAGNodes.h:1692
llvm::AMDGPUISD::RSQ_CLAMP
@ RSQ_CLAMP
Definition: AMDGPUISelLowering.h:411
llvm::AMDGPUAS::CONSTANT_BUFFER_8
@ CONSTANT_BUFFER_8
Definition: AMDGPU.h:403
llvm::isNullConstant
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
Definition: SelectionDAG.cpp:9347
llvm::EVT::getScalarType
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:288
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::MemSDNode::getAddressSpace
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Definition: SelectionDAGNodes.h:1336
llvm::MemSDNode::getAlignment
unsigned getAlignment() const
Definition: SelectionDAGNodes.h:1270
llvm::ISD::FCOS
@ FCOS
Definition: ISDOpcodes.h:856
llvm::SelectionDAG::getEntryNode
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:516
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:864
llvm::ISD::getSetCCSwappedOperands
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
Definition: SelectionDAG.cpp:452
llvm::R600TargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition: R600ISelLowering.cpp:1820
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:377
llvm::ISD::FSIN
@ FSIN
Definition: ISDOpcodes.h:855
llvm::SelectionDAG::getDataLayout
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:440
llvm::MVT::v8i16
@ v8i16
Definition: MachineValueType.h:87
llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:485
llvm::TargetLoweringBase::Custom
@ Custom
Definition: TargetLowering.h:197
llvm::R600Subtarget::getRegisterInfo
const R600RegisterInfo * getRegisterInfo() const override
Definition: R600Subtarget.h:67
llvm::ISD::SUBC
@ SUBC
Definition: ISDOpcodes.h:263
R600Subtarget.h
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:43
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::ISD::SETUO
@ SETUO
Definition: ISDOpcodes.h:1346
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:138
llvm::R600InstrInfo
Definition: R600InstrInfo.h:39
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:907
llvm::MVT::v8i32
@ v8i32
Definition: MachineValueType.h:98
llvm::ISD::SRL_PARTS
@ SRL_PARTS
Definition: ISDOpcodes.h:702
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:403
llvm::R600TargetLowering::getSetCCResultType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
Definition: R600ISelLowering.cpp:1626
llvm::AMDGPUISD::BRANCH_COND
@ BRANCH_COND
Definition: AMDGPUISelLowering.h:339
llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition: ISDOpcodes.h:725
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:232
llvm::SDValue::isUndef
bool isUndef() const
Definition: SelectionDAGNodes.h:1149
llvm::SDNode::getMachineOpcode
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
Definition: SelectionDAGNodes.h:678
Vector
So we should use XX3Form_Rcr to implement instrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:474
llvm::AMDGPUTargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition: AMDGPUISelLowering.cpp:3897
llvm::AMDGPUAS::CONSTANT_BUFFER_9
@ CONSTANT_BUFFER_9
Definition: AMDGPU.h:404
llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:293
llvm::MemSDNode::getAlign
Align getAlign() const
Definition: SelectionDAGNodes.h:1264
llvm::TargetLoweringBase::setBooleanContents
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
Definition: TargetLowering.h:2074
llvm::SDNode::op_begin
op_iterator op_begin() const
Definition: SelectionDAGNodes.h:903
llvm::AMDGPUISD::FRACT
@ FRACT
Definition: AMDGPUISelLowering.h:362
llvm::ISD::FSUB
@ FSUB
Definition: ISDOpcodes.h:372
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:642
R600Defines.h
llvm::TargetLoweringBase::Expand
@ Expand
Definition: TargetLowering.h:195
llvm::TargetLoweringBase::setTargetDAGCombine
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
Definition: TargetLowering.h:2254
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:329
N
#define N
llvm::TargetLoweringBase::computeRegisterProperties
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
Definition: TargetLoweringBase.cpp:1255
llvm::AMDGPUAS::CONSTANT_BUFFER_12
@ CONSTANT_BUFFER_12
Definition: AMDGPU.h:407
llvm::ISD::SUBE
@ SUBE
Definition: ISDOpcodes.h:273
llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:644
llvm::GlobalAddressSDNode::getAddressSpace
unsigned getAddressSpace() const
Definition: SelectionDAG.cpp:10099
llvm::R600TargetLowering::ReplaceNodeResults
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
Definition: R600ISelLowering.cpp:630
llvm::AMDGPUISD::COS_HW
@ COS_HW
Definition: AMDGPUISelLowering.h:381
llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:157
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition: SelectionDAGNodes.h:1109
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:637
llvm::numbers::pif
constexpr float pif
Definition: MathExtras.h:78
llvm::ISD::SETONE
@ SETONE
Definition: ISDOpcodes.h:1344
llvm::MVT::v32i8
@ v32i8
Definition: MachineValueType.h:78
llvm::CallingConv::AMDGPU_ES
@ AMDGPU_ES
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:231
llvm::EVT::bitsGT
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:249
llvm::MVT::i16
@ i16
Definition: MachineValueType.h:42
llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:851
llvm::SelectionDAG::getMachineFunction
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:437
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::R600TargetLowering::EmitInstrWithCustomInserter
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Definition: R600ISelLowering.cpp:264
llvm::ISD::SDIVREM
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:248
llvm::AMDGPUISD::DWORDADDR
@ DWORDADDR
Definition: AMDGPUISelLowering.h:361
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:376
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::EVT::bitsGE
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:257
llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:205
llvm::CallingConv::AMDGPU_VS
@ AMDGPU_VS
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:199
llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:711
llvm::MVT::v8i8
@ v8i8
Definition: MachineValueType.h:76
llvm::AMDGPUFrameLowering::getStackWidth
unsigned getStackWidth(const MachineFunction &MF) const
Definition: AMDGPUFrameLowering.cpp:22
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:865
llvm::SDValue::isMachineOpcode
bool isMachineOpcode() const
Definition: SelectionDAGNodes.h:1141
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::MVT::v2i16
@ v2i16
Definition: MachineValueType.h:84
llvm::AMDGPUMachineFunction
Definition: AMDGPUMachineFunction.h:20
llvm::ConstantFPSDNode::getValueAPF
const APFloat & getValueAPF() const
Definition: SelectionDAGNodes.h:1590
ReorganizeVector
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
Definition: R600ISelLowering.cpp:1706
llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:494
llvm::SelectionDAG::getExtLoad
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:7263
llvm::ISD::ADDE
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:272
llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:799
llvm::MVT::f32
@ f32
Definition: MachineValueType.h:52
llvm::CallingConv::AMDGPU_CS
@ AMDGPU_CS
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:208
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::MachineMemOperand::getFlags
Flags getFlags() const
Return the raw flags of the source value,.
Definition: MachineMemOperand.h:209
llvm::AMDGPUAS::CONSTANT_BUFFER_1
@ CONSTANT_BUFFER_1
Definition: AMDGPU.h:396
llvm::ISD::ATOMIC_LOAD
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1095
llvm::MemSDNode::getPointerInfo
const MachinePointerInfo & getPointerInfo() const
Definition: SelectionDAGNodes.h:1331
llvm::AMDGPUISD::CONST_DATA_PTR
@ CONST_DATA_PTR
Pointer to the start of the shader's constant data.
Definition: AMDGPUISelLowering.h:474
llvm::AMDGPUAS::CONSTANT_BUFFER_11
@ CONSTANT_BUFFER_11
Definition: AMDGPU.h:406
llvm::TargetLoweringBase::getPointerTy
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Definition: TargetLowering.h:342
llvm::ISD::CTPOP
@ CTPOP
Definition: ISDOpcodes.h:654
llvm::AMDGPUISD::DUMMY_CHAIN
@ DUMMY_CHAIN
Definition: AMDGPUISelLowering.h:477
llvm::R600TargetLowering::R600TargetLowering
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
Definition: R600ISelLowering.cpp:28
llvm::SelectionDAG::getMergeValues
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
Definition: SelectionDAG.cpp:7012
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:281
llvm::MVT::v4i1
@ v4i1
Definition: MachineValueType.h:63
llvm::SelectionDAG::getCondCode
SDValue getCondCode(ISD::CondCode Cond)
Definition: SelectionDAG.cpp:1737
llvm::AMDGPUTargetLowering::FIRST_IMPLICIT
@ FIRST_IMPLICIT
Definition: AMDGPUISelLowering.h:316