LLVM  15.0.0git
NVPTXISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the NVPTX target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "NVPTXISelDAGToDAG.h"
15 #include "NVPTXUtilities.h"
17 #include "llvm/IR/GlobalValue.h"
18 #include "llvm/IR/Instructions.h"
19 #include "llvm/IR/IntrinsicsNVPTX.h"
22 #include "llvm/Support/Debug.h"
26 
27 using namespace llvm;
28 
29 #define DEBUG_TYPE "nvptx-isel"
30 
31 /// createNVPTXISelDag - This pass converts a legalized DAG into a
32 /// NVPTX-specific DAG, ready for instruction scheduling.
34  llvm::CodeGenOpt::Level OptLevel) {
35  return new NVPTXDAGToDAGISel(TM, OptLevel);
36 }
37 
39  CodeGenOpt::Level OptLevel)
40  : SelectionDAGISel(tm, OptLevel), TM(tm) {
41  doMulWide = (OptLevel > 0);
42 }
43 
47 }
48 
49 int NVPTXDAGToDAGISel::getDivF32Level() const {
51 }
52 
53 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
55 }
56 
57 bool NVPTXDAGToDAGISel::useF32FTZ() const {
59 }
60 
61 bool NVPTXDAGToDAGISel::allowFMA() const {
63  return TL->allowFMA(*MF, OptLevel);
64 }
65 
66 bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
68  return TL->allowUnsafeFPMath(*MF);
69 }
70 
71 bool NVPTXDAGToDAGISel::useShortPointers() const {
72  return TM.useShortPointers();
73 }
74 
75 /// Select - Select instructions not customized! Used for
76 /// expanded, promoted and normal instructions.
77 void NVPTXDAGToDAGISel::Select(SDNode *N) {
78 
79  if (N->isMachineOpcode()) {
80  N->setNodeId(-1);
81  return; // Already selected.
82  }
83 
84  switch (N->getOpcode()) {
85  case ISD::LOAD:
86  case ISD::ATOMIC_LOAD:
87  if (tryLoad(N))
88  return;
89  break;
90  case ISD::STORE:
91  case ISD::ATOMIC_STORE:
92  if (tryStore(N))
93  return;
94  break;
96  if (tryEXTRACT_VECTOR_ELEMENT(N))
97  return;
98  break;
100  SelectSETP_F16X2(N);
101  return;
102 
103  case NVPTXISD::LoadV2:
104  case NVPTXISD::LoadV4:
105  if (tryLoadVector(N))
106  return;
107  break;
108  case NVPTXISD::LDGV2:
109  case NVPTXISD::LDGV4:
110  case NVPTXISD::LDUV2:
111  case NVPTXISD::LDUV4:
112  if (tryLDGLDU(N))
113  return;
114  break;
115  case NVPTXISD::StoreV2:
116  case NVPTXISD::StoreV4:
117  if (tryStoreVector(N))
118  return;
119  break;
120  case NVPTXISD::LoadParam:
123  if (tryLoadParam(N))
124  return;
125  break;
129  if (tryStoreRetval(N))
130  return;
131  break;
137  if (tryStoreParam(N))
138  return;
139  break;
141  if (tryIntrinsicNoChain(N))
142  return;
143  break;
145  if (tryIntrinsicChain(N))
146  return;
147  break;
316  if (tryTextureIntrinsic(N))
317  return;
318  break;
484  if (trySurfaceIntrinsic(N))
485  return;
486  break;
487  case ISD::AND:
488  case ISD::SRA:
489  case ISD::SRL:
490  // Try to select BFE
491  if (tryBFE(N))
492  return;
493  break;
494  case ISD::ADDRSPACECAST:
495  SelectAddrSpaceCast(N);
496  return;
497  case ISD::ConstantFP:
498  if (tryConstantFP16(N))
499  return;
500  break;
501  default:
502  break;
503  }
504  SelectCode(N);
505 }
506 
507 bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
508  unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
509  switch (IID) {
510  default:
511  return false;
512  case Intrinsic::nvvm_ldg_global_f:
513  case Intrinsic::nvvm_ldg_global_i:
514  case Intrinsic::nvvm_ldg_global_p:
515  case Intrinsic::nvvm_ldu_global_f:
516  case Intrinsic::nvvm_ldu_global_i:
517  case Intrinsic::nvvm_ldu_global_p:
518  return tryLDGLDU(N);
519  }
520 }
521 
522 // There's no way to specify FP16 immediates in .f16 ops, so we have to
523 // load them into an .f16 register first.
524 bool NVPTXDAGToDAGISel::tryConstantFP16(SDNode *N) {
525  if (N->getValueType(0) != MVT::f16)
526  return false;
528  cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), MVT::f16);
529  SDNode *LoadConstF16 =
530  CurDAG->getMachineNode(NVPTX::LOAD_CONST_F16, SDLoc(N), MVT::f16, Val);
531  ReplaceNode(N, LoadConstF16);
532  return true;
533 }
534 
535 // Map ISD:CONDCODE value to appropriate CmpMode expected by
536 // NVPTXInstPrinter::printCmpMode()
537 static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
539  unsigned PTXCmpMode = [](ISD::CondCode CC) {
540  switch (CC) {
541  default:
542  llvm_unreachable("Unexpected condition code.");
543  case ISD::SETOEQ:
544  return CmpMode::EQ;
545  case ISD::SETOGT:
546  return CmpMode::GT;
547  case ISD::SETOGE:
548  return CmpMode::GE;
549  case ISD::SETOLT:
550  return CmpMode::LT;
551  case ISD::SETOLE:
552  return CmpMode::LE;
553  case ISD::SETONE:
554  return CmpMode::NE;
555  case ISD::SETO:
556  return CmpMode::NUM;
557  case ISD::SETUO:
558  return CmpMode::NotANumber;
559  case ISD::SETUEQ:
560  return CmpMode::EQU;
561  case ISD::SETUGT:
562  return CmpMode::GTU;
563  case ISD::SETUGE:
564  return CmpMode::GEU;
565  case ISD::SETULT:
566  return CmpMode::LTU;
567  case ISD::SETULE:
568  return CmpMode::LEU;
569  case ISD::SETUNE:
570  return CmpMode::NEU;
571  case ISD::SETEQ:
572  return CmpMode::EQ;
573  case ISD::SETGT:
574  return CmpMode::GT;
575  case ISD::SETGE:
576  return CmpMode::GE;
577  case ISD::SETLT:
578  return CmpMode::LT;
579  case ISD::SETLE:
580  return CmpMode::LE;
581  case ISD::SETNE:
582  return CmpMode::NE;
583  }
584  }(CondCode.get());
585 
586  if (FTZ)
587  PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG;
588 
589  return PTXCmpMode;
590 }
591 
592 bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
593  unsigned PTXCmpMode =
594  getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
595  SDLoc DL(N);
596  SDNode *SetP = CurDAG->getMachineNode(
597  NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
598  N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
599  ReplaceNode(N, SetP);
600  return true;
601 }
602 
603 // Find all instances of extract_vector_elt that use this v2f16 vector
604 // and coalesce them into a scattering move instruction.
605 bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
606  SDValue Vector = N->getOperand(0);
607 
608  // We only care about f16x2 as it's the only real vector type we
609  // need to deal with.
610  if (Vector.getSimpleValueType() != MVT::v2f16)
611  return false;
612 
613  // Find and record all uses of this vector that extract element 0 or 1.
615  for (auto U : Vector.getNode()->uses()) {
616  if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
617  continue;
618  if (U->getOperand(0) != Vector)
619  continue;
620  if (const ConstantSDNode *IdxConst =
621  dyn_cast<ConstantSDNode>(U->getOperand(1))) {
622  if (IdxConst->getZExtValue() == 0)
623  E0.push_back(U);
624  else if (IdxConst->getZExtValue() == 1)
625  E1.push_back(U);
626  else
627  llvm_unreachable("Invalid vector index.");
628  }
629  }
630 
631  // There's no point scattering f16x2 if we only ever access one
632  // element of it.
633  if (E0.empty() || E1.empty())
634  return false;
635 
636  unsigned Op = NVPTX::SplitF16x2;
637  // If the vector has been BITCAST'ed from i32, we can use original
638  // value directly and avoid register-to-register move.
640  if (Vector->getOpcode() == ISD::BITCAST) {
641  Op = NVPTX::SplitI32toF16x2;
642  Source = Vector->getOperand(0);
643  }
644  // Merge (f16 extractelt(V, 0), f16 extractelt(V,1))
645  // into f16,f16 SplitF16x2(V)
646  SDNode *ScatterOp =
648  for (auto *Node : E0)
649  ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0));
650  for (auto *Node : E1)
651  ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1));
652 
653  return true;
654 }
655 
656 static unsigned int getCodeAddrSpace(MemSDNode *N) {
657  const Value *Src = N->getMemOperand()->getValue();
658 
659  if (!Src)
661 
662  if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
663  switch (PT->getAddressSpace()) {
670  default: break;
671  }
672  }
674 }
675 
676 static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
677  unsigned CodeAddrSpace, MachineFunction *F) {
678  // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
679  // space.
680  //
681  // We have two ways of identifying invariant loads: Loads may be explicitly
682  // marked as invariant, or we may infer them to be invariant.
683  //
684  // We currently infer invariance for loads from
685  // - constant global variables, and
686  // - kernel function pointer params that are noalias (i.e. __restrict) and
687  // never written to.
688  //
689  // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
690  // not during the SelectionDAG phase).
691  //
692  // TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for
693  // explicitly invariant loads because these are how clang tells us to use ldg
694  // when the user uses a builtin.
695  if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL)
696  return false;
697 
698  if (N->isInvariant())
699  return true;
700 
701  bool IsKernelFn = isKernelFunction(F->getFunction());
702 
703  // We use getUnderlyingObjects() here instead of getUnderlyingObject() mainly
704  // because the former looks through phi nodes while the latter does not. We
705  // need to look through phi nodes to handle pointer induction variables.
707  getUnderlyingObjects(N->getMemOperand()->getValue(), Objs);
708 
709  return all_of(Objs, [&](const Value *V) {
710  if (auto *A = dyn_cast<const Argument>(V))
711  return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr();
712  if (auto *GV = dyn_cast<const GlobalVariable>(V))
713  return GV->isConstant();
714  return false;
715  });
716 }
717 
718 bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
719  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
720  switch (IID) {
721  default:
722  return false;
723  case Intrinsic::nvvm_texsurf_handle_internal:
724  SelectTexSurfHandle(N);
725  return true;
726  }
727 }
728 
729 void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
730  // Op 0 is the intrinsic ID
731  SDValue Wrapper = N->getOperand(1);
732  SDValue GlobalVal = Wrapper.getOperand(0);
733  ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
734  MVT::i64, GlobalVal));
735 }
736 
737 void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
738  SDValue Src = N->getOperand(0);
739  AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
740  unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
741  unsigned DstAddrSpace = CastN->getDestAddressSpace();
742  assert(SrcAddrSpace != DstAddrSpace &&
743  "addrspacecast must be between different address spaces");
744 
745  if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
746  // Specific to generic
747  unsigned Opc;
748  switch (SrcAddrSpace) {
749  default: report_fatal_error("Bad address space in addrspacecast");
751  Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
752  break;
754  Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_shared_yes_6432
755  : NVPTX::cvta_shared_yes_64)
756  : NVPTX::cvta_shared_yes;
757  break;
758  case ADDRESS_SPACE_CONST:
759  Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_const_yes_6432
760  : NVPTX::cvta_const_yes_64)
761  : NVPTX::cvta_const_yes;
762  break;
763  case ADDRESS_SPACE_LOCAL:
764  Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_local_yes_6432
765  : NVPTX::cvta_local_yes_64)
766  : NVPTX::cvta_local_yes;
767  break;
768  }
769  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
770  Src));
771  return;
772  } else {
773  // Generic to specific
774  if (SrcAddrSpace != 0)
775  report_fatal_error("Cannot cast between two non-generic address spaces");
776  unsigned Opc;
777  switch (DstAddrSpace) {
778  default: report_fatal_error("Bad address space in addrspacecast");
780  Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
781  : NVPTX::cvta_to_global_yes;
782  break;
784  Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_shared_yes_3264
785  : NVPTX::cvta_to_shared_yes_64)
786  : NVPTX::cvta_to_shared_yes;
787  break;
788  case ADDRESS_SPACE_CONST:
789  Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_const_yes_3264
790  : NVPTX::cvta_to_const_yes_64)
791  : NVPTX::cvta_to_const_yes;
792  break;
793  case ADDRESS_SPACE_LOCAL:
794  Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_local_yes_3264
795  : NVPTX::cvta_to_local_yes_64)
796  : NVPTX::cvta_to_local_yes;
797  break;
798  case ADDRESS_SPACE_PARAM:
799  Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
800  : NVPTX::nvvm_ptr_gen_to_param;
801  break;
802  }
803  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
804  Src));
805  return;
806  }
807 }
808 
809 // Helper function template to reduce amount of boilerplate code for
810 // opcode selection.
812  MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16,
813  unsigned Opcode_i32, Optional<unsigned> Opcode_i64, unsigned Opcode_f16,
814  unsigned Opcode_f16x2, unsigned Opcode_f32, Optional<unsigned> Opcode_f64) {
815  switch (VT) {
816  case MVT::i1:
817  case MVT::i8:
818  return Opcode_i8;
819  case MVT::i16:
820  return Opcode_i16;
821  case MVT::i32:
822  return Opcode_i32;
823  case MVT::i64:
824  return Opcode_i64;
825  case MVT::f16:
826  return Opcode_f16;
827  case MVT::v2f16:
828  return Opcode_f16x2;
829  case MVT::f32:
830  return Opcode_f32;
831  case MVT::f64:
832  return Opcode_f64;
833  default:
834  return None;
835  }
836 }
837 
838 bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
839  SDLoc dl(N);
840  MemSDNode *LD = cast<MemSDNode>(N);
841  assert(LD->readMem() && "Expected load");
842  LoadSDNode *PlainLoad = dyn_cast<LoadSDNode>(N);
843  EVT LoadedVT = LD->getMemoryVT();
844  SDNode *NVPTXLD = nullptr;
845 
846  // do not support pre/post inc/dec
847  if (PlainLoad && PlainLoad->isIndexed())
848  return false;
849 
850  if (!LoadedVT.isSimple())
851  return false;
852 
853  AtomicOrdering Ordering = LD->getSuccessOrdering();
854  // In order to lower atomic loads with stronger guarantees we would need to
855  // use load.acquire or insert fences. However these features were only added
856  // with PTX ISA 6.0 / sm_70.
857  // TODO: Check if we can actually use the new instructions and implement them.
858  if (isStrongerThanMonotonic(Ordering))
859  return false;
860 
861  // Address Space Setting
862  unsigned int CodeAddrSpace = getCodeAddrSpace(LD);
863  if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) {
864  return tryLDGLDU(N);
865  }
866 
867  unsigned int PointerSize =
868  CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace());
869 
870  // Volatile Setting
871  // - .volatile is only available for .global and .shared
872  // - .volatile has the same memory synchronization semantics as .relaxed.sys
873  bool isVolatile = LD->isVolatile() || Ordering == AtomicOrdering::Monotonic;
874  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
875  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
876  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
877  isVolatile = false;
878 
879  // Type Setting: fromType + fromTypeWidth
880  //
881  // Sign : ISD::SEXTLOAD
882  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
883  // type is integer
884  // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
885  MVT SimpleVT = LoadedVT.getSimpleVT();
886  MVT ScalarVT = SimpleVT.getScalarType();
887  // Read at least 8 bits (predicates are stored as 8-bit values)
888  unsigned fromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits());
889  unsigned int fromType;
890 
891  // Vector Setting
892  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
893  if (SimpleVT.isVector()) {
894  assert(LoadedVT == MVT::v2f16 && "Unexpected vector type");
895  // v2f16 is loaded using ld.b32
896  fromTypeWidth = 32;
897  }
898 
899  if (PlainLoad && (PlainLoad->getExtensionType() == ISD::SEXTLOAD))
901  else if (ScalarVT.isFloatingPoint())
902  // f16 uses .b16 as its storage type.
903  fromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
905  else
907 
908  // Create the machine instruction DAG
909  SDValue Chain = N->getOperand(0);
910  SDValue N1 = N->getOperand(1);
911  SDValue Addr;
913  Optional<unsigned> Opcode;
914  MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
915 
916  if (SelectDirectAddr(N1, Addr)) {
917  Opcode = pickOpcodeForVT(
918  TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar,
919  NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar,
920  NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
921  if (!Opcode)
922  return false;
923  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
924  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
925  getI32Imm(fromTypeWidth, dl), Addr, Chain };
926  NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
927  } else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
928  : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
929  Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
930  NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
931  NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi,
932  NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
933  if (!Opcode)
934  return false;
935  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
936  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
937  getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
938  NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
939  } else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
940  : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
941  if (PointerSize == 64)
942  Opcode = pickOpcodeForVT(
943  TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
944  NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64,
945  NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
946  else
947  Opcode = pickOpcodeForVT(
948  TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari,
949  NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari,
950  NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
951  if (!Opcode)
952  return false;
953  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
954  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
955  getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
956  NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
957  } else {
958  if (PointerSize == 64)
959  Opcode = pickOpcodeForVT(
960  TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
961  NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64,
962  NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64,
963  NVPTX::LD_f64_areg_64);
964  else
965  Opcode = pickOpcodeForVT(
966  TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg,
967  NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg,
968  NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
969  if (!Opcode)
970  return false;
971  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
972  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
973  getI32Imm(fromTypeWidth, dl), N1, Chain };
974  NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
975  }
976 
977  if (!NVPTXLD)
978  return false;
979 
980  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
981  CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXLD), {MemRef});
982 
983  ReplaceNode(N, NVPTXLD);
984  return true;
985 }
986 
987 bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
988 
989  SDValue Chain = N->getOperand(0);
990  SDValue Op1 = N->getOperand(1);
992  Optional<unsigned> Opcode;
993  SDLoc DL(N);
994  SDNode *LD;
995  MemSDNode *MemSD = cast<MemSDNode>(N);
996  EVT LoadedVT = MemSD->getMemoryVT();
997 
998  if (!LoadedVT.isSimple())
999  return false;
1000 
1001  // Address Space Setting
1002  unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
1003  if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
1004  return tryLDGLDU(N);
1005  }
1006 
1007  unsigned int PointerSize =
1009 
1010  // Volatile Setting
1011  // - .volatile is only availalble for .global and .shared
1012  bool IsVolatile = MemSD->isVolatile();
1013  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1014  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1015  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1016  IsVolatile = false;
1017 
1018  // Vector Setting
1019  MVT SimpleVT = LoadedVT.getSimpleVT();
1020 
1021  // Type Setting: fromType + fromTypeWidth
1022  //
1023  // Sign : ISD::SEXTLOAD
1024  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
1025  // type is integer
1026  // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
1027  MVT ScalarVT = SimpleVT.getScalarType();
1028  // Read at least 8 bits (predicates are stored as 8-bit values)
1029  unsigned FromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits());
1030  unsigned int FromType;
1031  // The last operand holds the original LoadSDNode::getExtensionType() value
1032  unsigned ExtensionType = cast<ConstantSDNode>(
1033  N->getOperand(N->getNumOperands() - 1))->getZExtValue();
1034  if (ExtensionType == ISD::SEXTLOAD)
1036  else if (ScalarVT.isFloatingPoint())
1039  else
1041 
1042  unsigned VecType;
1043 
1044  switch (N->getOpcode()) {
1045  case NVPTXISD::LoadV2:
1047  break;
1048  case NVPTXISD::LoadV4:
1050  break;
1051  default:
1052  return false;
1053  }
1054 
1055  EVT EltVT = N->getValueType(0);
1056 
1057  // v8f16 is a special case. PTX doesn't have ld.v8.f16
1058  // instruction. Instead, we split the vector into v2f16 chunks and
1059  // load them with ld.v4.b32.
1060  if (EltVT == MVT::v2f16) {
1061  assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode.");
1062  EltVT = MVT::i32;
1064  FromTypeWidth = 32;
1065  }
1066 
1067  if (SelectDirectAddr(Op1, Addr)) {
1068  switch (N->getOpcode()) {
1069  default:
1070  return false;
1071  case NVPTXISD::LoadV2:
1072  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1073  NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1074  NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1075  NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar,
1076  NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1077  break;
1078  case NVPTXISD::LoadV4:
1079  Opcode =
1080  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar,
1081  NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar, None,
1082  NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar,
1083  NVPTX::LDV_f32_v4_avar, None);
1084  break;
1085  }
1086  if (!Opcode)
1087  return false;
1088  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1089  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1090  getI32Imm(FromTypeWidth, DL), Addr, Chain };
1091  LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
1092  } else if (PointerSize == 64
1093  ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1094  : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
1095  switch (N->getOpcode()) {
1096  default:
1097  return false;
1098  case NVPTXISD::LoadV2:
1099  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1100  NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1101  NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1102  NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi,
1103  NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1104  break;
1105  case NVPTXISD::LoadV4:
1106  Opcode =
1107  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi,
1108  NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi, None,
1109  NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi,
1110  NVPTX::LDV_f32_v4_asi, None);
1111  break;
1112  }
1113  if (!Opcode)
1114  return false;
1115  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1116  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1117  getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1118  LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
1119  } else if (PointerSize == 64
1120  ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1121  : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1122  if (PointerSize == 64) {
1123  switch (N->getOpcode()) {
1124  default:
1125  return false;
1126  case NVPTXISD::LoadV2:
1127  Opcode = pickOpcodeForVT(
1128  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari_64,
1129  NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64,
1130  NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64,
1131  NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64,
1132  NVPTX::LDV_f64_v2_ari_64);
1133  break;
1134  case NVPTXISD::LoadV4:
1135  Opcode = pickOpcodeForVT(
1136  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64,
1137  NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, None,
1138  NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64,
1139  NVPTX::LDV_f32_v4_ari_64, None);
1140  break;
1141  }
1142  } else {
1143  switch (N->getOpcode()) {
1144  default:
1145  return false;
1146  case NVPTXISD::LoadV2:
1147  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1148  NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1149  NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1150  NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari,
1151  NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1152  break;
1153  case NVPTXISD::LoadV4:
1154  Opcode =
1155  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari,
1156  NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari, None,
1157  NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari,
1158  NVPTX::LDV_f32_v4_ari, None);
1159  break;
1160  }
1161  }
1162  if (!Opcode)
1163  return false;
1164  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1165  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1166  getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1167 
1168  LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
1169  } else {
1170  if (PointerSize == 64) {
1171  switch (N->getOpcode()) {
1172  default:
1173  return false;
1174  case NVPTXISD::LoadV2:
1175  Opcode = pickOpcodeForVT(
1176  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64,
1177  NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1178  NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64,
1179  NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1180  NVPTX::LDV_f64_v2_areg_64);
1181  break;
1182  case NVPTXISD::LoadV4:
1183  Opcode = pickOpcodeForVT(
1184  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64,
1185  NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, None,
1186  NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64,
1187  NVPTX::LDV_f32_v4_areg_64, None);
1188  break;
1189  }
1190  } else {
1191  switch (N->getOpcode()) {
1192  default:
1193  return false;
1194  case NVPTXISD::LoadV2:
1195  Opcode =
1196  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg,
1197  NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1198  NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg,
1199  NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg,
1200  NVPTX::LDV_f64_v2_areg);
1201  break;
1202  case NVPTXISD::LoadV4:
1203  Opcode = pickOpcodeForVT(
1204  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
1205  NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, None,
1206  NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg,
1207  NVPTX::LDV_f32_v4_areg, None);
1208  break;
1209  }
1210  }
1211  if (!Opcode)
1212  return false;
1213  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1214  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1215  getI32Imm(FromTypeWidth, DL), Op1, Chain };
1216  LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
1217  }
1218 
1219  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
1220  CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef});
1221 
1222  ReplaceNode(N, LD);
1223  return true;
1224 }
1225 
1226 bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1227 
1228  SDValue Chain = N->getOperand(0);
1229  SDValue Op1;
1230  MemSDNode *Mem;
1231  bool IsLDG = true;
1232 
1233  // If this is an LDG intrinsic, the address is the third operand. If its an
1234  // LDG/LDU SD node (from custom vector handling), then its the second operand
1235  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1236  Op1 = N->getOperand(2);
1237  Mem = cast<MemIntrinsicSDNode>(N);
1238  unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1239  switch (IID) {
1240  default:
1241  return false;
1242  case Intrinsic::nvvm_ldg_global_f:
1243  case Intrinsic::nvvm_ldg_global_i:
1244  case Intrinsic::nvvm_ldg_global_p:
1245  IsLDG = true;
1246  break;
1247  case Intrinsic::nvvm_ldu_global_f:
1248  case Intrinsic::nvvm_ldu_global_i:
1249  case Intrinsic::nvvm_ldu_global_p:
1250  IsLDG = false;
1251  break;
1252  }
1253  } else {
1254  Op1 = N->getOperand(1);
1255  Mem = cast<MemSDNode>(N);
1256  }
1257 
1258  Optional<unsigned> Opcode;
1259  SDLoc DL(N);
1260  SDNode *LD;
1261  SDValue Base, Offset, Addr;
1262 
1263  EVT EltVT = Mem->getMemoryVT();
1264  unsigned NumElts = 1;
1265  if (EltVT.isVector()) {
1266  NumElts = EltVT.getVectorNumElements();
1267  EltVT = EltVT.getVectorElementType();
1268  // vectors of f16 are loaded/stored as multiples of v2f16 elements.
1269  if (EltVT == MVT::f16 && N->getValueType(0) == MVT::v2f16) {
1270  assert(NumElts % 2 == 0 && "Vector must have even number of elements");
1271  EltVT = MVT::v2f16;
1272  NumElts /= 2;
1273  }
1274  }
1275 
1276  // Build the "promoted" result VTList for the load. If we are really loading
1277  // i8s, then the return type will be promoted to i16 since we do not expose
1278  // 8-bit registers in NVPTX.
1279  EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1280  SmallVector<EVT, 5> InstVTs;
1281  for (unsigned i = 0; i != NumElts; ++i) {
1282  InstVTs.push_back(NodeVT);
1283  }
1284  InstVTs.push_back(MVT::Other);
1285  SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1286 
1287  if (SelectDirectAddr(Op1, Addr)) {
1288  switch (N->getOpcode()) {
1289  default:
1290  return false;
1291  case ISD::LOAD:
1293  if (IsLDG)
1294  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1295  NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1296  NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1297  NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1298  NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1299  NVPTX::INT_PTX_LDG_GLOBAL_f16avar,
1300  NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar,
1301  NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1302  NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1303  else
1304  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1305  NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1306  NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1307  NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1308  NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1309  NVPTX::INT_PTX_LDU_GLOBAL_f16avar,
1310  NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar,
1311  NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1312  NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1313  break;
1314  case NVPTXISD::LoadV2:
1315  case NVPTXISD::LDGV2:
1316  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1317  NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1318  NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1319  NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1320  NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1321  NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar,
1322  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar,
1323  NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1324  NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1325  break;
1326  case NVPTXISD::LDUV2:
1327  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1328  NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1329  NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1330  NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1331  NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1332  NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar,
1333  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar,
1334  NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1335  NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1336  break;
1337  case NVPTXISD::LoadV4:
1338  case NVPTXISD::LDGV4:
1339  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1340  NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
1341  NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1342  NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, None,
1343  NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar,
1344  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar,
1345  NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, None);
1346  break;
1347  case NVPTXISD::LDUV4:
1348  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1349  NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar,
1350  NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1351  NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, None,
1352  NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar,
1353  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar,
1354  NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, None);
1355  break;
1356  }
1357  if (!Opcode)
1358  return false;
1359  SDValue Ops[] = { Addr, Chain };
1360  LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
1361  } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1362  : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1363  if (TM.is64Bit()) {
1364  switch (N->getOpcode()) {
1365  default:
1366  return false;
1367  case ISD::LOAD:
1369  if (IsLDG)
1370  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1371  NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1372  NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1373  NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1374  NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1375  NVPTX::INT_PTX_LDG_GLOBAL_f16ari64,
1376  NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64,
1377  NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1378  NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1379  else
1380  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1381  NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1382  NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1383  NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1384  NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1385  NVPTX::INT_PTX_LDU_GLOBAL_f16ari64,
1386  NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64,
1387  NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1388  NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1389  break;
1390  case NVPTXISD::LoadV2:
1391  case NVPTXISD::LDGV2:
1392  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1393  NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1394  NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1395  NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1396  NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1397  NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64,
1398  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64,
1399  NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1400  NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1401  break;
1402  case NVPTXISD::LDUV2:
1403  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1404  NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1405  NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1406  NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1407  NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1408  NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64,
1409  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64,
1410  NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1411  NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1412  break;
1413  case NVPTXISD::LoadV4:
1414  case NVPTXISD::LDGV4:
1415  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1416  NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
1417  NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1418  NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, None,
1419  NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64,
1420  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64,
1421  NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, None);
1422  break;
1423  case NVPTXISD::LDUV4:
1424  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1425  NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
1426  NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1427  NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, None,
1428  NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64,
1429  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64,
1430  NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, None);
1431  break;
1432  }
1433  } else {
1434  switch (N->getOpcode()) {
1435  default:
1436  return false;
1437  case ISD::LOAD:
1439  if (IsLDG)
1440  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1441  NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1442  NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1443  NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1444  NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1445  NVPTX::INT_PTX_LDG_GLOBAL_f16ari,
1446  NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari,
1447  NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1448  NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1449  else
1450  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1451  NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1452  NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1453  NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1454  NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1455  NVPTX::INT_PTX_LDU_GLOBAL_f16ari,
1456  NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari,
1457  NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1458  NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1459  break;
1460  case NVPTXISD::LoadV2:
1461  case NVPTXISD::LDGV2:
1462  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1463  NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1464  NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1465  NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1466  NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1467  NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32,
1468  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32,
1469  NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1470  NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1471  break;
1472  case NVPTXISD::LDUV2:
1473  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1474  NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1475  NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1476  NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1477  NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1478  NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32,
1479  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32,
1480  NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1481  NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1482  break;
1483  case NVPTXISD::LoadV4:
1484  case NVPTXISD::LDGV4:
1485  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1486  NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
1487  NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1488  NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, None,
1489  NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32,
1490  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32,
1491  NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, None);
1492  break;
1493  case NVPTXISD::LDUV4:
1494  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1495  NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
1496  NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1497  NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, None,
1498  NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32,
1499  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32,
1500  NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, None);
1501  break;
1502  }
1503  }
1504  if (!Opcode)
1505  return false;
1506  SDValue Ops[] = {Base, Offset, Chain};
1507  LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
1508  } else {
1509  if (TM.is64Bit()) {
1510  switch (N->getOpcode()) {
1511  default:
1512  return false;
1513  case ISD::LOAD:
1515  if (IsLDG)
1516  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1517  NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1518  NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1519  NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1520  NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1521  NVPTX::INT_PTX_LDG_GLOBAL_f16areg64,
1522  NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64,
1523  NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1524  NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1525  else
1526  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1527  NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1528  NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1529  NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1530  NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1531  NVPTX::INT_PTX_LDU_GLOBAL_f16areg64,
1532  NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64,
1533  NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1534  NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1535  break;
1536  case NVPTXISD::LoadV2:
1537  case NVPTXISD::LDGV2:
1538  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1539  NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1540  NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1541  NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1542  NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1543  NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64,
1544  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64,
1545  NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1546  NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1547  break;
1548  case NVPTXISD::LDUV2:
1549  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1550  NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1551  NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1552  NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1553  NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1554  NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64,
1555  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64,
1556  NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1557  NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1558  break;
1559  case NVPTXISD::LoadV4:
1560  case NVPTXISD::LDGV4:
1561  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1562  NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
1563  NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1564  NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, None,
1565  NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64,
1566  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64,
1567  NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, None);
1568  break;
1569  case NVPTXISD::LDUV4:
1570  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1571  NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
1572  NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1573  NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, None,
1574  NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64,
1575  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64,
1576  NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, None);
1577  break;
1578  }
1579  } else {
1580  switch (N->getOpcode()) {
1581  default:
1582  return false;
1583  case ISD::LOAD:
1585  if (IsLDG)
1586  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1587  NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1588  NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1589  NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1590  NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1591  NVPTX::INT_PTX_LDG_GLOBAL_f16areg,
1592  NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg,
1593  NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1594  NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1595  else
1596  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1597  NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1598  NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1599  NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1600  NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1601  NVPTX::INT_PTX_LDU_GLOBAL_f16areg,
1602  NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg,
1603  NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1604  NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1605  break;
1606  case NVPTXISD::LoadV2:
1607  case NVPTXISD::LDGV2:
1608  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1609  NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1610  NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1611  NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1612  NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1613  NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32,
1614  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32,
1615  NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1616  NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1617  break;
1618  case NVPTXISD::LDUV2:
1619  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1620  NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1621  NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1622  NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1623  NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1624  NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32,
1625  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32,
1626  NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1627  NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1628  break;
1629  case NVPTXISD::LoadV4:
1630  case NVPTXISD::LDGV4:
1631  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1632  NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
1633  NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1634  NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, None,
1635  NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32,
1636  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32,
1637  NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, None);
1638  break;
1639  case NVPTXISD::LDUV4:
1640  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1641  NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
1642  NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1643  NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, None,
1644  NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32,
1645  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32,
1646  NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, None);
1647  break;
1648  }
1649  }
1650  if (!Opcode)
1651  return false;
1652  SDValue Ops[] = { Op1, Chain };
1653  LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
1654  }
1655 
1657  CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef});
1658 
1659  // For automatic generation of LDG (through SelectLoad[Vector], not the
1660  // intrinsics), we may have an extending load like:
1661  //
1662  // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
1663  //
1664  // In this case, the matching logic above will select a load for the original
1665  // memory type (in this case, i8) and our types will not match (the node needs
1666  // to return an i32 in this case). Our LDG/LDU nodes do not support the
1667  // concept of sign-/zero-extension, so emulate it here by adding an explicit
1668  // CVT instruction. Ptxas should clean up any redundancies here.
1669 
1670  EVT OrigType = N->getValueType(0);
1671  LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
1672 
1673  if (OrigType != EltVT && LdNode) {
1674  // We have an extending-load. The instruction we selected operates on the
1675  // smaller type, but the SDNode we are replacing has the larger type. We
1676  // need to emit a CVT to make the types match.
1677  bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
1678  unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
1679  EltVT.getSimpleVT(), IsSigned);
1680 
1681  // For each output value, apply the manual sign/zero-extension and make sure
1682  // all users of the load go through that CVT.
1683  for (unsigned i = 0; i != NumElts; ++i) {
1684  SDValue Res(LD, i);
1685  SDValue OrigVal(N, i);
1686 
1687  SDNode *CvtNode =
1688  CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
1690  DL, MVT::i32));
1691  ReplaceUses(OrigVal, SDValue(CvtNode, 0));
1692  }
1693  }
1694 
1695  ReplaceNode(N, LD);
1696  return true;
1697 }
1698 
1699 bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
1700  SDLoc dl(N);
1701  MemSDNode *ST = cast<MemSDNode>(N);
1702  assert(ST->writeMem() && "Expected store");
1703  StoreSDNode *PlainStore = dyn_cast<StoreSDNode>(N);
1704  AtomicSDNode *AtomicStore = dyn_cast<AtomicSDNode>(N);
1705  assert((PlainStore || AtomicStore) && "Expected store");
1706  EVT StoreVT = ST->getMemoryVT();
1707  SDNode *NVPTXST = nullptr;
1708 
1709  // do not support pre/post inc/dec
1710  if (PlainStore && PlainStore->isIndexed())
1711  return false;
1712 
1713  if (!StoreVT.isSimple())
1714  return false;
1715 
1716  AtomicOrdering Ordering = ST->getSuccessOrdering();
1717  // In order to lower atomic loads with stronger guarantees we would need to
1718  // use store.release or insert fences. However these features were only added
1719  // with PTX ISA 6.0 / sm_70.
1720  // TODO: Check if we can actually use the new instructions and implement them.
1721  if (isStrongerThanMonotonic(Ordering))
1722  return false;
1723 
1724  // Address Space Setting
1725  unsigned int CodeAddrSpace = getCodeAddrSpace(ST);
1726  unsigned int PointerSize =
1727  CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace());
1728 
1729  // Volatile Setting
1730  // - .volatile is only available for .global and .shared
1731  // - .volatile has the same memory synchronization semantics as .relaxed.sys
1732  bool isVolatile = ST->isVolatile() || Ordering == AtomicOrdering::Monotonic;
1733  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1734  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1735  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1736  isVolatile = false;
1737 
1738  // Vector Setting
1739  MVT SimpleVT = StoreVT.getSimpleVT();
1740  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1741 
1742  // Type Setting: toType + toTypeWidth
1743  // - for integer type, always use 'u'
1744  //
1745  MVT ScalarVT = SimpleVT.getScalarType();
1746  unsigned toTypeWidth = ScalarVT.getSizeInBits();
1747  if (SimpleVT.isVector()) {
1748  assert(StoreVT == MVT::v2f16 && "Unexpected vector type");
1749  // v2f16 is stored using st.b32
1750  toTypeWidth = 32;
1751  }
1752 
1753  unsigned int toType;
1754  if (ScalarVT.isFloatingPoint())
1755  // f16 uses .b16 as its storage type.
1756  toType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1758  else
1760 
1761  // Create the machine instruction DAG
1762  SDValue Chain = ST->getChain();
1763  SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal();
1764  SDValue BasePtr = ST->getBasePtr();
1765  SDValue Addr;
1766  SDValue Offset, Base;
1767  Optional<unsigned> Opcode;
1768  MVT::SimpleValueType SourceVT =
1769  Value.getNode()->getSimpleValueType(0).SimpleTy;
1770 
1771  if (SelectDirectAddr(BasePtr, Addr)) {
1772  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1773  NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1774  NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar,
1775  NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1776  if (!Opcode)
1777  return false;
1778  SDValue Ops[] = {Value,
1779  getI32Imm(isVolatile, dl),
1780  getI32Imm(CodeAddrSpace, dl),
1781  getI32Imm(vecType, dl),
1782  getI32Imm(toType, dl),
1783  getI32Imm(toTypeWidth, dl),
1784  Addr,
1785  Chain};
1786  NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1787  } else if (PointerSize == 64
1788  ? SelectADDRsi64(BasePtr.getNode(), BasePtr, Base, Offset)
1789  : SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) {
1790  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1791  NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1792  NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
1793  NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1794  if (!Opcode)
1795  return false;
1796  SDValue Ops[] = {Value,
1797  getI32Imm(isVolatile, dl),
1798  getI32Imm(CodeAddrSpace, dl),
1799  getI32Imm(vecType, dl),
1800  getI32Imm(toType, dl),
1801  getI32Imm(toTypeWidth, dl),
1802  Base,
1803  Offset,
1804  Chain};
1805  NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1806  } else if (PointerSize == 64
1807  ? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset)
1808  : SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset)) {
1809  if (PointerSize == 64)
1810  Opcode = pickOpcodeForVT(
1811  SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
1812  NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64,
1813  NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1814  else
1815  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1816  NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1817  NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari,
1818  NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1819  if (!Opcode)
1820  return false;
1821 
1822  SDValue Ops[] = {Value,
1823  getI32Imm(isVolatile, dl),
1824  getI32Imm(CodeAddrSpace, dl),
1825  getI32Imm(vecType, dl),
1826  getI32Imm(toType, dl),
1827  getI32Imm(toTypeWidth, dl),
1828  Base,
1829  Offset,
1830  Chain};
1831  NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1832  } else {
1833  if (PointerSize == 64)
1834  Opcode =
1835  pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1836  NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1837  NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64,
1838  NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1839  else
1840  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1841  NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1842  NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg,
1843  NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1844  if (!Opcode)
1845  return false;
1846  SDValue Ops[] = {Value,
1847  getI32Imm(isVolatile, dl),
1848  getI32Imm(CodeAddrSpace, dl),
1849  getI32Imm(vecType, dl),
1850  getI32Imm(toType, dl),
1851  getI32Imm(toTypeWidth, dl),
1852  BasePtr,
1853  Chain};
1854  NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
1855  }
1856 
1857  if (!NVPTXST)
1858  return false;
1859 
1860  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
1861  CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXST), {MemRef});
1862  ReplaceNode(N, NVPTXST);
1863  return true;
1864 }
1865 
1866 bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
1867  SDValue Chain = N->getOperand(0);
1868  SDValue Op1 = N->getOperand(1);
1869  SDValue Addr, Offset, Base;
1870  Optional<unsigned> Opcode;
1871  SDLoc DL(N);
1872  SDNode *ST;
1873  EVT EltVT = Op1.getValueType();
1874  MemSDNode *MemSD = cast<MemSDNode>(N);
1875  EVT StoreVT = MemSD->getMemoryVT();
1876 
1877  // Address Space Setting
1878  unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
1879  if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1880  report_fatal_error("Cannot store to pointer that points to constant "
1881  "memory space");
1882  }
1883  unsigned int PointerSize =
1885 
1886  // Volatile Setting
1887  // - .volatile is only availalble for .global and .shared
1888  bool IsVolatile = MemSD->isVolatile();
1889  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1890  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1891  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1892  IsVolatile = false;
1893 
1894  // Type Setting: toType + toTypeWidth
1895  // - for integer type, always use 'u'
1896  assert(StoreVT.isSimple() && "Store value is not simple");
1897  MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1898  unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1899  unsigned ToType;
1900  if (ScalarVT.isFloatingPoint())
1901  ToType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1903  else
1905 
1907  SDValue N2;
1908  unsigned VecType;
1909 
1910  switch (N->getOpcode()) {
1911  case NVPTXISD::StoreV2:
1913  StOps.push_back(N->getOperand(1));
1914  StOps.push_back(N->getOperand(2));
1915  N2 = N->getOperand(3);
1916  break;
1917  case NVPTXISD::StoreV4:
1919  StOps.push_back(N->getOperand(1));
1920  StOps.push_back(N->getOperand(2));
1921  StOps.push_back(N->getOperand(3));
1922  StOps.push_back(N->getOperand(4));
1923  N2 = N->getOperand(5);
1924  break;
1925  default:
1926  return false;
1927  }
1928 
1929  // v8f16 is a special case. PTX doesn't have st.v8.f16
1930  // instruction. Instead, we split the vector into v2f16 chunks and
1931  // store them with st.v4.b32.
1932  if (EltVT == MVT::v2f16) {
1933  assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode.");
1934  EltVT = MVT::i32;
1936  ToTypeWidth = 32;
1937  }
1938 
1939  StOps.push_back(getI32Imm(IsVolatile, DL));
1940  StOps.push_back(getI32Imm(CodeAddrSpace, DL));
1941  StOps.push_back(getI32Imm(VecType, DL));
1942  StOps.push_back(getI32Imm(ToType, DL));
1943  StOps.push_back(getI32Imm(ToTypeWidth, DL));
1944 
1945  if (SelectDirectAddr(N2, Addr)) {
1946  switch (N->getOpcode()) {
1947  default:
1948  return false;
1949  case NVPTXISD::StoreV2:
1950  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1951  NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1952  NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1953  NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar,
1954  NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
1955  break;
1956  case NVPTXISD::StoreV4:
1957  Opcode =
1958  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_avar,
1959  NVPTX::STV_i16_v4_avar, NVPTX::STV_i32_v4_avar, None,
1960  NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar,
1961  NVPTX::STV_f32_v4_avar, None);
1962  break;
1963  }
1964  StOps.push_back(Addr);
1965  } else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1966  : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1967  switch (N->getOpcode()) {
1968  default:
1969  return false;
1970  case NVPTXISD::StoreV2:
1971  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1972  NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1973  NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1974  NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi,
1975  NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
1976  break;
1977  case NVPTXISD::StoreV4:
1978  Opcode =
1979  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi,
1980  NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi, None,
1981  NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi,
1982  NVPTX::STV_f32_v4_asi, None);
1983  break;
1984  }
1985  StOps.push_back(Base);
1986  StOps.push_back(Offset);
1987  } else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1988  : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1989  if (PointerSize == 64) {
1990  switch (N->getOpcode()) {
1991  default:
1992  return false;
1993  case NVPTXISD::StoreV2:
1994  Opcode = pickOpcodeForVT(
1995  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari_64,
1996  NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64,
1997  NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64,
1998  NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64,
1999  NVPTX::STV_f64_v2_ari_64);
2000  break;
2001  case NVPTXISD::StoreV4:
2002  Opcode = pickOpcodeForVT(
2003  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64,
2004  NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, None,
2005  NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64,
2006  NVPTX::STV_f32_v4_ari_64, None);
2007  break;
2008  }
2009  } else {
2010  switch (N->getOpcode()) {
2011  default:
2012  return false;
2013  case NVPTXISD::StoreV2:
2014  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
2015  NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
2016  NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
2017  NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari,
2018  NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
2019  break;
2020  case NVPTXISD::StoreV4:
2021  Opcode =
2022  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari,
2023  NVPTX::STV_i16_v4_ari, NVPTX::STV_i32_v4_ari, None,
2024  NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari,
2025  NVPTX::STV_f32_v4_ari, None);
2026  break;
2027  }
2028  }
2029  StOps.push_back(Base);
2030  StOps.push_back(Offset);
2031  } else {
2032  if (PointerSize == 64) {
2033  switch (N->getOpcode()) {
2034  default:
2035  return false;
2036  case NVPTXISD::StoreV2:
2037  Opcode = pickOpcodeForVT(
2038  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64,
2039  NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
2040  NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64,
2041  NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
2042  NVPTX::STV_f64_v2_areg_64);
2043  break;
2044  case NVPTXISD::StoreV4:
2045  Opcode = pickOpcodeForVT(
2046  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64,
2047  NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, None,
2048  NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64,
2049  NVPTX::STV_f32_v4_areg_64, None);
2050  break;
2051  }
2052  } else {
2053  switch (N->getOpcode()) {
2054  default:
2055  return false;
2056  case NVPTXISD::StoreV2:
2057  Opcode =
2058  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg,
2059  NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
2060  NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg,
2061  NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg,
2062  NVPTX::STV_f64_v2_areg);
2063  break;
2064  case NVPTXISD::StoreV4:
2065  Opcode =
2066  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
2067  NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, None,
2068  NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg,
2069  NVPTX::STV_f32_v4_areg, None);
2070  break;
2071  }
2072  }
2073  StOps.push_back(N2);
2074  }
2075 
2076  if (!Opcode)
2077  return false;
2078 
2079  StOps.push_back(Chain);
2080 
2081  ST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, StOps);
2082 
2083  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2084  CurDAG->setNodeMemRefs(cast<MachineSDNode>(ST), {MemRef});
2085 
2086  ReplaceNode(N, ST);
2087  return true;
2088 }
2089 
2090 bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
2091  SDValue Chain = Node->getOperand(0);
2092  SDValue Offset = Node->getOperand(2);
2093  SDValue Flag = Node->getOperand(3);
2094  SDLoc DL(Node);
2095  MemSDNode *Mem = cast<MemSDNode>(Node);
2096 
2097  unsigned VecSize;
2098  switch (Node->getOpcode()) {
2099  default:
2100  return false;
2101  case NVPTXISD::LoadParam:
2102  VecSize = 1;
2103  break;
2104  case NVPTXISD::LoadParamV2:
2105  VecSize = 2;
2106  break;
2107  case NVPTXISD::LoadParamV4:
2108  VecSize = 4;
2109  break;
2110  }
2111 
2112  EVT EltVT = Node->getValueType(0);
2113  EVT MemVT = Mem->getMemoryVT();
2114 
2115  Optional<unsigned> Opcode;
2116 
2117  switch (VecSize) {
2118  default:
2119  return false;
2120  case 1:
2121  Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
2122  NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2123  NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2124  NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2,
2125  NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
2126  break;
2127  case 2:
2128  Opcode =
2129  pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
2130  NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2131  NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16,
2132  NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32,
2133  NVPTX::LoadParamMemV2F64);
2134  break;
2135  case 4:
2136  Opcode = pickOpcodeForVT(
2137  MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8,
2138  NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, None,
2139  NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2,
2140  NVPTX::LoadParamMemV4F32, None);
2141  break;
2142  }
2143  if (!Opcode)
2144  return false;
2145 
2146  SDVTList VTs;
2147  if (VecSize == 1) {
2148  VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2149  } else if (VecSize == 2) {
2150  VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2151  } else {
2152  EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2153  VTs = CurDAG->getVTList(EVTs);
2154  }
2155 
2156  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2157 
2159  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2160  Ops.push_back(Chain);
2161  Ops.push_back(Flag);
2162 
2163  ReplaceNode(Node, CurDAG->getMachineNode(*Opcode, DL, VTs, Ops));
2164  return true;
2165 }
2166 
2167 bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
2168  SDLoc DL(N);
2169  SDValue Chain = N->getOperand(0);
2170  SDValue Offset = N->getOperand(1);
2171  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2172  MemSDNode *Mem = cast<MemSDNode>(N);
2173 
2174  // How many elements do we have?
2175  unsigned NumElts = 1;
2176  switch (N->getOpcode()) {
2177  default:
2178  return false;
2179  case NVPTXISD::StoreRetval:
2180  NumElts = 1;
2181  break;
2183  NumElts = 2;
2184  break;
2186  NumElts = 4;
2187  break;
2188  }
2189 
2190  // Build vector of operands
2192  for (unsigned i = 0; i < NumElts; ++i)
2193  Ops.push_back(N->getOperand(i + 2));
2194  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2195  Ops.push_back(Chain);
2196 
2197  // Determine target opcode
2198  // If we have an i1, use an 8-bit store. The lowering code in
2199  // NVPTXISelLowering will have already emitted an upcast.
2200  Optional<unsigned> Opcode = 0;
2201  switch (NumElts) {
2202  default:
2203  return false;
2204  case 1:
2205  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2206  NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2207  NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2208  NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2,
2209  NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
2210  break;
2211  case 2:
2212  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2213  NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2214  NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2215  NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2,
2216  NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2217  break;
2218  case 4:
2219  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2220  NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2221  NVPTX::StoreRetvalV4I32, None,
2222  NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2,
2223  NVPTX::StoreRetvalV4F32, None);
2224  break;
2225  }
2226  if (!Opcode)
2227  return false;
2228 
2229  SDNode *Ret = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops);
2230  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2231  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
2232 
2233  ReplaceNode(N, Ret);
2234  return true;
2235 }
2236 
2237 bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
2238  SDLoc DL(N);
2239  SDValue Chain = N->getOperand(0);
2240  SDValue Param = N->getOperand(1);
2241  unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2242  SDValue Offset = N->getOperand(2);
2243  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2244  MemSDNode *Mem = cast<MemSDNode>(N);
2245  SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2246 
2247  // How many elements do we have?
2248  unsigned NumElts = 1;
2249  switch (N->getOpcode()) {
2250  default:
2251  return false;
2254  case NVPTXISD::StoreParam:
2255  NumElts = 1;
2256  break;
2258  NumElts = 2;
2259  break;
2261  NumElts = 4;
2262  break;
2263  }
2264 
2265  // Build vector of operands
2267  for (unsigned i = 0; i < NumElts; ++i)
2268  Ops.push_back(N->getOperand(i + 3));
2269  Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2270  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2271  Ops.push_back(Chain);
2272  Ops.push_back(Flag);
2273 
2274  // Determine target opcode
2275  // If we have an i1, use an 8-bit store. The lowering code in
2276  // NVPTXISelLowering will have already emitted an upcast.
2277  Optional<unsigned> Opcode = 0;
2278  switch (N->getOpcode()) {
2279  default:
2280  switch (NumElts) {
2281  default:
2282  return false;
2283  case 1:
2284  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2285  NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2286  NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2287  NVPTX::StoreParamF16, NVPTX::StoreParamF16x2,
2288  NVPTX::StoreParamF32, NVPTX::StoreParamF64);
2289  break;
2290  case 2:
2291  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2292  NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2293  NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2294  NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2,
2295  NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
2296  break;
2297  case 4:
2298  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2299  NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2300  NVPTX::StoreParamV4I32, None,
2301  NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2,
2302  NVPTX::StoreParamV4F32, None);
2303  break;
2304  }
2305  if (!Opcode)
2306  return false;
2307  break;
2308  // Special case: if we have a sign-extend/zero-extend node, insert the
2309  // conversion instruction first, and use that as the value operand to
2310  // the selected StoreParam node.
2311  case NVPTXISD::StoreParamU32: {
2312  Opcode = NVPTX::StoreParamI32;
2314  MVT::i32);
2315  SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2316  MVT::i32, Ops[0], CvtNone);
2317  Ops[0] = SDValue(Cvt, 0);
2318  break;
2319  }
2320  case NVPTXISD::StoreParamS32: {
2321  Opcode = NVPTX::StoreParamI32;
2323  MVT::i32);
2324  SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2325  MVT::i32, Ops[0], CvtNone);
2326  Ops[0] = SDValue(Cvt, 0);
2327  break;
2328  }
2329  }
2330 
2332  SDNode *Ret = CurDAG->getMachineNode(*Opcode, DL, RetVTs, Ops);
2333  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2334  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
2335 
2336  ReplaceNode(N, Ret);
2337  return true;
2338 }
2339 
2340 bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
2341  unsigned Opc = 0;
2342 
2343  switch (N->getOpcode()) {
2344  default: return false;
2346  Opc = NVPTX::TEX_1D_F32_S32_RR;
2347  break;
2349  Opc = NVPTX::TEX_1D_F32_F32_RR;
2350  break;
2352  Opc = NVPTX::TEX_1D_F32_F32_LEVEL_RR;
2353  break;
2355  Opc = NVPTX::TEX_1D_F32_F32_GRAD_RR;
2356  break;
2357  case NVPTXISD::Tex1DS32S32:
2358  Opc = NVPTX::TEX_1D_S32_S32_RR;
2359  break;
2361  Opc = NVPTX::TEX_1D_S32_F32_RR;
2362  break;
2364  Opc = NVPTX::TEX_1D_S32_F32_LEVEL_RR;
2365  break;
2367  Opc = NVPTX::TEX_1D_S32_F32_GRAD_RR;
2368  break;
2369  case NVPTXISD::Tex1DU32S32:
2370  Opc = NVPTX::TEX_1D_U32_S32_RR;
2371  break;
2373  Opc = NVPTX::TEX_1D_U32_F32_RR;
2374  break;
2376  Opc = NVPTX::TEX_1D_U32_F32_LEVEL_RR;
2377  break;
2379  Opc = NVPTX::TEX_1D_U32_F32_GRAD_RR;
2380  break;
2382  Opc = NVPTX::TEX_1D_ARRAY_F32_S32_RR;
2383  break;
2385  Opc = NVPTX::TEX_1D_ARRAY_F32_F32_RR;
2386  break;
2388  Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR;
2389  break;
2391  Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR;
2392  break;
2394  Opc = NVPTX::TEX_1D_ARRAY_S32_S32_RR;
2395  break;
2397  Opc = NVPTX::TEX_1D_ARRAY_S32_F32_RR;
2398  break;
2400  Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR;
2401  break;
2403  Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR;
2404  break;
2406  Opc = NVPTX::TEX_1D_ARRAY_U32_S32_RR;
2407  break;
2409  Opc = NVPTX::TEX_1D_ARRAY_U32_F32_RR;
2410  break;
2412  Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR;
2413  break;
2415  Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR;
2416  break;
2418  Opc = NVPTX::TEX_2D_F32_S32_RR;
2419  break;
2421  Opc = NVPTX::TEX_2D_F32_F32_RR;
2422  break;
2424  Opc = NVPTX::TEX_2D_F32_F32_LEVEL_RR;
2425  break;
2427  Opc = NVPTX::TEX_2D_F32_F32_GRAD_RR;
2428  break;
2429  case NVPTXISD::Tex2DS32S32:
2430  Opc = NVPTX::TEX_2D_S32_S32_RR;
2431  break;
2433  Opc = NVPTX::TEX_2D_S32_F32_RR;
2434  break;
2436  Opc = NVPTX::TEX_2D_S32_F32_LEVEL_RR;
2437  break;
2439  Opc = NVPTX::TEX_2D_S32_F32_GRAD_RR;
2440  break;
2441  case NVPTXISD::Tex2DU32S32:
2442  Opc = NVPTX::TEX_2D_U32_S32_RR;
2443  break;
2445  Opc = NVPTX::TEX_2D_U32_F32_RR;
2446  break;
2448  Opc = NVPTX::TEX_2D_U32_F32_LEVEL_RR;
2449  break;
2451  Opc = NVPTX::TEX_2D_U32_F32_GRAD_RR;
2452  break;
2454  Opc = NVPTX::TEX_2D_ARRAY_F32_S32_RR;
2455  break;
2457  Opc = NVPTX::TEX_2D_ARRAY_F32_F32_RR;
2458  break;
2460  Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR;
2461  break;
2463  Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR;
2464  break;
2466  Opc = NVPTX::TEX_2D_ARRAY_S32_S32_RR;
2467  break;
2469  Opc = NVPTX::TEX_2D_ARRAY_S32_F32_RR;
2470  break;
2472  Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR;
2473  break;
2475  Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR;
2476  break;
2478  Opc = NVPTX::TEX_2D_ARRAY_U32_S32_RR;
2479  break;
2481  Opc = NVPTX::TEX_2D_ARRAY_U32_F32_RR;
2482  break;
2484  Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR;
2485  break;
2487  Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR;
2488  break;
2490  Opc = NVPTX::TEX_3D_F32_S32_RR;
2491  break;
2493  Opc = NVPTX::TEX_3D_F32_F32_RR;
2494  break;
2496  Opc = NVPTX::TEX_3D_F32_F32_LEVEL_RR;
2497  break;
2499  Opc = NVPTX::TEX_3D_F32_F32_GRAD_RR;
2500  break;
2501  case NVPTXISD::Tex3DS32S32:
2502  Opc = NVPTX::TEX_3D_S32_S32_RR;
2503  break;
2505  Opc = NVPTX::TEX_3D_S32_F32_RR;
2506  break;
2508  Opc = NVPTX::TEX_3D_S32_F32_LEVEL_RR;
2509  break;
2511  Opc = NVPTX::TEX_3D_S32_F32_GRAD_RR;
2512  break;
2513  case NVPTXISD::Tex3DU32S32:
2514  Opc = NVPTX::TEX_3D_U32_S32_RR;
2515  break;
2517  Opc = NVPTX::TEX_3D_U32_F32_RR;
2518  break;
2520  Opc = NVPTX::TEX_3D_U32_F32_LEVEL_RR;
2521  break;
2523  Opc = NVPTX::TEX_3D_U32_F32_GRAD_RR;
2524  break;
2526  Opc = NVPTX::TEX_CUBE_F32_F32_RR;
2527  break;
2529  Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL_RR;
2530  break;
2532  Opc = NVPTX::TEX_CUBE_S32_F32_RR;
2533  break;
2535  Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL_RR;
2536  break;
2538  Opc = NVPTX::TEX_CUBE_U32_F32_RR;
2539  break;
2541  Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL_RR;
2542  break;
2544  Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_RR;
2545  break;
2547  Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR;
2548  break;
2550  Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_RR;
2551  break;
2553  Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR;
2554  break;
2556  Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_RR;
2557  break;
2559  Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR;
2560  break;
2562  Opc = NVPTX::TLD4_R_2D_F32_F32_RR;
2563  break;
2565  Opc = NVPTX::TLD4_G_2D_F32_F32_RR;
2566  break;
2568  Opc = NVPTX::TLD4_B_2D_F32_F32_RR;
2569  break;
2571  Opc = NVPTX::TLD4_A_2D_F32_F32_RR;
2572  break;
2574  Opc = NVPTX::TLD4_R_2D_S32_F32_RR;
2575  break;
2577  Opc = NVPTX::TLD4_G_2D_S32_F32_RR;
2578  break;
2580  Opc = NVPTX::TLD4_B_2D_S32_F32_RR;
2581  break;
2583  Opc = NVPTX::TLD4_A_2D_S32_F32_RR;
2584  break;
2586  Opc = NVPTX::TLD4_R_2D_U32_F32_RR;
2587  break;
2589  Opc = NVPTX::TLD4_G_2D_U32_F32_RR;
2590  break;
2592  Opc = NVPTX::TLD4_B_2D_U32_F32_RR;
2593  break;
2595  Opc = NVPTX::TLD4_A_2D_U32_F32_RR;
2596  break;
2598  Opc = NVPTX::TEX_UNIFIED_1D_F32_S32_R;
2599  break;
2601  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_R;
2602  break;
2604  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_R;
2605  break;
2607  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_R;
2608  break;
2610  Opc = NVPTX::TEX_UNIFIED_1D_S32_S32_R;
2611  break;
2613  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_R;
2614  break;
2616  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_R;
2617  break;
2619  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_R;
2620  break;
2622  Opc = NVPTX::TEX_UNIFIED_1D_U32_S32_R;
2623  break;
2625  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_R;
2626  break;
2628  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_R;
2629  break;
2631  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_R;
2632  break;
2634  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_R;
2635  break;
2637  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_R;
2638  break;
2640  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_R;
2641  break;
2643  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_R;
2644  break;
2646  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_R;
2647  break;
2649  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_R;
2650  break;
2652  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_R;
2653  break;
2655  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_R;
2656  break;
2658  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_R;
2659  break;
2661  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_R;
2662  break;
2664  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_R;
2665  break;
2667  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_R;
2668  break;
2670  Opc = NVPTX::TEX_UNIFIED_2D_F32_S32_R;
2671  break;
2673  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_R;
2674  break;
2676  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_R;
2677  break;
2679  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_R;
2680  break;
2682  Opc = NVPTX::TEX_UNIFIED_2D_S32_S32_R;
2683  break;
2685  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_R;
2686  break;
2688  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_R;
2689  break;
2691  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_R;
2692  break;
2694  Opc = NVPTX::TEX_UNIFIED_2D_U32_S32_R;
2695  break;
2697  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_R;
2698  break;
2700  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_R;
2701  break;
2703  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_R;
2704  break;
2706  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_R;
2707  break;
2709  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_R;
2710  break;
2712  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_R;
2713  break;
2715  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_R;
2716  break;
2718  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_R;
2719  break;
2721  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_R;
2722  break;
2724  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_R;
2725  break;
2727  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_R;
2728  break;
2730  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_R;
2731  break;
2733  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_R;
2734  break;
2736  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_R;
2737  break;
2739  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_R;
2740  break;
2742  Opc = NVPTX::TEX_UNIFIED_3D_F32_S32_R;
2743  break;
2745  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_R;
2746  break;
2748  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_R;
2749  break;
2751  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_R;
2752  break;
2754  Opc = NVPTX::TEX_UNIFIED_3D_S32_S32_R;
2755  break;
2757  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_R;
2758  break;
2760  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_R;
2761  break;
2763  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_R;
2764  break;
2766  Opc = NVPTX::TEX_UNIFIED_3D_U32_S32_R;
2767  break;
2769  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_R;
2770  break;
2772  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_R;
2773  break;
2775  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_R;
2776  break;
2778  Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_R;
2779  break;
2781  Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_R;
2782  break;
2784  Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_R;
2785  break;
2787  Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_R;
2788  break;
2790  Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_R;
2791  break;
2793  Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_R;
2794  break;
2796  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_R;
2797  break;
2799  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_R;
2800  break;
2802  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_R;
2803  break;
2805  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_R;
2806  break;
2808  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_R;
2809  break;
2811  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R;
2812  break;
2814  Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R;
2815  break;
2817  Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R;
2818  break;
2820  Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32_R;
2821  break;
2823  Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32_R;
2824  break;
2826  Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32_R;
2827  break;
2829  Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32_R;
2830  break;
2832  Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32_R;
2833  break;
2835  Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32_R;
2836  break;
2838  Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32_R;
2839  break;
2841  Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32_R;
2842  break;
2844  Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32_R;
2845  break;
2847  Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R;
2848  break;
2849  }
2850 
2851  // Copy over operands
2852  SmallVector<SDValue, 8> Ops(drop_begin(N->ops()));
2853  Ops.push_back(N->getOperand(0)); // Move chain to the back.
2854 
2855  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
2856  return true;
2857 }
2858 
2859 bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
2860  unsigned Opc = 0;
2861  switch (N->getOpcode()) {
2862  default: return false;
2864  Opc = NVPTX::SULD_1D_I8_CLAMP_R;
2865  break;
2867  Opc = NVPTX::SULD_1D_I16_CLAMP_R;
2868  break;
2870  Opc = NVPTX::SULD_1D_I32_CLAMP_R;
2871  break;
2873  Opc = NVPTX::SULD_1D_I64_CLAMP_R;
2874  break;
2876  Opc = NVPTX::SULD_1D_V2I8_CLAMP_R;
2877  break;
2879  Opc = NVPTX::SULD_1D_V2I16_CLAMP_R;
2880  break;
2882  Opc = NVPTX::SULD_1D_V2I32_CLAMP_R;
2883  break;
2885  Opc = NVPTX::SULD_1D_V2I64_CLAMP_R;
2886  break;
2888  Opc = NVPTX::SULD_1D_V4I8_CLAMP_R;
2889  break;
2891  Opc = NVPTX::SULD_1D_V4I16_CLAMP_R;
2892  break;
2894  Opc = NVPTX::SULD_1D_V4I32_CLAMP_R;
2895  break;
2897  Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP_R;
2898  break;
2900  Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP_R;
2901  break;
2903  Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP_R;
2904  break;
2906  Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP_R;
2907  break;
2909  Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_R;
2910  break;
2912  Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_R;
2913  break;
2915  Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_R;
2916  break;
2918  Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_R;
2919  break;
2921  Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_R;
2922  break;
2924  Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_R;
2925  break;
2927  Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_R;
2928  break;
2930  Opc = NVPTX::SULD_2D_I8_CLAMP_R;
2931  break;
2933  Opc = NVPTX::SULD_2D_I16_CLAMP_R;
2934  break;
2936  Opc = NVPTX::SULD_2D_I32_CLAMP_R;
2937  break;
2939  Opc = NVPTX::SULD_2D_I64_CLAMP_R;
2940  break;
2942  Opc = NVPTX::SULD_2D_V2I8_CLAMP_R;
2943  break;
2945  Opc = NVPTX::SULD_2D_V2I16_CLAMP_R;
2946  break;
2948  Opc = NVPTX::SULD_2D_V2I32_CLAMP_R;
2949  break;
2951  Opc = NVPTX::SULD_2D_V2I64_CLAMP_R;
2952  break;
2954  Opc = NVPTX::SULD_2D_V4I8_CLAMP_R;
2955  break;
2957  Opc = NVPTX::SULD_2D_V4I16_CLAMP_R;
2958  break;
2960  Opc = NVPTX::SULD_2D_V4I32_CLAMP_R;
2961  break;
2963  Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP_R;
2964  break;
2966  Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP_R;
2967  break;
2969  Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP_R;
2970  break;
2972  Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP_R;
2973  break;
2975  Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_R;
2976  break;
2978  Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_R;
2979  break;
2981  Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_R;
2982  break;
2984  Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_R;
2985  break;
2987  Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_R;
2988  break;
2990  Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_R;
2991  break;
2993  Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_R;
2994  break;
2996  Opc = NVPTX::SULD_3D_I8_CLAMP_R;
2997  break;
2999  Opc = NVPTX::SULD_3D_I16_CLAMP_R;
3000  break;
3002  Opc = NVPTX::SULD_3D_I32_CLAMP_R;
3003  break;
3005  Opc = NVPTX::SULD_3D_I64_CLAMP_R;
3006  break;
3008  Opc = NVPTX::SULD_3D_V2I8_CLAMP_R;
3009  break;
3011  Opc = NVPTX::SULD_3D_V2I16_CLAMP_R;
3012  break;
3014  Opc = NVPTX::SULD_3D_V2I32_CLAMP_R;
3015  break;
3017  Opc = NVPTX::SULD_3D_V2I64_CLAMP_R;
3018  break;
3020  Opc = NVPTX::SULD_3D_V4I8_CLAMP_R;
3021  break;
3023  Opc = NVPTX::SULD_3D_V4I16_CLAMP_R;
3024  break;
3026  Opc = NVPTX::SULD_3D_V4I32_CLAMP_R;
3027  break;
3029  Opc = NVPTX::SULD_1D_I8_TRAP_R;
3030  break;
3032  Opc = NVPTX::SULD_1D_I16_TRAP_R;
3033  break;
3035  Opc = NVPTX::SULD_1D_I32_TRAP_R;
3036  break;
3038  Opc = NVPTX::SULD_1D_I64_TRAP_R;
3039  break;
3041  Opc = NVPTX::SULD_1D_V2I8_TRAP_R;
3042  break;
3044  Opc = NVPTX::SULD_1D_V2I16_TRAP_R;
3045  break;
3047  Opc = NVPTX::SULD_1D_V2I32_TRAP_R;
3048  break;
3050  Opc = NVPTX::SULD_1D_V2I64_TRAP_R;
3051  break;
3053  Opc = NVPTX::SULD_1D_V4I8_TRAP_R;
3054  break;
3056  Opc = NVPTX::SULD_1D_V4I16_TRAP_R;
3057  break;
3059  Opc = NVPTX::SULD_1D_V4I32_TRAP_R;
3060  break;
3062  Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP_R;
3063  break;
3065  Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP_R;
3066  break;
3068  Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP_R;
3069  break;
3071  Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP_R;
3072  break;
3074  Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP_R;
3075  break;
3077  Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP_R;
3078  break;
3080  Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP_R;
3081  break;
3083  Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP_R;
3084  break;
3086  Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP_R;
3087  break;
3089  Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP_R;
3090  break;
3092  Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP_R;
3093  break;
3095  Opc = NVPTX::SULD_2D_I8_TRAP_R;
3096  break;
3098  Opc = NVPTX::SULD_2D_I16_TRAP_R;
3099  break;
3101  Opc = NVPTX::SULD_2D_I32_TRAP_R;
3102  break;
3104  Opc = NVPTX::SULD_2D_I64_TRAP_R;
3105  break;
3107  Opc = NVPTX::SULD_2D_V2I8_TRAP_R;
3108  break;
3110  Opc = NVPTX::SULD_2D_V2I16_TRAP_R;
3111  break;
3113  Opc = NVPTX::SULD_2D_V2I32_TRAP_R;
3114  break;
3116  Opc = NVPTX::SULD_2D_V2I64_TRAP_R;
3117  break;
3119  Opc = NVPTX::SULD_2D_V4I8_TRAP_R;
3120  break;
3122  Opc = NVPTX::SULD_2D_V4I16_TRAP_R;
3123  break;
3125  Opc = NVPTX::SULD_2D_V4I32_TRAP_R;
3126  break;
3128  Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP_R;
3129  break;
3131  Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP_R;
3132  break;
3134  Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP_R;
3135  break;
3137  Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP_R;
3138  break;
3140  Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP_R;
3141  break;
3143  Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP_R;
3144  break;
3146  Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP_R;
3147  break;
3149  Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP_R;
3150  break;
3152  Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP_R;
3153  break;
3155  Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP_R;
3156  break;
3158  Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP_R;
3159  break;
3161  Opc = NVPTX::SULD_3D_I8_TRAP_R;
3162  break;
3164  Opc = NVPTX::SULD_3D_I16_TRAP_R;
3165  break;
3167  Opc = NVPTX::SULD_3D_I32_TRAP_R;
3168  break;
3170  Opc = NVPTX::SULD_3D_I64_TRAP_R;
3171  break;
3173  Opc = NVPTX::SULD_3D_V2I8_TRAP_R;
3174  break;
3176  Opc = NVPTX::SULD_3D_V2I16_TRAP_R;
3177  break;
3179  Opc = NVPTX::SULD_3D_V2I32_TRAP_R;
3180  break;
3182  Opc = NVPTX::SULD_3D_V2I64_TRAP_R;
3183  break;
3185  Opc = NVPTX::SULD_3D_V4I8_TRAP_R;
3186  break;
3188  Opc = NVPTX::SULD_3D_V4I16_TRAP_R;
3189  break;
3191  Opc = NVPTX::SULD_3D_V4I32_TRAP_R;
3192  break;
3194  Opc = NVPTX::SULD_1D_I8_ZERO_R;
3195  break;
3197  Opc = NVPTX::SULD_1D_I16_ZERO_R;
3198  break;
3200  Opc = NVPTX::SULD_1D_I32_ZERO_R;
3201  break;
3203  Opc = NVPTX::SULD_1D_I64_ZERO_R;
3204  break;
3206  Opc = NVPTX::SULD_1D_V2I8_ZERO_R;
3207  break;
3209  Opc = NVPTX::SULD_1D_V2I16_ZERO_R;
3210  break;
3212  Opc = NVPTX::SULD_1D_V2I32_ZERO_R;
3213  break;
3215  Opc = NVPTX::SULD_1D_V2I64_ZERO_R;
3216  break;
3218  Opc = NVPTX::SULD_1D_V4I8_ZERO_R;
3219  break;
3221  Opc = NVPTX::SULD_1D_V4I16_ZERO_R;
3222  break;
3224  Opc = NVPTX::SULD_1D_V4I32_ZERO_R;
3225  break;
3227  Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO_R;
3228  break;
3230  Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO_R;
3231  break;
3233  Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO_R;
3234  break;
3236  Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO_R;
3237  break;
3239  Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO_R;
3240  break;
3242  Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO_R;
3243  break;
3245  Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO_R;
3246  break;
3248  Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO_R;
3249  break;
3251  Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO_R;
3252  break;
3254  Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO_R;
3255  break;
3257  Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO_R;
3258  break;
3260  Opc = NVPTX::SULD_2D_I8_ZERO_R;
3261  break;
3263  Opc = NVPTX::SULD_2D_I16_ZERO_R;
3264  break;
3266  Opc = NVPTX::SULD_2D_I32_ZERO_R;
3267  break;
3269  Opc = NVPTX::SULD_2D_I64_ZERO_R;
3270  break;
3272  Opc = NVPTX::SULD_2D_V2I8_ZERO_R;
3273  break;
3275  Opc = NVPTX::SULD_2D_V2I16_ZERO_R;
3276  break;
3278  Opc = NVPTX::SULD_2D_V2I32_ZERO_R;
3279  break;
3281  Opc = NVPTX::SULD_2D_V2I64_ZERO_R;
3282  break;
3284  Opc = NVPTX::SULD_2D_V4I8_ZERO_R;
3285  break;
3287  Opc = NVPTX::SULD_2D_V4I16_ZERO_R;
3288  break;
3290  Opc = NVPTX::SULD_2D_V4I32_ZERO_R;
3291  break;
3293  Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO_R;
3294  break;
3296  Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO_R;
3297  break;
3299  Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO_R;
3300  break;
3302  Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO_R;
3303  break;
3305  Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO_R;
3306  break;
3308  Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO_R;
3309  break;
3311  Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO_R;
3312  break;
3314  Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO_R;
3315  break;
3317  Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO_R;
3318  break;
3320  Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO_R;
3321  break;
3323  Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO_R;
3324  break;
3326  Opc = NVPTX::SULD_3D_I8_ZERO_R;
3327  break;
3329  Opc = NVPTX::SULD_3D_I16_ZERO_R;
3330  break;
3332  Opc = NVPTX::SULD_3D_I32_ZERO_R;
3333  break;
3335  Opc = NVPTX::SULD_3D_I64_ZERO_R;
3336  break;
3338  Opc = NVPTX::SULD_3D_V2I8_ZERO_R;
3339  break;
3341  Opc = NVPTX::SULD_3D_V2I16_ZERO_R;
3342  break;
3344  Opc = NVPTX::SULD_3D_V2I32_ZERO_R;
3345  break;
3347  Opc = NVPTX::SULD_3D_V2I64_ZERO_R;
3348  break;
3350  Opc = NVPTX::SULD_3D_V4I8_ZERO_R;
3351  break;
3353  Opc = NVPTX::SULD_3D_V4I16_ZERO_R;
3354  break;
3356  Opc = NVPTX::SULD_3D_V4I32_ZERO_R;
3357  break;
3358  }
3359 
3360  // Copy over operands
3361  SmallVector<SDValue, 8> Ops(drop_begin(N->ops()));
3362  Ops.push_back(N->getOperand(0)); // Move chain to the back.
3363 
3364  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
3365  return true;
3366 }
3367 
3368 
3369 /// SelectBFE - Look for instruction sequences that can be made more efficient
3370 /// by using the 'bfe' (bit-field extract) PTX instruction
3371 bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
3372  SDLoc DL(N);
3373  SDValue LHS = N->getOperand(0);
3374  SDValue RHS = N->getOperand(1);
3375  SDValue Len;
3376  SDValue Start;
3377  SDValue Val;
3378  bool IsSigned = false;
3379 
3380  if (N->getOpcode() == ISD::AND) {
3381  // Canonicalize the operands
3382  // We want 'and %val, %mask'
3383  if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3384  std::swap(LHS, RHS);
3385  }
3386 
3387  ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
3388  if (!Mask) {
3389  // We need a constant mask on the RHS of the AND
3390  return false;
3391  }
3392 
3393  // Extract the mask bits
3394  uint64_t MaskVal = Mask->getZExtValue();
3395  if (!isMask_64(MaskVal)) {
3396  // We *could* handle shifted masks here, but doing so would require an
3397  // 'and' operation to fix up the low-order bits so we would trade
3398  // shr+and for bfe+and, which has the same throughput
3399  return false;
3400  }
3401 
3402  // How many bits are in our mask?
3403  uint64_t NumBits = countTrailingOnes(MaskVal);
3404  Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3405 
3406  if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
3407  // We have a 'srl/and' pair, extract the effective start bit and length
3408  Val = LHS.getNode()->getOperand(0);
3409  Start = LHS.getNode()->getOperand(1);
3410  ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
3411  if (StartConst) {
3412  uint64_t StartVal = StartConst->getZExtValue();
3413  // How many "good" bits do we have left? "good" is defined here as bits
3414  // that exist in the original value, not shifted in.
3415  uint64_t GoodBits = Start.getValueSizeInBits() - StartVal;
3416  if (NumBits > GoodBits) {
3417  // Do not handle the case where bits have been shifted in. In theory
3418  // we could handle this, but the cost is likely higher than just
3419  // emitting the srl/and pair.
3420  return false;
3421  }
3422  Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
3423  } else {
3424  // Do not handle the case where the shift amount (can be zero if no srl
3425  // was found) is not constant. We could handle this case, but it would
3426  // require run-time logic that would be more expensive than just
3427  // emitting the srl/and pair.
3428  return false;
3429  }
3430  } else {
3431  // Do not handle the case where the LHS of the and is not a shift. While
3432  // it would be trivial to handle this case, it would just transform
3433  // 'and' -> 'bfe', but 'and' has higher-throughput.
3434  return false;
3435  }
3436  } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
3437  if (LHS->getOpcode() == ISD::AND) {
3438  ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
3439  if (!ShiftCnst) {
3440  // Shift amount must be constant
3441  return false;
3442  }
3443 
3444  uint64_t ShiftAmt = ShiftCnst->getZExtValue();
3445 
3446  SDValue AndLHS = LHS->getOperand(0);
3447  SDValue AndRHS = LHS->getOperand(1);
3448 
3449  // Canonicalize the AND to have the mask on the RHS
3450  if (isa<ConstantSDNode>(AndLHS)) {
3451  std::swap(AndLHS, AndRHS);
3452  }
3453 
3454  ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
3455  if (!MaskCnst) {
3456  // Mask must be constant
3457  return false;
3458  }
3459 
3460  uint64_t MaskVal = MaskCnst->getZExtValue();
3461  uint64_t NumZeros;
3462  uint64_t NumBits;
3463  if (isMask_64(MaskVal)) {
3464  NumZeros = 0;
3465  // The number of bits in the result bitfield will be the number of
3466  // trailing ones (the AND) minus the number of bits we shift off
3467  NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
3468  } else if (isShiftedMask_64(MaskVal)) {
3469  NumZeros = countTrailingZeros(MaskVal);
3470  unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
3471  // The number of bits in the result bitfield will be the number of
3472  // trailing zeros plus the number of set bits in the mask minus the
3473  // number of bits we shift off
3474  NumBits = NumZeros + NumOnes - ShiftAmt;
3475  } else {
3476  // This is not a mask we can handle
3477  return false;
3478  }
3479 
3480  if (ShiftAmt < NumZeros) {
3481  // Handling this case would require extra logic that would make this
3482  // transformation non-profitable
3483  return false;
3484  }
3485 
3486  Val = AndLHS;
3487  Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
3488  Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3489  } else if (LHS->getOpcode() == ISD::SHL) {
3490  // Here, we have a pattern like:
3491  //
3492  // (sra (shl val, NN), MM)
3493  // or
3494  // (srl (shl val, NN), MM)
3495  //
3496  // If MM >= NN, we can efficiently optimize this with bfe
3497  Val = LHS->getOperand(0);
3498 
3499  SDValue ShlRHS = LHS->getOperand(1);
3500  ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
3501  if (!ShlCnst) {
3502  // Shift amount must be constant
3503  return false;
3504  }
3505  uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
3506 
3507  SDValue ShrRHS = RHS;
3508  ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
3509  if (!ShrCnst) {
3510  // Shift amount must be constant
3511  return false;
3512  }
3513  uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
3514 
3515  // To avoid extra codegen and be profitable, we need Outer >= Inner
3516  if (OuterShiftAmt < InnerShiftAmt) {
3517  return false;
3518  }
3519 
3520  // If the outer shift is more than the type size, we have no bitfield to
3521  // extract (since we also check that the inner shift is <= the outer shift
3522  // then this also implies that the inner shift is < the type size)
3523  if (OuterShiftAmt >= Val.getValueSizeInBits()) {
3524  return false;
3525  }
3526 
3527  Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL,
3528  MVT::i32);
3529  Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt,
3530  DL, MVT::i32);
3531 
3532  if (N->getOpcode() == ISD::SRA) {
3533  // If we have a arithmetic right shift, we need to use the signed bfe
3534  // variant
3535  IsSigned = true;
3536  }
3537  } else {
3538  // No can do...
3539  return false;
3540  }
3541  } else {
3542  // No can do...
3543  return false;
3544  }
3545 
3546 
3547  unsigned Opc;
3548  // For the BFE operations we form here from "and" and "srl", always use the
3549  // unsigned variants.
3550  if (Val.getValueType() == MVT::i32) {
3551  if (IsSigned) {
3552  Opc = NVPTX::BFE_S32rii;
3553  } else {
3554  Opc = NVPTX::BFE_U32rii;
3555  }
3556  } else if (Val.getValueType() == MVT::i64) {
3557  if (IsSigned) {
3558  Opc = NVPTX::BFE_S64rii;
3559  } else {
3560  Opc = NVPTX::BFE_U64rii;
3561  }
3562  } else {
3563  // We cannot handle this type
3564  return false;
3565  }
3566 
3567  SDValue Ops[] = {
3568  Val, Start, Len
3569  };
3570 
3571  ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops));
3572  return true;
3573 }
3574 
3575 // SelectDirectAddr - Match a direct address for DAG.
3576 // A direct address could be a globaladdress or externalsymbol.
3577 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
3578  // Return true if TGA or ES.
3579  if (N.getOpcode() == ISD::TargetGlobalAddress ||
3580  N.getOpcode() == ISD::TargetExternalSymbol) {
3581  Address = N;
3582  return true;
3583  }
3584  if (N.getOpcode() == NVPTXISD::Wrapper) {
3585  Address = N.getOperand(0);
3586  return true;
3587  }
3588  // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol
3589  if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) {
3590  if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
3592  CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam)
3593  return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address);
3594  }
3595  return false;
3596 }
3597 
3598 // symbol+offset
3599 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3600  SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3601  if (Addr.getOpcode() == ISD::ADD) {
3602  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3603  SDValue base = Addr.getOperand(0);
3604  if (SelectDirectAddr(base, Base)) {
3605  Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3606  mvt);
3607  return true;
3608  }
3609  }
3610  }
3611  return false;
3612 }
3613 
3614 // symbol+offset
3615 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
3616  SDValue &Base, SDValue &Offset) {
3617  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
3618 }
3619 
3620 // symbol+offset
3621 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
3622  SDValue &Base, SDValue &Offset) {
3623  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
3624 }
3625 
3626 // register+offset
3627 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3628  SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3629  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3630  Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3631  Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
3632  return true;
3633  }
3634  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
3635  Addr.getOpcode() == ISD::TargetGlobalAddress)
3636  return false; // direct calls.
3637 
3638  if (Addr.getOpcode() == ISD::ADD) {
3639  if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
3640  return false;
3641  }
3642  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3643  if (FrameIndexSDNode *FIN =
3644  dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
3645  // Constant offset from frame ref.
3646  Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3647  else
3648  Base = Addr.getOperand(0);
3649  Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3650  mvt);
3651  return true;
3652  }
3653  }
3654  return false;
3655 }
3656 
3657 // register+offset
3658 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
3659  SDValue &Base, SDValue &Offset) {
3660  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
3661 }
3662 
3663 // register+offset
3664 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
3665  SDValue &Base, SDValue &Offset) {
3666  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
3667 }
3668 
3669 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
3670  unsigned int spN) const {
3671  const Value *Src = nullptr;
3672  if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
3673  if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3674  return true;
3675  Src = mN->getMemOperand()->getValue();
3676  }
3677  if (!Src)
3678  return false;
3679  if (auto *PT = dyn_cast<PointerType>(Src->getType()))
3680  return (PT->getAddressSpace() == spN);
3681  return false;
3682 }
3683 
3684 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
3685 /// inline asm expressions.
3687  const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
3688  SDValue Op0, Op1;
3689  switch (ConstraintID) {
3690  default:
3691  return true;
3692  case InlineAsm::Constraint_m: // memory
3693  if (SelectDirectAddr(Op, Op0)) {
3694  OutOps.push_back(Op0);
3695  OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
3696  return false;
3697  }
3698  if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
3699  OutOps.push_back(Op0);
3700  OutOps.push_back(Op1);
3701  return false;
3702  }
3703  break;
3704  }
3705  return true;
3706 }
3707 
3708 /// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
3709 /// conversion from \p SrcTy to \p DestTy.
3710 unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
3711  bool IsSigned) {
3712  switch (SrcTy.SimpleTy) {
3713  default:
3714  llvm_unreachable("Unhandled source type");
3715  case MVT::i8:
3716  switch (DestTy.SimpleTy) {
3717  default:
3718  llvm_unreachable("Unhandled dest type");
3719  case MVT::i16:
3720  return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
3721  case MVT::i32:
3722  return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
3723  case MVT::i64:
3724  return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
3725  }
3726  case MVT::i16:
3727  switch (DestTy.SimpleTy) {
3728  default:
3729  llvm_unreachable("Unhandled dest type");
3730  case MVT::i8:
3731  return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
3732  case MVT::i32:
3733  return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
3734  case MVT::i64:
3735  return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
3736  }
3737  case MVT::i32:
3738  switch (DestTy.SimpleTy) {
3739  default:
3740  llvm_unreachable("Unhandled dest type");
3741  case MVT::i8:
3742  return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
3743  case MVT::i16:
3744  return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
3745  case MVT::i64:
3746  return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
3747  }
3748  case MVT::i64:
3749  switch (DestTy.SimpleTy) {
3750  default:
3751  llvm_unreachable("Unhandled dest type");
3752  case MVT::i8:
3753  return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
3754  case MVT::i16:
3755  return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
3756  case MVT::i32:
3757  return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
3758  }
3759  }
3760 }
llvm::NVPTXISD::Suld1DI16Clamp
@ Suld1DI16Clamp
Definition: NVPTXISelLowering.h:254
i
i
Definition: README.txt:29
llvm::NVPTXISD::TexUnified2DU32FloatLevel
@ TexUnified2DU32FloatLevel
Definition: NVPTXISelLowering.h:201
llvm::NVPTXISD::Suld3DV2I64Trap
@ Suld3DV2I64Trap
Definition: NVPTXISelLowering.h:368
llvm::ISD::SETUGE
@ SETUGE
Definition: ISDOpcodes.h:1424
llvm::NVPTXISD::Suld2DV2I32Clamp
@ Suld2DV2I32Clamp
Definition: NVPTXISelLowering.h:283
llvm::NVPTXISD::Tex1DFloatS32
@ Tex1DFloatS32
Definition: NVPTXISelLowering.h:83
llvm::NVPTXISD::Tld4UnifiedR2DU64Float
@ Tld4UnifiedR2DU64Float
Definition: NVPTXISelLowering.h:247
llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1564
llvm::NVPTXISD::Suld1DArrayI16Zero
@ Suld1DArrayI16Zero
Definition: NVPTXISelLowering.h:386
llvm::NVPTXISD::Suld1DV4I16Trap
@ Suld1DV4I16Trap
Definition: NVPTXISelLowering.h:322
llvm::NVPTXISD::TexUnifiedCubeArrayS32FloatLevel
@ TexUnifiedCubeArrayS32FloatLevel
Definition: NVPTXISelLowering.h:236
llvm::NVPTXISD::TexUnified1DU32S32
@ TexUnified1DU32S32
Definition: NVPTXISelLowering.h:175
llvm::ISD::SETLE
@ SETLE
Definition: ISDOpcodes.h:1435
llvm::NVPTXISD::TexCubeArrayU32Float
@ TexCubeArrayU32Float
Definition: NVPTXISelLowering.h:153
llvm::ISD::SETO
@ SETO
Definition: ISDOpcodes.h:1420
llvm::NVPTXISD::Suld1DArrayV4I8Trap
@ Suld1DArrayV4I8Trap
Definition: NVPTXISelLowering.h:333
llvm::AddrSpaceCastSDNode::getSrcAddressSpace
unsigned getSrcAddressSpace() const
Definition: SelectionDAGNodes.h:1250
llvm::NVPTXISD::Suld1DArrayI32Clamp
@ Suld1DArrayI32Clamp
Definition: NVPTXISelLowering.h:267
llvm::NVPTXISD::TexUnified3DFloatFloat
@ TexUnified3DFloatFloat
Definition: NVPTXISelLowering.h:216
llvm::NVPTXISD::Suld2DArrayI32Zero
@ Suld2DArrayI32Zero
Definition: NVPTXISelLowering.h:411
llvm::NVPTXISD::Suld2DArrayV4I32Trap
@ Suld2DArrayV4I32Trap
Definition: NVPTXISelLowering.h:359
llvm::NVPTXISD::Suld1DArrayI64Trap
@ Suld1DArrayI64Trap
Definition: NVPTXISelLowering.h:328
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::NVPTXISD::Suld2DArrayV4I16Zero
@ Suld2DArrayV4I16Zero
Definition: NVPTXISelLowering.h:418
llvm::NVPTXDAGToDAGISel::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Definition: NVPTXISelDAGToDAG.cpp:44
llvm::NVPTX::PTXLdStInstCode::SHARED
@ SHARED
Definition: NVPTX.h:111
llvm::NVPTXISD::Tld4UnifiedR2DFloatFloat
@ Tld4UnifiedR2DFloatFloat
Definition: NVPTXISelLowering.h:239
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:280
llvm::AArch64CC::NE
@ NE
Definition: AArch64BaseInfo.h:256
llvm::NVPTXISD::TexUnifiedCubeU32FloatLevel
@ TexUnifiedCubeU32FloatLevel
Definition: NVPTXISelLowering.h:232
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1090
llvm::ARMII::VecSize
@ VecSize
Definition: ARMBaseInfo.h:421
pickOpcodeForVT
static Optional< unsigned > pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16, unsigned Opcode_i32, Optional< unsigned > Opcode_i64, unsigned Opcode_f16, unsigned Opcode_f16x2, unsigned Opcode_f32, Optional< unsigned > Opcode_f64)
Definition: NVPTXISelDAGToDAG.cpp:811
EVTs
static ManagedStatic< std::set< EVT, EVT::compareRawBits > > EVTs
Definition: SelectionDAG.cpp:10729
llvm::NVPTXISD::LoadParamV2
@ LoadParamV2
Definition: NVPTXISelLowering.h:71
llvm::ISD::SETGT
@ SETGT
Definition: ISDOpcodes.h:1432
llvm::NVPTXISD::TexUnifiedCubeFloatFloatLevel
@ TexUnifiedCubeFloatFloatLevel
Definition: NVPTXISelLowering.h:228
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:886
llvm::NVPTXISD::LoadV4
@ LoadV4
Definition: NVPTXISelLowering.h:63
llvm::NVPTXISD::Suld2DArrayV2I8Clamp
@ Suld2DArrayV2I8Clamp
Definition: NVPTXISelLowering.h:293
llvm::NVPTXISD::Suld1DArrayV4I16Zero
@ Suld1DArrayV4I16Zero
Definition: NVPTXISelLowering.h:394
llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1436
llvm::NVPTXISD::MoveParam
@ MoveParam
Definition: NVPTXISelLowering.h:47
llvm::NVPTXISD::TexCubeS32Float
@ TexCubeS32Float
Definition: NVPTXISelLowering.h:145
llvm::NVPTXISD::TexUnified1DArrayU32Float
@ TexUnified1DArrayU32Float
Definition: NVPTXISelLowering.h:188
llvm::NVPTXISD::TexUnified1DFloatFloatLevel
@ TexUnified1DFloatFloatLevel
Definition: NVPTXISelLowering.h:169
llvm::NVPTXISD::Tex1DU32S32
@ Tex1DU32S32
Definition: NVPTXISelLowering.h:91
llvm::NVPTXSubtarget::hasLDG
bool hasLDG() const
Definition: NVPTXSubtarget.h:75
llvm::NVPTX::PTXCmpMode::EQU
@ EQU
Definition: NVPTX.h:162
llvm::NVPTXISD::Suld1DI16Zero
@ Suld1DI16Zero
Definition: NVPTXISelLowering.h:374
AtomicOrdering.h
llvm::NVPTXISD::Suld3DI16Zero
@ Suld3DI16Zero
Definition: NVPTXISelLowering.h:422
llvm::NVPTXISD::Suld2DV2I32Trap
@ Suld2DV2I32Trap
Definition: NVPTXISelLowering.h:343
llvm::NVPTXISD::Tex1DArrayS32Float
@ Tex1DArrayS32Float
Definition: NVPTXISelLowering.h:100
llvm::NVPTXISD::Suld3DI8Zero
@ Suld3DI8Zero
Definition: NVPTXISelLowering.h:421
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:151
llvm::NVPTXISD::Suld1DArrayV4I32Trap
@ Suld1DArrayV4I32Trap
Definition: NVPTXISelLowering.h:335
llvm::NVPTXISD::TexUnifiedCubeS32Float
@ TexUnifiedCubeS32Float
Definition: NVPTXISelLowering.h:229
llvm::ISD::ConstantFP
@ ConstantFP
Definition: ISDOpcodes.h:77
getCodeAddrSpace
static unsigned int getCodeAddrSpace(MemSDNode *N)
Definition: NVPTXISelDAGToDAG.cpp:656
llvm::NVPTX::PTXCmpMode::NotANumber
@ NotANumber
Definition: NVPTX.h:170
llvm::NVPTXISD::Tex2DS32Float
@ Tex2DS32Float
Definition: NVPTXISelLowering.h:112
llvm::NVPTXISD::Tld4G2DS64Float
@ Tld4G2DS64Float
Definition: NVPTXISelLowering.h:160
llvm::NVPTXISD::Tex1DArrayFloatFloatLevel
@ Tex1DArrayFloatFloatLevel
Definition: NVPTXISelLowering.h:97
llvm::NVPTXSubtarget::getTargetLowering
const NVPTXTargetLowering * getTargetLowering() const override
Definition: NVPTXSubtarget.h:64
llvm::NVPTX::PTXCmpMode::NUM
@ NUM
Definition: NVPTX.h:168
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
llvm::NVPTXTargetMachine::is64Bit
bool is64Bit() const
Definition: NVPTXTargetMachine.h:47
llvm::NVPTXISD::Suld3DV2I32Trap
@ Suld3DV2I32Trap
Definition: NVPTXISelLowering.h:367
llvm::NVPTXISD::Suld3DV2I8Clamp
@ Suld3DV2I8Clamp
Definition: NVPTXISelLowering.h:305
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::NVPTX::PTXLdStInstCode::LOCAL
@ LOCAL
Definition: NVPTX.h:113
Wrapper
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
Definition: AMDGPUAliasAnalysis.cpp:31
llvm::ISD::SETEQ
@ SETEQ
Definition: ISDOpcodes.h:1431
llvm::MVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: MachineValueType.h:374
llvm::ADDRESS_SPACE_LOCAL
@ ADDRESS_SPACE_LOCAL
Definition: NVPTXBaseInfo.h:26
llvm::SelectionDAG::getVTList
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition: SelectionDAG.cpp:9108
llvm::NVPTXISD::Tld4A2DS64Float
@ Tld4A2DS64Float
Definition: NVPTXISelLowering.h:162
llvm::NVPTXISD::Suld1DV2I64Trap
@ Suld1DV2I64Trap
Definition: NVPTXISelLowering.h:320
llvm::NVPTXISD::TexUnified1DArrayS32S32
@ TexUnified1DArrayS32S32
Definition: NVPTXISelLowering.h:183
llvm::NVPTXISD::TexUnified2DArrayFloatS32
@ TexUnified2DArrayFloatS32
Definition: NVPTXISelLowering.h:203
llvm::NVPTXISD::Suld1DArrayV2I32Clamp
@ Suld1DArrayV2I32Clamp
Definition: NVPTXISelLowering.h:271
llvm::NVPTXISD::Tex3DFloatFloatLevel
@ Tex3DFloatFloatLevel
Definition: NVPTXISelLowering.h:133
llvm::NVPTXISD::Suld1DV2I32Clamp
@ Suld1DV2I32Clamp
Definition: NVPTXISelLowering.h:259
ErrorHandling.h
llvm::NVPTXISD::Tld4UnifiedA2DU64Float
@ Tld4UnifiedA2DU64Float
Definition: NVPTXISelLowering.h:250
llvm::ADDRESS_SPACE_PARAM
@ ADDRESS_SPACE_PARAM
Definition: NVPTXBaseInfo.h:29
llvm::NVPTXISD::Tld4UnifiedR2DS64Float
@ Tld4UnifiedR2DS64Float
Definition: NVPTXISelLowering.h:243
llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition: SelectionDAGNodes.h:1341
ValueTracking.h
llvm::NVPTXISD::Tld4B2DS64Float
@ Tld4B2DS64Float
Definition: NVPTXISelLowering.h:161
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:454
llvm::NVPTXISD::TexUnified2DU32FloatGrad
@ TexUnified2DU32FloatGrad
Definition: NVPTXISelLowering.h:202
llvm::NVPTXISD::TexUnified3DS32FloatGrad
@ TexUnified3DS32FloatGrad
Definition: NVPTXISelLowering.h:222
llvm::NVPTXISD::Tex2DArrayFloatS32
@ Tex2DArrayFloatS32
Definition: NVPTXISelLowering.h:119
llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition: SelectionDAGNodes.h:2314
llvm::MVT::Glue
@ Glue
Definition: MachineValueType.h:270
llvm::NVPTX::PTXCmpMode::GEU
@ GEU
Definition: NVPTX.h:167
llvm::NVPTXISD::Suld2DI16Trap
@ Suld2DI16Trap
Definition: NVPTXISelLowering.h:338
llvm::NVPTXISD::TexCubeFloatFloatLevel
@ TexCubeFloatFloatLevel
Definition: NVPTXISelLowering.h:144
llvm::NVPTXISD::TexUnifiedCubeArrayU32Float
@ TexUnifiedCubeArrayU32Float
Definition: NVPTXISelLowering.h:237
llvm::NVPTXISD::Tld4B2DU64Float
@ Tld4B2DU64Float
Definition: NVPTXISelLowering.h:165
llvm::NVPTXISD::Tex1DArrayS32FloatLevel
@ Tex1DArrayS32FloatLevel
Definition: NVPTXISelLowering.h:101
llvm::NVPTXISD::Tld4UnifiedB2DS64Float
@ Tld4UnifiedB2DS64Float
Definition: NVPTXISelLowering.h:245
llvm::NVPTXISD::TexUnified1DArrayS32Float
@ TexUnified1DArrayS32Float
Definition: NVPTXISelLowering.h:184
llvm::ISD::SETULE
@ SETULE
Definition: ISDOpcodes.h:1426
llvm::NVPTXISD::Suld3DI64Trap
@ Suld3DI64Trap
Definition: NVPTXISelLowering.h:364
llvm::NVPTXISD::Suld1DV2I64Zero
@ Suld1DV2I64Zero
Definition: NVPTXISelLowering.h:380
llvm::NVPTXISD::Tex3DFloatS32
@ Tex3DFloatS32
Definition: NVPTXISelLowering.h:131
llvm::NVPTXISD::TexUnified2DArrayU32S32
@ TexUnified2DArrayU32S32
Definition: NVPTXISelLowering.h:211
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
llvm::InlineAsm::Constraint_m
@ Constraint_m
Definition: InlineAsm.h:255
llvm::NVPTXISD::Suld1DV4I8Clamp
@ Suld1DV4I8Clamp
Definition: NVPTXISelLowering.h:261
llvm::NVPTXISD::Suld3DV4I32Trap
@ Suld3DV4I32Trap
Definition: NVPTXISelLowering.h:371
llvm::Optional< unsigned >
llvm::NVPTXISD::Suld2DV2I16Clamp
@ Suld2DV2I16Clamp
Definition: NVPTXISelLowering.h:282
Vector
So we should use XX3Form_Rcr to implement intrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
llvm::NVPTXISD::Tex3DS32S32
@ Tex3DS32S32
Definition: NVPTXISelLowering.h:135
llvm::NVPTXISD::Suld3DV2I32Clamp
@ Suld3DV2I32Clamp
Definition: NVPTXISelLowering.h:307
llvm::MemSDNode
This is an abstract virtual class for memory operations.
Definition: SelectionDAGNodes.h:1259
llvm::NVPTXISD::TexUnified3DS32S32
@ TexUnified3DS32S32
Definition: NVPTXISelLowering.h:219
llvm::NVPTXISD::Suld2DArrayI8Trap
@ Suld2DArrayI8Trap
Definition: NVPTXISelLowering.h:349
getPTXCmpMode
static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ)
Definition: NVPTXISelDAGToDAG.cpp:537
llvm::NVPTXISD::Suld1DArrayV4I16Trap
@ Suld1DArrayV4I16Trap
Definition: NVPTXISelLowering.h:334
llvm::NVPTXISD::Suld1DV4I32Trap
@ Suld1DV4I32Trap
Definition: NVPTXISelLowering.h:323
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:119
llvm::NVPTXISD::Suld3DI32Trap
@ Suld3DI32Trap
Definition: NVPTXISelLowering.h:363
llvm::NVPTXISD::Suld2DI8Zero
@ Suld2DI8Zero
Definition: NVPTXISelLowering.h:397
llvm::NVPTXISD::Suld2DV4I32Clamp
@ Suld2DV4I32Clamp
Definition: NVPTXISelLowering.h:287
llvm::NVPTXISD::Tld4R2DS64Float
@ Tld4R2DS64Float
Definition: NVPTXISelLowering.h:159
llvm::NVPTXISD::Suld2DArrayV2I64Trap
@ Suld2DArrayV2I64Trap
Definition: NVPTXISelLowering.h:356
llvm::NVPTXISD::TexUnified1DArrayU32S32
@ TexUnified1DArrayU32S32
Definition: NVPTXISelLowering.h:187
llvm::NVPTXISD::Tex2DArrayU32FloatLevel
@ Tex2DArrayU32FloatLevel
Definition: NVPTXISelLowering.h:129
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::LSBaseSDNode::isIndexed
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
Definition: SelectionDAGNodes.h:2302
llvm::ISD::SETOEQ
@ SETOEQ
Definition: ISDOpcodes.h:1414
llvm::NVPTXISD::TexUnified2DS32S32
@ TexUnified2DS32S32
Definition: NVPTXISelLowering.h:195
llvm::NVPTXISD::TexUnifiedCubeArrayS32Float
@ TexUnifiedCubeArrayS32Float
Definition: NVPTXISelLowering.h:235
llvm::NVPTXISD::Suld3DV2I64Zero
@ Suld3DV2I64Zero
Definition: NVPTXISelLowering.h:428
llvm::NVPTXISD::Suld2DV2I8Trap
@ Suld2DV2I8Trap
Definition: NVPTXISelLowering.h:341
llvm::NVPTXISD::Suld1DV2I8Trap
@ Suld1DV2I8Trap
Definition: NVPTXISelLowering.h:317
llvm::NVPTXISD::TexUnified2DS32FloatLevel
@ TexUnified2DS32FloatLevel
Definition: NVPTXISelLowering.h:197
llvm::NVPTXISD::LDGV4
@ LDGV4
Definition: NVPTXISelLowering.h:65
llvm::NVPTXISD::Tex1DArrayS32FloatGrad
@ Tex1DArrayS32FloatGrad
Definition: NVPTXISelLowering.h:102
llvm::ISD::SETUEQ
@ SETUEQ
Definition: ISDOpcodes.h:1422
llvm::NVPTXTargetMachine
NVPTXTargetMachine.
Definition: NVPTXTargetMachine.h:25
llvm::NVPTXISD::Tex2DFloatFloatLevel
@ Tex2DFloatFloatLevel
Definition: NVPTXISelLowering.h:109
llvm::NVPTX::PTXLdStInstCode::VecType
VecType
Definition: NVPTX.h:121
llvm::NVPTXISD::TexUnified3DU32Float
@ TexUnified3DU32Float
Definition: NVPTXISelLowering.h:224
llvm::NVPTXISD::Tex1DU32FloatLevel
@ Tex1DU32FloatLevel
Definition: NVPTXISelLowering.h:93
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::AtomicOrdering::Monotonic
@ Monotonic
llvm::NVPTXISD::Suld3DV4I16Clamp
@ Suld3DV4I16Clamp
Definition: NVPTXISelLowering.h:310
llvm::NVPTXISD::Suld1DArrayV2I32Trap
@ Suld1DArrayV2I32Trap
Definition: NVPTXISelLowering.h:331
llvm::NVPTXISD::Suld1DI32Trap
@ Suld1DI32Trap
Definition: NVPTXISelLowering.h:315
NVPTXUtilities.h
llvm::NVPTXISD::Suld2DArrayV4I8Clamp
@ Suld2DArrayV4I8Clamp
Definition: NVPTXISelLowering.h:297
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:129
llvm::NVPTXISD::Suld1DArrayV4I8Zero
@ Suld1DArrayV4I8Zero
Definition: NVPTXISelLowering.h:393
llvm::NVPTXISD::Suld3DV2I8Zero
@ Suld3DV2I8Zero
Definition: NVPTXISelLowering.h:425
llvm::MVT::SimpleValueType
SimpleValueType
Definition: MachineValueType.h:33
llvm::AArch64CC::LT
@ LT
Definition: AArch64BaseInfo.h:266
llvm::NVPTXISD::Suld2DArrayI64Clamp
@ Suld2DArrayI64Clamp
Definition: NVPTXISelLowering.h:292
llvm::NVPTXISD::Suld1DV4I16Clamp
@ Suld1DV4I16Clamp
Definition: NVPTXISelLowering.h:262
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
CommandLine.h
NVPTXISelDAGToDAG.h
llvm::NVPTXISD::TexCubeU32Float
@ TexCubeU32Float
Definition: NVPTXISelLowering.h:147
llvm::NVPTXISD::Suld2DArrayV2I64Zero
@ Suld2DArrayV2I64Zero
Definition: NVPTXISelLowering.h:416
llvm::NVPTXISD::Suld3DI16Clamp
@ Suld3DI16Clamp
Definition: NVPTXISelLowering.h:302
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1617
llvm::NVPTXISD::TexUnified2DArrayS32FloatGrad
@ TexUnified2DArrayS32FloatGrad
Definition: NVPTXISelLowering.h:210
llvm::NVPTXISD::Tex1DS32S32
@ Tex1DS32S32
Definition: NVPTXISelLowering.h:87
llvm::NVPTXISD::LoadParamV4
@ LoadParamV4
Definition: NVPTXISelLowering.h:72
llvm::NVPTXISD::Suld3DI32Zero
@ Suld3DI32Zero
Definition: NVPTXISelLowering.h:423
GlobalValue.h
llvm::NVPTXISD::Suld2DV4I8Zero
@ Suld2DV4I8Zero
Definition: NVPTXISelLowering.h:405
llvm::SelectionDAG::getTargetFrameIndex
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:703
llvm::NVPTXISD::Suld1DV4I8Trap
@ Suld1DV4I8Trap
Definition: NVPTXISelLowering.h:321
llvm::NVPTXISD::Suld2DV4I16Zero
@ Suld2DV4I16Zero
Definition: NVPTXISelLowering.h:406
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1125
llvm::NVPTXISD::Tld4UnifiedB2DFloatFloat
@ Tld4UnifiedB2DFloatFloat
Definition: NVPTXISelLowering.h:241
llvm::NVPTXISD::Suld3DV4I32Zero
@ Suld3DV4I32Zero
Definition: NVPTXISelLowering.h:431
llvm::NVPTXISD::Suld2DArrayV4I32Zero
@ Suld2DArrayV4I32Zero
Definition: NVPTXISelLowering.h:419
llvm::NVPTXISD::Suld3DV2I8Trap
@ Suld3DV2I8Trap
Definition: NVPTXISelLowering.h:365
llvm::ISD::SETGE
@ SETGE
Definition: ISDOpcodes.h:1433
llvm::NVPTXISD::Tex3DFloatFloatGrad
@ Tex3DFloatFloatGrad
Definition: NVPTXISelLowering.h:134
llvm::NVPTXISD::Tex2DArrayFloatFloatLevel
@ Tex2DArrayFloatFloatLevel
Definition: NVPTXISelLowering.h:121
llvm::NVPTXISD::TexUnified1DFloatS32
@ TexUnified1DFloatS32
Definition: NVPTXISelLowering.h:167
llvm::ADDRESS_SPACE_GLOBAL
@ ADDRESS_SPACE_GLOBAL
Definition: NVPTXBaseInfo.h:23
llvm::NVPTXISD::TexCubeArrayS32FloatLevel
@ TexCubeArrayS32FloatLevel
Definition: NVPTXISelLowering.h:152
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
llvm::NVPTXISD::LoadV2
@ LoadV2
Definition: NVPTXISelLowering.h:62
llvm::NVPTXISD::Suld1DArrayI32Zero
@ Suld1DArrayI32Zero
Definition: NVPTXISelLowering.h:387
llvm::NVPTXISD::Tex2DArrayS32S32
@ Tex2DArrayS32S32
Definition: NVPTXISelLowering.h:123
llvm::NVPTXISD::Suld2DV2I64Zero
@ Suld2DV2I64Zero
Definition: NVPTXISelLowering.h:404
llvm::SelectionDAGISel::OptLevel
CodeGenOpt::Level OptLevel
Definition: SelectionDAGISel.h:52
llvm::NVPTXISD::Tex1DU32FloatGrad
@ Tex1DU32FloatGrad
Definition: NVPTXISelLowering.h:94
llvm::MVT::f64
@ f64
Definition: MachineValueType.h:58
llvm::NVPTXISD::Suld2DI32Clamp
@ Suld2DI32Clamp
Definition: NVPTXISelLowering.h:279
llvm::isShiftedMask_64
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:485
llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:308
EQ
#define EQ(a, b)
Definition: regexec.c:112
llvm::NVPTXISD::Tex3DS32Float
@ Tex3DS32Float
Definition: NVPTXISelLowering.h:136
llvm::NVPTXISD::Suld2DV2I64Clamp
@ Suld2DV2I64Clamp
Definition: NVPTXISelLowering.h:284
llvm::NVPTXISD::TexUnified2DFloatFloat
@ TexUnified2DFloatFloat
Definition: NVPTXISelLowering.h:192
llvm::NVPTXISD::Tex2DFloatFloat
@ Tex2DFloatFloat
Definition: NVPTXISelLowering.h:108
llvm::NVPTXISD::Suld1DV2I8Clamp
@ Suld1DV2I8Clamp
Definition: NVPTXISelLowering.h:257
llvm::NVPTXISD::Suld1DArrayV2I8Zero
@ Suld1DArrayV2I8Zero
Definition: NVPTXISelLowering.h:389
llvm::NVPTXISD::TexUnifiedCubeFloatFloat
@ TexUnifiedCubeFloatFloat
Definition: NVPTXISelLowering.h:227
llvm::NVPTXISD::Tld4G2DFloatFloat
@ Tld4G2DFloatFloat
Definition: NVPTXISelLowering.h:156
Param
Value * Param
Definition: NVPTXLowerArgs.cpp:164
llvm::MVT::SimpleTy
SimpleValueType SimpleTy
Definition: MachineValueType.h:329
llvm::NVPTXISD::StoreRetval
@ StoreRetval
Definition: NVPTXISelLowering.h:78
llvm::ISD::SRA
@ SRA
Definition: ISDOpcodes.h:692
llvm::NVPTXISD::Tld4UnifiedB2DU64Float
@ Tld4UnifiedB2DU64Float
Definition: NVPTXISelLowering.h:249
llvm::NVPTXISD::Suld2DArrayI16Clamp
@ Suld2DArrayI16Clamp
Definition: NVPTXISelLowering.h:290
llvm::NVPTXISD::Tex2DArrayU32FloatGrad
@ Tex2DArrayU32FloatGrad
Definition: NVPTXISelLowering.h:130
llvm::NVPTXISD::TexUnifiedCubeS32FloatLevel
@ TexUnifiedCubeS32FloatLevel
Definition: NVPTXISelLowering.h:230
llvm::NVPTXISD::TexUnified1DFloatFloatGrad
@ TexUnified1DFloatFloatGrad
Definition: NVPTXISelLowering.h:170
llvm::SelectionDAGISel::ReplaceNode
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
Definition: SelectionDAGISel.h:229
llvm::NVPTXISD::TexUnified1DArrayFloatS32
@ TexUnified1DArrayFloatS32
Definition: NVPTXISelLowering.h:179
llvm::NVPTXISD::Suld1DArrayV2I16Trap
@ Suld1DArrayV2I16Trap
Definition: NVPTXISelLowering.h:330
llvm::NVPTXISD::Suld1DArrayV2I32Zero
@ Suld1DArrayV2I32Zero
Definition: NVPTXISelLowering.h:391
llvm::ISD::ADDRSPACECAST
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition: ISDOpcodes.h:890
llvm::NVPTXISD::Suld1DV4I16Zero
@ Suld1DV4I16Zero
Definition: NVPTXISelLowering.h:382
llvm::NVPTXISD::Tex2DU32FloatLevel
@ Tex2DU32FloatLevel
Definition: NVPTXISelLowering.h:117
llvm::NVPTXISD::TexUnified1DU32Float
@ TexUnified1DU32Float
Definition: NVPTXISelLowering.h:176
llvm::MCID::Flag
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:147
llvm::NVPTXISD::Suld2DArrayV4I8Trap
@ Suld2DArrayV4I8Trap
Definition: NVPTXISelLowering.h:357
llvm::NVPTXISD::Suld2DArrayI8Clamp
@ Suld2DArrayI8Clamp
Definition: NVPTXISelLowering.h:289
llvm::NVPTXISD::TexUnifiedCubeArrayU32FloatLevel
@ TexUnifiedCubeArrayU32FloatLevel
Definition: NVPTXISelLowering.h:238
llvm::NVPTXISD::Tex1DArrayU32S32
@ Tex1DArrayU32S32
Definition: NVPTXISelLowering.h:103
llvm::NVPTXISD::Suld1DArrayV4I16Clamp
@ Suld1DArrayV4I16Clamp
Definition: NVPTXISelLowering.h:274
llvm::ADDRESS_SPACE_CONST
@ ADDRESS_SPACE_CONST
Definition: NVPTXBaseInfo.h:25
llvm::NVPTXTargetLowering::usePrecSqrtF32
bool usePrecSqrtF32() const
Definition: NVPTXISelLowering.cpp:104
llvm::NVPTXISD::Tex1DS32Float
@ Tex1DS32Float
Definition: NVPTXISelLowering.h:88
llvm::NVPTXISD::Suld2DArrayV4I32Clamp
@ Suld2DArrayV4I32Clamp
Definition: NVPTXISelLowering.h:299
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
llvm::NVPTXISD::Suld1DV2I8Zero
@ Suld1DV2I8Zero
Definition: NVPTXISelLowering.h:377
llvm::NVPTXISD::Suld2DV4I8Clamp
@ Suld2DV4I8Clamp
Definition: NVPTXISelLowering.h:285
llvm::NVPTXISD::Tex2DArrayU32S32
@ Tex2DArrayU32S32
Definition: NVPTXISelLowering.h:127
llvm::codeview::EncodedFramePtrReg::BasePtr
@ BasePtr
llvm::ISD::ATOMIC_STORE
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1163
llvm::AArch64CC::LE
@ LE
Definition: AArch64BaseInfo.h:268
llvm::FrameIndexSDNode
Definition: SelectionDAGNodes.h:1760
llvm::NVPTXISD::StoreV4
@ StoreV4
Definition: NVPTXISelLowering.h:69
llvm::NVPTXISD::TexCubeArrayFloatFloat
@ TexCubeArrayFloatFloat
Definition: NVPTXISelLowering.h:149
llvm::NVPTXISD::LDGV2
@ LDGV2
Definition: NVPTXISelLowering.h:64
llvm::NVPTXISD::TexUnified3DS32FloatLevel
@ TexUnified3DS32FloatLevel
Definition: NVPTXISelLowering.h:221
llvm::NVPTXTargetMachine::useShortPointers
bool useShortPointers() const
Definition: NVPTXTargetMachine.h:48
llvm::NVPTXISD::Suld2DV4I32Zero
@ Suld2DV4I32Zero
Definition: NVPTXISelLowering.h:407
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:666
llvm::ISD::SETOLT
@ SETOLT
Definition: ISDOpcodes.h:1417
llvm::ISD::TargetGlobalAddress
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:164
llvm::NVPTXISD::Suld1DI64Clamp
@ Suld1DI64Clamp
Definition: NVPTXISelLowering.h:256
llvm::NVPTXISD::LDUV4
@ LDUV4
Definition: NVPTXISelLowering.h:67
llvm::NVPTXISD::Suld1DV4I32Clamp
@ Suld1DV4I32Clamp
Definition: NVPTXISelLowering.h:263
llvm::NVPTXISD::Suld2DArrayV2I32Trap
@ Suld2DArrayV2I32Trap
Definition: NVPTXISelLowering.h:355
llvm::NVPTXISD::Tex2DS32FloatGrad
@ Tex2DS32FloatGrad
Definition: NVPTXISelLowering.h:114
llvm::NVPTXISD::TexUnified2DFloatFloatGrad
@ TexUnified2DFloatFloatGrad
Definition: NVPTXISelLowering.h:194
llvm::NVPTX::PTXLdStInstCode::Scalar
@ Scalar
Definition: NVPTX.h:122
llvm::NVPTXISD::Suld3DV4I16Trap
@ Suld3DV4I16Trap
Definition: NVPTXISelLowering.h:370
llvm::SDValue::getValueSizeInBits
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
Definition: SelectionDAGNodes.h:191
llvm::NVPTXISD::TexUnified1DArrayS32FloatLevel
@ TexUnified1DArrayS32FloatLevel
Definition: NVPTXISelLowering.h:185
llvm::NVPTXISD::Suld2DV2I16Zero
@ Suld2DV2I16Zero
Definition: NVPTXISelLowering.h:402
llvm::None
const NoneType None
Definition: None.h:24
llvm::MemSDNode::isVolatile
bool isVolatile() const
Definition: SelectionDAGNodes.h:1300
llvm::NVPTXISD::Suld1DI8Clamp
@ Suld1DI8Clamp
Definition: NVPTXISelLowering.h:253
llvm::ISD::SETOLE
@ SETOLE
Definition: ISDOpcodes.h:1418
llvm::ISD::SETUGT
@ SETUGT
Definition: ISDOpcodes.h:1423
llvm::MVT::getScalarType
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Definition: MachineValueType.h:524
llvm::NVPTXISD::StoreRetvalV4
@ StoreRetvalV4
Definition: NVPTXISelLowering.h:80
llvm::NVPTXISD::TexCubeU32FloatLevel
@ TexCubeU32FloatLevel
Definition: NVPTXISelLowering.h:148
llvm::NVPTXISD::Tex2DArrayFloatFloat
@ Tex2DArrayFloatFloat
Definition: NVPTXISelLowering.h:120
llvm::NVPTXISD::TexUnified3DS32Float
@ TexUnified3DS32Float
Definition: NVPTXISelLowering.h:220
llvm::NVPTXISD::Tex2DArrayS32Float
@ Tex2DArrayS32Float
Definition: NVPTXISelLowering.h:124
llvm::NVPTXISD::Tld4G2DU64Float
@ Tld4G2DU64Float
Definition: NVPTXISelLowering.h:164
llvm::MVT::isFloatingPoint
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: MachineValueType.h:348
llvm::NVPTXISD::Suld3DV4I8Zero
@ Suld3DV4I8Zero
Definition: NVPTXISelLowering.h:429
llvm::NVPTXSubtarget
Definition: NVPTXSubtarget.h:31
llvm::NVPTXISD::TexCubeArrayU32FloatLevel
@ TexCubeArrayU32FloatLevel
Definition: NVPTXISelLowering.h:154
llvm::NVPTXISD::TexCubeS32FloatLevel
@ TexCubeS32FloatLevel
Definition: NVPTXISelLowering.h:146
llvm::NVPTXISD::TexUnified1DArrayS32FloatGrad
@ TexUnified1DArrayS32FloatGrad
Definition: NVPTXISelLowering.h:186
llvm::NVPTXISD::TexUnified3DU32FloatLevel
@ TexUnified3DU32FloatLevel
Definition: NVPTXISelLowering.h:225
llvm::NVPTXISD::Tld4A2DU64Float
@ Tld4A2DU64Float
Definition: NVPTXISelLowering.h:166
llvm::isKernelFunction
bool isKernelFunction(const Function &F)
Definition: NVPTXUtilities.cpp:274
llvm::ISD::SETUNE
@ SETUNE
Definition: ISDOpcodes.h:1427
llvm::NVPTXISD::Suld3DV2I16Zero
@ Suld3DV2I16Zero
Definition: NVPTXISelLowering.h:426
llvm::NVPTXISD::Suld2DArrayI64Trap
@ Suld2DArrayI64Trap
Definition: NVPTXISelLowering.h:352
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:656
llvm::NVPTXISD::Tld4UnifiedG2DFloatFloat
@ Tld4UnifiedG2DFloatFloat
Definition: NVPTXISelLowering.h:240
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
llvm::NVPTX::PTXLdStInstCode::V4
@ V4
Definition: NVPTX.h:124
llvm::NVPTXISD::Suld3DI64Zero
@ Suld3DI64Zero
Definition: NVPTXISelLowering.h:424
llvm::NVPTXISD::TexUnified1DArrayFloatFloatGrad
@ TexUnified1DArrayFloatFloatGrad
Definition: NVPTXISelLowering.h:182
llvm::pdb::PDB_ColorItem::Address
@ Address
llvm::getUnderlyingObjects
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
Definition: ValueTracking.cpp:4500
llvm::NVPTXISD::Tex3DFloatFloat
@ Tex3DFloatFloat
Definition: NVPTXISelLowering.h:132
llvm::NVPTXISD::Suld1DArrayV2I8Clamp
@ Suld1DArrayV2I8Clamp
Definition: NVPTXISelLowering.h:269
llvm::NVPTXISD::Tex1DArrayFloatS32
@ Tex1DArrayFloatS32
Definition: NVPTXISelLowering.h:95
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:413
llvm::NVPTXISD::Tex3DU32FloatLevel
@ Tex3DU32FloatLevel
Definition: NVPTXISelLowering.h:141
llvm::NVPTXISD::Tex2DArrayS32FloatLevel
@ Tex2DArrayS32FloatLevel
Definition: NVPTXISelLowering.h:125
llvm::MVT::v2f16
@ v2f16
Definition: MachineValueType.h:140
uint64_t
llvm::NVPTXISD::Suld3DI8Trap
@ Suld3DI8Trap
Definition: NVPTXISelLowering.h:361
llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition: SelectionDAGNodes.h:1345
llvm::NVPTXISD::LoadParam
@ LoadParam
Definition: NVPTXISelLowering.h:70
llvm::NVPTXISD::Tex1DU32Float
@ Tex1DU32Float
Definition: NVPTXISelLowering.h:92
llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:966
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:78
llvm::NVPTXISD::TexUnified2DU32S32
@ TexUnified2DU32S32
Definition: NVPTXISelLowering.h:199
llvm::NVPTXDAGToDAGISel::NVPTXDAGToDAGISel
NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, CodeGenOpt::Level OptLevel)
Definition: NVPTXISelDAGToDAG.cpp:38
llvm::NVPTXISD::Tex1DArrayFloatFloat
@ Tex1DArrayFloatFloat
Definition: NVPTXISelLowering.h:96
llvm::NVPTXISD::Tex2DU32Float
@ Tex2DU32Float
Definition: NVPTXISelLowering.h:116
llvm::NVPTXISD::Suld1DArrayI16Trap
@ Suld1DArrayI16Trap
Definition: NVPTXISelLowering.h:326
llvm::NVPTXISD::TexUnified2DU32Float
@ TexUnified2DU32Float
Definition: NVPTXISelLowering.h:200
llvm::NVPTXISD::Suld1DV2I32Trap
@ Suld1DV2I32Trap
Definition: NVPTXISelLowering.h:319
llvm::NVPTXISD::Suld3DI64Clamp
@ Suld3DI64Clamp
Definition: NVPTXISelLowering.h:304
llvm::NVPTXISD::Suld2DV2I8Clamp
@ Suld2DV2I8Clamp
Definition: NVPTXISelLowering.h:281
llvm::NVPTXISD::Suld1DArrayI8Clamp
@ Suld1DArrayI8Clamp
Definition: NVPTXISelLowering.h:265
llvm::NVPTXISD::Tex3DS32FloatLevel
@ Tex3DS32FloatLevel
Definition: NVPTXISelLowering.h:137
llvm::NVPTXISD::Tex1DArrayU32FloatGrad
@ Tex1DArrayU32FloatGrad
Definition: NVPTXISelLowering.h:106
llvm::NVPTXISD::Tex2DArrayS32FloatGrad
@ Tex2DArrayS32FloatGrad
Definition: NVPTXISelLowering.h:126
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:534
llvm::NVPTXISD::TexUnified2DArrayU32FloatLevel
@ TexUnified2DArrayU32FloatLevel
Definition: NVPTXISelLowering.h:213
llvm::NVPTX::PTXLdStInstCode::Float
@ Float
Definition: NVPTX.h:118
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:908
llvm::NVPTXISD::Suld2DArrayI16Trap
@ Suld2DArrayI16Trap
Definition: NVPTXISelLowering.h:350
NVPTXBaseInfo.h
llvm::NVPTXISD::Suld3DI32Clamp
@ Suld3DI32Clamp
Definition: NVPTXISelLowering.h:303
llvm::NVPTXISD::Suld1DArrayV4I32Clamp
@ Suld1DArrayV4I32Clamp
Definition: NVPTXISelLowering.h:275
llvm::countTrailingOnes
unsigned countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:525
llvm::CondCodeSDNode
Definition: SelectionDAGNodes.h:2244
llvm::ADDRESS_SPACE_SHARED
@ ADDRESS_SPACE_SHARED
Definition: NVPTXBaseInfo.h:24
llvm::LoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Definition: SelectionDAGNodes.h:2329
llvm::NVPTXISD::Suld2DArrayV2I32Zero
@ Suld2DArrayV2I32Zero
Definition: NVPTXISelLowering.h:415
llvm::NVPTXISD::TexUnified2DArrayFloatFloatGrad
@ TexUnified2DArrayFloatFloatGrad
Definition: NVPTXISelLowering.h:206
canLowerToLDG
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, unsigned CodeAddrSpace, MachineFunction *F)
Definition: NVPTXISelDAGToDAG.cpp:676
llvm::NVPTXISD::TexUnified2DArrayS32FloatLevel
@ TexUnified2DArrayS32FloatLevel
Definition: NVPTXISelLowering.h:209
llvm::NVPTXISD::TexUnified2DArrayS32Float
@ TexUnified2DArrayS32Float
Definition: NVPTXISelLowering.h:208
llvm::NVPTXISD::Suld1DArrayV2I64Trap
@ Suld1DArrayV2I64Trap
Definition: NVPTXISelLowering.h:332
llvm::StoreSDNode
This class is used to represent ISD::STORE nodes.
Definition: SelectionDAGNodes.h:2342
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:46
llvm::ISD::SETOGT
@ SETOGT
Definition: ISDOpcodes.h:1415
llvm::NVPTXISD::Tex1DFloatFloatLevel
@ Tex1DFloatFloatLevel
Definition: NVPTXISelLowering.h:85
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::NVPTXISD::TexUnified3DFloatS32
@ TexUnified3DFloatS32
Definition: NVPTXISelLowering.h:215
llvm::NVPTXISD::TexUnifiedCubeArrayFloatFloat
@ TexUnifiedCubeArrayFloatFloat
Definition: NVPTXISelLowering.h:233
llvm::NVPTXISD::TexUnified3DFloatFloatLevel
@ TexUnified3DFloatFloatLevel
Definition: NVPTXISelLowering.h:217
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::NVPTXISD::Suld2DI32Zero
@ Suld2DI32Zero
Definition: NVPTXISelLowering.h:399
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:883
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::NVPTXISD::Suld3DV2I32Zero
@ Suld3DV2I32Zero
Definition: NVPTXISelLowering.h:427
llvm::NVPTXISD::TexUnified1DArrayU32FloatLevel
@ TexUnified1DArrayU32FloatLevel
Definition: NVPTXISelLowering.h:189
llvm::ISD::SETULT
@ SETULT
Definition: ISDOpcodes.h:1425
llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition: SelectionDAGNodes.h:1579
base
therefore end up llgh r3 lr r0 br r14 but truncating the load would lh r3 br r14 Functions ret i64 and ought to be implemented ngr r0 br r14 but two address optimizations reverse the order of the AND and ngr r2 lgr r0 br r14 CodeGen SystemZ and ll has several examples of this Out of range displacements are usually handled by loading the full address into a register In many cases it would be better to create an anchor point instead E g i64 base
Definition: README.txt:125
llvm::NVPTXISD::Suld1DArrayI16Clamp
@ Suld1DArrayI16Clamp
Definition: NVPTXISelLowering.h:266
llvm::NVPTXISD::Suld2DV2I32Zero
@ Suld2DV2I32Zero
Definition: NVPTXISelLowering.h:403
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1411
llvm::NVPTXISD::Tex2DFloatFloatGrad
@ Tex2DFloatFloatGrad
Definition: NVPTXISelLowering.h:110
llvm::SelectionDAGISel::CurDAG
SelectionDAG * CurDAG
Definition: SelectionDAGISel.h:48
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:9546
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::SelectionDAG::setNodeMemRefs
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
Definition: SelectionDAG.cpp:9314
llvm::NVPTXISD::TexUnified2DFloatFloatLevel
@ TexUnified2DFloatFloatLevel
Definition: NVPTXISelLowering.h:193
llvm::NVPTXISD::TexUnified1DU32FloatLevel
@ TexUnified1DU32FloatLevel
Definition: NVPTXISelLowering.h:177
llvm::NVPTXISD::Tld4R2DFloatFloat
@ Tld4R2DFloatFloat
Definition: NVPTXISelLowering.h:155
llvm::StoreSDNode::getValue
const SDValue & getValue() const
Definition: SelectionDAGNodes.h:2363
llvm::NVPTXISD::Suld2DArrayV2I32Clamp
@ Suld2DArrayV2I32Clamp
Definition: NVPTXISelLowering.h:295
llvm::NVPTXISD::Tld4B2DFloatFloat
@ Tld4B2DFloatFloat
Definition: NVPTXISelLowering.h:157
llvm::NVPTXTargetLowering::getDivF32Level
int getDivF32Level() const
Definition: NVPTXISelLowering.cpp:91
llvm::NVPTXISD::Tex1DArrayU32FloatLevel
@ Tex1DArrayU32FloatLevel
Definition: NVPTXISelLowering.h:105
llvm::NVPTXISD::Tex1DS32FloatGrad
@ Tex1DS32FloatGrad
Definition: NVPTXISelLowering.h:90
llvm::NVPTXISD::Tex3DS32FloatGrad
@ Tex3DS32FloatGrad
Definition: NVPTXISelLowering.h:138
llvm::NVPTXISD::Suld2DArrayV2I16Clamp
@ Suld2DArrayV2I16Clamp
Definition: NVPTXISelLowering.h:294
llvm::NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel
@ TexUnifiedCubeArrayFloatFloatLevel
Definition: NVPTXISelLowering.h:234
llvm::NVPTXISD::LDUV2
@ LDUV2
Definition: NVPTXISelLowering.h:66
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::NVPTX::PTXLdStInstCode::FromType
FromType
Definition: NVPTX.h:115
llvm::NVPTXISD::TexUnified3DU32FloatGrad
@ TexUnified3DU32FloatGrad
Definition: NVPTXISelLowering.h:226
llvm::NVPTXISD::Suld2DI8Clamp
@ Suld2DI8Clamp
Definition: NVPTXISelLowering.h:277
llvm::NVPTXTargetLowering::useF32FTZ
bool useF32FTZ(const MachineFunction &MF) const
Definition: NVPTXISelLowering.cpp:114
llvm::NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions.
Definition: NVPTXISelDAGToDAG.cpp:3686
llvm::createNVPTXISelDag
FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel)
createNVPTXISelDag - This pass converts a legalized DAG into a NVPTX-specific DAG,...
Definition: NVPTXISelDAGToDAG.cpp:33
llvm::NVPTX::PTXLdStInstCode::CONSTANT
@ CONSTANT
Definition: NVPTX.h:110
llvm::NVPTXISD::TexCubeArrayS32Float
@ TexCubeArrayS32Float
Definition: NVPTXISelLowering.h:151
llvm::NVPTXISD::Tex1DArrayU32Float
@ Tex1DArrayU32Float
Definition: NVPTXISelLowering.h:104
llvm::NVPTX::PTXCmpMode::GTU
@ GTU
Definition: NVPTX.h:166
llvm::NVPTXISD::Suld2DI64Zero
@ Suld2DI64Zero
Definition: NVPTXISelLowering.h:400
llvm::NVPTX::PTXCmpMode::LEU
@ LEU
Definition: NVPTX.h:165
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:99
llvm::AArch64CC::GE
@ GE
Definition: AArch64BaseInfo.h:265
llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:154
llvm::NVPTXISD::Suld2DArrayV4I16Clamp
@ Suld2DArrayV4I16Clamp
Definition: NVPTXISelLowering.h:298
llvm::NVPTXISD::Suld2DArrayV2I8Trap
@ Suld2DArrayV2I8Trap
Definition: NVPTXISelLowering.h:353
llvm::MVT::i64
@ i64
Definition: MachineValueType.h:49
llvm::countTrailingZeros
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: MathExtras.h:156
llvm::NVPTX::PTXLdStInstCode::Signed
@ Signed
Definition: NVPTX.h:117
llvm::NVPTXISD::Suld3DV4I16Zero
@ Suld3DV4I16Zero
Definition: NVPTXISelLowering.h:430
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::NVPTXISD::Suld2DV4I32Trap
@ Suld2DV4I32Trap
Definition: NVPTXISelLowering.h:347
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
llvm::NVPTXISD::Tex2DS32S32
@ Tex2DS32S32
Definition: NVPTXISelLowering.h:111
llvm::NVPTXISD::TexUnified2DArrayFloatFloat
@ TexUnified2DArrayFloatFloat
Definition: NVPTXISelLowering.h:204
llvm::NVPTXISD::Suld2DArrayI16Zero
@ Suld2DArrayI16Zero
Definition: NVPTXISelLowering.h:410
llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition: SelectionDAGNodes.h:1133
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::NVPTXISD::TexUnified1DArrayFloatFloatLevel
@ TexUnified1DArrayFloatFloatLevel
Definition: NVPTXISelLowering.h:181
llvm::NVPTXISD::Tex3DU32S32
@ Tex3DU32S32
Definition: NVPTXISelLowering.h:139
llvm::NVPTXISD::Suld1DArrayV2I64Zero
@ Suld1DArrayV2I64Zero
Definition: NVPTXISelLowering.h:392
llvm::NVPTXISD::Tex1DFloatFloat
@ Tex1DFloatFloat
Definition: NVPTXISelLowering.h:84
llvm::NVPTXISD::TexUnified1DS32FloatLevel
@ TexUnified1DS32FloatLevel
Definition: NVPTXISelLowering.h:173
llvm::NVPTXISD::Suld2DV4I16Trap
@ Suld2DV4I16Trap
Definition: NVPTXISelLowering.h:346
llvm::ADDRESS_SPACE_GENERIC
@ ADDRESS_SPACE_GENERIC
Definition: NVPTXBaseInfo.h:22
llvm::NVPTXISD::Suld3DV2I16Trap
@ Suld3DV2I16Trap
Definition: NVPTXISelLowering.h:366
llvm::NVPTXISD::StoreParam
@ StoreParam
Definition: NVPTXISelLowering.h:73
llvm::NVPTXISD::Tex1DArrayFloatFloatGrad
@ Tex1DArrayFloatFloatGrad
Definition: NVPTXISelLowering.h:98
llvm::AMDGPU::HSAMD::Kernel::Arg::Key::IsVolatile
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
Definition: AMDGPUMetadata.h:199
llvm::NVPTXISD::Tld4A2DFloatFloat
@ Tld4A2DFloatFloat
Definition: NVPTXISelLowering.h:158
llvm::NVPTX::PTXLdStInstCode::Untyped
@ Untyped
Definition: NVPTX.h:119
llvm::CodeGenOpt::Level
Level
Definition: CodeGen.h:52
llvm::NVPTXISD::Tld4UnifiedA2DS64Float
@ Tld4UnifiedA2DS64Float
Definition: NVPTXISelLowering.h:246
llvm::NVPTXISD::TexUnified1DArrayFloatFloat
@ TexUnified1DArrayFloatFloat
Definition: NVPTXISelLowering.h:180
llvm::SDVTList
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Definition: SelectionDAGNodes.h:78
llvm::NVPTXISD::Suld1DI16Trap
@ Suld1DI16Trap
Definition: NVPTXISelLowering.h:314
llvm::NVPTXISD::Suld1DV4I32Zero
@ Suld1DV4I32Zero
Definition: NVPTXISelLowering.h:383
llvm::NVPTXISD::TexUnified1DS32Float
@ TexUnified1DS32Float
Definition: NVPTXISelLowering.h:172
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1391
llvm::NVPTXISD::Suld1DArrayI64Clamp
@ Suld1DArrayI64Clamp
Definition: NVPTXISelLowering.h:268
llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
llvm::NVPTXISD::Suld1DArrayI64Zero
@ Suld1DArrayI64Zero
Definition: NVPTXISelLowering.h:388
llvm::AddrSpaceCastSDNode::getDestAddressSpace
unsigned getDestAddressSpace() const
Definition: SelectionDAGNodes.h:1251
llvm::NVPTXISD::Tex2DArrayU32Float
@ Tex2DArrayU32Float
Definition: NVPTXISelLowering.h:128
llvm::NVPTXDAGToDAGISel
Definition: NVPTXISelDAGToDAG.h:27
llvm::AtomicSDNode
This is an SDNode representing atomic operations.
Definition: SelectionDAGNodes.h:1426
llvm::NVPTXISD::Suld2DArrayI64Zero
@ Suld2DArrayI64Zero
Definition: NVPTXISelLowering.h:412
llvm::NVPTXTargetLowering
Definition: NVPTXISelLowering.h:440
llvm::NVPTXISD::Suld1DArrayI32Trap
@ Suld1DArrayI32Trap
Definition: NVPTXISelLowering.h:327
llvm::SelectionDAGISel::MF
MachineFunction * MF
Definition: SelectionDAGISel.h:46
llvm::NVPTXISD::Suld1DArrayV2I16Zero
@ Suld1DArrayV2I16Zero
Definition: NVPTXISelLowering.h:390
llvm::ISD::SETLT
@ SETLT
Definition: ISDOpcodes.h:1434
llvm::NVPTX::PTXCmpMode::FTZ_FLAG
@ FTZ_FLAG
Definition: NVPTX.h:173
llvm::NVPTXISD::Suld1DV2I16Trap
@ Suld1DV2I16Trap
Definition: NVPTXISelLowering.h:318
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
llvm::NVPTXISD::TexUnified2DArrayS32S32
@ TexUnified2DArrayS32S32
Definition: NVPTXISelLowering.h:207
llvm::NVPTXISD::TexUnified3DU32S32
@ TexUnified3DU32S32
Definition: NVPTXISelLowering.h:223
llvm::NVPTXISD::Suld2DV2I16Trap
@ Suld2DV2I16Trap
Definition: NVPTXISelLowering.h:342
llvm::NVPTXISD::Suld2DI64Clamp
@ Suld2DI64Clamp
Definition: NVPTXISelLowering.h:280
llvm::NVPTXISD::Suld1DI32Zero
@ Suld1DI32Zero
Definition: NVPTXISelLowering.h:375
llvm::NVPTXISD::TexUnified2DArrayFloatFloatLevel
@ TexUnified2DArrayFloatFloatLevel
Definition: NVPTXISelLowering.h:205
llvm::NVPTXISD::Suld3DI16Trap
@ Suld3DI16Trap
Definition: NVPTXISelLowering.h:362
llvm::NVPTXISD::StoreParamS32
@ StoreParamS32
Definition: NVPTXISelLowering.h:76
llvm::NVPTXISD::Tex3DU32Float
@ Tex3DU32Float
Definition: NVPTXISelLowering.h:140
llvm::NVPTX::PTXLdStInstCode::PARAM
@ PARAM
Definition: NVPTX.h:112
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:345
llvm::NVPTXISD::Tld4UnifiedG2DS64Float
@ Tld4UnifiedG2DS64Float
Definition: NVPTXISelLowering.h:244
llvm::ISD::TargetExternalSymbol
@ TargetExternalSymbol
Definition: ISDOpcodes.h:169
llvm::MemSDNode::getAddressSpace
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Definition: SelectionDAGNodes.h:1352
llvm::NVPTXISD::Suld2DArrayI8Zero
@ Suld2DArrayI8Zero
Definition: NVPTXISelLowering.h:409
llvm::NVPTXISD::Suld1DArrayI8Zero
@ Suld1DArrayI8Zero
Definition: NVPTXISelLowering.h:385
llvm::SelectionDAG::getDataLayout
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:452
llvm::NVPTXISD::Tld4R2DU64Float
@ Tld4R2DU64Float
Definition: NVPTXISelLowering.h:163
llvm::SelectionDAGISel::ReplaceUses
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
Definition: SelectionDAGISel.h:208
llvm::NVPTXISD::TexUnified3DFloatFloatGrad
@ TexUnified3DFloatFloatGrad
Definition: NVPTXISelLowering.h:218
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:48
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::NVPTXISD::TexUnified2DS32FloatGrad
@ TexUnified2DS32FloatGrad
Definition: NVPTXISelLowering.h:198
llvm::ISD::SETUO
@ SETUO
Definition: ISDOpcodes.h:1421
llvm::NVPTXISD::TexCubeFloatFloat
@ TexCubeFloatFloat
Definition: NVPTXISelLowering.h:143
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:137
llvm::AddrSpaceCastSDNode
Definition: SelectionDAGNodes.h:1241
llvm::NVPTXISD::Suld1DI8Trap
@ Suld1DI8Trap
Definition: NVPTXISelLowering.h:313
llvm::NVPTXISD::Suld3DV2I64Clamp
@ Suld3DV2I64Clamp
Definition: NVPTXISelLowering.h:308
llvm::NVPTX::PTXCmpMode::NEU
@ NEU
Definition: NVPTX.h:163
llvm::NVPTXISD::TexUnified1DArrayU32FloatGrad
@ TexUnified1DArrayU32FloatGrad
Definition: NVPTXISelLowering.h:190
llvm::NVPTXISD::StoreV2
@ StoreV2
Definition: NVPTXISelLowering.h:68
MemRef
Definition: Lint.cpp:81
llvm::AArch64CC::GT
@ GT
Definition: AArch64BaseInfo.h:267
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:967
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
llvm::NVPTXISD::Suld1DI8Zero
@ Suld1DI8Zero
Definition: NVPTXISelLowering.h:373
llvm::NVPTXISD::StoreParamV4
@ StoreParamV4
Definition: NVPTXISelLowering.h:75
llvm::AtomicSDNode::getVal
const SDValue & getVal() const
Definition: SelectionDAGNodes.h:1436
llvm::NVPTXISD::Suld3DV4I8Clamp
@ Suld3DV4I8Clamp
Definition: NVPTXISelLowering.h:309
llvm::NVPTX::PTXLdStInstCode::GLOBAL
@ GLOBAL
Definition: NVPTX.h:109
llvm::NVPTX::PTXLdStInstCode::GENERIC
@ GENERIC
Definition: NVPTX.h:108
llvm::NVPTXISD::Tex2DU32S32
@ Tex2DU32S32
Definition: NVPTXISelLowering.h:115
llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:300
llvm::ISD::SETOGE
@ SETOGE
Definition: ISDOpcodes.h:1416
llvm::NVPTXISD::Suld2DArrayV4I8Zero
@ Suld2DArrayV4I8Zero
Definition: NVPTXISelLowering.h:417
Instructions.h
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:691
llvm::NVPTXISD::Tld4UnifiedA2DFloatFloat
@ Tld4UnifiedA2DFloatFloat
Definition: NVPTXISelLowering.h:242
llvm::NVPTXISD::Tex3DU32FloatGrad
@ Tex3DU32FloatGrad
Definition: NVPTXISelLowering.h:142
llvm::SelectionDAGISel
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
Definition: SelectionDAGISel.h:40
llvm::NVPTXISD::Suld2DI16Clamp
@ Suld2DI16Clamp
Definition: NVPTXISelLowering.h:278
llvm::NVPTXISD::TexUnified1DU32FloatGrad
@ TexUnified1DU32FloatGrad
Definition: NVPTXISelLowering.h:178
llvm::NVPTXISD::Suld1DV4I8Zero
@ Suld1DV4I8Zero
Definition: NVPTXISelLowering.h:381
TargetIntrinsicInfo.h
llvm::NVPTX::PTXCvtMode::NONE
@ NONE
Definition: NVPTX.h:131
llvm::MVT::f16
@ f16
Definition: MachineValueType.h:56
llvm::NVPTXISD::TexUnified1DS32FloatGrad
@ TexUnified1DS32FloatGrad
Definition: NVPTXISelLowering.h:174
llvm::NVPTXISD::TexUnified1DS32S32
@ TexUnified1DS32S32
Definition: NVPTXISelLowering.h:171
N
#define N
llvm::NVPTXISD::Suld1DArrayV4I8Clamp
@ Suld1DArrayV4I8Clamp
Definition: NVPTXISelLowering.h:273
llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:693
llvm::SelectionDAG::getTargetConstantFP
SDValue getTargetConstantFP(double Val, const SDLoc &DL, EVT VT)
Definition: SelectionDAG.h:684
llvm::NVPTXISD::Suld2DI32Trap
@ Suld2DI32Trap
Definition: NVPTXISelLowering.h:339
llvm::isStrongerThanMonotonic
bool isStrongerThanMonotonic(AtomicOrdering AO)
Definition: AtomicOrdering.h:124
llvm::NVPTXISD::Suld2DV4I16Clamp
@ Suld2DV4I16Clamp
Definition: NVPTXISelLowering.h:286
llvm::NVPTXISD::Suld1DI32Clamp
@ Suld1DI32Clamp
Definition: NVPTXISelLowering.h:255
llvm::NVPTXISD::Suld1DV2I64Clamp
@ Suld1DV2I64Clamp
Definition: NVPTXISelLowering.h:260
llvm::NVPTXTargetLowering::allowUnsafeFPMath
bool allowUnsafeFPMath(MachineFunction &MF) const
Definition: NVPTXISelLowering.cpp:4388
llvm::NVPTXISD::Suld3DI8Clamp
@ Suld3DI8Clamp
Definition: NVPTXISelLowering.h:301
llvm::NVPTXISD::Suld2DV4I8Trap
@ Suld2DV4I8Trap
Definition: NVPTXISelLowering.h:345
llvm::NVPTXISD::Tex2DFloatS32
@ Tex2DFloatS32
Definition: NVPTXISelLowering.h:107
llvm::NVPTXISD::Suld2DI8Trap
@ Suld2DI8Trap
Definition: NVPTXISelLowering.h:337
llvm::NVPTXISD::Suld2DI64Trap
@ Suld2DI64Trap
Definition: NVPTXISelLowering.h:340
llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition: SelectionDAGNodes.h:1121
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:652
llvm::NVPTXDAGToDAGISel::Subtarget
const NVPTXSubtarget * Subtarget
Definition: NVPTXISelDAGToDAG.h:49
llvm::NVPTXISD::Suld3DV4I8Trap
@ Suld3DV4I8Trap
Definition: NVPTXISelLowering.h:369
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::ISD::SETONE
@ SETONE
Definition: ISDOpcodes.h:1419
llvm::DataLayout::getPointerSizeInBits
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:412
llvm::NVPTXISD::Suld2DV2I8Zero
@ Suld2DV2I8Zero
Definition: NVPTXISelLowering.h:401
llvm::NVPTXISD::Tex2DS32FloatLevel
@ Tex2DS32FloatLevel
Definition: NVPTXISelLowering.h:113
llvm::MVT::i16
@ i16
Definition: MachineValueType.h:47
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::ISD::INTRINSIC_W_CHAIN
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
llvm::NVPTXISD::Suld1DV2I16Zero
@ Suld1DV2I16Zero
Definition: NVPTXISelLowering.h:378
llvm::NVPTXISD::Tld4UnifiedG2DU64Float
@ Tld4UnifiedG2DU64Float
Definition: NVPTXISelLowering.h:248
llvm::NVPTXISD::TexUnified2DFloatS32
@ TexUnified2DFloatS32
Definition: NVPTXISelLowering.h:191
llvm::NVPTX::PTXCmpMode::CmpMode
CmpMode
Definition: NVPTX.h:151
llvm::NVPTXISD::Suld1DArrayV2I16Clamp
@ Suld1DArrayV2I16Clamp
Definition: NVPTXISelLowering.h:270
llvm::NVPTXISD::Wrapper
@ Wrapper
Definition: NVPTXISelLowering.h:26
llvm::NVPTXISD::StoreParamV2
@ StoreParamV2
Definition: NVPTXISelLowering.h:74
llvm::NVPTXISD::StoreRetvalV2
@ StoreRetvalV2
Definition: NVPTXISelLowering.h:79
llvm::NVPTXISD::Suld2DArrayV2I16Trap
@ Suld2DArrayV2I16Trap
Definition: NVPTXISelLowering.h:354
llvm::isMask_64
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:473
llvm::NVPTX::PTXLdStInstCode::Unsigned
@ Unsigned
Definition: NVPTX.h:116
llvm::NVPTXISD::Suld1DArrayV2I64Clamp
@ Suld1DArrayV2I64Clamp
Definition: NVPTXISelLowering.h:272
llvm::NVPTXISD::Suld1DArrayV2I8Trap
@ Suld1DArrayV2I8Trap
Definition: NVPTXISelLowering.h:329
llvm::NVPTXISD::Suld1DI64Trap
@ Suld1DI64Trap
Definition: NVPTXISelLowering.h:316
llvm::NVPTXISD::TexUnified2DArrayU32FloatGrad
@ TexUnified2DArrayU32FloatGrad
Definition: NVPTXISelLowering.h:214
llvm::NVPTXISD::StoreParamU32
@ StoreParamU32
Definition: NVPTXISelLowering.h:77
llvm::NVPTXISD::TexUnified2DArrayU32Float
@ TexUnified2DArrayU32Float
Definition: NVPTXISelLowering.h:212
llvm::NVPTXISD::Tex2DU32FloatGrad
@ Tex2DU32FloatGrad
Definition: NVPTXISelLowering.h:118
llvm::NVPTXTargetLowering::allowFMA
bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const
Definition: NVPTXISelLowering.cpp:4371
llvm::NVPTXISD::SETP_F16X2
@ SETP_F16X2
Definition: NVPTXISelLowering.h:59
raw_ostream.h
llvm::NVPTXISD::Tex1DFloatFloatGrad
@ Tex1DFloatFloatGrad
Definition: NVPTXISelLowering.h:86
llvm::NVPTXISD::TexUnifiedCubeU32Float
@ TexUnifiedCubeU32Float
Definition: NVPTXISelLowering.h:231
llvm::SelectionDAGISel::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Definition: SelectionDAGISel.cpp:413
llvm::NVPTXISD::TexCubeArrayFloatFloatLevel
@ TexCubeArrayFloatFloatLevel
Definition: NVPTXISelLowering.h:150
llvm::NVPTXISD::Suld1DArrayI8Trap
@ Suld1DArrayI8Trap
Definition: NVPTXISelLowering.h:325
llvm::NVPTXISD::TexUnified2DS32Float
@ TexUnified2DS32Float
Definition: NVPTXISelLowering.h:196
llvm::MVT::f32
@ f32
Definition: MachineValueType.h:57
llvm::NVPTXISD::Tex2DArrayFloatFloatGrad
@ Tex2DArrayFloatFloatGrad
Definition: NVPTXISelLowering.h:122
llvm::NVPTXISD::Suld2DArrayV2I8Zero
@ Suld2DArrayV2I8Zero
Definition: NVPTXISelLowering.h:413
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::NVPTXISD::TexUnified1DFloatFloat
@ TexUnified1DFloatFloat
Definition: NVPTXISelLowering.h:168
llvm::NVPTXISD::Suld2DArrayV2I64Clamp
@ Suld2DArrayV2I64Clamp
Definition: NVPTXISelLowering.h:296
Debug.h
llvm::NVPTXISD::Suld2DArrayI32Trap
@ Suld2DArrayI32Trap
Definition: NVPTXISelLowering.h:351
llvm::ISD::ATOMIC_LOAD
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1159
llvm::NVPTXISD::Suld2DI16Zero
@ Suld2DI16Zero
Definition: NVPTXISelLowering.h:398
llvm::NVPTXISD::Suld1DArrayV4I32Zero
@ Suld1DArrayV4I32Zero
Definition: NVPTXISelLowering.h:395
llvm::NVPTXISD::Suld1DV2I16Clamp
@ Suld1DV2I16Clamp
Definition: NVPTXISelLowering.h:258
llvm::NVPTXISD::Tex1DArrayS32S32
@ Tex1DArrayS32S32
Definition: NVPTXISelLowering.h:99
llvm::NVPTXISD::Suld1DI64Zero
@ Suld1DI64Zero
Definition: NVPTXISelLowering.h:376
llvm::NVPTXISD::Suld2DV2I64Trap
@ Suld2DV2I64Trap
Definition: NVPTXISelLowering.h:344
llvm::NVPTXISD::Suld2DArrayI32Clamp
@ Suld2DArrayI32Clamp
Definition: NVPTXISelLowering.h:291
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::NVPTXISD::Suld3DV4I32Clamp
@ Suld3DV4I32Clamp
Definition: NVPTXISelLowering.h:311
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:288
llvm::NVPTXISD::Suld2DArrayV2I16Zero
@ Suld2DArrayV2I16Zero
Definition: NVPTXISelLowering.h:414
llvm::NVPTXISD::Suld3DV2I16Clamp
@ Suld3DV2I16Clamp
Definition: NVPTXISelLowering.h:306
llvm::NVPTXISD::Suld1DV2I32Zero
@ Suld1DV2I32Zero
Definition: NVPTXISelLowering.h:379
llvm::NVPTXISD::Tex1DS32FloatLevel
@ Tex1DS32FloatLevel
Definition: NVPTXISelLowering.h:89
llvm::NVPTXISD::Suld2DArrayV4I16Trap
@ Suld2DArrayV4I16Trap
Definition: NVPTXISelLowering.h:358
llvm::NVPTX::PTXCmpMode::LTU
@ LTU
Definition: NVPTX.h:164