LLVM  14.0.0git
NVPTXISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the NVPTX target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "NVPTXISelDAGToDAG.h"
15 #include "NVPTXUtilities.h"
17 #include "llvm/IR/GlobalValue.h"
18 #include "llvm/IR/Instructions.h"
19 #include "llvm/IR/IntrinsicsNVPTX.h"
22 #include "llvm/Support/Debug.h"
26 
27 using namespace llvm;
28 
29 #define DEBUG_TYPE "nvptx-isel"
30 
31 /// createNVPTXISelDag - This pass converts a legalized DAG into a
32 /// NVPTX-specific DAG, ready for instruction scheduling.
34  llvm::CodeGenOpt::Level OptLevel) {
35  return new NVPTXDAGToDAGISel(TM, OptLevel);
36 }
37 
39  CodeGenOpt::Level OptLevel)
40  : SelectionDAGISel(tm, OptLevel), TM(tm) {
41  doMulWide = (OptLevel > 0);
42 }
43 
45  Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
47 }
48 
49 int NVPTXDAGToDAGISel::getDivF32Level() const {
51 }
52 
53 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
55 }
56 
57 bool NVPTXDAGToDAGISel::useF32FTZ() const {
59 }
60 
61 bool NVPTXDAGToDAGISel::allowFMA() const {
63  return TL->allowFMA(*MF, OptLevel);
64 }
65 
66 bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
68  return TL->allowUnsafeFPMath(*MF);
69 }
70 
71 bool NVPTXDAGToDAGISel::useShortPointers() const {
72  return TM.useShortPointers();
73 }
74 
75 /// Select - Select instructions not customized! Used for
76 /// expanded, promoted and normal instructions.
77 void NVPTXDAGToDAGISel::Select(SDNode *N) {
78 
79  if (N->isMachineOpcode()) {
80  N->setNodeId(-1);
81  return; // Already selected.
82  }
83 
84  switch (N->getOpcode()) {
85  case ISD::LOAD:
86  case ISD::ATOMIC_LOAD:
87  if (tryLoad(N))
88  return;
89  break;
90  case ISD::STORE:
91  case ISD::ATOMIC_STORE:
92  if (tryStore(N))
93  return;
94  break;
96  if (tryEXTRACT_VECTOR_ELEMENT(N))
97  return;
98  break;
100  SelectSETP_F16X2(N);
101  return;
102 
103  case NVPTXISD::LoadV2:
104  case NVPTXISD::LoadV4:
105  if (tryLoadVector(N))
106  return;
107  break;
108  case NVPTXISD::LDGV2:
109  case NVPTXISD::LDGV4:
110  case NVPTXISD::LDUV2:
111  case NVPTXISD::LDUV4:
112  if (tryLDGLDU(N))
113  return;
114  break;
115  case NVPTXISD::StoreV2:
116  case NVPTXISD::StoreV4:
117  if (tryStoreVector(N))
118  return;
119  break;
120  case NVPTXISD::LoadParam:
123  if (tryLoadParam(N))
124  return;
125  break;
129  if (tryStoreRetval(N))
130  return;
131  break;
137  if (tryStoreParam(N))
138  return;
139  break;
141  if (tryIntrinsicNoChain(N))
142  return;
143  break;
145  if (tryIntrinsicChain(N))
146  return;
147  break;
316  if (tryTextureIntrinsic(N))
317  return;
318  break;
484  if (trySurfaceIntrinsic(N))
485  return;
486  break;
487  case ISD::AND:
488  case ISD::SRA:
489  case ISD::SRL:
490  // Try to select BFE
491  if (tryBFE(N))
492  return;
493  break;
494  case ISD::ADDRSPACECAST:
495  SelectAddrSpaceCast(N);
496  return;
497  case ISD::ConstantFP:
498  if (tryConstantFP16(N))
499  return;
500  break;
501  default:
502  break;
503  }
504  SelectCode(N);
505 }
506 
507 bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
508  unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
509  switch (IID) {
510  default:
511  return false;
512  case Intrinsic::nvvm_ldg_global_f:
513  case Intrinsic::nvvm_ldg_global_i:
514  case Intrinsic::nvvm_ldg_global_p:
515  case Intrinsic::nvvm_ldu_global_f:
516  case Intrinsic::nvvm_ldu_global_i:
517  case Intrinsic::nvvm_ldu_global_p:
518  return tryLDGLDU(N);
519  }
520 }
521 
522 // There's no way to specify FP16 immediates in .f16 ops, so we have to
523 // load them into an .f16 register first.
524 bool NVPTXDAGToDAGISel::tryConstantFP16(SDNode *N) {
525  if (N->getValueType(0) != MVT::f16)
526  return false;
528  cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), MVT::f16);
529  SDNode *LoadConstF16 =
530  CurDAG->getMachineNode(NVPTX::LOAD_CONST_F16, SDLoc(N), MVT::f16, Val);
531  ReplaceNode(N, LoadConstF16);
532  return true;
533 }
534 
535 // Map ISD:CONDCODE value to appropriate CmpMode expected by
536 // NVPTXInstPrinter::printCmpMode()
537 static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
539  unsigned PTXCmpMode = [](ISD::CondCode CC) {
540  switch (CC) {
541  default:
542  llvm_unreachable("Unexpected condition code.");
543  case ISD::SETOEQ:
544  return CmpMode::EQ;
545  case ISD::SETOGT:
546  return CmpMode::GT;
547  case ISD::SETOGE:
548  return CmpMode::GE;
549  case ISD::SETOLT:
550  return CmpMode::LT;
551  case ISD::SETOLE:
552  return CmpMode::LE;
553  case ISD::SETONE:
554  return CmpMode::NE;
555  case ISD::SETO:
556  return CmpMode::NUM;
557  case ISD::SETUO:
558  return CmpMode::NotANumber;
559  case ISD::SETUEQ:
560  return CmpMode::EQU;
561  case ISD::SETUGT:
562  return CmpMode::GTU;
563  case ISD::SETUGE:
564  return CmpMode::GEU;
565  case ISD::SETULT:
566  return CmpMode::LTU;
567  case ISD::SETULE:
568  return CmpMode::LEU;
569  case ISD::SETUNE:
570  return CmpMode::NEU;
571  case ISD::SETEQ:
572  return CmpMode::EQ;
573  case ISD::SETGT:
574  return CmpMode::GT;
575  case ISD::SETGE:
576  return CmpMode::GE;
577  case ISD::SETLT:
578  return CmpMode::LT;
579  case ISD::SETLE:
580  return CmpMode::LE;
581  case ISD::SETNE:
582  return CmpMode::NE;
583  }
584  }(CondCode.get());
585 
586  if (FTZ)
587  PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG;
588 
589  return PTXCmpMode;
590 }
591 
592 bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
593  unsigned PTXCmpMode =
594  getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
595  SDLoc DL(N);
596  SDNode *SetP = CurDAG->getMachineNode(
597  NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
598  N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
599  ReplaceNode(N, SetP);
600  return true;
601 }
602 
603 // Find all instances of extract_vector_elt that use this v2f16 vector
604 // and coalesce them into a scattering move instruction.
605 bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
606  SDValue Vector = N->getOperand(0);
607 
608  // We only care about f16x2 as it's the only real vector type we
609  // need to deal with.
610  if (Vector.getSimpleValueType() != MVT::v2f16)
611  return false;
612 
613  // Find and record all uses of this vector that extract element 0 or 1.
615  for (auto U : Vector.getNode()->uses()) {
616  if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
617  continue;
618  if (U->getOperand(0) != Vector)
619  continue;
620  if (const ConstantSDNode *IdxConst =
621  dyn_cast<ConstantSDNode>(U->getOperand(1))) {
622  if (IdxConst->getZExtValue() == 0)
623  E0.push_back(U);
624  else if (IdxConst->getZExtValue() == 1)
625  E1.push_back(U);
626  else
627  llvm_unreachable("Invalid vector index.");
628  }
629  }
630 
631  // There's no point scattering f16x2 if we only ever access one
632  // element of it.
633  if (E0.empty() || E1.empty())
634  return false;
635 
636  unsigned Op = NVPTX::SplitF16x2;
637  // If the vector has been BITCAST'ed from i32, we can use original
638  // value directly and avoid register-to-register move.
640  if (Vector->getOpcode() == ISD::BITCAST) {
641  Op = NVPTX::SplitI32toF16x2;
642  Source = Vector->getOperand(0);
643  }
644  // Merge (f16 extractelt(V, 0), f16 extractelt(V,1))
645  // into f16,f16 SplitF16x2(V)
646  SDNode *ScatterOp =
648  for (auto *Node : E0)
649  ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0));
650  for (auto *Node : E1)
651  ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1));
652 
653  return true;
654 }
655 
656 static unsigned int getCodeAddrSpace(MemSDNode *N) {
657  const Value *Src = N->getMemOperand()->getValue();
658 
659  if (!Src)
661 
662  if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
663  switch (PT->getAddressSpace()) {
670  default: break;
671  }
672  }
674 }
675 
676 static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
677  unsigned CodeAddrSpace, MachineFunction *F) {
678  // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
679  // space.
680  //
681  // We have two ways of identifying invariant loads: Loads may be explicitly
682  // marked as invariant, or we may infer them to be invariant.
683  //
684  // We currently infer invariance for loads from
685  // - constant global variables, and
686  // - kernel function pointer params that are noalias (i.e. __restrict) and
687  // never written to.
688  //
689  // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
690  // not during the SelectionDAG phase).
691  //
692  // TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for
693  // explicitly invariant loads because these are how clang tells us to use ldg
694  // when the user uses a builtin.
695  if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL)
696  return false;
697 
698  if (N->isInvariant())
699  return true;
700 
701  bool IsKernelFn = isKernelFunction(F->getFunction());
702 
703  // We use getUnderlyingObjects() here instead of getUnderlyingObject() mainly
704  // because the former looks through phi nodes while the latter does not. We
705  // need to look through phi nodes to handle pointer induction variables.
707  getUnderlyingObjects(N->getMemOperand()->getValue(), Objs);
708 
709  return all_of(Objs, [&](const Value *V) {
710  if (auto *A = dyn_cast<const Argument>(V))
711  return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr();
712  if (auto *GV = dyn_cast<const GlobalVariable>(V))
713  return GV->isConstant();
714  return false;
715  });
716 }
717 
718 bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
719  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
720  switch (IID) {
721  default:
722  return false;
723  case Intrinsic::nvvm_texsurf_handle_internal:
724  SelectTexSurfHandle(N);
725  return true;
726  }
727 }
728 
729 void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
730  // Op 0 is the intrinsic ID
731  SDValue Wrapper = N->getOperand(1);
732  SDValue GlobalVal = Wrapper.getOperand(0);
733  ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
734  MVT::i64, GlobalVal));
735 }
736 
737 void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
738  SDValue Src = N->getOperand(0);
739  AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
740  unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
741  unsigned DstAddrSpace = CastN->getDestAddressSpace();
742  assert(SrcAddrSpace != DstAddrSpace &&
743  "addrspacecast must be between different address spaces");
744 
745  if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
746  // Specific to generic
747  unsigned Opc;
748  switch (SrcAddrSpace) {
749  default: report_fatal_error("Bad address space in addrspacecast");
751  Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
752  break;
754  Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_shared_yes_6432
755  : NVPTX::cvta_shared_yes_64)
756  : NVPTX::cvta_shared_yes;
757  break;
758  case ADDRESS_SPACE_CONST:
759  Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_const_yes_6432
760  : NVPTX::cvta_const_yes_64)
761  : NVPTX::cvta_const_yes;
762  break;
763  case ADDRESS_SPACE_LOCAL:
764  Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_local_yes_6432
765  : NVPTX::cvta_local_yes_64)
766  : NVPTX::cvta_local_yes;
767  break;
768  }
769  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
770  Src));
771  return;
772  } else {
773  // Generic to specific
774  if (SrcAddrSpace != 0)
775  report_fatal_error("Cannot cast between two non-generic address spaces");
776  unsigned Opc;
777  switch (DstAddrSpace) {
778  default: report_fatal_error("Bad address space in addrspacecast");
780  Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
781  : NVPTX::cvta_to_global_yes;
782  break;
784  Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_shared_yes_3264
785  : NVPTX::cvta_to_shared_yes_64)
786  : NVPTX::cvta_to_shared_yes;
787  break;
788  case ADDRESS_SPACE_CONST:
789  Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_const_yes_3264
790  : NVPTX::cvta_to_const_yes_64)
791  : NVPTX::cvta_to_const_yes;
792  break;
793  case ADDRESS_SPACE_LOCAL:
794  Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_local_yes_3264
795  : NVPTX::cvta_to_local_yes_64)
796  : NVPTX::cvta_to_local_yes;
797  break;
798  case ADDRESS_SPACE_PARAM:
799  Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
800  : NVPTX::nvvm_ptr_gen_to_param;
801  break;
802  }
803  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
804  Src));
805  return;
806  }
807 }
808 
809 // Helper function template to reduce amount of boilerplate code for
810 // opcode selection.
812  MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16,
813  unsigned Opcode_i32, Optional<unsigned> Opcode_i64, unsigned Opcode_f16,
814  unsigned Opcode_f16x2, unsigned Opcode_f32, Optional<unsigned> Opcode_f64) {
815  switch (VT) {
816  case MVT::i1:
817  case MVT::i8:
818  return Opcode_i8;
819  case MVT::i16:
820  return Opcode_i16;
821  case MVT::i32:
822  return Opcode_i32;
823  case MVT::i64:
824  return Opcode_i64;
825  case MVT::f16:
826  return Opcode_f16;
827  case MVT::v2f16:
828  return Opcode_f16x2;
829  case MVT::f32:
830  return Opcode_f32;
831  case MVT::f64:
832  return Opcode_f64;
833  default:
834  return None;
835  }
836 }
837 
838 bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
839  SDLoc dl(N);
840  MemSDNode *LD = cast<MemSDNode>(N);
841  assert(LD->readMem() && "Expected load");
842  LoadSDNode *PlainLoad = dyn_cast<LoadSDNode>(N);
843  EVT LoadedVT = LD->getMemoryVT();
844  SDNode *NVPTXLD = nullptr;
845 
846  // do not support pre/post inc/dec
847  if (PlainLoad && PlainLoad->isIndexed())
848  return false;
849 
850  if (!LoadedVT.isSimple())
851  return false;
852 
853  AtomicOrdering Ordering = LD->getSuccessOrdering();
854  // In order to lower atomic loads with stronger guarantees we would need to
855  // use load.acquire or insert fences. However these features were only added
856  // with PTX ISA 6.0 / sm_70.
857  // TODO: Check if we can actually use the new instructions and implement them.
858  if (isStrongerThanMonotonic(Ordering))
859  return false;
860 
861  // Address Space Setting
862  unsigned int CodeAddrSpace = getCodeAddrSpace(LD);
863  if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) {
864  return tryLDGLDU(N);
865  }
866 
867  unsigned int PointerSize =
868  CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace());
869 
870  // Volatile Setting
871  // - .volatile is only available for .global and .shared
872  // - .volatile has the same memory synchronization semantics as .relaxed.sys
873  bool isVolatile = LD->isVolatile() || Ordering == AtomicOrdering::Monotonic;
874  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
875  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
876  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
877  isVolatile = false;
878 
879  // Type Setting: fromType + fromTypeWidth
880  //
881  // Sign : ISD::SEXTLOAD
882  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
883  // type is integer
884  // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
885  MVT SimpleVT = LoadedVT.getSimpleVT();
886  MVT ScalarVT = SimpleVT.getScalarType();
887  // Read at least 8 bits (predicates are stored as 8-bit values)
888  unsigned fromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits());
889  unsigned int fromType;
890 
891  // Vector Setting
892  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
893  if (SimpleVT.isVector()) {
894  assert(LoadedVT == MVT::v2f16 && "Unexpected vector type");
895  // v2f16 is loaded using ld.b32
896  fromTypeWidth = 32;
897  }
898 
899  if (PlainLoad && (PlainLoad->getExtensionType() == ISD::SEXTLOAD))
901  else if (ScalarVT.isFloatingPoint())
902  // f16 uses .b16 as its storage type.
903  fromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
905  else
907 
908  // Create the machine instruction DAG
909  SDValue Chain = N->getOperand(0);
910  SDValue N1 = N->getOperand(1);
911  SDValue Addr;
913  Optional<unsigned> Opcode;
914  MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
915 
916  if (SelectDirectAddr(N1, Addr)) {
917  Opcode = pickOpcodeForVT(
918  TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar,
919  NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar,
920  NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
921  if (!Opcode)
922  return false;
923  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
924  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
925  getI32Imm(fromTypeWidth, dl), Addr, Chain };
926  NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
927  MVT::Other, Ops);
928  } else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
929  : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
930  Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
931  NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
932  NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi,
933  NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
934  if (!Opcode)
935  return false;
936  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
937  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
938  getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
939  NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
940  MVT::Other, Ops);
941  } else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
942  : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
943  if (PointerSize == 64)
944  Opcode = pickOpcodeForVT(
945  TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
946  NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64,
947  NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
948  else
949  Opcode = pickOpcodeForVT(
950  TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari,
951  NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari,
952  NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
953  if (!Opcode)
954  return false;
955  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
956  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
957  getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
958  NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
959  MVT::Other, Ops);
960  } else {
961  if (PointerSize == 64)
962  Opcode = pickOpcodeForVT(
963  TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
964  NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64,
965  NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64,
966  NVPTX::LD_f64_areg_64);
967  else
968  Opcode = pickOpcodeForVT(
969  TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg,
970  NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg,
971  NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
972  if (!Opcode)
973  return false;
974  SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
975  getI32Imm(vecType, dl), getI32Imm(fromType, dl),
976  getI32Imm(fromTypeWidth, dl), N1, Chain };
977  NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
978  MVT::Other, Ops);
979  }
980 
981  if (!NVPTXLD)
982  return false;
983 
984  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
985  CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXLD), {MemRef});
986 
987  ReplaceNode(N, NVPTXLD);
988  return true;
989 }
990 
991 bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
992 
993  SDValue Chain = N->getOperand(0);
994  SDValue Op1 = N->getOperand(1);
996  Optional<unsigned> Opcode;
997  SDLoc DL(N);
998  SDNode *LD;
999  MemSDNode *MemSD = cast<MemSDNode>(N);
1000  EVT LoadedVT = MemSD->getMemoryVT();
1001 
1002  if (!LoadedVT.isSimple())
1003  return false;
1004 
1005  // Address Space Setting
1006  unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
1007  if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
1008  return tryLDGLDU(N);
1009  }
1010 
1011  unsigned int PointerSize =
1013 
1014  // Volatile Setting
1015  // - .volatile is only availalble for .global and .shared
1016  bool IsVolatile = MemSD->isVolatile();
1017  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1018  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1019  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1020  IsVolatile = false;
1021 
1022  // Vector Setting
1023  MVT SimpleVT = LoadedVT.getSimpleVT();
1024 
1025  // Type Setting: fromType + fromTypeWidth
1026  //
1027  // Sign : ISD::SEXTLOAD
1028  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
1029  // type is integer
1030  // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
1031  MVT ScalarVT = SimpleVT.getScalarType();
1032  // Read at least 8 bits (predicates are stored as 8-bit values)
1033  unsigned FromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits());
1034  unsigned int FromType;
1035  // The last operand holds the original LoadSDNode::getExtensionType() value
1036  unsigned ExtensionType = cast<ConstantSDNode>(
1037  N->getOperand(N->getNumOperands() - 1))->getZExtValue();
1038  if (ExtensionType == ISD::SEXTLOAD)
1040  else if (ScalarVT.isFloatingPoint())
1043  else
1045 
1046  unsigned VecType;
1047 
1048  switch (N->getOpcode()) {
1049  case NVPTXISD::LoadV2:
1051  break;
1052  case NVPTXISD::LoadV4:
1054  break;
1055  default:
1056  return false;
1057  }
1058 
1059  EVT EltVT = N->getValueType(0);
1060 
1061  // v8f16 is a special case. PTX doesn't have ld.v8.f16
1062  // instruction. Instead, we split the vector into v2f16 chunks and
1063  // load them with ld.v4.b32.
1064  if (EltVT == MVT::v2f16) {
1065  assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode.");
1066  EltVT = MVT::i32;
1068  FromTypeWidth = 32;
1069  }
1070 
1071  if (SelectDirectAddr(Op1, Addr)) {
1072  switch (N->getOpcode()) {
1073  default:
1074  return false;
1075  case NVPTXISD::LoadV2:
1076  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1077  NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1078  NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1079  NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar,
1080  NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1081  break;
1082  case NVPTXISD::LoadV4:
1083  Opcode =
1084  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar,
1085  NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar, None,
1086  NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar,
1087  NVPTX::LDV_f32_v4_avar, None);
1088  break;
1089  }
1090  if (!Opcode)
1091  return false;
1092  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1093  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1094  getI32Imm(FromTypeWidth, DL), Addr, Chain };
1095  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1096  } else if (PointerSize == 64
1097  ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1098  : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
1099  switch (N->getOpcode()) {
1100  default:
1101  return false;
1102  case NVPTXISD::LoadV2:
1103  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1104  NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1105  NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1106  NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi,
1107  NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1108  break;
1109  case NVPTXISD::LoadV4:
1110  Opcode =
1111  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi,
1112  NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi, None,
1113  NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi,
1114  NVPTX::LDV_f32_v4_asi, None);
1115  break;
1116  }
1117  if (!Opcode)
1118  return false;
1119  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1120  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1121  getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1122  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1123  } else if (PointerSize == 64
1124  ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1125  : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1126  if (PointerSize == 64) {
1127  switch (N->getOpcode()) {
1128  default:
1129  return false;
1130  case NVPTXISD::LoadV2:
1131  Opcode = pickOpcodeForVT(
1132  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari_64,
1133  NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64,
1134  NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64,
1135  NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64,
1136  NVPTX::LDV_f64_v2_ari_64);
1137  break;
1138  case NVPTXISD::LoadV4:
1139  Opcode = pickOpcodeForVT(
1140  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64,
1141  NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, None,
1142  NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64,
1143  NVPTX::LDV_f32_v4_ari_64, None);
1144  break;
1145  }
1146  } else {
1147  switch (N->getOpcode()) {
1148  default:
1149  return false;
1150  case NVPTXISD::LoadV2:
1151  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1152  NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1153  NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1154  NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari,
1155  NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1156  break;
1157  case NVPTXISD::LoadV4:
1158  Opcode =
1159  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari,
1160  NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari, None,
1161  NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari,
1162  NVPTX::LDV_f32_v4_ari, None);
1163  break;
1164  }
1165  }
1166  if (!Opcode)
1167  return false;
1168  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1169  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1170  getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1171 
1172  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1173  } else {
1174  if (PointerSize == 64) {
1175  switch (N->getOpcode()) {
1176  default:
1177  return false;
1178  case NVPTXISD::LoadV2:
1179  Opcode = pickOpcodeForVT(
1180  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64,
1181  NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1182  NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64,
1183  NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1184  NVPTX::LDV_f64_v2_areg_64);
1185  break;
1186  case NVPTXISD::LoadV4:
1187  Opcode = pickOpcodeForVT(
1188  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64,
1189  NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, None,
1190  NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64,
1191  NVPTX::LDV_f32_v4_areg_64, None);
1192  break;
1193  }
1194  } else {
1195  switch (N->getOpcode()) {
1196  default:
1197  return false;
1198  case NVPTXISD::LoadV2:
1199  Opcode =
1200  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg,
1201  NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1202  NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg,
1203  NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg,
1204  NVPTX::LDV_f64_v2_areg);
1205  break;
1206  case NVPTXISD::LoadV4:
1207  Opcode = pickOpcodeForVT(
1208  EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
1209  NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, None,
1210  NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg,
1211  NVPTX::LDV_f32_v4_areg, None);
1212  break;
1213  }
1214  }
1215  if (!Opcode)
1216  return false;
1217  SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1218  getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1219  getI32Imm(FromTypeWidth, DL), Op1, Chain };
1220  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
1221  }
1222 
1223  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
1224  CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef});
1225 
1226  ReplaceNode(N, LD);
1227  return true;
1228 }
1229 
1230 bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1231 
1232  SDValue Chain = N->getOperand(0);
1233  SDValue Op1;
1234  MemSDNode *Mem;
1235  bool IsLDG = true;
1236 
1237  // If this is an LDG intrinsic, the address is the third operand. If its an
1238  // LDG/LDU SD node (from custom vector handling), then its the second operand
1239  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1240  Op1 = N->getOperand(2);
1241  Mem = cast<MemIntrinsicSDNode>(N);
1242  unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1243  switch (IID) {
1244  default:
1245  return false;
1246  case Intrinsic::nvvm_ldg_global_f:
1247  case Intrinsic::nvvm_ldg_global_i:
1248  case Intrinsic::nvvm_ldg_global_p:
1249  IsLDG = true;
1250  break;
1251  case Intrinsic::nvvm_ldu_global_f:
1252  case Intrinsic::nvvm_ldu_global_i:
1253  case Intrinsic::nvvm_ldu_global_p:
1254  IsLDG = false;
1255  break;
1256  }
1257  } else {
1258  Op1 = N->getOperand(1);
1259  Mem = cast<MemSDNode>(N);
1260  }
1261 
1262  Optional<unsigned> Opcode;
1263  SDLoc DL(N);
1264  SDNode *LD;
1265  SDValue Base, Offset, Addr;
1266 
1267  EVT EltVT = Mem->getMemoryVT();
1268  unsigned NumElts = 1;
1269  if (EltVT.isVector()) {
1270  NumElts = EltVT.getVectorNumElements();
1271  EltVT = EltVT.getVectorElementType();
1272  // vectors of f16 are loaded/stored as multiples of v2f16 elements.
1273  if (EltVT == MVT::f16 && N->getValueType(0) == MVT::v2f16) {
1274  assert(NumElts % 2 == 0 && "Vector must have even number of elements");
1275  EltVT = MVT::v2f16;
1276  NumElts /= 2;
1277  }
1278  }
1279 
1280  // Build the "promoted" result VTList for the load. If we are really loading
1281  // i8s, then the return type will be promoted to i16 since we do not expose
1282  // 8-bit registers in NVPTX.
1283  EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1284  SmallVector<EVT, 5> InstVTs;
1285  for (unsigned i = 0; i != NumElts; ++i) {
1286  InstVTs.push_back(NodeVT);
1287  }
1288  InstVTs.push_back(MVT::Other);
1289  SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1290 
1291  if (SelectDirectAddr(Op1, Addr)) {
1292  switch (N->getOpcode()) {
1293  default:
1294  return false;
1295  case ISD::LOAD:
1297  if (IsLDG)
1298  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1299  NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1300  NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1301  NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1302  NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1303  NVPTX::INT_PTX_LDG_GLOBAL_f16avar,
1304  NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar,
1305  NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1306  NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1307  else
1308  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1309  NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1310  NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1311  NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1312  NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1313  NVPTX::INT_PTX_LDU_GLOBAL_f16avar,
1314  NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar,
1315  NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1316  NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1317  break;
1318  case NVPTXISD::LoadV2:
1319  case NVPTXISD::LDGV2:
1320  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1321  NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1322  NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1323  NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1324  NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1325  NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar,
1326  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar,
1327  NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1328  NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1329  break;
1330  case NVPTXISD::LDUV2:
1331  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1332  NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1333  NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1334  NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1335  NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1336  NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar,
1337  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar,
1338  NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1339  NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1340  break;
1341  case NVPTXISD::LoadV4:
1342  case NVPTXISD::LDGV4:
1343  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1344  NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
1345  NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1346  NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, None,
1347  NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar,
1348  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar,
1349  NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, None);
1350  break;
1351  case NVPTXISD::LDUV4:
1352  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1353  NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar,
1354  NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1355  NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, None,
1356  NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar,
1357  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar,
1358  NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, None);
1359  break;
1360  }
1361  if (!Opcode)
1362  return false;
1363  SDValue Ops[] = { Addr, Chain };
1364  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
1365  } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1366  : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1367  if (TM.is64Bit()) {
1368  switch (N->getOpcode()) {
1369  default:
1370  return false;
1371  case ISD::LOAD:
1373  if (IsLDG)
1374  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1375  NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1376  NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1377  NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1378  NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1379  NVPTX::INT_PTX_LDG_GLOBAL_f16ari64,
1380  NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64,
1381  NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1382  NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1383  else
1384  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1385  NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1386  NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1387  NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1388  NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1389  NVPTX::INT_PTX_LDU_GLOBAL_f16ari64,
1390  NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64,
1391  NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1392  NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1393  break;
1394  case NVPTXISD::LoadV2:
1395  case NVPTXISD::LDGV2:
1396  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1397  NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1398  NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1399  NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1400  NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1401  NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64,
1402  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64,
1403  NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1404  NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1405  break;
1406  case NVPTXISD::LDUV2:
1407  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1408  NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1409  NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1410  NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1411  NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1412  NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64,
1413  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64,
1414  NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1415  NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1416  break;
1417  case NVPTXISD::LoadV4:
1418  case NVPTXISD::LDGV4:
1419  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1420  NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
1421  NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1422  NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, None,
1423  NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64,
1424  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64,
1425  NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, None);
1426  break;
1427  case NVPTXISD::LDUV4:
1428  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1429  NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
1430  NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1431  NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, None,
1432  NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64,
1433  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64,
1434  NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, None);
1435  break;
1436  }
1437  } else {
1438  switch (N->getOpcode()) {
1439  default:
1440  return false;
1441  case ISD::LOAD:
1443  if (IsLDG)
1444  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1445  NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1446  NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1447  NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1448  NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1449  NVPTX::INT_PTX_LDG_GLOBAL_f16ari,
1450  NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari,
1451  NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1452  NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1453  else
1454  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1455  NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1456  NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1457  NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1458  NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1459  NVPTX::INT_PTX_LDU_GLOBAL_f16ari,
1460  NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari,
1461  NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1462  NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1463  break;
1464  case NVPTXISD::LoadV2:
1465  case NVPTXISD::LDGV2:
1466  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1467  NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1468  NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1469  NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1470  NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1471  NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32,
1472  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32,
1473  NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1474  NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1475  break;
1476  case NVPTXISD::LDUV2:
1477  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1478  NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1479  NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1480  NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1481  NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1482  NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32,
1483  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32,
1484  NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1485  NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1486  break;
1487  case NVPTXISD::LoadV4:
1488  case NVPTXISD::LDGV4:
1489  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1490  NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
1491  NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1492  NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, None,
1493  NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32,
1494  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32,
1495  NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, None);
1496  break;
1497  case NVPTXISD::LDUV4:
1498  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1499  NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
1500  NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1501  NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, None,
1502  NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32,
1503  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32,
1504  NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, None);
1505  break;
1506  }
1507  }
1508  if (!Opcode)
1509  return false;
1510  SDValue Ops[] = {Base, Offset, Chain};
1511  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
1512  } else {
1513  if (TM.is64Bit()) {
1514  switch (N->getOpcode()) {
1515  default:
1516  return false;
1517  case ISD::LOAD:
1519  if (IsLDG)
1520  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1521  NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1522  NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1523  NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1524  NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1525  NVPTX::INT_PTX_LDG_GLOBAL_f16areg64,
1526  NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64,
1527  NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1528  NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1529  else
1530  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1531  NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1532  NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1533  NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1534  NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1535  NVPTX::INT_PTX_LDU_GLOBAL_f16areg64,
1536  NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64,
1537  NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1538  NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1539  break;
1540  case NVPTXISD::LoadV2:
1541  case NVPTXISD::LDGV2:
1542  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1543  NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1544  NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1545  NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1546  NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1547  NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64,
1548  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64,
1549  NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1550  NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1551  break;
1552  case NVPTXISD::LDUV2:
1553  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1554  NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1555  NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1556  NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1557  NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1558  NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64,
1559  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64,
1560  NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1561  NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1562  break;
1563  case NVPTXISD::LoadV4:
1564  case NVPTXISD::LDGV4:
1565  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1566  NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
1567  NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1568  NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, None,
1569  NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64,
1570  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64,
1571  NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, None);
1572  break;
1573  case NVPTXISD::LDUV4:
1574  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1575  NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
1576  NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1577  NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, None,
1578  NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64,
1579  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64,
1580  NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, None);
1581  break;
1582  }
1583  } else {
1584  switch (N->getOpcode()) {
1585  default:
1586  return false;
1587  case ISD::LOAD:
1589  if (IsLDG)
1590  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1591  NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1592  NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1593  NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1594  NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1595  NVPTX::INT_PTX_LDG_GLOBAL_f16areg,
1596  NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg,
1597  NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1598  NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1599  else
1600  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1601  NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1602  NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1603  NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1604  NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1605  NVPTX::INT_PTX_LDU_GLOBAL_f16areg,
1606  NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg,
1607  NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1608  NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1609  break;
1610  case NVPTXISD::LoadV2:
1611  case NVPTXISD::LDGV2:
1612  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1613  NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1614  NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1615  NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1616  NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1617  NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32,
1618  NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32,
1619  NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1620  NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1621  break;
1622  case NVPTXISD::LDUV2:
1623  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1624  NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1625  NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1626  NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1627  NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1628  NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32,
1629  NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32,
1630  NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1631  NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1632  break;
1633  case NVPTXISD::LoadV4:
1634  case NVPTXISD::LDGV4:
1635  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1636  NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
1637  NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1638  NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, None,
1639  NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32,
1640  NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32,
1641  NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, None);
1642  break;
1643  case NVPTXISD::LDUV4:
1644  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1645  NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
1646  NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1647  NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, None,
1648  NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32,
1649  NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32,
1650  NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, None);
1651  break;
1652  }
1653  }
1654  if (!Opcode)
1655  return false;
1656  SDValue Ops[] = { Op1, Chain };
1657  LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
1658  }
1659 
1661  CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef});
1662 
1663  // For automatic generation of LDG (through SelectLoad[Vector], not the
1664  // intrinsics), we may have an extending load like:
1665  //
1666  // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
1667  //
1668  // In this case, the matching logic above will select a load for the original
1669  // memory type (in this case, i8) and our types will not match (the node needs
1670  // to return an i32 in this case). Our LDG/LDU nodes do not support the
1671  // concept of sign-/zero-extension, so emulate it here by adding an explicit
1672  // CVT instruction. Ptxas should clean up any redundancies here.
1673 
1674  EVT OrigType = N->getValueType(0);
1675  LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
1676 
1677  if (OrigType != EltVT && LdNode) {
1678  // We have an extending-load. The instruction we selected operates on the
1679  // smaller type, but the SDNode we are replacing has the larger type. We
1680  // need to emit a CVT to make the types match.
1681  bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
1682  unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
1683  EltVT.getSimpleVT(), IsSigned);
1684 
1685  // For each output value, apply the manual sign/zero-extension and make sure
1686  // all users of the load go through that CVT.
1687  for (unsigned i = 0; i != NumElts; ++i) {
1688  SDValue Res(LD, i);
1689  SDValue OrigVal(N, i);
1690 
1691  SDNode *CvtNode =
1692  CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
1694  DL, MVT::i32));
1695  ReplaceUses(OrigVal, SDValue(CvtNode, 0));
1696  }
1697  }
1698 
1699  ReplaceNode(N, LD);
1700  return true;
1701 }
1702 
1703 bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
1704  SDLoc dl(N);
1705  MemSDNode *ST = cast<MemSDNode>(N);
1706  assert(ST->writeMem() && "Expected store");
1707  StoreSDNode *PlainStore = dyn_cast<StoreSDNode>(N);
1708  AtomicSDNode *AtomicStore = dyn_cast<AtomicSDNode>(N);
1709  assert((PlainStore || AtomicStore) && "Expected store");
1710  EVT StoreVT = ST->getMemoryVT();
1711  SDNode *NVPTXST = nullptr;
1712 
1713  // do not support pre/post inc/dec
1714  if (PlainStore && PlainStore->isIndexed())
1715  return false;
1716 
1717  if (!StoreVT.isSimple())
1718  return false;
1719 
1720  AtomicOrdering Ordering = ST->getSuccessOrdering();
1721  // In order to lower atomic loads with stronger guarantees we would need to
1722  // use store.release or insert fences. However these features were only added
1723  // with PTX ISA 6.0 / sm_70.
1724  // TODO: Check if we can actually use the new instructions and implement them.
1725  if (isStrongerThanMonotonic(Ordering))
1726  return false;
1727 
1728  // Address Space Setting
1729  unsigned int CodeAddrSpace = getCodeAddrSpace(ST);
1730  unsigned int PointerSize =
1731  CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace());
1732 
1733  // Volatile Setting
1734  // - .volatile is only available for .global and .shared
1735  // - .volatile has the same memory synchronization semantics as .relaxed.sys
1736  bool isVolatile = ST->isVolatile() || Ordering == AtomicOrdering::Monotonic;
1737  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1738  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1739  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1740  isVolatile = false;
1741 
1742  // Vector Setting
1743  MVT SimpleVT = StoreVT.getSimpleVT();
1744  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1745 
1746  // Type Setting: toType + toTypeWidth
1747  // - for integer type, always use 'u'
1748  //
1749  MVT ScalarVT = SimpleVT.getScalarType();
1750  unsigned toTypeWidth = ScalarVT.getSizeInBits();
1751  if (SimpleVT.isVector()) {
1752  assert(StoreVT == MVT::v2f16 && "Unexpected vector type");
1753  // v2f16 is stored using st.b32
1754  toTypeWidth = 32;
1755  }
1756 
1757  unsigned int toType;
1758  if (ScalarVT.isFloatingPoint())
1759  // f16 uses .b16 as its storage type.
1760  toType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1762  else
1764 
1765  // Create the machine instruction DAG
1766  SDValue Chain = ST->getChain();
1767  SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal();
1768  SDValue BasePtr = ST->getBasePtr();
1769  SDValue Addr;
1770  SDValue Offset, Base;
1771  Optional<unsigned> Opcode;
1772  MVT::SimpleValueType SourceVT =
1773  Value.getNode()->getSimpleValueType(0).SimpleTy;
1774 
1775  if (SelectDirectAddr(BasePtr, Addr)) {
1776  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1777  NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1778  NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar,
1779  NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1780  if (!Opcode)
1781  return false;
1782  SDValue Ops[] = {Value,
1783  getI32Imm(isVolatile, dl),
1784  getI32Imm(CodeAddrSpace, dl),
1785  getI32Imm(vecType, dl),
1786  getI32Imm(toType, dl),
1787  getI32Imm(toTypeWidth, dl),
1788  Addr,
1789  Chain};
1790  NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1791  } else if (PointerSize == 64
1792  ? SelectADDRsi64(BasePtr.getNode(), BasePtr, Base, Offset)
1793  : SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) {
1794  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1795  NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1796  NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
1797  NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1798  if (!Opcode)
1799  return false;
1800  SDValue Ops[] = {Value,
1801  getI32Imm(isVolatile, dl),
1802  getI32Imm(CodeAddrSpace, dl),
1803  getI32Imm(vecType, dl),
1804  getI32Imm(toType, dl),
1805  getI32Imm(toTypeWidth, dl),
1806  Base,
1807  Offset,
1808  Chain};
1809  NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1810  } else if (PointerSize == 64
1811  ? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset)
1812  : SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset)) {
1813  if (PointerSize == 64)
1814  Opcode = pickOpcodeForVT(
1815  SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
1816  NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64,
1817  NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1818  else
1819  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1820  NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1821  NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari,
1822  NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1823  if (!Opcode)
1824  return false;
1825 
1826  SDValue Ops[] = {Value,
1827  getI32Imm(isVolatile, dl),
1828  getI32Imm(CodeAddrSpace, dl),
1829  getI32Imm(vecType, dl),
1830  getI32Imm(toType, dl),
1831  getI32Imm(toTypeWidth, dl),
1832  Base,
1833  Offset,
1834  Chain};
1835  NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1836  } else {
1837  if (PointerSize == 64)
1838  Opcode =
1839  pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1840  NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1841  NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64,
1842  NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1843  else
1844  Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1845  NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1846  NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg,
1847  NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1848  if (!Opcode)
1849  return false;
1850  SDValue Ops[] = {Value,
1851  getI32Imm(isVolatile, dl),
1852  getI32Imm(CodeAddrSpace, dl),
1853  getI32Imm(vecType, dl),
1854  getI32Imm(toType, dl),
1855  getI32Imm(toTypeWidth, dl),
1856  BasePtr,
1857  Chain};
1858  NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
1859  }
1860 
1861  if (!NVPTXST)
1862  return false;
1863 
1864  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
1865  CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXST), {MemRef});
1866  ReplaceNode(N, NVPTXST);
1867  return true;
1868 }
1869 
1870 bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
1871  SDValue Chain = N->getOperand(0);
1872  SDValue Op1 = N->getOperand(1);
1873  SDValue Addr, Offset, Base;
1874  Optional<unsigned> Opcode;
1875  SDLoc DL(N);
1876  SDNode *ST;
1877  EVT EltVT = Op1.getValueType();
1878  MemSDNode *MemSD = cast<MemSDNode>(N);
1879  EVT StoreVT = MemSD->getMemoryVT();
1880 
1881  // Address Space Setting
1882  unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
1883  if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1884  report_fatal_error("Cannot store to pointer that points to constant "
1885  "memory space");
1886  }
1887  unsigned int PointerSize =
1889 
1890  // Volatile Setting
1891  // - .volatile is only availalble for .global and .shared
1892  bool IsVolatile = MemSD->isVolatile();
1893  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1894  CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1895  CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1896  IsVolatile = false;
1897 
1898  // Type Setting: toType + toTypeWidth
1899  // - for integer type, always use 'u'
1900  assert(StoreVT.isSimple() && "Store value is not simple");
1901  MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1902  unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1903  unsigned ToType;
1904  if (ScalarVT.isFloatingPoint())
1905  ToType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1907  else
1909 
1911  SDValue N2;
1912  unsigned VecType;
1913 
1914  switch (N->getOpcode()) {
1915  case NVPTXISD::StoreV2:
1917  StOps.push_back(N->getOperand(1));
1918  StOps.push_back(N->getOperand(2));
1919  N2 = N->getOperand(3);
1920  break;
1921  case NVPTXISD::StoreV4:
1923  StOps.push_back(N->getOperand(1));
1924  StOps.push_back(N->getOperand(2));
1925  StOps.push_back(N->getOperand(3));
1926  StOps.push_back(N->getOperand(4));
1927  N2 = N->getOperand(5);
1928  break;
1929  default:
1930  return false;
1931  }
1932 
1933  // v8f16 is a special case. PTX doesn't have st.v8.f16
1934  // instruction. Instead, we split the vector into v2f16 chunks and
1935  // store them with st.v4.b32.
1936  if (EltVT == MVT::v2f16) {
1937  assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode.");
1938  EltVT = MVT::i32;
1940  ToTypeWidth = 32;
1941  }
1942 
1943  StOps.push_back(getI32Imm(IsVolatile, DL));
1944  StOps.push_back(getI32Imm(CodeAddrSpace, DL));
1945  StOps.push_back(getI32Imm(VecType, DL));
1946  StOps.push_back(getI32Imm(ToType, DL));
1947  StOps.push_back(getI32Imm(ToTypeWidth, DL));
1948 
1949  if (SelectDirectAddr(N2, Addr)) {
1950  switch (N->getOpcode()) {
1951  default:
1952  return false;
1953  case NVPTXISD::StoreV2:
1954  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1955  NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1956  NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1957  NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar,
1958  NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
1959  break;
1960  case NVPTXISD::StoreV4:
1961  Opcode =
1962  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_avar,
1963  NVPTX::STV_i16_v4_avar, NVPTX::STV_i32_v4_avar, None,
1964  NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar,
1965  NVPTX::STV_f32_v4_avar, None);
1966  break;
1967  }
1968  StOps.push_back(Addr);
1969  } else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1970  : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1971  switch (N->getOpcode()) {
1972  default:
1973  return false;
1974  case NVPTXISD::StoreV2:
1975  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1976  NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1977  NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1978  NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi,
1979  NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
1980  break;
1981  case NVPTXISD::StoreV4:
1982  Opcode =
1983  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi,
1984  NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi, None,
1985  NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi,
1986  NVPTX::STV_f32_v4_asi, None);
1987  break;
1988  }
1989  StOps.push_back(Base);
1990  StOps.push_back(Offset);
1991  } else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1992  : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1993  if (PointerSize == 64) {
1994  switch (N->getOpcode()) {
1995  default:
1996  return false;
1997  case NVPTXISD::StoreV2:
1998  Opcode = pickOpcodeForVT(
1999  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari_64,
2000  NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64,
2001  NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64,
2002  NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64,
2003  NVPTX::STV_f64_v2_ari_64);
2004  break;
2005  case NVPTXISD::StoreV4:
2006  Opcode = pickOpcodeForVT(
2007  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64,
2008  NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, None,
2009  NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64,
2010  NVPTX::STV_f32_v4_ari_64, None);
2011  break;
2012  }
2013  } else {
2014  switch (N->getOpcode()) {
2015  default:
2016  return false;
2017  case NVPTXISD::StoreV2:
2018  Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
2019  NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
2020  NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
2021  NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari,
2022  NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
2023  break;
2024  case NVPTXISD::StoreV4:
2025  Opcode =
2026  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari,
2027  NVPTX::STV_i16_v4_ari, NVPTX::STV_i32_v4_ari, None,
2028  NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari,
2029  NVPTX::STV_f32_v4_ari, None);
2030  break;
2031  }
2032  }
2033  StOps.push_back(Base);
2034  StOps.push_back(Offset);
2035  } else {
2036  if (PointerSize == 64) {
2037  switch (N->getOpcode()) {
2038  default:
2039  return false;
2040  case NVPTXISD::StoreV2:
2041  Opcode = pickOpcodeForVT(
2042  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64,
2043  NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
2044  NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64,
2045  NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
2046  NVPTX::STV_f64_v2_areg_64);
2047  break;
2048  case NVPTXISD::StoreV4:
2049  Opcode = pickOpcodeForVT(
2050  EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64,
2051  NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, None,
2052  NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64,
2053  NVPTX::STV_f32_v4_areg_64, None);
2054  break;
2055  }
2056  } else {
2057  switch (N->getOpcode()) {
2058  default:
2059  return false;
2060  case NVPTXISD::StoreV2:
2061  Opcode =
2062  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg,
2063  NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
2064  NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg,
2065  NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg,
2066  NVPTX::STV_f64_v2_areg);
2067  break;
2068  case NVPTXISD::StoreV4:
2069  Opcode =
2070  pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
2071  NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, None,
2072  NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg,
2073  NVPTX::STV_f32_v4_areg, None);
2074  break;
2075  }
2076  }
2077  StOps.push_back(N2);
2078  }
2079 
2080  if (!Opcode)
2081  return false;
2082 
2083  StOps.push_back(Chain);
2084 
2085  ST = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, StOps);
2086 
2087  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2088  CurDAG->setNodeMemRefs(cast<MachineSDNode>(ST), {MemRef});
2089 
2090  ReplaceNode(N, ST);
2091  return true;
2092 }
2093 
2094 bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
2095  SDValue Chain = Node->getOperand(0);
2096  SDValue Offset = Node->getOperand(2);
2097  SDValue Flag = Node->getOperand(3);
2098  SDLoc DL(Node);
2099  MemSDNode *Mem = cast<MemSDNode>(Node);
2100 
2101  unsigned VecSize;
2102  switch (Node->getOpcode()) {
2103  default:
2104  return false;
2105  case NVPTXISD::LoadParam:
2106  VecSize = 1;
2107  break;
2108  case NVPTXISD::LoadParamV2:
2109  VecSize = 2;
2110  break;
2111  case NVPTXISD::LoadParamV4:
2112  VecSize = 4;
2113  break;
2114  }
2115 
2116  EVT EltVT = Node->getValueType(0);
2117  EVT MemVT = Mem->getMemoryVT();
2118 
2119  Optional<unsigned> Opcode;
2120 
2121  switch (VecSize) {
2122  default:
2123  return false;
2124  case 1:
2125  Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
2126  NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2127  NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2128  NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2,
2129  NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
2130  break;
2131  case 2:
2132  Opcode =
2133  pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
2134  NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2135  NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16,
2136  NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32,
2137  NVPTX::LoadParamMemV2F64);
2138  break;
2139  case 4:
2140  Opcode = pickOpcodeForVT(
2141  MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8,
2142  NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, None,
2143  NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2,
2144  NVPTX::LoadParamMemV4F32, None);
2145  break;
2146  }
2147  if (!Opcode)
2148  return false;
2149 
2150  SDVTList VTs;
2151  if (VecSize == 1) {
2152  VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2153  } else if (VecSize == 2) {
2154  VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2155  } else {
2156  EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2157  VTs = CurDAG->getVTList(EVTs);
2158  }
2159 
2160  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2161 
2163  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2164  Ops.push_back(Chain);
2165  Ops.push_back(Flag);
2166 
2167  ReplaceNode(Node, CurDAG->getMachineNode(Opcode.getValue(), DL, VTs, Ops));
2168  return true;
2169 }
2170 
2171 bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
2172  SDLoc DL(N);
2173  SDValue Chain = N->getOperand(0);
2174  SDValue Offset = N->getOperand(1);
2175  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2176  MemSDNode *Mem = cast<MemSDNode>(N);
2177 
2178  // How many elements do we have?
2179  unsigned NumElts = 1;
2180  switch (N->getOpcode()) {
2181  default:
2182  return false;
2183  case NVPTXISD::StoreRetval:
2184  NumElts = 1;
2185  break;
2187  NumElts = 2;
2188  break;
2190  NumElts = 4;
2191  break;
2192  }
2193 
2194  // Build vector of operands
2196  for (unsigned i = 0; i < NumElts; ++i)
2197  Ops.push_back(N->getOperand(i + 2));
2198  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2199  Ops.push_back(Chain);
2200 
2201  // Determine target opcode
2202  // If we have an i1, use an 8-bit store. The lowering code in
2203  // NVPTXISelLowering will have already emitted an upcast.
2204  Optional<unsigned> Opcode = 0;
2205  switch (NumElts) {
2206  default:
2207  return false;
2208  case 1:
2209  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2210  NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2211  NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2212  NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2,
2213  NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
2214  break;
2215  case 2:
2216  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2217  NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2218  NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2219  NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2,
2220  NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2221  break;
2222  case 4:
2223  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2224  NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2225  NVPTX::StoreRetvalV4I32, None,
2226  NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2,
2227  NVPTX::StoreRetvalV4F32, None);
2228  break;
2229  }
2230  if (!Opcode)
2231  return false;
2232 
2233  SDNode *Ret = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, Ops);
2234  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2235  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
2236 
2237  ReplaceNode(N, Ret);
2238  return true;
2239 }
2240 
2241 bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
2242  SDLoc DL(N);
2243  SDValue Chain = N->getOperand(0);
2244  SDValue Param = N->getOperand(1);
2245  unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2246  SDValue Offset = N->getOperand(2);
2247  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2248  MemSDNode *Mem = cast<MemSDNode>(N);
2249  SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2250 
2251  // How many elements do we have?
2252  unsigned NumElts = 1;
2253  switch (N->getOpcode()) {
2254  default:
2255  return false;
2258  case NVPTXISD::StoreParam:
2259  NumElts = 1;
2260  break;
2262  NumElts = 2;
2263  break;
2265  NumElts = 4;
2266  break;
2267  }
2268 
2269  // Build vector of operands
2271  for (unsigned i = 0; i < NumElts; ++i)
2272  Ops.push_back(N->getOperand(i + 3));
2273  Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2274  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2275  Ops.push_back(Chain);
2276  Ops.push_back(Flag);
2277 
2278  // Determine target opcode
2279  // If we have an i1, use an 8-bit store. The lowering code in
2280  // NVPTXISelLowering will have already emitted an upcast.
2281  Optional<unsigned> Opcode = 0;
2282  switch (N->getOpcode()) {
2283  default:
2284  switch (NumElts) {
2285  default:
2286  return false;
2287  case 1:
2288  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2289  NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2290  NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2291  NVPTX::StoreParamF16, NVPTX::StoreParamF16x2,
2292  NVPTX::StoreParamF32, NVPTX::StoreParamF64);
2293  break;
2294  case 2:
2295  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2296  NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2297  NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2298  NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2,
2299  NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
2300  break;
2301  case 4:
2302  Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2303  NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2304  NVPTX::StoreParamV4I32, None,
2305  NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2,
2306  NVPTX::StoreParamV4F32, None);
2307  break;
2308  }
2309  if (!Opcode)
2310  return false;
2311  break;
2312  // Special case: if we have a sign-extend/zero-extend node, insert the
2313  // conversion instruction first, and use that as the value operand to
2314  // the selected StoreParam node.
2315  case NVPTXISD::StoreParamU32: {
2316  Opcode = NVPTX::StoreParamI32;
2318  MVT::i32);
2319  SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2320  MVT::i32, Ops[0], CvtNone);
2321  Ops[0] = SDValue(Cvt, 0);
2322  break;
2323  }
2324  case NVPTXISD::StoreParamS32: {
2325  Opcode = NVPTX::StoreParamI32;
2327  MVT::i32);
2328  SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2329  MVT::i32, Ops[0], CvtNone);
2330  Ops[0] = SDValue(Cvt, 0);
2331  break;
2332  }
2333  }
2334 
2336  SDNode *Ret =
2337  CurDAG->getMachineNode(Opcode.getValue(), DL, RetVTs, Ops);
2338  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2339  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
2340 
2341  ReplaceNode(N, Ret);
2342  return true;
2343 }
2344 
2345 bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
2346  unsigned Opc = 0;
2347 
2348  switch (N->getOpcode()) {
2349  default: return false;
2351  Opc = NVPTX::TEX_1D_F32_S32;
2352  break;
2354  Opc = NVPTX::TEX_1D_F32_F32;
2355  break;
2357  Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
2358  break;
2360  Opc = NVPTX::TEX_1D_F32_F32_GRAD;
2361  break;
2362  case NVPTXISD::Tex1DS32S32:
2363  Opc = NVPTX::TEX_1D_S32_S32;
2364  break;
2366  Opc = NVPTX::TEX_1D_S32_F32;
2367  break;
2369  Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
2370  break;
2372  Opc = NVPTX::TEX_1D_S32_F32_GRAD;
2373  break;
2374  case NVPTXISD::Tex1DU32S32:
2375  Opc = NVPTX::TEX_1D_U32_S32;
2376  break;
2378  Opc = NVPTX::TEX_1D_U32_F32;
2379  break;
2381  Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
2382  break;
2384  Opc = NVPTX::TEX_1D_U32_F32_GRAD;
2385  break;
2387  Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
2388  break;
2390  Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
2391  break;
2393  Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
2394  break;
2396  Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
2397  break;
2399  Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
2400  break;
2402  Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
2403  break;
2405  Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
2406  break;
2408  Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
2409  break;
2411  Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
2412  break;
2414  Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
2415  break;
2417  Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
2418  break;
2420  Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
2421  break;
2423  Opc = NVPTX::TEX_2D_F32_S32;
2424  break;
2426  Opc = NVPTX::TEX_2D_F32_F32;
2427  break;
2429  Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
2430  break;
2432  Opc = NVPTX::TEX_2D_F32_F32_GRAD;
2433  break;
2434  case NVPTXISD::Tex2DS32S32:
2435  Opc = NVPTX::TEX_2D_S32_S32;
2436  break;
2438  Opc = NVPTX::TEX_2D_S32_F32;
2439  break;
2441  Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
2442  break;
2444  Opc = NVPTX::TEX_2D_S32_F32_GRAD;
2445  break;
2446  case NVPTXISD::Tex2DU32S32:
2447  Opc = NVPTX::TEX_2D_U32_S32;
2448  break;
2450  Opc = NVPTX::TEX_2D_U32_F32;
2451  break;
2453  Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
2454  break;
2456  Opc = NVPTX::TEX_2D_U32_F32_GRAD;
2457  break;
2459  Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
2460  break;
2462  Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
2463  break;
2465  Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
2466  break;
2468  Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
2469  break;
2471  Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
2472  break;
2474  Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
2475  break;
2477  Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
2478  break;
2480  Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
2481  break;
2483  Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
2484  break;
2486  Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
2487  break;
2489  Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
2490  break;
2492  Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
2493  break;
2495  Opc = NVPTX::TEX_3D_F32_S32;
2496  break;
2498  Opc = NVPTX::TEX_3D_F32_F32;
2499  break;
2501  Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
2502  break;
2504  Opc = NVPTX::TEX_3D_F32_F32_GRAD;
2505  break;
2506  case NVPTXISD::Tex3DS32S32:
2507  Opc = NVPTX::TEX_3D_S32_S32;
2508  break;
2510  Opc = NVPTX::TEX_3D_S32_F32;
2511  break;
2513  Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
2514  break;
2516  Opc = NVPTX::TEX_3D_S32_F32_GRAD;
2517  break;
2518  case NVPTXISD::Tex3DU32S32:
2519  Opc = NVPTX::TEX_3D_U32_S32;
2520  break;
2522  Opc = NVPTX::TEX_3D_U32_F32;
2523  break;
2525  Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
2526  break;
2528  Opc = NVPTX::TEX_3D_U32_F32_GRAD;
2529  break;
2531  Opc = NVPTX::TEX_CUBE_F32_F32;
2532  break;
2534  Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
2535  break;
2537  Opc = NVPTX::TEX_CUBE_S32_F32;
2538  break;
2540  Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
2541  break;
2543  Opc = NVPTX::TEX_CUBE_U32_F32;
2544  break;
2546  Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
2547  break;
2549  Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
2550  break;
2552  Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
2553  break;
2555  Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
2556  break;
2558  Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
2559  break;
2561  Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
2562  break;
2564  Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
2565  break;
2567  Opc = NVPTX::TLD4_R_2D_F32_F32;
2568  break;
2570  Opc = NVPTX::TLD4_G_2D_F32_F32;
2571  break;
2573  Opc = NVPTX::TLD4_B_2D_F32_F32;
2574  break;
2576  Opc = NVPTX::TLD4_A_2D_F32_F32;
2577  break;
2579  Opc = NVPTX::TLD4_R_2D_S32_F32;
2580  break;
2582  Opc = NVPTX::TLD4_G_2D_S32_F32;
2583  break;
2585  Opc = NVPTX::TLD4_B_2D_S32_F32;
2586  break;
2588  Opc = NVPTX::TLD4_A_2D_S32_F32;
2589  break;
2591  Opc = NVPTX::TLD4_R_2D_U32_F32;
2592  break;
2594  Opc = NVPTX::TLD4_G_2D_U32_F32;
2595  break;
2597  Opc = NVPTX::TLD4_B_2D_U32_F32;
2598  break;
2600  Opc = NVPTX::TLD4_A_2D_U32_F32;
2601  break;
2603  Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
2604  break;
2606  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
2607  break;
2609  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
2610  break;
2612  Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
2613  break;
2615  Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
2616  break;
2618  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
2619  break;
2621  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
2622  break;
2624  Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
2625  break;
2627  Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
2628  break;
2630  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
2631  break;
2633  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
2634  break;
2636  Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
2637  break;
2639  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
2640  break;
2642  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
2643  break;
2645  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
2646  break;
2648  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
2649  break;
2651  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
2652  break;
2654  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
2655  break;
2657  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
2658  break;
2660  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
2661  break;
2663  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
2664  break;
2666  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
2667  break;
2669  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
2670  break;
2672  Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
2673  break;
2675  Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
2676  break;
2678  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
2679  break;
2681  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
2682  break;
2684  Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
2685  break;
2687  Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
2688  break;
2690  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
2691  break;
2693  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
2694  break;
2696  Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
2697  break;
2699  Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
2700  break;
2702  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
2703  break;
2705  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
2706  break;
2708  Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
2709  break;
2711  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
2712  break;
2714  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
2715  break;
2717  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
2718  break;
2720  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
2721  break;
2723  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
2724  break;
2726  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
2727  break;
2729  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
2730  break;
2732  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
2733  break;
2735  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
2736  break;
2738  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
2739  break;
2741  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
2742  break;
2744  Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
2745  break;
2747  Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
2748  break;
2750  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
2751  break;
2753  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
2754  break;
2756  Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
2757  break;
2759  Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
2760  break;
2762  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
2763  break;
2765  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
2766  break;
2768  Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
2769  break;
2771  Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
2772  break;
2774  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
2775  break;
2777  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
2778  break;
2780  Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
2781  break;
2783  Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
2784  break;
2786  Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
2787  break;
2789  Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
2790  break;
2792  Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
2793  break;
2795  Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
2796  break;
2798  Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
2799  break;
2801  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
2802  break;
2804  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
2805  break;
2807  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
2808  break;
2810  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
2811  break;
2813  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
2814  break;
2816  Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
2817  break;
2819  Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
2820  break;
2822  Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
2823  break;
2825  Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
2826  break;
2828  Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
2829  break;
2831  Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
2832  break;
2834  Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
2835  break;
2837  Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
2838  break;
2840  Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
2841  break;
2843  Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
2844  break;
2846  Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
2847  break;
2849  Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
2850  break;
2852  Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
2853  break;
2854  }
2855 
2856  // Copy over operands
2857  SmallVector<SDValue, 8> Ops(drop_begin(N->ops()));
2858  Ops.push_back(N->getOperand(0)); // Move chain to the back.
2859 
2860  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
2861  return true;
2862 }
2863 
2864 bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
2865  unsigned Opc = 0;
2866  switch (N->getOpcode()) {
2867  default: return false;
2869  Opc = NVPTX::SULD_1D_I8_CLAMP;
2870  break;
2872  Opc = NVPTX::SULD_1D_I16_CLAMP;
2873  break;
2875  Opc = NVPTX::SULD_1D_I32_CLAMP;
2876  break;
2878  Opc = NVPTX::SULD_1D_I64_CLAMP;
2879  break;
2881  Opc = NVPTX::SULD_1D_V2I8_CLAMP;
2882  break;
2884  Opc = NVPTX::SULD_1D_V2I16_CLAMP;
2885  break;
2887  Opc = NVPTX::SULD_1D_V2I32_CLAMP;
2888  break;
2890  Opc = NVPTX::SULD_1D_V2I64_CLAMP;
2891  break;
2893  Opc = NVPTX::SULD_1D_V4I8_CLAMP;
2894  break;
2896  Opc = NVPTX::SULD_1D_V4I16_CLAMP;
2897  break;
2899  Opc = NVPTX::SULD_1D_V4I32_CLAMP;
2900  break;
2902  Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
2903  break;
2905  Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
2906  break;
2908  Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
2909  break;
2911  Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
2912  break;
2914  Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
2915  break;
2917  Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
2918  break;
2920  Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
2921  break;
2923  Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
2924  break;
2926  Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
2927  break;
2929  Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
2930  break;
2932  Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
2933  break;
2935  Opc = NVPTX::SULD_2D_I8_CLAMP;
2936  break;
2938  Opc = NVPTX::SULD_2D_I16_CLAMP;
2939  break;
2941  Opc = NVPTX::SULD_2D_I32_CLAMP;
2942  break;
2944  Opc = NVPTX::SULD_2D_I64_CLAMP;
2945  break;
2947  Opc = NVPTX::SULD_2D_V2I8_CLAMP;
2948  break;
2950  Opc = NVPTX::SULD_2D_V2I16_CLAMP;
2951  break;
2953  Opc = NVPTX::SULD_2D_V2I32_CLAMP;
2954  break;
2956  Opc = NVPTX::SULD_2D_V2I64_CLAMP;
2957  break;
2959  Opc = NVPTX::SULD_2D_V4I8_CLAMP;
2960  break;
2962  Opc = NVPTX::SULD_2D_V4I16_CLAMP;
2963  break;
2965  Opc = NVPTX::SULD_2D_V4I32_CLAMP;
2966  break;
2968  Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
2969  break;
2971  Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
2972  break;
2974  Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
2975  break;
2977  Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
2978  break;
2980  Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
2981  break;
2983  Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
2984  break;
2986  Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
2987  break;
2989  Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
2990  break;
2992  Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
2993  break;
2995  Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
2996  break;
2998  Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
2999  break;
3001  Opc = NVPTX::SULD_3D_I8_CLAMP;
3002  break;
3004  Opc = NVPTX::SULD_3D_I16_CLAMP;
3005  break;
3007  Opc = NVPTX::SULD_3D_I32_CLAMP;
3008  break;
3010  Opc = NVPTX::SULD_3D_I64_CLAMP;
3011  break;
3013  Opc = NVPTX::SULD_3D_V2I8_CLAMP;
3014  break;
3016  Opc = NVPTX::SULD_3D_V2I16_CLAMP;
3017  break;
3019  Opc = NVPTX::SULD_3D_V2I32_CLAMP;
3020  break;
3022  Opc = NVPTX::SULD_3D_V2I64_CLAMP;
3023  break;
3025  Opc = NVPTX::SULD_3D_V4I8_CLAMP;
3026  break;
3028  Opc = NVPTX::SULD_3D_V4I16_CLAMP;
3029  break;
3031  Opc = NVPTX::SULD_3D_V4I32_CLAMP;
3032  break;
3034  Opc = NVPTX::SULD_1D_I8_TRAP;
3035  break;
3037  Opc = NVPTX::SULD_1D_I16_TRAP;
3038  break;
3040  Opc = NVPTX::SULD_1D_I32_TRAP;
3041  break;
3043  Opc = NVPTX::SULD_1D_I64_TRAP;
3044  break;
3046  Opc = NVPTX::SULD_1D_V2I8_TRAP;
3047  break;
3049  Opc = NVPTX::SULD_1D_V2I16_TRAP;
3050  break;
3052  Opc = NVPTX::SULD_1D_V2I32_TRAP;
3053  break;
3055  Opc = NVPTX::SULD_1D_V2I64_TRAP;
3056  break;
3058  Opc = NVPTX::SULD_1D_V4I8_TRAP;
3059  break;
3061  Opc = NVPTX::SULD_1D_V4I16_TRAP;
3062  break;
3064  Opc = NVPTX::SULD_1D_V4I32_TRAP;
3065  break;
3067  Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
3068  break;
3070  Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
3071  break;
3073  Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
3074  break;
3076  Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
3077  break;
3079  Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
3080  break;
3082  Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
3083  break;
3085  Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
3086  break;
3088  Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
3089  break;
3091  Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
3092  break;
3094  Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
3095  break;
3097  Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
3098  break;
3100  Opc = NVPTX::SULD_2D_I8_TRAP;
3101  break;
3103  Opc = NVPTX::SULD_2D_I16_TRAP;
3104  break;
3106  Opc = NVPTX::SULD_2D_I32_TRAP;
3107  break;
3109  Opc = NVPTX::SULD_2D_I64_TRAP;
3110  break;
3112  Opc = NVPTX::SULD_2D_V2I8_TRAP;
3113  break;
3115  Opc = NVPTX::SULD_2D_V2I16_TRAP;
3116  break;
3118  Opc = NVPTX::SULD_2D_V2I32_TRAP;
3119  break;
3121  Opc = NVPTX::SULD_2D_V2I64_TRAP;
3122  break;
3124  Opc = NVPTX::SULD_2D_V4I8_TRAP;
3125  break;
3127  Opc = NVPTX::SULD_2D_V4I16_TRAP;
3128  break;
3130  Opc = NVPTX::SULD_2D_V4I32_TRAP;
3131  break;
3133  Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
3134  break;
3136  Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
3137  break;
3139  Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
3140  break;
3142  Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
3143  break;
3145  Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
3146  break;
3148  Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
3149  break;
3151  Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
3152  break;
3154  Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
3155  break;
3157  Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
3158  break;
3160  Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
3161  break;
3163  Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
3164  break;
3166  Opc = NVPTX::SULD_3D_I8_TRAP;
3167  break;
3169  Opc = NVPTX::SULD_3D_I16_TRAP;
3170  break;
3172  Opc = NVPTX::SULD_3D_I32_TRAP;
3173  break;
3175  Opc = NVPTX::SULD_3D_I64_TRAP;
3176  break;
3178  Opc = NVPTX::SULD_3D_V2I8_TRAP;
3179  break;
3181  Opc = NVPTX::SULD_3D_V2I16_TRAP;
3182  break;
3184  Opc = NVPTX::SULD_3D_V2I32_TRAP;
3185  break;
3187  Opc = NVPTX::SULD_3D_V2I64_TRAP;
3188  break;
3190  Opc = NVPTX::SULD_3D_V4I8_TRAP;
3191  break;
3193  Opc = NVPTX::SULD_3D_V4I16_TRAP;
3194  break;
3196  Opc = NVPTX::SULD_3D_V4I32_TRAP;
3197  break;
3199  Opc = NVPTX::SULD_1D_I8_ZERO;
3200  break;
3202  Opc = NVPTX::SULD_1D_I16_ZERO;
3203  break;
3205  Opc = NVPTX::SULD_1D_I32_ZERO;
3206  break;
3208  Opc = NVPTX::SULD_1D_I64_ZERO;
3209  break;
3211  Opc = NVPTX::SULD_1D_V2I8_ZERO;
3212  break;
3214  Opc = NVPTX::SULD_1D_V2I16_ZERO;
3215  break;
3217  Opc = NVPTX::SULD_1D_V2I32_ZERO;
3218  break;
3220  Opc = NVPTX::SULD_1D_V2I64_ZERO;
3221  break;
3223  Opc = NVPTX::SULD_1D_V4I8_ZERO;
3224  break;
3226  Opc = NVPTX::SULD_1D_V4I16_ZERO;
3227  break;
3229  Opc = NVPTX::SULD_1D_V4I32_ZERO;
3230  break;
3232  Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
3233  break;
3235  Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
3236  break;
3238  Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
3239  break;
3241  Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
3242  break;
3244  Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
3245  break;
3247  Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
3248  break;
3250  Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
3251  break;
3253  Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
3254  break;
3256  Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
3257  break;
3259  Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
3260  break;
3262  Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
3263  break;
3265  Opc = NVPTX::SULD_2D_I8_ZERO;
3266  break;
3268  Opc = NVPTX::SULD_2D_I16_ZERO;
3269  break;
3271  Opc = NVPTX::SULD_2D_I32_ZERO;
3272  break;
3274  Opc = NVPTX::SULD_2D_I64_ZERO;
3275  break;
3277  Opc = NVPTX::SULD_2D_V2I8_ZERO;
3278  break;
3280  Opc = NVPTX::SULD_2D_V2I16_ZERO;
3281  break;
3283  Opc = NVPTX::SULD_2D_V2I32_ZERO;
3284  break;
3286  Opc = NVPTX::SULD_2D_V2I64_ZERO;
3287  break;
3289  Opc = NVPTX::SULD_2D_V4I8_ZERO;
3290  break;
3292  Opc = NVPTX::SULD_2D_V4I16_ZERO;
3293  break;
3295  Opc = NVPTX::SULD_2D_V4I32_ZERO;
3296  break;
3298  Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
3299  break;
3301  Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
3302  break;
3304  Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
3305  break;
3307  Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
3308  break;
3310  Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
3311  break;
3313  Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
3314  break;
3316  Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
3317  break;
3319  Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
3320  break;
3322  Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
3323  break;
3325  Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
3326  break;
3328  Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
3329  break;
3331  Opc = NVPTX::SULD_3D_I8_ZERO;
3332  break;
3334  Opc = NVPTX::SULD_3D_I16_ZERO;
3335  break;
3337  Opc = NVPTX::SULD_3D_I32_ZERO;
3338  break;
3340  Opc = NVPTX::SULD_3D_I64_ZERO;
3341  break;
3343  Opc = NVPTX::SULD_3D_V2I8_ZERO;
3344  break;
3346  Opc = NVPTX::SULD_3D_V2I16_ZERO;
3347  break;
3349  Opc = NVPTX::SULD_3D_V2I32_ZERO;
3350  break;
3352  Opc = NVPTX::SULD_3D_V2I64_ZERO;
3353  break;
3355  Opc = NVPTX::SULD_3D_V4I8_ZERO;
3356  break;
3358  Opc = NVPTX::SULD_3D_V4I16_ZERO;
3359  break;
3361  Opc = NVPTX::SULD_3D_V4I32_ZERO;
3362  break;
3363  }
3364 
3365  // Copy over operands
3366  SmallVector<SDValue, 8> Ops(drop_begin(N->ops()));
3367  Ops.push_back(N->getOperand(0)); // Move chain to the back.
3368 
3369  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
3370  return true;
3371 }
3372 
3373 
3374 /// SelectBFE - Look for instruction sequences that can be made more efficient
3375 /// by using the 'bfe' (bit-field extract) PTX instruction
3376 bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
3377  SDLoc DL(N);
3378  SDValue LHS = N->getOperand(0);
3379  SDValue RHS = N->getOperand(1);
3380  SDValue Len;
3381  SDValue Start;
3382  SDValue Val;
3383  bool IsSigned = false;
3384 
3385  if (N->getOpcode() == ISD::AND) {
3386  // Canonicalize the operands
3387  // We want 'and %val, %mask'
3388  if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3389  std::swap(LHS, RHS);
3390  }
3391 
3392  ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
3393  if (!Mask) {
3394  // We need a constant mask on the RHS of the AND
3395  return false;
3396  }
3397 
3398  // Extract the mask bits
3399  uint64_t MaskVal = Mask->getZExtValue();
3400  if (!isMask_64(MaskVal)) {
3401  // We *could* handle shifted masks here, but doing so would require an
3402  // 'and' operation to fix up the low-order bits so we would trade
3403  // shr+and for bfe+and, which has the same throughput
3404  return false;
3405  }
3406 
3407  // How many bits are in our mask?
3408  uint64_t NumBits = countTrailingOnes(MaskVal);
3409  Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3410 
3411  if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
3412  // We have a 'srl/and' pair, extract the effective start bit and length
3413  Val = LHS.getNode()->getOperand(0);
3414  Start = LHS.getNode()->getOperand(1);
3415  ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
3416  if (StartConst) {
3417  uint64_t StartVal = StartConst->getZExtValue();
3418  // How many "good" bits do we have left? "good" is defined here as bits
3419  // that exist in the original value, not shifted in.
3420  uint64_t GoodBits = Start.getValueSizeInBits() - StartVal;
3421  if (NumBits > GoodBits) {
3422  // Do not handle the case where bits have been shifted in. In theory
3423  // we could handle this, but the cost is likely higher than just
3424  // emitting the srl/and pair.
3425  return false;
3426  }
3427  Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
3428  } else {
3429  // Do not handle the case where the shift amount (can be zero if no srl
3430  // was found) is not constant. We could handle this case, but it would
3431  // require run-time logic that would be more expensive than just
3432  // emitting the srl/and pair.
3433  return false;
3434  }
3435  } else {
3436  // Do not handle the case where the LHS of the and is not a shift. While
3437  // it would be trivial to handle this case, it would just transform
3438  // 'and' -> 'bfe', but 'and' has higher-throughput.
3439  return false;
3440  }
3441  } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
3442  if (LHS->getOpcode() == ISD::AND) {
3443  ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
3444  if (!ShiftCnst) {
3445  // Shift amount must be constant
3446  return false;
3447  }
3448 
3449  uint64_t ShiftAmt = ShiftCnst->getZExtValue();
3450 
3451  SDValue AndLHS = LHS->getOperand(0);
3452  SDValue AndRHS = LHS->getOperand(1);
3453 
3454  // Canonicalize the AND to have the mask on the RHS
3455  if (isa<ConstantSDNode>(AndLHS)) {
3456  std::swap(AndLHS, AndRHS);
3457  }
3458 
3459  ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
3460  if (!MaskCnst) {
3461  // Mask must be constant
3462  return false;
3463  }
3464 
3465  uint64_t MaskVal = MaskCnst->getZExtValue();
3466  uint64_t NumZeros;
3467  uint64_t NumBits;
3468  if (isMask_64(MaskVal)) {
3469  NumZeros = 0;
3470  // The number of bits in the result bitfield will be the number of
3471  // trailing ones (the AND) minus the number of bits we shift off
3472  NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
3473  } else if (isShiftedMask_64(MaskVal)) {
3474  NumZeros = countTrailingZeros(MaskVal);
3475  unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
3476  // The number of bits in the result bitfield will be the number of
3477  // trailing zeros plus the number of set bits in the mask minus the
3478  // number of bits we shift off
3479  NumBits = NumZeros + NumOnes - ShiftAmt;
3480  } else {
3481  // This is not a mask we can handle
3482  return false;
3483  }
3484 
3485  if (ShiftAmt < NumZeros) {
3486  // Handling this case would require extra logic that would make this
3487  // transformation non-profitable
3488  return false;
3489  }
3490 
3491  Val = AndLHS;
3492  Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
3493  Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
3494  } else if (LHS->getOpcode() == ISD::SHL) {
3495  // Here, we have a pattern like:
3496  //
3497  // (sra (shl val, NN), MM)
3498  // or
3499  // (srl (shl val, NN), MM)
3500  //
3501  // If MM >= NN, we can efficiently optimize this with bfe
3502  Val = LHS->getOperand(0);
3503 
3504  SDValue ShlRHS = LHS->getOperand(1);
3505  ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
3506  if (!ShlCnst) {
3507  // Shift amount must be constant
3508  return false;
3509  }
3510  uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
3511 
3512  SDValue ShrRHS = RHS;
3513  ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
3514  if (!ShrCnst) {
3515  // Shift amount must be constant
3516  return false;
3517  }
3518  uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
3519 
3520  // To avoid extra codegen and be profitable, we need Outer >= Inner
3521  if (OuterShiftAmt < InnerShiftAmt) {
3522  return false;
3523  }
3524 
3525  // If the outer shift is more than the type size, we have no bitfield to
3526  // extract (since we also check that the inner shift is <= the outer shift
3527  // then this also implies that the inner shift is < the type size)
3528  if (OuterShiftAmt >= Val.getValueSizeInBits()) {
3529  return false;
3530  }
3531 
3532  Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL,
3533  MVT::i32);
3534  Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt,
3535  DL, MVT::i32);
3536 
3537  if (N->getOpcode() == ISD::SRA) {
3538  // If we have a arithmetic right shift, we need to use the signed bfe
3539  // variant
3540  IsSigned = true;
3541  }
3542  } else {
3543  // No can do...
3544  return false;
3545  }
3546  } else {
3547  // No can do...
3548  return false;
3549  }
3550 
3551 
3552  unsigned Opc;
3553  // For the BFE operations we form here from "and" and "srl", always use the
3554  // unsigned variants.
3555  if (Val.getValueType() == MVT::i32) {
3556  if (IsSigned) {
3557  Opc = NVPTX::BFE_S32rii;
3558  } else {
3559  Opc = NVPTX::BFE_U32rii;
3560  }
3561  } else if (Val.getValueType() == MVT::i64) {
3562  if (IsSigned) {
3563  Opc = NVPTX::BFE_S64rii;
3564  } else {
3565  Opc = NVPTX::BFE_U64rii;
3566  }
3567  } else {
3568  // We cannot handle this type
3569  return false;
3570  }
3571 
3572  SDValue Ops[] = {
3573  Val, Start, Len
3574  };
3575 
3576  ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops));
3577  return true;
3578 }
3579 
3580 // SelectDirectAddr - Match a direct address for DAG.
3581 // A direct address could be a globaladdress or externalsymbol.
3582 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
3583  // Return true if TGA or ES.
3584  if (N.getOpcode() == ISD::TargetGlobalAddress ||
3585  N.getOpcode() == ISD::TargetExternalSymbol) {
3586  Address = N;
3587  return true;
3588  }
3589  if (N.getOpcode() == NVPTXISD::Wrapper) {
3590  Address = N.getOperand(0);
3591  return true;
3592  }
3593  // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol
3594  if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) {
3595  if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
3597  CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam)
3598  return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address);
3599  }
3600  return false;
3601 }
3602 
3603 // symbol+offset
3604 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3605  SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3606  if (Addr.getOpcode() == ISD::ADD) {
3607  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3608  SDValue base = Addr.getOperand(0);
3609  if (SelectDirectAddr(base, Base)) {
3610  Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3611  mvt);
3612  return true;
3613  }
3614  }
3615  }
3616  return false;
3617 }
3618 
3619 // symbol+offset
3620 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
3621  SDValue &Base, SDValue &Offset) {
3622  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
3623 }
3624 
3625 // symbol+offset
3626 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
3627  SDValue &Base, SDValue &Offset) {
3628  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
3629 }
3630 
3631 // register+offset
3632 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3633  SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3634  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3635  Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3636  Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
3637  return true;
3638  }
3639  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
3640  Addr.getOpcode() == ISD::TargetGlobalAddress)
3641  return false; // direct calls.
3642 
3643  if (Addr.getOpcode() == ISD::ADD) {
3644  if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
3645  return false;
3646  }
3647  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3648  if (FrameIndexSDNode *FIN =
3649  dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
3650  // Constant offset from frame ref.
3651  Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3652  else
3653  Base = Addr.getOperand(0);
3654  Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3655  mvt);
3656  return true;
3657  }
3658  }
3659  return false;
3660 }
3661 
3662 // register+offset
3663 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
3664  SDValue &Base, SDValue &Offset) {
3665  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
3666 }
3667 
3668 // register+offset
3669 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
3670  SDValue &Base, SDValue &Offset) {
3671  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
3672 }
3673 
3674 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
3675  unsigned int spN) const {
3676  const Value *Src = nullptr;
3677  if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
3678  if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3679  return true;
3680  Src = mN->getMemOperand()->getValue();
3681  }
3682  if (!Src)
3683  return false;
3684  if (auto *PT = dyn_cast<PointerType>(Src->getType()))
3685  return (PT->getAddressSpace() == spN);
3686  return false;
3687 }
3688 
3689 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
3690 /// inline asm expressions.
3692  const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
3693  SDValue Op0, Op1;
3694  switch (ConstraintID) {
3695  default:
3696  return true;
3697  case InlineAsm::Constraint_m: // memory
3698  if (SelectDirectAddr(Op, Op0)) {
3699  OutOps.push_back(Op0);
3700  OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
3701  return false;
3702  }
3703  if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
3704  OutOps.push_back(Op0);
3705  OutOps.push_back(Op1);
3706  return false;
3707  }
3708  break;
3709  }
3710  return true;
3711 }
3712 
3713 /// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
3714 /// conversion from \p SrcTy to \p DestTy.
3715 unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
3716  bool IsSigned) {
3717  switch (SrcTy.SimpleTy) {
3718  default:
3719  llvm_unreachable("Unhandled source type");
3720  case MVT::i8:
3721  switch (DestTy.SimpleTy) {
3722  default:
3723  llvm_unreachable("Unhandled dest type");
3724  case MVT::i16:
3725  return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
3726  case MVT::i32:
3727  return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
3728  case MVT::i64:
3729  return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
3730  }
3731  case MVT::i16:
3732  switch (DestTy.SimpleTy) {
3733  default:
3734  llvm_unreachable("Unhandled dest type");
3735  case MVT::i8:
3736  return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
3737  case MVT::i32:
3738  return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
3739  case MVT::i64:
3740  return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
3741  }
3742  case MVT::i32:
3743  switch (DestTy.SimpleTy) {
3744  default:
3745  llvm_unreachable("Unhandled dest type");
3746  case MVT::i8:
3747  return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
3748  case MVT::i16:
3749  return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
3750  case MVT::i64:
3751  return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
3752  }
3753  case MVT::i64:
3754  switch (DestTy.SimpleTy) {
3755  default:
3756  llvm_unreachable("Unhandled dest type");
3757  case MVT::i8:
3758  return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
3759  case MVT::i16:
3760  return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
3761  case MVT::i32:
3762  return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
3763  }
3764  }
3765 }
llvm::NVPTXISD::Suld1DI16Clamp
@ Suld1DI16Clamp
Definition: NVPTXISelLowering.h:254
i
i
Definition: README.txt:29
llvm::NVPTXISD::TexUnified2DU32FloatLevel
@ TexUnified2DU32FloatLevel
Definition: NVPTXISelLowering.h:201
llvm::NVPTXISD::Suld3DV2I64Trap
@ Suld3DV2I64Trap
Definition: NVPTXISelLowering.h:368
llvm::ISD::SETUGE
@ SETUGE
Definition: ISDOpcodes.h:1368
llvm::NVPTXISD::Suld2DV2I32Clamp
@ Suld2DV2I32Clamp
Definition: NVPTXISelLowering.h:283
llvm::NVPTXISD::Tex1DFloatS32
@ Tex1DFloatS32
Definition: NVPTXISelLowering.h:83
llvm::NVPTXISD::Tld4UnifiedR2DU64Float
@ Tld4UnifiedR2DU64Float
Definition: NVPTXISelLowering.h:247
llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1556
llvm::NVPTXISD::Suld1DArrayI16Zero
@ Suld1DArrayI16Zero
Definition: NVPTXISelLowering.h:386
llvm::NVPTXISD::Suld1DV4I16Trap
@ Suld1DV4I16Trap
Definition: NVPTXISelLowering.h:322
llvm::NVPTXISD::TexUnifiedCubeArrayS32FloatLevel
@ TexUnifiedCubeArrayS32FloatLevel
Definition: NVPTXISelLowering.h:236
llvm::NVPTXISD::TexUnified1DU32S32
@ TexUnified1DU32S32
Definition: NVPTXISelLowering.h:175
llvm::ARMII::VecSize
@ VecSize
Definition: ARMBaseInfo.h:417
llvm::ISD::SETLE
@ SETLE
Definition: ISDOpcodes.h:1379
llvm::NVPTXISD::TexCubeArrayU32Float
@ TexCubeArrayU32Float
Definition: NVPTXISelLowering.h:153
llvm::ISD::SETO
@ SETO
Definition: ISDOpcodes.h:1364
llvm::NVPTXISD::Suld1DArrayV4I8Trap
@ Suld1DArrayV4I8Trap
Definition: NVPTXISelLowering.h:333
llvm::AddrSpaceCastSDNode::getSrcAddressSpace
unsigned getSrcAddressSpace() const
Definition: SelectionDAGNodes.h:1245
llvm::NVPTXISD::Suld1DArrayI32Clamp
@ Suld1DArrayI32Clamp
Definition: NVPTXISelLowering.h:267
llvm::NVPTXISD::TexUnified3DFloatFloat
@ TexUnified3DFloatFloat
Definition: NVPTXISelLowering.h:216
llvm::NVPTXISD::Suld2DArrayI32Zero
@ Suld2DArrayI32Zero
Definition: NVPTXISelLowering.h:411
llvm::NVPTXISD::Suld2DArrayV4I32Trap
@ Suld2DArrayV4I32Trap
Definition: NVPTXISelLowering.h:359
llvm::NVPTXISD::Suld1DArrayI64Trap
@ Suld1DArrayI64Trap
Definition: NVPTXISelLowering.h:328
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::NVPTXISD::Suld2DArrayV4I16Zero
@ Suld2DArrayV4I16Zero
Definition: NVPTXISelLowering.h:418
llvm::NVPTXDAGToDAGISel::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Definition: NVPTXISelDAGToDAG.cpp:44
llvm::NVPTX::PTXLdStInstCode::SHARED
@ SHARED
Definition: NVPTX.h:111
llvm::NVPTXISD::Tld4UnifiedR2DFloatFloat
@ Tld4UnifiedR2DFloatFloat
Definition: NVPTXISelLowering.h:239
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:266
llvm::AArch64CC::NE
@ NE
Definition: AArch64BaseInfo.h:256
llvm::NVPTXISD::TexUnifiedCubeU32FloatLevel
@ TexUnifiedCubeU32FloatLevel
Definition: NVPTXISelLowering.h:232
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1086
pickOpcodeForVT
static Optional< unsigned > pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16, unsigned Opcode_i32, Optional< unsigned > Opcode_i64, unsigned Opcode_f16, unsigned Opcode_f16x2, unsigned Opcode_f32, Optional< unsigned > Opcode_f64)
Definition: NVPTXISelDAGToDAG.cpp:811
EVTs
static ManagedStatic< std::set< EVT, EVT::compareRawBits > > EVTs
Definition: SelectionDAG.cpp:10148
llvm::NVPTXISD::LoadParamV2
@ LoadParamV2
Definition: NVPTXISelLowering.h:71
llvm::ISD::SETGT
@ SETGT
Definition: ISDOpcodes.h:1376
llvm::NVPTXISD::TexUnifiedCubeFloatFloatLevel
@ TexUnifiedCubeFloatFloatLevel
Definition: NVPTXISelLowering.h:228
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:848
llvm::NVPTXISD::LoadV4
@ LoadV4
Definition: NVPTXISelLowering.h:63
llvm::NVPTXISD::Suld2DArrayV2I8Clamp
@ Suld2DArrayV2I8Clamp
Definition: NVPTXISelLowering.h:293
llvm::NVPTXISD::Suld1DArrayV4I16Zero
@ Suld1DArrayV4I16Zero
Definition: NVPTXISelLowering.h:394
llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1380
llvm::NVPTXISD::MoveParam
@ MoveParam
Definition: NVPTXISelLowering.h:47
llvm::NVPTXISD::TexCubeS32Float
@ TexCubeS32Float
Definition: NVPTXISelLowering.h:145
llvm::NVPTXISD::TexUnified1DArrayU32Float
@ TexUnified1DArrayU32Float
Definition: NVPTXISelLowering.h:188
llvm::NVPTXISD::TexUnified1DFloatFloatLevel
@ TexUnified1DFloatFloatLevel
Definition: NVPTXISelLowering.h:169
llvm::NVPTXISD::Tex1DU32S32
@ Tex1DU32S32
Definition: NVPTXISelLowering.h:91
llvm::NVPTXSubtarget::hasLDG
bool hasLDG() const
Definition: NVPTXSubtarget.h:75
llvm::NVPTX::PTXCmpMode::EQU
@ EQU
Definition: NVPTX.h:160
llvm::NVPTXISD::Suld1DI16Zero
@ Suld1DI16Zero
Definition: NVPTXISelLowering.h:374
AtomicOrdering.h
llvm::NVPTXISD::Suld3DI16Zero
@ Suld3DI16Zero
Definition: NVPTXISelLowering.h:422
llvm::NVPTXISD::Suld2DV2I32Trap
@ Suld2DV2I32Trap
Definition: NVPTXISelLowering.h:343
llvm::NVPTXISD::Tex1DArrayS32Float
@ Tex1DArrayS32Float
Definition: NVPTXISelLowering.h:100
llvm::NVPTXISD::Suld3DI8Zero
@ Suld3DI8Zero
Definition: NVPTXISelLowering.h:421
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:152
llvm::NVPTXISD::Suld1DArrayV4I32Trap
@ Suld1DArrayV4I32Trap
Definition: NVPTXISelLowering.h:335
llvm::NVPTXISD::TexUnifiedCubeS32Float
@ TexUnifiedCubeS32Float
Definition: NVPTXISelLowering.h:229
llvm::ISD::ConstantFP
@ ConstantFP
Definition: ISDOpcodes.h:77
getCodeAddrSpace
static unsigned int getCodeAddrSpace(MemSDNode *N)
Definition: NVPTXISelDAGToDAG.cpp:656
llvm::NVPTX::PTXCmpMode::NotANumber
@ NotANumber
Definition: NVPTX.h:168
llvm::NVPTXISD::Tex2DS32Float
@ Tex2DS32Float
Definition: NVPTXISelLowering.h:112
llvm::NVPTXISD::Tld4G2DS64Float
@ Tld4G2DS64Float
Definition: NVPTXISelLowering.h:160
llvm::NVPTXISD::Tex1DArrayFloatFloatLevel
@ Tex1DArrayFloatFloatLevel
Definition: NVPTXISelLowering.h:97
llvm::NVPTXSubtarget::getTargetLowering
const NVPTXTargetLowering * getTargetLowering() const override
Definition: NVPTXSubtarget.h:64
llvm::NVPTX::PTXCmpMode::NUM
@ NUM
Definition: NVPTX.h:166
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
llvm::NVPTXTargetMachine::is64Bit
bool is64Bit() const
Definition: NVPTXTargetMachine.h:46
llvm::NVPTXISD::Suld3DV2I32Trap
@ Suld3DV2I32Trap
Definition: NVPTXISelLowering.h:367
llvm::NVPTXISD::Suld3DV2I8Clamp
@ Suld3DV2I8Clamp
Definition: NVPTXISelLowering.h:305
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::NVPTX::PTXLdStInstCode::LOCAL
@ LOCAL
Definition: NVPTX.h:113
Wrapper
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
Definition: AMDGPUAliasAnalysis.cpp:30
llvm::ISD::SETEQ
@ SETEQ
Definition: ISDOpcodes.h:1375
llvm::MVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: MachineValueType.h:366
llvm::ADDRESS_SPACE_LOCAL
@ ADDRESS_SPACE_LOCAL
Definition: NVPTXBaseInfo.h:26
llvm::SelectionDAG::getVTList
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition: SelectionDAG.cpp:8551
llvm::NVPTXISD::Tld4A2DS64Float
@ Tld4A2DS64Float
Definition: NVPTXISelLowering.h:162
llvm::NVPTXISD::Suld1DV2I64Trap
@ Suld1DV2I64Trap
Definition: NVPTXISelLowering.h:320
llvm::NVPTXISD::TexUnified1DArrayS32S32
@ TexUnified1DArrayS32S32
Definition: NVPTXISelLowering.h:183
llvm::NVPTXISD::TexUnified2DArrayFloatS32
@ TexUnified2DArrayFloatS32
Definition: NVPTXISelLowering.h:203
llvm::NVPTXISD::Suld1DArrayV2I32Clamp
@ Suld1DArrayV2I32Clamp
Definition: NVPTXISelLowering.h:271
llvm::NVPTXISD::Tex3DFloatFloatLevel
@ Tex3DFloatFloatLevel
Definition: NVPTXISelLowering.h:133
llvm::NVPTXISD::Suld1DV2I32Clamp
@ Suld1DV2I32Clamp
Definition: NVPTXISelLowering.h:259
ErrorHandling.h
llvm::NVPTXISD::Tld4UnifiedA2DU64Float
@ Tld4UnifiedA2DU64Float
Definition: NVPTXISelLowering.h:250
llvm::ADDRESS_SPACE_PARAM
@ ADDRESS_SPACE_PARAM
Definition: NVPTXBaseInfo.h:29
llvm::NVPTXISD::Tld4UnifiedR2DS64Float
@ Tld4UnifiedR2DS64Float
Definition: NVPTXISelLowering.h:243
llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition: SelectionDAGNodes.h:1336
ValueTracking.h
llvm::NVPTXISD::Tld4B2DS64Float
@ Tld4B2DS64Float
Definition: NVPTXISelLowering.h:161
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:455
llvm::NVPTXISD::TexUnified2DU32FloatGrad
@ TexUnified2DU32FloatGrad
Definition: NVPTXISelLowering.h:202
llvm::NVPTXISD::TexUnified3DS32FloatGrad
@ TexUnified3DS32FloatGrad
Definition: NVPTXISelLowering.h:222
llvm::NVPTXISD::Tex2DArrayFloatS32
@ Tex2DArrayFloatS32
Definition: NVPTXISelLowering.h:119
llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition: SelectionDAGNodes.h:2281
llvm::MVT::Glue
@ Glue
Definition: MachineValueType.h:262
llvm::NVPTX::PTXCmpMode::GEU
@ GEU
Definition: NVPTX.h:165
llvm::NVPTXISD::Suld2DI16Trap
@ Suld2DI16Trap
Definition: NVPTXISelLowering.h:338
llvm::NVPTXISD::TexCubeFloatFloatLevel
@ TexCubeFloatFloatLevel
Definition: NVPTXISelLowering.h:144
llvm::NVPTXISD::TexUnifiedCubeArrayU32Float
@ TexUnifiedCubeArrayU32Float
Definition: NVPTXISelLowering.h:237
llvm::NVPTXISD::Tld4B2DU64Float
@ Tld4B2DU64Float
Definition: NVPTXISelLowering.h:165
llvm::NVPTXISD::Tex1DArrayS32FloatLevel
@ Tex1DArrayS32FloatLevel
Definition: NVPTXISelLowering.h:101
llvm::NVPTXISD::Tld4UnifiedB2DS64Float
@ Tld4UnifiedB2DS64Float
Definition: NVPTXISelLowering.h:245
llvm::NVPTXISD::TexUnified1DArrayS32Float
@ TexUnified1DArrayS32Float
Definition: NVPTXISelLowering.h:184
llvm::ISD::SETULE
@ SETULE
Definition: ISDOpcodes.h:1370
llvm::NVPTXISD::Suld3DI64Trap
@ Suld3DI64Trap
Definition: NVPTXISelLowering.h:364
llvm::NVPTXISD::Suld1DV2I64Zero
@ Suld1DV2I64Zero
Definition: NVPTXISelLowering.h:380
llvm::NVPTXISD::Tex3DFloatS32
@ Tex3DFloatS32
Definition: NVPTXISelLowering.h:131
llvm::NVPTXISD::TexUnified2DArrayU32S32
@ TexUnified2DArrayU32S32
Definition: NVPTXISelLowering.h:211
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:128
llvm::NVPTXISD::Suld1DV4I8Clamp
@ Suld1DV4I8Clamp
Definition: NVPTXISelLowering.h:261
llvm::NVPTXISD::Suld3DV4I32Trap
@ Suld3DV4I32Trap
Definition: NVPTXISelLowering.h:371
llvm::Optional< unsigned >
llvm::NVPTXISD::Suld2DV2I16Clamp
@ Suld2DV2I16Clamp
Definition: NVPTXISelLowering.h:282
llvm::NVPTXISD::Tex3DS32S32
@ Tex3DS32S32
Definition: NVPTXISelLowering.h:135
llvm::NVPTXISD::Suld3DV2I32Clamp
@ Suld3DV2I32Clamp
Definition: NVPTXISelLowering.h:307
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::MemSDNode
This is an abstract virtual class for memory operations.
Definition: SelectionDAGNodes.h:1254
llvm::InlineAsm::Constraint_m
@ Constraint_m
Definition: InlineAsm.h:247
llvm::NVPTXISD::TexUnified3DS32S32
@ TexUnified3DS32S32
Definition: NVPTXISelLowering.h:219
llvm::NVPTXISD::Suld2DArrayI8Trap
@ Suld2DArrayI8Trap
Definition: NVPTXISelLowering.h:349
getPTXCmpMode
static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ)
Definition: NVPTXISelDAGToDAG.cpp:537
llvm::NVPTXISD::Suld1DArrayV4I16Trap
@ Suld1DArrayV4I16Trap
Definition: NVPTXISelLowering.h:334
llvm::NVPTXISD::Suld1DV4I32Trap
@ Suld1DV4I32Trap
Definition: NVPTXISelLowering.h:323
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::NVPTXISD::Suld3DI32Trap
@ Suld3DI32Trap
Definition: NVPTXISelLowering.h:363
llvm::NVPTXISD::Suld2DI8Zero
@ Suld2DI8Zero
Definition: NVPTXISelLowering.h:397
llvm::NVPTXISD::Suld2DV4I32Clamp
@ Suld2DV4I32Clamp
Definition: NVPTXISelLowering.h:287
llvm::NVPTXISD::Tld4R2DS64Float
@ Tld4R2DS64Float
Definition: NVPTXISelLowering.h:159
llvm::NVPTXISD::Suld2DArrayV2I64Trap
@ Suld2DArrayV2I64Trap
Definition: NVPTXISelLowering.h:356
llvm::NVPTXISD::TexUnified1DArrayU32S32
@ TexUnified1DArrayU32S32
Definition: NVPTXISelLowering.h:187
llvm::NVPTXISD::Tex2DArrayU32FloatLevel
@ Tex2DArrayU32FloatLevel
Definition: NVPTXISelLowering.h:129
llvm::LSBaseSDNode::isIndexed
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
Definition: SelectionDAGNodes.h:2269
llvm::ISD::SETOEQ
@ SETOEQ
Definition: ISDOpcodes.h:1358
llvm::NVPTXISD::TexUnified2DS32S32
@ TexUnified2DS32S32
Definition: NVPTXISelLowering.h:195
llvm::NVPTXISD::TexUnifiedCubeArrayS32Float
@ TexUnifiedCubeArrayS32Float
Definition: NVPTXISelLowering.h:235
llvm::NVPTXISD::Suld3DV2I64Zero
@ Suld3DV2I64Zero
Definition: NVPTXISelLowering.h:428
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::NVPTXISD::Suld2DV2I8Trap
@ Suld2DV2I8Trap
Definition: NVPTXISelLowering.h:341
llvm::NVPTXISD::Suld1DV2I8Trap
@ Suld1DV2I8Trap
Definition: NVPTXISelLowering.h:317
llvm::NVPTXISD::TexUnified2DS32FloatLevel
@ TexUnified2DS32FloatLevel
Definition: NVPTXISelLowering.h:197
llvm::NVPTXISD::LDGV4
@ LDGV4
Definition: NVPTXISelLowering.h:65
llvm::NVPTXISD::Tex1DArrayS32FloatGrad
@ Tex1DArrayS32FloatGrad
Definition: NVPTXISelLowering.h:102
llvm::ISD::SETUEQ
@ SETUEQ
Definition: ISDOpcodes.h:1366
llvm::NVPTXTargetMachine
NVPTXTargetMachine.
Definition: NVPTXTargetMachine.h:24
llvm::NVPTXISD::Tex2DFloatFloatLevel
@ Tex2DFloatFloatLevel
Definition: NVPTXISelLowering.h:109
llvm::NVPTX::PTXLdStInstCode::VecType
VecType
Definition: NVPTX.h:121
llvm::NVPTXISD::TexUnified3DU32Float
@ TexUnified3DU32Float
Definition: NVPTXISelLowering.h:224
llvm::NVPTXISD::Tex1DU32FloatLevel
@ Tex1DU32FloatLevel
Definition: NVPTXISelLowering.h:93
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::AtomicOrdering::Monotonic
@ Monotonic
llvm::NVPTXISD::Suld3DV4I16Clamp
@ Suld3DV4I16Clamp
Definition: NVPTXISelLowering.h:310
llvm::NVPTXISD::Suld1DArrayV2I32Trap
@ Suld1DArrayV2I32Trap
Definition: NVPTXISelLowering.h:331
llvm::NVPTXISD::Suld1DI32Trap
@ Suld1DI32Trap
Definition: NVPTXISelLowering.h:315
NVPTXUtilities.h
llvm::NVPTXISD::Suld2DArrayV4I8Clamp
@ Suld2DArrayV4I8Clamp
Definition: NVPTXISelLowering.h:297
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:130
llvm::NVPTXISD::Suld1DArrayV4I8Zero
@ Suld1DArrayV4I8Zero
Definition: NVPTXISelLowering.h:393
llvm::NVPTXISD::Suld3DV2I8Zero
@ Suld3DV2I8Zero
Definition: NVPTXISelLowering.h:425
llvm::MVT::SimpleValueType
SimpleValueType
Definition: MachineValueType.h:33
llvm::AArch64CC::LT
@ LT
Definition: AArch64BaseInfo.h:266
llvm::NVPTXISD::Suld2DArrayI64Clamp
@ Suld2DArrayI64Clamp
Definition: NVPTXISelLowering.h:292
llvm::NVPTXISD::Suld1DV4I16Clamp
@ Suld1DV4I16Clamp
Definition: NVPTXISelLowering.h:262
CommandLine.h
NVPTXISelDAGToDAG.h
llvm::NVPTXISD::TexCubeU32Float
@ TexCubeU32Float
Definition: NVPTXISelLowering.h:147
llvm::NVPTXISD::Suld2DArrayV2I64Zero
@ Suld2DArrayV2I64Zero
Definition: NVPTXISelLowering.h:416
llvm::NVPTXISD::Suld3DI16Clamp
@ Suld3DI16Clamp
Definition: NVPTXISelLowering.h:302
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1551
llvm::SDNode::getOpcode
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Definition: SelectionDAGNodes.h:629
llvm::NVPTXISD::TexUnified2DArrayS32FloatGrad
@ TexUnified2DArrayS32FloatGrad
Definition: NVPTXISelLowering.h:210
llvm::NVPTXISD::Tex1DS32S32
@ Tex1DS32S32
Definition: NVPTXISelLowering.h:87
llvm::NVPTXISD::LoadParamV4
@ LoadParamV4
Definition: NVPTXISelLowering.h:72
llvm::NVPTXISD::Suld3DI32Zero
@ Suld3DI32Zero
Definition: NVPTXISelLowering.h:423
GlobalValue.h
llvm::NVPTXISD::Suld2DV4I8Zero
@ Suld2DV4I8Zero
Definition: NVPTXISelLowering.h:405
llvm::SelectionDAG::getTargetFrameIndex
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:688
llvm::NVPTXISD::Suld1DV4I8Trap
@ Suld1DV4I8Trap
Definition: NVPTXISelLowering.h:321
llvm::NVPTXISD::Suld2DV4I16Zero
@ Suld2DV4I16Zero
Definition: NVPTXISelLowering.h:406
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1121
llvm::NVPTXISD::Tld4UnifiedB2DFloatFloat
@ Tld4UnifiedB2DFloatFloat
Definition: NVPTXISelLowering.h:241
llvm::NVPTXISD::Suld3DV4I32Zero
@ Suld3DV4I32Zero
Definition: NVPTXISelLowering.h:431
llvm::NVPTXISD::Suld2DArrayV4I32Zero
@ Suld2DArrayV4I32Zero
Definition: NVPTXISelLowering.h:419
llvm::NVPTXISD::Suld3DV2I8Trap
@ Suld3DV2I8Trap
Definition: NVPTXISelLowering.h:365
llvm::ISD::SETGE
@ SETGE
Definition: ISDOpcodes.h:1377
llvm::NVPTXISD::Tex3DFloatFloatGrad
@ Tex3DFloatFloatGrad
Definition: NVPTXISelLowering.h:134
llvm::NVPTXISD::Tex2DArrayFloatFloatLevel
@ Tex2DArrayFloatFloatLevel
Definition: NVPTXISelLowering.h:121
llvm::NVPTXISD::TexUnified1DFloatS32
@ TexUnified1DFloatS32
Definition: NVPTXISelLowering.h:167
llvm::ADDRESS_SPACE_GLOBAL
@ ADDRESS_SPACE_GLOBAL
Definition: NVPTXBaseInfo.h:23
llvm::NVPTXISD::TexCubeArrayS32FloatLevel
@ TexCubeArrayS32FloatLevel
Definition: NVPTXISelLowering.h:152
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
llvm::NVPTXISD::LoadV2
@ LoadV2
Definition: NVPTXISelLowering.h:62
llvm::NVPTXISD::Suld1DArrayI32Zero
@ Suld1DArrayI32Zero
Definition: NVPTXISelLowering.h:387
llvm::NVPTXISD::Tex2DArrayS32S32
@ Tex2DArrayS32S32
Definition: NVPTXISelLowering.h:123
llvm::NVPTXISD::Suld2DV2I64Zero
@ Suld2DV2I64Zero
Definition: NVPTXISelLowering.h:404
llvm::SelectionDAGISel::OptLevel
CodeGenOpt::Level OptLevel
Definition: SelectionDAGISel.h:51
llvm::NVPTXISD::Tex1DU32FloatGrad
@ Tex1DU32FloatGrad
Definition: NVPTXISelLowering.h:94
llvm::MVT::f64
@ f64
Definition: MachineValueType.h:56
llvm::NVPTXISD::Suld2DI32Clamp
@ Suld2DI32Clamp
Definition: NVPTXISelLowering.h:279
llvm::isShiftedMask_64
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:485
llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:309
EQ
#define EQ(a, b)
Definition: regexec.c:112
llvm::NVPTXISD::Tex3DS32Float
@ Tex3DS32Float
Definition: NVPTXISelLowering.h:136
llvm::NVPTXISD::Suld2DV2I64Clamp
@ Suld2DV2I64Clamp
Definition: NVPTXISelLowering.h:284
llvm::NVPTXISD::TexUnified2DFloatFloat
@ TexUnified2DFloatFloat
Definition: NVPTXISelLowering.h:192
llvm::NVPTXISD::Tex2DFloatFloat
@ Tex2DFloatFloat
Definition: NVPTXISelLowering.h:108
llvm::NVPTXISD::Suld1DV2I8Clamp
@ Suld1DV2I8Clamp
Definition: NVPTXISelLowering.h:257
llvm::NVPTXISD::Suld1DArrayV2I8Zero
@ Suld1DArrayV2I8Zero
Definition: NVPTXISelLowering.h:389
llvm::NVPTXISD::TexUnifiedCubeFloatFloat
@ TexUnifiedCubeFloatFloat
Definition: NVPTXISelLowering.h:227
llvm::NVPTXISD::Tld4G2DFloatFloat
@ Tld4G2DFloatFloat
Definition: NVPTXISelLowering.h:156
Param
Value * Param
Definition: NVPTXLowerArgs.cpp:163
llvm::MVT::SimpleTy
SimpleValueType SimpleTy
Definition: MachineValueType.h:321
llvm::NVPTXISD::StoreRetval
@ StoreRetval
Definition: NVPTXISelLowering.h:78
llvm::ISD::SRA
@ SRA
Definition: ISDOpcodes.h:658
llvm::NVPTXISD::Tld4UnifiedB2DU64Float
@ Tld4UnifiedB2DU64Float
Definition: NVPTXISelLowering.h:249
llvm::NVPTXISD::Suld2DArrayI16Clamp
@ Suld2DArrayI16Clamp
Definition: NVPTXISelLowering.h:290
llvm::NVPTXISD::Tex2DArrayU32FloatGrad
@ Tex2DArrayU32FloatGrad
Definition: NVPTXISelLowering.h:130
llvm::NVPTXISD::TexUnifiedCubeS32FloatLevel
@ TexUnifiedCubeS32FloatLevel
Definition: NVPTXISelLowering.h:230
llvm::NVPTXISD::TexUnified1DFloatFloatGrad
@ TexUnified1DFloatFloatGrad
Definition: NVPTXISelLowering.h:170
llvm::SelectionDAGISel::ReplaceNode
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
Definition: SelectionDAGISel.h:227
llvm::NVPTXISD::TexUnified1DArrayFloatS32
@ TexUnified1DArrayFloatS32
Definition: NVPTXISelLowering.h:179
llvm::NVPTXISD::Suld1DArrayV2I16Trap
@ Suld1DArrayV2I16Trap
Definition: NVPTXISelLowering.h:330
llvm::NVPTXISD::Suld1DArrayV2I32Zero
@ Suld1DArrayV2I32Zero
Definition: NVPTXISelLowering.h:391
llvm::ISD::ADDRSPACECAST
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition: ISDOpcodes.h:852
llvm::NVPTXISD::Suld1DV4I16Zero
@ Suld1DV4I16Zero
Definition: NVPTXISelLowering.h:382
llvm::NVPTXISD::Tex2DU32FloatLevel
@ Tex2DU32FloatLevel
Definition: NVPTXISelLowering.h:117
llvm::NVPTXISD::TexUnified1DU32Float
@ TexUnified1DU32Float
Definition: NVPTXISelLowering.h:176
llvm::MCID::Flag
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:146
llvm::NVPTXISD::Suld2DArrayV4I8Trap
@ Suld2DArrayV4I8Trap
Definition: NVPTXISelLowering.h:357
llvm::NVPTXISD::Suld2DArrayI8Clamp
@ Suld2DArrayI8Clamp
Definition: NVPTXISelLowering.h:289
llvm::NVPTXISD::TexUnifiedCubeArrayU32FloatLevel
@ TexUnifiedCubeArrayU32FloatLevel
Definition: NVPTXISelLowering.h:238
llvm::NVPTXISD::Tex1DArrayU32S32
@ Tex1DArrayU32S32
Definition: NVPTXISelLowering.h:103
llvm::NVPTXISD::Suld1DArrayV4I16Clamp
@ Suld1DArrayV4I16Clamp
Definition: NVPTXISelLowering.h:274
llvm::ADDRESS_SPACE_CONST
@ ADDRESS_SPACE_CONST
Definition: NVPTXBaseInfo.h:25
llvm::NVPTXTargetLowering::usePrecSqrtF32
bool usePrecSqrtF32() const
Definition: NVPTXISelLowering.cpp:104
llvm::NVPTXISD::Tex1DS32Float
@ Tex1DS32Float
Definition: NVPTXISelLowering.h:88
llvm::NVPTXISD::Suld2DArrayV4I32Clamp
@ Suld2DArrayV4I32Clamp
Definition: NVPTXISelLowering.h:299
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::NVPTXISD::Suld1DV2I8Zero
@ Suld1DV2I8Zero
Definition: NVPTXISelLowering.h:377
llvm::NVPTXISD::Suld2DV4I8Clamp
@ Suld2DV4I8Clamp
Definition: NVPTXISelLowering.h:285
llvm::NVPTXISD::Tex2DArrayU32S32
@ Tex2DArrayU32S32
Definition: NVPTXISelLowering.h:127
llvm::codeview::EncodedFramePtrReg::BasePtr
@ BasePtr
llvm::ISD::ATOMIC_STORE
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1118
llvm::AArch64CC::LE
@ LE
Definition: AArch64BaseInfo.h:268
llvm::FrameIndexSDNode
Definition: SelectionDAGNodes.h:1744
llvm::NVPTXISD::StoreV4
@ StoreV4
Definition: NVPTXISelLowering.h:69
llvm::NVPTXISD::TexCubeArrayFloatFloat
@ TexCubeArrayFloatFloat
Definition: NVPTXISelLowering.h:149
llvm::NVPTXISD::LDGV2
@ LDGV2
Definition: NVPTXISelLowering.h:64
llvm::NVPTXISD::TexUnified3DS32FloatLevel
@ TexUnified3DS32FloatLevel
Definition: NVPTXISelLowering.h:221
llvm::NVPTXTargetMachine::useShortPointers
bool useShortPointers() const
Definition: NVPTXTargetMachine.h:47
llvm::NVPTXISD::Suld2DV4I32Zero
@ Suld2DV4I32Zero
Definition: NVPTXISelLowering.h:407
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:632
llvm::ISD::SETOLT
@ SETOLT
Definition: ISDOpcodes.h:1361
llvm::ISD::TargetGlobalAddress
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:164
llvm::NVPTXISD::Suld1DI64Clamp
@ Suld1DI64Clamp
Definition: NVPTXISelLowering.h:256
llvm::NVPTXISD::LDUV4
@ LDUV4
Definition: NVPTXISelLowering.h:67
llvm::NVPTXISD::Suld1DV4I32Clamp
@ Suld1DV4I32Clamp
Definition: NVPTXISelLowering.h:263
llvm::NVPTXISD::Suld2DArrayV2I32Trap
@ Suld2DArrayV2I32Trap
Definition: NVPTXISelLowering.h:355
llvm::NVPTXISD::Tex2DS32FloatGrad
@ Tex2DS32FloatGrad
Definition: NVPTXISelLowering.h:114
llvm::NVPTXISD::TexUnified2DFloatFloatGrad
@ TexUnified2DFloatFloatGrad
Definition: NVPTXISelLowering.h:194
llvm::NVPTX::PTXLdStInstCode::Scalar
@ Scalar
Definition: NVPTX.h:122
llvm::NVPTXISD::Suld3DV4I16Trap
@ Suld3DV4I16Trap
Definition: NVPTXISelLowering.h:370
llvm::SDValue::getValueSizeInBits
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
Definition: SelectionDAGNodes.h:192
llvm::NVPTXISD::TexUnified1DArrayS32FloatLevel
@ TexUnified1DArrayS32FloatLevel
Definition: NVPTXISelLowering.h:185
llvm::NVPTXISD::Suld2DV2I16Zero
@ Suld2DV2I16Zero
Definition: NVPTXISelLowering.h:402
llvm::None
const NoneType None
Definition: None.h:23
llvm::MemSDNode::isVolatile
bool isVolatile() const
Definition: SelectionDAGNodes.h:1295
llvm::NVPTXISD::Suld1DI8Clamp
@ Suld1DI8Clamp
Definition: NVPTXISelLowering.h:253
llvm::ISD::SETOLE
@ SETOLE
Definition: ISDOpcodes.h:1362
llvm::ISD::SETUGT
@ SETUGT
Definition: ISDOpcodes.h:1367
llvm::MVT::getScalarType
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Definition: MachineValueType.h:515
llvm::NVPTXISD::StoreRetvalV4
@ StoreRetvalV4
Definition: NVPTXISelLowering.h:80
llvm::NVPTXISD::TexCubeU32FloatLevel
@ TexCubeU32FloatLevel
Definition: NVPTXISelLowering.h:148
llvm::NVPTXISD::Tex2DArrayFloatFloat
@ Tex2DArrayFloatFloat
Definition: NVPTXISelLowering.h:120
llvm::NVPTXISD::TexUnified3DS32Float
@ TexUnified3DS32Float
Definition: NVPTXISelLowering.h:220
llvm::NVPTXISD::Tex2DArrayS32Float
@ Tex2DArrayS32Float
Definition: NVPTXISelLowering.h:124
llvm::NVPTXISD::Tld4G2DU64Float
@ Tld4G2DU64Float
Definition: NVPTXISelLowering.h:164
llvm::MVT::isFloatingPoint
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: MachineValueType.h:340
llvm::NVPTXISD::Suld3DV4I8Zero
@ Suld3DV4I8Zero
Definition: NVPTXISelLowering.h:429
llvm::NVPTXSubtarget
Definition: NVPTXSubtarget.h:31
llvm::NVPTXISD::TexCubeArrayU32FloatLevel
@ TexCubeArrayU32FloatLevel
Definition: NVPTXISelLowering.h:154
llvm::NVPTXISD::TexCubeS32FloatLevel
@ TexCubeS32FloatLevel
Definition: NVPTXISelLowering.h:146
llvm::NVPTXISD::TexUnified1DArrayS32FloatGrad
@ TexUnified1DArrayS32FloatGrad
Definition: NVPTXISelLowering.h:186
llvm::NVPTXISD::TexUnified3DU32FloatLevel
@ TexUnified3DU32FloatLevel
Definition: NVPTXISelLowering.h:225
llvm::NVPTXISD::Tld4A2DU64Float
@ Tld4A2DU64Float
Definition: NVPTXISelLowering.h:166
llvm::isKernelFunction
bool isKernelFunction(const Function &F)
Definition: NVPTXUtilities.cpp:274
llvm::ISD::SETUNE
@ SETUNE
Definition: ISDOpcodes.h:1371
llvm::NVPTXISD::Suld3DV2I16Zero
@ Suld3DV2I16Zero
Definition: NVPTXISelLowering.h:426
llvm::NVPTXISD::Suld2DArrayI64Trap
@ Suld2DArrayI64Trap
Definition: NVPTXISelLowering.h:352
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:626
llvm::NVPTXISD::Tld4UnifiedG2DFloatFloat
@ Tld4UnifiedG2DFloatFloat
Definition: NVPTXISelLowering.h:240
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
llvm::NVPTX::PTXLdStInstCode::V4
@ V4
Definition: NVPTX.h:124
llvm::NVPTXISD::Suld3DI64Zero
@ Suld3DI64Zero
Definition: NVPTXISelLowering.h:424
llvm::NVPTXISD::TexUnified1DArrayFloatFloatGrad
@ TexUnified1DArrayFloatFloatGrad
Definition: NVPTXISelLowering.h:182
llvm::getUnderlyingObjects
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
Definition: ValueTracking.cpp:4423
llvm::NVPTXISD::Tex3DFloatFloat
@ Tex3DFloatFloat
Definition: NVPTXISelLowering.h:132
llvm::NVPTXISD::Suld1DArrayV2I8Clamp
@ Suld1DArrayV2I8Clamp
Definition: NVPTXISelLowering.h:269
llvm::NVPTXISD::Tex1DArrayFloatS32
@ Tex1DArrayFloatS32
Definition: NVPTXISelLowering.h:95
llvm::NVPTXISD::Tex3DU32FloatLevel
@ Tex3DU32FloatLevel
Definition: NVPTXISelLowering.h:141
llvm::NVPTXISD::Tex2DArrayS32FloatLevel
@ Tex2DArrayS32FloatLevel
Definition: NVPTXISelLowering.h:125
llvm::MVT::v2f16
@ v2f16
Definition: MachineValueType.h:134
uint64_t
llvm::NVPTXISD::Suld3DI8Trap
@ Suld3DI8Trap
Definition: NVPTXISelLowering.h:361
llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition: SelectionDAGNodes.h:1340
llvm::NVPTXISD::LoadParam
@ LoadParam
Definition: NVPTXISelLowering.h:70
llvm::NVPTXISD::Tex1DU32Float
@ Tex1DU32Float
Definition: NVPTXISelLowering.h:92
llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:921
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
llvm::NVPTXISD::TexUnified2DU32S32
@ TexUnified2DU32S32
Definition: NVPTXISelLowering.h:199
llvm::NVPTXDAGToDAGISel::NVPTXDAGToDAGISel
NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, CodeGenOpt::Level OptLevel)
Definition: NVPTXISelDAGToDAG.cpp:38
llvm::NVPTXISD::Tex1DArrayFloatFloat
@ Tex1DArrayFloatFloat
Definition: NVPTXISelLowering.h:96
llvm::NVPTXISD::Tex2DU32Float
@ Tex2DU32Float
Definition: NVPTXISelLowering.h:116
llvm::NVPTXISD::Suld1DArrayI16Trap
@ Suld1DArrayI16Trap
Definition: NVPTXISelLowering.h:326
llvm::NVPTXISD::TexUnified2DU32Float
@ TexUnified2DU32Float
Definition: NVPTXISelLowering.h:200
llvm::NVPTXISD::Suld1DV2I32Trap
@ Suld1DV2I32Trap
Definition: NVPTXISelLowering.h:319
llvm::NVPTXISD::Suld3DI64Clamp
@ Suld3DI64Clamp
Definition: NVPTXISelLowering.h:304
llvm::NVPTXISD::Suld2DV2I8Clamp
@ Suld2DV2I8Clamp
Definition: NVPTXISelLowering.h:281
llvm::NVPTXISD::Suld1DArrayI8Clamp
@ Suld1DArrayI8Clamp
Definition: NVPTXISelLowering.h:265
llvm::NVPTXISD::Tex3DS32FloatLevel
@ Tex3DS32FloatLevel
Definition: NVPTXISelLowering.h:137
llvm::NVPTXISD::Tex1DArrayU32FloatGrad
@ Tex1DArrayU32FloatGrad
Definition: NVPTXISelLowering.h:106
llvm::NVPTXISD::Tex2DArrayS32FloatGrad
@ Tex2DArrayS32FloatGrad
Definition: NVPTXISelLowering.h:126
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:511
llvm::NVPTXISD::TexUnified2DArrayU32FloatLevel
@ TexUnified2DArrayU32FloatLevel
Definition: NVPTXISelLowering.h:213
llvm::NVPTX::PTXLdStInstCode::Float
@ Float
Definition: NVPTX.h:118
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:904
llvm::NVPTXISD::Suld2DArrayI16Trap
@ Suld2DArrayI16Trap
Definition: NVPTXISelLowering.h:350
NVPTXBaseInfo.h
llvm::NVPTXISD::Suld3DI32Clamp
@ Suld3DI32Clamp
Definition: NVPTXISelLowering.h:303
llvm::NVPTXISD::Suld1DArrayV4I32Clamp
@ Suld1DArrayV4I32Clamp
Definition: NVPTXISelLowering.h:275
llvm::countTrailingOnes
unsigned countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:525
llvm::CondCodeSDNode
Definition: SelectionDAGNodes.h:2211
llvm::ADDRESS_SPACE_SHARED
@ ADDRESS_SPACE_SHARED
Definition: NVPTXBaseInfo.h:24
llvm::LoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Definition: SelectionDAGNodes.h:2296
llvm::NVPTXISD::Suld2DArrayV2I32Zero
@ Suld2DArrayV2I32Zero
Definition: NVPTXISelLowering.h:415
llvm::NVPTXISD::TexUnified2DArrayFloatFloatGrad
@ TexUnified2DArrayFloatFloatGrad
Definition: NVPTXISelLowering.h:206
canLowerToLDG
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, unsigned CodeAddrSpace, MachineFunction *F)
Definition: NVPTXISelDAGToDAG.cpp:676
llvm::HighlightColor::Address
@ Address
llvm::NVPTXISD::TexUnified2DArrayS32FloatLevel
@ TexUnified2DArrayS32FloatLevel
Definition: NVPTXISelLowering.h:209
llvm::NVPTXISD::TexUnified2DArrayS32Float
@ TexUnified2DArrayS32Float
Definition: NVPTXISelLowering.h:208
llvm::NVPTXISD::Suld1DArrayV2I64Trap
@ Suld1DArrayV2I64Trap
Definition: NVPTXISelLowering.h:332
llvm::StoreSDNode
This class is used to represent ISD::STORE nodes.
Definition: SelectionDAGNodes.h:2309
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:44
llvm::ISD::SETOGT
@ SETOGT
Definition: ISDOpcodes.h:1359
llvm::NVPTXISD::Tex1DFloatFloatLevel
@ Tex1DFloatFloatLevel
Definition: NVPTXISelLowering.h:85
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::NVPTXISD::TexUnified3DFloatS32
@ TexUnified3DFloatS32
Definition: NVPTXISelLowering.h:215
llvm::NVPTXISD::TexUnifiedCubeArrayFloatFloat
@ TexUnifiedCubeArrayFloatFloat
Definition: NVPTXISelLowering.h:233
llvm::NVPTXISD::TexUnified3DFloatFloatLevel
@ TexUnified3DFloatFloatLevel
Definition: NVPTXISelLowering.h:217
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::NVPTXISD::Suld2DI32Zero
@ Suld2DI32Zero
Definition: NVPTXISelLowering.h:399
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:860
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::NVPTXISD::Suld3DV2I32Zero
@ Suld3DV2I32Zero
Definition: NVPTXISelLowering.h:427
llvm::NVPTXISD::TexUnified1DArrayU32FloatLevel
@ TexUnified1DArrayU32FloatLevel
Definition: NVPTXISelLowering.h:189
llvm::ISD::SETULT
@ SETULT
Definition: ISDOpcodes.h:1369
llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition: SelectionDAGNodes.h:1571
base
therefore end up llgh r3 lr r0 br r14 but truncating the load would lh r3 br r14 Functions ret i64 and ought to be implemented ngr r0 br r14 but two address optimizations reverse the order of the AND and ngr r2 lgr r0 br r14 CodeGen SystemZ and ll has several examples of this Out of range displacements are usually handled by loading the full address into a register In many cases it would be better to create an anchor point instead E g i64 base
Definition: README.txt:125
llvm::NVPTXISD::Suld1DArrayI16Clamp
@ Suld1DArrayI16Clamp
Definition: NVPTXISelLowering.h:266
llvm::NVPTXISD::Suld2DV2I32Zero
@ Suld2DV2I32Zero
Definition: NVPTXISelLowering.h:403
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1355
llvm::NVPTXISD::Tex2DFloatFloatGrad
@ Tex2DFloatFloatGrad
Definition: NVPTXISelLowering.h:110
llvm::SelectionDAGISel::CurDAG
SelectionDAG * CurDAG
Definition: SelectionDAGISel.h:47
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:8989
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::SelectionDAG::setNodeMemRefs
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
Definition: SelectionDAG.cpp:8757
llvm::NVPTXISD::TexUnified2DFloatFloatLevel
@ TexUnified2DFloatFloatLevel
Definition: NVPTXISelLowering.h:193
llvm::NVPTXISD::TexUnified1DU32FloatLevel
@ TexUnified1DU32FloatLevel
Definition: NVPTXISelLowering.h:177
llvm::NVPTXISD::Tld4R2DFloatFloat
@ Tld4R2DFloatFloat
Definition: NVPTXISelLowering.h:155
llvm::StoreSDNode::getValue
const SDValue & getValue() const
Definition: SelectionDAGNodes.h:2330
llvm::NVPTXISD::Suld2DArrayV2I32Clamp
@ Suld2DArrayV2I32Clamp
Definition: NVPTXISelLowering.h:295
llvm::NVPTXISD::Tld4B2DFloatFloat
@ Tld4B2DFloatFloat
Definition: NVPTXISelLowering.h:157
llvm::NVPTXTargetLowering::getDivF32Level
int getDivF32Level() const
Definition: NVPTXISelLowering.cpp:91
llvm::NVPTXISD::Tex1DArrayU32FloatLevel
@ Tex1DArrayU32FloatLevel
Definition: NVPTXISelLowering.h:105
llvm::NVPTXISD::Tex1DS32FloatGrad
@ Tex1DS32FloatGrad
Definition: NVPTXISelLowering.h:90
llvm::NVPTXISD::Tex3DS32FloatGrad
@ Tex3DS32FloatGrad
Definition: NVPTXISelLowering.h:138
llvm::NVPTXISD::Suld2DArrayV2I16Clamp
@ Suld2DArrayV2I16Clamp
Definition: NVPTXISelLowering.h:294
llvm::NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel
@ TexUnifiedCubeArrayFloatFloatLevel
Definition: NVPTXISelLowering.h:234
llvm::NVPTXISD::LDUV2
@ LDUV2
Definition: NVPTXISelLowering.h:66
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::NVPTX::PTXLdStInstCode::FromType
FromType
Definition: NVPTX.h:115
llvm::NVPTXISD::TexUnified3DU32FloatGrad
@ TexUnified3DU32FloatGrad
Definition: NVPTXISelLowering.h:226
llvm::NVPTXISD::Suld2DI8Clamp
@ Suld2DI8Clamp
Definition: NVPTXISelLowering.h:277
llvm::NVPTXTargetLowering::useF32FTZ
bool useF32FTZ(const MachineFunction &MF) const
Definition: NVPTXISelLowering.cpp:114
llvm::NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions.
Definition: NVPTXISelDAGToDAG.cpp:3691
llvm::createNVPTXISelDag
FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel)
createNVPTXISelDag - This pass converts a legalized DAG into a NVPTX-specific DAG,...
Definition: NVPTXISelDAGToDAG.cpp:33
llvm::NVPTX::PTXLdStInstCode::CONSTANT
@ CONSTANT
Definition: NVPTX.h:110
llvm::NVPTXISD::TexCubeArrayS32Float
@ TexCubeArrayS32Float
Definition: NVPTXISelLowering.h:151
llvm::NVPTXISD::Tex1DArrayU32Float
@ Tex1DArrayU32Float
Definition: NVPTXISelLowering.h:104
llvm::NVPTX::PTXCmpMode::GTU
@ GTU
Definition: NVPTX.h:164
llvm::NVPTXISD::Suld2DI64Zero
@ Suld2DI64Zero
Definition: NVPTXISelLowering.h:400
llvm::NVPTX::PTXCmpMode::LEU
@ LEU
Definition: NVPTX.h:163
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:100
llvm::AArch64CC::GE
@ GE
Definition: AArch64BaseInfo.h:265
llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:155
llvm::NVPTXISD::Suld2DArrayV4I16Clamp
@ Suld2DArrayV4I16Clamp
Definition: NVPTXISelLowering.h:298
llvm::NVPTXISD::Suld2DArrayV2I8Trap
@ Suld2DArrayV2I8Trap
Definition: NVPTXISelLowering.h:353
llvm::MVT::i64
@ i64
Definition: MachineValueType.h:47
llvm::countTrailingZeros
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: MathExtras.h:156
llvm::NVPTX::PTXLdStInstCode::Signed
@ Signed
Definition: NVPTX.h:117
llvm::NVPTXISD::Suld3DV4I16Zero
@ Suld3DV4I16Zero
Definition: NVPTXISelLowering.h:430
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::NVPTXISD::Suld2DV4I32Trap
@ Suld2DV4I32Trap
Definition: NVPTXISelLowering.h:347
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
llvm::NVPTXISD::Tex2DS32S32
@ Tex2DS32S32
Definition: NVPTXISelLowering.h:111
llvm::NVPTXISD::TexUnified2DArrayFloatFloat
@ TexUnified2DArrayFloatFloat
Definition: NVPTXISelLowering.h:204
llvm::NVPTXISD::Suld2DArrayI16Zero
@ Suld2DArrayI16Zero
Definition: NVPTXISelLowering.h:410
llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition: SelectionDAGNodes.h:1129
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::NVPTXISD::TexUnified1DArrayFloatFloatLevel
@ TexUnified1DArrayFloatFloatLevel
Definition: NVPTXISelLowering.h:181
llvm::NVPTXISD::Tex3DU32S32
@ Tex3DU32S32
Definition: NVPTXISelLowering.h:139
llvm::NVPTXISD::Suld1DArrayV2I64Zero
@ Suld1DArrayV2I64Zero
Definition: NVPTXISelLowering.h:392
llvm::NVPTXISD::Tex1DFloatFloat
@ Tex1DFloatFloat
Definition: NVPTXISelLowering.h:84
llvm::NVPTXISD::TexUnified1DS32FloatLevel
@ TexUnified1DS32FloatLevel
Definition: NVPTXISelLowering.h:173
llvm::NVPTXISD::Suld2DV4I16Trap
@ Suld2DV4I16Trap
Definition: NVPTXISelLowering.h:346
llvm::ADDRESS_SPACE_GENERIC
@ ADDRESS_SPACE_GENERIC
Definition: NVPTXBaseInfo.h:22
llvm::NVPTXISD::Suld3DV2I16Trap
@ Suld3DV2I16Trap
Definition: NVPTXISelLowering.h:366
llvm::NVPTXISD::StoreParam
@ StoreParam
Definition: NVPTXISelLowering.h:73
llvm::NVPTXISD::Tex1DArrayFloatFloatGrad
@ Tex1DArrayFloatFloatGrad
Definition: NVPTXISelLowering.h:98
llvm::AMDGPU::HSAMD::Kernel::Arg::Key::IsVolatile
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
Definition: AMDGPUMetadata.h:194
llvm::NVPTXISD::Tld4A2DFloatFloat
@ Tld4A2DFloatFloat
Definition: NVPTXISelLowering.h:158
llvm::NVPTX::PTXLdStInstCode::Untyped
@ Untyped
Definition: NVPTX.h:119
llvm::CodeGenOpt::Level
Level
Definition: CodeGen.h:52
llvm::NVPTXISD::Tld4UnifiedA2DS64Float
@ Tld4UnifiedA2DS64Float
Definition: NVPTXISelLowering.h:246
llvm::NVPTXISD::TexUnified1DArrayFloatFloat
@ TexUnified1DArrayFloatFloat
Definition: NVPTXISelLowering.h:180
llvm::SDVTList
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Definition: SelectionDAGNodes.h:79
llvm::NVPTXISD::Suld1DI16Trap
@ Suld1DI16Trap
Definition: NVPTXISelLowering.h:314
llvm::NVPTXISD::Suld1DV4I32Zero
@ Suld1DV4I32Zero
Definition: NVPTXISelLowering.h:383
llvm::NVPTXISD::TexUnified1DS32Float
@ TexUnified1DS32Float
Definition: NVPTXISelLowering.h:172
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1335
llvm::NVPTXISD::Suld1DArrayI64Clamp
@ Suld1DArrayI64Clamp
Definition: NVPTXISelLowering.h:268
llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
llvm::NVPTXISD::Suld1DArrayI64Zero
@ Suld1DArrayI64Zero
Definition: NVPTXISelLowering.h:388
llvm::AddrSpaceCastSDNode::getDestAddressSpace
unsigned getDestAddressSpace() const
Definition: SelectionDAGNodes.h:1246
llvm::NVPTXISD::Tex2DArrayU32Float
@ Tex2DArrayU32Float
Definition: NVPTXISelLowering.h:128
llvm::NVPTXDAGToDAGISel
Definition: NVPTXISelDAGToDAG.h:27
llvm::AtomicSDNode
This is an SDNode representing atomic operations.
Definition: SelectionDAGNodes.h:1418
llvm::NVPTXISD::Suld2DArrayI64Zero
@ Suld2DArrayI64Zero
Definition: NVPTXISelLowering.h:412
llvm::NVPTXTargetLowering
Definition: NVPTXISelLowering.h:440
llvm::NVPTXISD::Suld1DArrayI32Trap
@ Suld1DArrayI32Trap
Definition: NVPTXISelLowering.h:327
llvm::SelectionDAGISel::MF
MachineFunction * MF
Definition: SelectionDAGISel.h:45
llvm::NVPTXISD::Suld1DArrayV2I16Zero
@ Suld1DArrayV2I16Zero
Definition: NVPTXISelLowering.h:390
llvm::ISD::SETLT
@ SETLT
Definition: ISDOpcodes.h:1378
llvm::NVPTX::PTXCmpMode::FTZ_FLAG
@ FTZ_FLAG
Definition: NVPTX.h:171
llvm::NVPTXISD::Suld1DV2I16Trap
@ Suld1DV2I16Trap
Definition: NVPTXISelLowering.h:318
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
llvm::NVPTXISD::TexUnified2DArrayS32S32
@ TexUnified2DArrayS32S32
Definition: NVPTXISelLowering.h:207
llvm::NVPTXISD::TexUnified3DU32S32
@ TexUnified3DU32S32
Definition: NVPTXISelLowering.h:223
llvm::NVPTXISD::Suld2DV2I16Trap
@ Suld2DV2I16Trap
Definition: NVPTXISelLowering.h:342
llvm::NVPTXISD::Suld2DI64Clamp
@ Suld2DI64Clamp
Definition: NVPTXISelLowering.h:280
llvm::NVPTXISD::Suld1DI32Zero
@ Suld1DI32Zero
Definition: NVPTXISelLowering.h:375
llvm::NVPTXISD::TexUnified2DArrayFloatFloatLevel
@ TexUnified2DArrayFloatFloatLevel
Definition: NVPTXISelLowering.h:205
llvm::NVPTXISD::Suld3DI16Trap
@ Suld3DI16Trap
Definition: NVPTXISelLowering.h:362
llvm::NVPTXISD::StoreParamS32
@ StoreParamS32
Definition: NVPTXISelLowering.h:76
llvm::NVPTXISD::Tex3DU32Float
@ Tex3DU32Float
Definition: NVPTXISelLowering.h:140
llvm::NVPTX::PTXLdStInstCode::PARAM
@ PARAM
Definition: NVPTX.h:112
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:321
llvm::NVPTXISD::Tld4UnifiedG2DS64Float
@ Tld4UnifiedG2DS64Float
Definition: NVPTXISelLowering.h:244
llvm::ISD::TargetExternalSymbol
@ TargetExternalSymbol
Definition: ISDOpcodes.h:169
llvm::MemSDNode::getAddressSpace
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Definition: SelectionDAGNodes.h:1347
llvm::NVPTXISD::Suld2DArrayI8Zero
@ Suld2DArrayI8Zero
Definition: NVPTXISelLowering.h:409
llvm::NVPTXISD::Suld1DArrayI8Zero
@ Suld1DArrayI8Zero
Definition: NVPTXISelLowering.h:385
llvm::SelectionDAG::getDataLayout
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:440
llvm::NVPTXISD::Tld4R2DU64Float
@ Tld4R2DU64Float
Definition: NVPTXISelLowering.h:163
llvm::SelectionDAGISel::ReplaceUses
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
Definition: SelectionDAGISel.h:206
llvm::NVPTXISD::TexUnified3DFloatFloatGrad
@ TexUnified3DFloatFloatGrad
Definition: NVPTXISelLowering.h:218
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:46
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::NVPTXISD::TexUnified2DS32FloatGrad
@ TexUnified2DS32FloatGrad
Definition: NVPTXISelLowering.h:198
llvm::ISD::SETUO
@ SETUO
Definition: ISDOpcodes.h:1365
llvm::NVPTXISD::TexCubeFloatFloat
@ TexCubeFloatFloat
Definition: NVPTXISelLowering.h:143
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:138
llvm::AddrSpaceCastSDNode
Definition: SelectionDAGNodes.h:1236
llvm::NVPTXISD::Suld1DI8Trap
@ Suld1DI8Trap
Definition: NVPTXISelLowering.h:313
llvm::NVPTXISD::Suld3DV2I64Clamp
@ Suld3DV2I64Clamp
Definition: NVPTXISelLowering.h:308
llvm::NVPTX::PTXCmpMode::NEU
@ NEU
Definition: NVPTX.h:161
llvm::NVPTXISD::TexUnified1DArrayU32FloatGrad
@ TexUnified1DArrayU32FloatGrad
Definition: NVPTXISelLowering.h:190
llvm::NVPTXISD::StoreV2
@ StoreV2
Definition: NVPTXISelLowering.h:68
MemRef
Definition: Lint.cpp:84
llvm::AArch64CC::GT
@ GT
Definition: AArch64BaseInfo.h:267
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:922
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
llvm::NVPTXISD::Suld1DI8Zero
@ Suld1DI8Zero
Definition: NVPTXISelLowering.h:373
llvm::NVPTXISD::StoreParamV4
@ StoreParamV4
Definition: NVPTXISelLowering.h:75
llvm::AtomicSDNode::getVal
const SDValue & getVal() const
Definition: SelectionDAGNodes.h:1428
Vector
So we should use XX3Form_Rcr to implement instrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
llvm::NVPTXISD::Suld3DV4I8Clamp
@ Suld3DV4I8Clamp
Definition: NVPTXISelLowering.h:309
llvm::NVPTX::PTXLdStInstCode::GLOBAL
@ GLOBAL
Definition: NVPTX.h:109
llvm::NVPTX::PTXLdStInstCode::GENERIC
@ GENERIC
Definition: NVPTX.h:108
llvm::NVPTXISD::Tex2DU32S32
@ Tex2DU32S32
Definition: NVPTXISelLowering.h:115
llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:301
llvm::ISD::SETOGE
@ SETOGE
Definition: ISDOpcodes.h:1360
llvm::NVPTXISD::Suld2DArrayV4I8Zero
@ Suld2DArrayV4I8Zero
Definition: NVPTXISelLowering.h:417
Instructions.h
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:657
llvm::NVPTXISD::Tld4UnifiedA2DFloatFloat
@ Tld4UnifiedA2DFloatFloat
Definition: NVPTXISelLowering.h:242
llvm::NVPTXISD::Tex3DU32FloatGrad
@ Tex3DU32FloatGrad
Definition: NVPTXISelLowering.h:142
llvm::SelectionDAGISel
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
Definition: SelectionDAGISel.h:39
llvm::NVPTXISD::Suld2DI16Clamp
@ Suld2DI16Clamp
Definition: NVPTXISelLowering.h:278
llvm::NVPTXISD::TexUnified1DU32FloatGrad
@ TexUnified1DU32FloatGrad
Definition: NVPTXISelLowering.h:178
llvm::NVPTXISD::Suld1DV4I8Zero
@ Suld1DV4I8Zero
Definition: NVPTXISelLowering.h:381
TargetIntrinsicInfo.h
llvm::NVPTX::PTXCvtMode::NONE
@ NONE
Definition: NVPTX.h:131
llvm::MVT::f16
@ f16
Definition: MachineValueType.h:54
llvm::NVPTXISD::TexUnified1DS32FloatGrad
@ TexUnified1DS32FloatGrad
Definition: NVPTXISelLowering.h:174
llvm::NVPTXISD::TexUnified1DS32S32
@ TexUnified1DS32S32
Definition: NVPTXISelLowering.h:171
N
#define N
llvm::NVPTXISD::Suld1DArrayV4I8Clamp
@ Suld1DArrayV4I8Clamp
Definition: NVPTXISelLowering.h:273
llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:659
llvm::SelectionDAG::getTargetConstantFP
SDValue getTargetConstantFP(double Val, const SDLoc &DL, EVT VT)
Definition: SelectionDAG.h:669
llvm::NVPTXISD::Suld2DI32Trap
@ Suld2DI32Trap
Definition: NVPTXISelLowering.h:339
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::isStrongerThanMonotonic
bool isStrongerThanMonotonic(AtomicOrdering AO)
Definition: AtomicOrdering.h:124
llvm::NVPTXISD::Suld2DV4I16Clamp
@ Suld2DV4I16Clamp
Definition: NVPTXISelLowering.h:286
llvm::NVPTXISD::Suld1DI32Clamp
@ Suld1DI32Clamp
Definition: NVPTXISelLowering.h:255
llvm::NVPTXISD::Suld1DV2I64Clamp
@ Suld1DV2I64Clamp
Definition: NVPTXISelLowering.h:260
llvm::NVPTXTargetLowering::allowUnsafeFPMath
bool allowUnsafeFPMath(MachineFunction &MF) const
Definition: NVPTXISelLowering.cpp:4393
llvm::NVPTXISD::Suld3DI8Clamp
@ Suld3DI8Clamp
Definition: NVPTXISelLowering.h:301
llvm::NVPTXISD::Suld2DV4I8Trap
@ Suld2DV4I8Trap
Definition: NVPTXISelLowering.h:345
llvm::NVPTXISD::Tex2DFloatS32
@ Tex2DFloatS32
Definition: NVPTXISelLowering.h:107
llvm::NVPTXISD::Suld2DI8Trap
@ Suld2DI8Trap
Definition: NVPTXISelLowering.h:337
llvm::NVPTXISD::Suld2DI64Trap
@ Suld2DI64Trap
Definition: NVPTXISelLowering.h:340
llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition: SelectionDAGNodes.h:1117
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:637
llvm::NVPTXDAGToDAGISel::Subtarget
const NVPTXSubtarget * Subtarget
Definition: NVPTXISelDAGToDAG.h:49
llvm::NVPTXISD::Suld3DV4I8Trap
@ Suld3DV4I8Trap
Definition: NVPTXISelLowering.h:369
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::ISD::SETONE
@ SETONE
Definition: ISDOpcodes.h:1363
llvm::DataLayout::getPointerSizeInBits
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:408
llvm::NVPTXISD::Suld2DV2I8Zero
@ Suld2DV2I8Zero
Definition: NVPTXISelLowering.h:401
llvm::NVPTXISD::Tex2DS32FloatLevel
@ Tex2DS32FloatLevel
Definition: NVPTXISelLowering.h:113
llvm::MVT::i16
@ i16
Definition: MachineValueType.h:45
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::ISD::INTRINSIC_W_CHAIN
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
llvm::NVPTXISD::Suld1DV2I16Zero
@ Suld1DV2I16Zero
Definition: NVPTXISelLowering.h:378
llvm::NVPTXISD::Tld4UnifiedG2DU64Float
@ Tld4UnifiedG2DU64Float
Definition: NVPTXISelLowering.h:248
llvm::NVPTXISD::TexUnified2DFloatS32
@ TexUnified2DFloatS32
Definition: NVPTXISelLowering.h:191
llvm::NVPTX::PTXCmpMode::CmpMode
CmpMode
Definition: NVPTX.h:149
llvm::NVPTXISD::Suld1DArrayV2I16Clamp
@ Suld1DArrayV2I16Clamp
Definition: NVPTXISelLowering.h:270
llvm::NVPTXISD::Wrapper
@ Wrapper
Definition: NVPTXISelLowering.h:26
llvm::NVPTXISD::StoreParamV2
@ StoreParamV2
Definition: NVPTXISelLowering.h:74
llvm::NVPTXISD::StoreRetvalV2
@ StoreRetvalV2
Definition: NVPTXISelLowering.h:79
llvm::NVPTXISD::Suld2DArrayV2I16Trap
@ Suld2DArrayV2I16Trap
Definition: NVPTXISelLowering.h:354
llvm::isMask_64
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:473
llvm::NVPTX::PTXLdStInstCode::Unsigned
@ Unsigned
Definition: NVPTX.h:116
llvm::NVPTXISD::Suld1DArrayV2I64Clamp
@ Suld1DArrayV2I64Clamp
Definition: NVPTXISelLowering.h:272
llvm::NVPTXISD::Suld1DArrayV2I8Trap
@ Suld1DArrayV2I8Trap
Definition: NVPTXISelLowering.h:329
llvm::NVPTXISD::Suld1DI64Trap
@ Suld1DI64Trap
Definition: NVPTXISelLowering.h:316
llvm::NVPTXISD::TexUnified2DArrayU32FloatGrad
@ TexUnified2DArrayU32FloatGrad
Definition: NVPTXISelLowering.h:214
llvm::NVPTXISD::StoreParamU32
@ StoreParamU32
Definition: NVPTXISelLowering.h:77
llvm::NVPTXISD::TexUnified2DArrayU32Float
@ TexUnified2DArrayU32Float
Definition: NVPTXISelLowering.h:212
llvm::NVPTXISD::Tex2DU32FloatGrad
@ Tex2DU32FloatGrad
Definition: NVPTXISelLowering.h:118
llvm::NVPTXTargetLowering::allowFMA
bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const
Definition: NVPTXISelLowering.cpp:4376
llvm::NVPTXISD::SETP_F16X2
@ SETP_F16X2
Definition: NVPTXISelLowering.h:59
raw_ostream.h
llvm::NVPTXISD::Tex1DFloatFloatGrad
@ Tex1DFloatFloatGrad
Definition: NVPTXISelLowering.h:86
llvm::NVPTXISD::TexUnifiedCubeU32Float
@ TexUnifiedCubeU32Float
Definition: NVPTXISelLowering.h:231
llvm::SelectionDAGISel::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Definition: SelectionDAGISel.cpp:415
llvm::NVPTXISD::TexCubeArrayFloatFloatLevel
@ TexCubeArrayFloatFloatLevel
Definition: NVPTXISelLowering.h:150
llvm::NVPTXISD::Suld1DArrayI8Trap
@ Suld1DArrayI8Trap
Definition: NVPTXISelLowering.h:325
llvm::NVPTXISD::TexUnified2DS32Float
@ TexUnified2DS32Float
Definition: NVPTXISelLowering.h:196
llvm::MVT::f32
@ f32
Definition: MachineValueType.h:55
llvm::NVPTXISD::Tex2DArrayFloatFloatGrad
@ Tex2DArrayFloatFloatGrad
Definition: NVPTXISelLowering.h:122
llvm::NVPTXISD::Suld2DArrayV2I8Zero
@ Suld2DArrayV2I8Zero
Definition: NVPTXISelLowering.h:413
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::NVPTXISD::TexUnified1DFloatFloat
@ TexUnified1DFloatFloat
Definition: NVPTXISelLowering.h:168
llvm::NVPTXISD::Suld2DArrayV2I64Clamp
@ Suld2DArrayV2I64Clamp
Definition: NVPTXISelLowering.h:296
Debug.h
llvm::NVPTXISD::Suld2DArrayI32Trap
@ Suld2DArrayI32Trap
Definition: NVPTXISelLowering.h:351
llvm::ISD::ATOMIC_LOAD
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1114
llvm::NVPTXISD::Suld2DI16Zero
@ Suld2DI16Zero
Definition: NVPTXISelLowering.h:398
llvm::NVPTXISD::Suld1DArrayV4I32Zero
@ Suld1DArrayV4I32Zero
Definition: NVPTXISelLowering.h:395
llvm::NVPTXISD::Suld1DV2I16Clamp
@ Suld1DV2I16Clamp
Definition: NVPTXISelLowering.h:258
llvm::NVPTXISD::Tex1DArrayS32S32
@ Tex1DArrayS32S32
Definition: NVPTXISelLowering.h:99
llvm::Optional::getValue
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:282
llvm::NVPTXISD::Suld1DI64Zero
@ Suld1DI64Zero
Definition: NVPTXISelLowering.h:376
llvm::NVPTXISD::Suld2DV2I64Trap
@ Suld2DV2I64Trap
Definition: NVPTXISelLowering.h:344
llvm::NVPTXISD::Suld2DArrayI32Clamp
@ Suld2DArrayI32Clamp
Definition: NVPTXISelLowering.h:291
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::NVPTXISD::Suld3DV4I32Clamp
@ Suld3DV4I32Clamp
Definition: NVPTXISelLowering.h:311
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:289
llvm::NVPTXISD::Suld2DArrayV2I16Zero
@ Suld2DArrayV2I16Zero
Definition: NVPTXISelLowering.h:414
llvm::NVPTXISD::Suld3DV2I16Clamp
@ Suld3DV2I16Clamp
Definition: NVPTXISelLowering.h:306
llvm::NVPTXISD::Suld1DV2I32Zero
@ Suld1DV2I32Zero
Definition: NVPTXISelLowering.h:379
llvm::NVPTXISD::Tex1DS32FloatLevel
@ Tex1DS32FloatLevel
Definition: NVPTXISelLowering.h:89
llvm::NVPTXISD::Suld2DArrayV4I16Trap
@ Suld2DArrayV4I16Trap
Definition: NVPTXISelLowering.h:358
llvm::NVPTX::PTXCmpMode::LTU
@ LTU
Definition: NVPTX.h:162