19 #include "llvm/IR/IntrinsicsNVPTX.h"
29 #define DEBUG_TYPE "nvptx-isel"
49 int NVPTXDAGToDAGISel::getDivF32Level()
const {
53 bool NVPTXDAGToDAGISel::usePrecSqrtF32()
const {
57 bool NVPTXDAGToDAGISel::useF32FTZ()
const {
61 bool NVPTXDAGToDAGISel::allowFMA()
const {
66 bool NVPTXDAGToDAGISel::allowUnsafeFPMath()
const {
71 bool NVPTXDAGToDAGISel::useShortPointers()
const {
77 void NVPTXDAGToDAGISel::Select(
SDNode *
N) {
79 if (
N->isMachineOpcode()) {
84 switch (
N->getOpcode()) {
96 if (tryEXTRACT_VECTOR_ELEMENT(
N))
105 if (tryLoadVector(
N))
117 if (tryStoreVector(
N))
129 if (tryStoreRetval(
N))
137 if (tryStoreParam(
N))
141 if (tryIntrinsicNoChain(
N))
145 if (tryIntrinsicChain(
N))
316 if (tryTextureIntrinsic(
N))
484 if (trySurfaceIntrinsic(
N))
495 SelectAddrSpaceCast(
N);
498 if (tryConstantFP16(
N))
507 bool NVPTXDAGToDAGISel::tryIntrinsicChain(
SDNode *
N) {
508 unsigned IID = cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue();
512 case Intrinsic::nvvm_ldg_global_f:
513 case Intrinsic::nvvm_ldg_global_i:
514 case Intrinsic::nvvm_ldg_global_p:
515 case Intrinsic::nvvm_ldu_global_f:
516 case Intrinsic::nvvm_ldu_global_i:
517 case Intrinsic::nvvm_ldu_global_p:
524 bool NVPTXDAGToDAGISel::tryConstantFP16(
SDNode *
N) {
592 bool NVPTXDAGToDAGISel::SelectSETP_F16X2(
SDNode *
N) {
593 unsigned PTXCmpMode =
594 getPTXCmpMode(*cast<CondCodeSDNode>(
N->getOperand(2)), useF32FTZ());
605 bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(
SDNode *
N) {
615 for (
auto U :
Vector.getNode()->uses()) {
618 if (U->getOperand(0) != Vector)
621 dyn_cast<ConstantSDNode>(U->getOperand(1))) {
622 if (IdxConst->getZExtValue() == 0)
624 else if (IdxConst->getZExtValue() == 1)
633 if (E0.empty() || E1.empty())
636 unsigned Op = NVPTX::SplitF16x2;
641 Op = NVPTX::SplitI32toF16x2;
648 for (
auto *Node : E0)
650 for (
auto *Node : E1)
657 const Value *Src =
N->getMemOperand()->getValue();
662 if (
auto *PT = dyn_cast<PointerType>(Src->getType())) {
663 switch (PT->getAddressSpace()) {
698 if (
N->isInvariant())
710 if (
auto *A = dyn_cast<const Argument>(V))
711 return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr();
712 if (
auto *GV = dyn_cast<const GlobalVariable>(V))
713 return GV->isConstant();
718 bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(
SDNode *
N) {
719 unsigned IID = cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue();
723 case Intrinsic::nvvm_texsurf_handle_internal:
724 SelectTexSurfHandle(
N);
729 void NVPTXDAGToDAGISel::SelectTexSurfHandle(
SDNode *
N) {
737 void NVPTXDAGToDAGISel::SelectAddrSpaceCast(
SDNode *
N) {
742 assert(SrcAddrSpace != DstAddrSpace &&
743 "addrspacecast must be between different address spaces");
748 switch (SrcAddrSpace) {
751 Opc = TM.
is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
754 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_shared_yes_6432
755 : NVPTX::cvta_shared_yes_64)
756 : NVPTX::cvta_shared_yes;
759 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_const_yes_6432
760 : NVPTX::cvta_const_yes_64)
761 : NVPTX::cvta_const_yes;
764 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_local_yes_6432
765 : NVPTX::cvta_local_yes_64)
766 : NVPTX::cvta_local_yes;
774 if (SrcAddrSpace != 0)
777 switch (DstAddrSpace) {
780 Opc = TM.
is64Bit() ? NVPTX::cvta_to_global_yes_64
781 : NVPTX::cvta_to_global_yes;
784 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_shared_yes_3264
785 : NVPTX::cvta_to_shared_yes_64)
786 : NVPTX::cvta_to_shared_yes;
789 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_const_yes_3264
790 : NVPTX::cvta_to_const_yes_64)
791 : NVPTX::cvta_to_const_yes;
794 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_local_yes_3264
795 : NVPTX::cvta_to_local_yes_64)
796 : NVPTX::cvta_to_local_yes;
799 Opc = TM.
is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
800 : NVPTX::nvvm_ptr_gen_to_param;
838 bool NVPTXDAGToDAGISel::tryLoad(
SDNode *
N) {
841 assert(
LD->readMem() &&
"Expected load");
843 EVT LoadedVT =
LD->getMemoryVT();
844 SDNode *NVPTXLD =
nullptr;
889 unsigned int fromType;
916 if (SelectDirectAddr(N1,
Addr)) {
918 TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar,
919 NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar,
920 NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
923 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
924 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
925 getI32Imm(fromTypeWidth, dl),
Addr, Chain };
929 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
930 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
931 NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi,
932 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
935 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
936 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
937 getI32Imm(fromTypeWidth, dl),
Base,
Offset, Chain };
943 TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
944 NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64,
945 NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
948 TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari,
949 NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari,
950 NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
953 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
954 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
955 getI32Imm(fromTypeWidth, dl),
Base,
Offset, Chain };
960 TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
961 NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64,
962 NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64,
963 NVPTX::LD_f64_areg_64);
966 TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg,
967 NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg,
968 NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
971 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
972 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
973 getI32Imm(fromTypeWidth, dl), N1, Chain };
987 bool NVPTXDAGToDAGISel::tryLoadVector(
SDNode *
N) {
1004 return tryLDGLDU(
N);
1032 unsigned ExtensionType = cast<ConstantSDNode>(
1033 N->getOperand(
N->getNumOperands() - 1))->getZExtValue();
1044 switch (
N->getOpcode()) {
1055 EVT EltVT =
N->getValueType(0);
1067 if (SelectDirectAddr(Op1,
Addr)) {
1068 switch (
N->getOpcode()) {
1073 NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1074 NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1075 NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar,
1076 NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1081 NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar,
None,
1082 NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar,
1083 NVPTX::LDV_f32_v4_avar,
None);
1090 getI32Imm(FromTypeWidth,
DL),
Addr, Chain };
1093 ? SelectADDRsi64(Op1.
getNode(), Op1,
Base, Offset)
1094 : SelectADDRsi(Op1.
getNode(), Op1,
Base, Offset)) {
1095 switch (
N->getOpcode()) {
1100 NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1101 NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1102 NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi,
1103 NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1108 NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi,
None,
1109 NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi,
1110 NVPTX::LDV_f32_v4_asi,
None);
1120 ? SelectADDRri64(Op1.
getNode(), Op1,
Base, Offset)
1121 : SelectADDRri(Op1.
getNode(), Op1,
Base, Offset)) {
1123 switch (
N->getOpcode()) {
1129 NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64,
1130 NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64,
1131 NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64,
1132 NVPTX::LDV_f64_v2_ari_64);
1137 NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64,
None,
1138 NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64,
1139 NVPTX::LDV_f32_v4_ari_64,
None);
1143 switch (
N->getOpcode()) {
1148 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1149 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1150 NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari,
1151 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1156 NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari,
None,
1157 NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari,
1158 NVPTX::LDV_f32_v4_ari,
None);
1171 switch (
N->getOpcode()) {
1177 NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1178 NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64,
1179 NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1180 NVPTX::LDV_f64_v2_areg_64);
1185 NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64,
None,
1186 NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64,
1187 NVPTX::LDV_f32_v4_areg_64,
None);
1191 switch (
N->getOpcode()) {
1197 NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1198 NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg,
1199 NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg,
1200 NVPTX::LDV_f64_v2_areg);
1205 NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg,
None,
1206 NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg,
1207 NVPTX::LDV_f32_v4_areg,
None);
1215 getI32Imm(FromTypeWidth,
DL), Op1, Chain };
1226 bool NVPTXDAGToDAGISel::tryLDGLDU(
SDNode *
N) {
1236 Op1 =
N->getOperand(2);
1237 Mem = cast<MemIntrinsicSDNode>(
N);
1238 unsigned IID = cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue();
1242 case Intrinsic::nvvm_ldg_global_f:
1243 case Intrinsic::nvvm_ldg_global_i:
1244 case Intrinsic::nvvm_ldg_global_p:
1247 case Intrinsic::nvvm_ldu_global_f:
1248 case Intrinsic::nvvm_ldu_global_i:
1249 case Intrinsic::nvvm_ldu_global_p:
1254 Op1 =
N->getOperand(1);
1255 Mem = cast<MemSDNode>(
N);
1264 unsigned NumElts = 1;
1270 assert(NumElts % 2 == 0 &&
"Vector must have even number of elements");
1281 for (
unsigned i = 0;
i != NumElts; ++
i) {
1282 InstVTs.push_back(NodeVT);
1287 if (SelectDirectAddr(Op1,
Addr)) {
1288 switch (
N->getOpcode()) {
1295 NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1296 NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1297 NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1298 NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1299 NVPTX::INT_PTX_LDG_GLOBAL_f16avar,
1300 NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar,
1301 NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1302 NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1305 NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1306 NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1307 NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1308 NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1309 NVPTX::INT_PTX_LDU_GLOBAL_f16avar,
1310 NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar,
1311 NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1312 NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1317 NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1318 NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1319 NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1320 NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1321 NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar,
1322 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar,
1323 NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1324 NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1328 NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1329 NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1330 NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1331 NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1332 NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar,
1333 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar,
1334 NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1335 NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1340 NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
1341 NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1342 NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar,
None,
1343 NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar,
1344 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar,
1345 NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar,
None);
1349 NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar,
1350 NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1351 NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar,
None,
1352 NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar,
1353 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar,
1354 NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar,
None);
1362 : SelectADDRri(Op1.
getNode(), Op1,
Base, Offset)) {
1364 switch (
N->getOpcode()) {
1371 NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1372 NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1373 NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1374 NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1375 NVPTX::INT_PTX_LDG_GLOBAL_f16ari64,
1376 NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64,
1377 NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1378 NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1381 NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1382 NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1383 NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1384 NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1385 NVPTX::INT_PTX_LDU_GLOBAL_f16ari64,
1386 NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64,
1387 NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1388 NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1393 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1394 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1395 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1396 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1397 NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64,
1398 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64,
1399 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1400 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1404 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1405 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1406 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1407 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1408 NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64,
1409 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64,
1410 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1411 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1416 NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
1417 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1418 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64,
None,
1419 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64,
1420 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64,
1421 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64,
None);
1425 NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
1426 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1427 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64,
None,
1428 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64,
1429 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64,
1430 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64,
None);
1434 switch (
N->getOpcode()) {
1441 NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1442 NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1443 NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1444 NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1445 NVPTX::INT_PTX_LDG_GLOBAL_f16ari,
1446 NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari,
1447 NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1448 NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1451 NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1452 NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1453 NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1454 NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1455 NVPTX::INT_PTX_LDU_GLOBAL_f16ari,
1456 NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari,
1457 NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1458 NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1463 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1464 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1465 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1466 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1467 NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32,
1468 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32,
1469 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1470 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1474 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1475 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1476 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1477 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1478 NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32,
1479 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32,
1480 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1481 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1486 NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
1487 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1488 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32,
None,
1489 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32,
1490 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32,
1491 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32,
None);
1495 NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
1496 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1497 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32,
None,
1498 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32,
1499 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32,
1500 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32,
None);
1510 switch (
N->getOpcode()) {
1517 NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1518 NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1519 NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1520 NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1521 NVPTX::INT_PTX_LDG_GLOBAL_f16areg64,
1522 NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64,
1523 NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1524 NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1527 NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1528 NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1529 NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1530 NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1531 NVPTX::INT_PTX_LDU_GLOBAL_f16areg64,
1532 NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64,
1533 NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1534 NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1539 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1540 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1541 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1542 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1543 NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64,
1544 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64,
1545 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1546 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1550 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1551 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1552 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1553 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1554 NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64,
1555 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64,
1556 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1557 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1562 NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
1563 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1564 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64,
None,
1565 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64,
1566 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64,
1567 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64,
None);
1571 NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
1572 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1573 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64,
None,
1574 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64,
1575 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64,
1576 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64,
None);
1580 switch (
N->getOpcode()) {
1587 NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1588 NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1589 NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1590 NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1591 NVPTX::INT_PTX_LDG_GLOBAL_f16areg,
1592 NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg,
1593 NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1594 NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1597 NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1598 NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1599 NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1600 NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1601 NVPTX::INT_PTX_LDU_GLOBAL_f16areg,
1602 NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg,
1603 NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1604 NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1609 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1610 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1611 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1612 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1613 NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32,
1614 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32,
1615 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1616 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1620 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1621 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1622 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1623 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1624 NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32,
1625 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32,
1626 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1627 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1632 NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
1633 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1634 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32,
None,
1635 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32,
1636 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32,
1637 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32,
None);
1641 NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
1642 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1643 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32,
None,
1644 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32,
1645 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32,
1646 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32,
None);
1652 SDValue Ops[] = { Op1, Chain };
1670 EVT OrigType =
N->getValueType(0);
1673 if (OrigType != EltVT && LdNode) {
1678 unsigned CvtOpc = GetConvertOpcode(OrigType.
getSimpleVT(),
1683 for (
unsigned i = 0;
i != NumElts; ++
i) {
1699 bool NVPTXDAGToDAGISel::tryStore(
SDNode *
N) {
1702 assert(
ST->writeMem() &&
"Expected store");
1705 assert((PlainStore || AtomicStore) &&
"Expected store");
1706 EVT StoreVT =
ST->getMemoryVT();
1707 SDNode *NVPTXST =
nullptr;
1710 if (PlainStore && PlainStore->
isIndexed())
1753 unsigned int toType;
1769 Value.getNode()->getSimpleValueType(0).SimpleTy;
1771 if (SelectDirectAddr(BasePtr,
Addr)) {
1772 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1773 NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1774 NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar,
1775 NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1779 getI32Imm(isVolatile, dl),
1780 getI32Imm(CodeAddrSpace, dl),
1781 getI32Imm(vecType, dl),
1782 getI32Imm(toType, dl),
1783 getI32Imm(toTypeWidth, dl),
1788 ? SelectADDRsi64(
BasePtr.getNode(), BasePtr,
Base, Offset)
1789 : SelectADDRsi(
BasePtr.getNode(), BasePtr,
Base, Offset)) {
1790 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1791 NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1792 NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
1793 NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1797 getI32Imm(isVolatile, dl),
1798 getI32Imm(CodeAddrSpace, dl),
1799 getI32Imm(vecType, dl),
1800 getI32Imm(toType, dl),
1801 getI32Imm(toTypeWidth, dl),
1807 ? SelectADDRri64(
BasePtr.getNode(), BasePtr,
Base, Offset)
1808 : SelectADDRri(
BasePtr.getNode(), BasePtr,
Base, Offset)) {
1811 SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
1812 NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64,
1813 NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1815 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1816 NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1817 NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari,
1818 NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1823 getI32Imm(isVolatile, dl),
1824 getI32Imm(CodeAddrSpace, dl),
1825 getI32Imm(vecType, dl),
1826 getI32Imm(toType, dl),
1827 getI32Imm(toTypeWidth, dl),
1835 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1836 NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1837 NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64,
1838 NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1840 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1841 NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1842 NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg,
1843 NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1847 getI32Imm(isVolatile, dl),
1848 getI32Imm(CodeAddrSpace, dl),
1849 getI32Imm(vecType, dl),
1850 getI32Imm(toType, dl),
1851 getI32Imm(toTypeWidth, dl),
1866 bool NVPTXDAGToDAGISel::tryStoreVector(
SDNode *
N) {
1910 switch (
N->getOpcode()) {
1913 StOps.push_back(
N->getOperand(1));
1914 StOps.push_back(
N->getOperand(2));
1915 N2 =
N->getOperand(3);
1919 StOps.push_back(
N->getOperand(1));
1920 StOps.push_back(
N->getOperand(2));
1921 StOps.push_back(
N->getOperand(3));
1922 StOps.push_back(
N->getOperand(4));
1923 N2 =
N->getOperand(5);
1940 StOps.push_back(getI32Imm(CodeAddrSpace,
DL));
1941 StOps.push_back(getI32Imm(
VecType,
DL));
1942 StOps.push_back(getI32Imm(ToType,
DL));
1943 StOps.push_back(getI32Imm(ToTypeWidth,
DL));
1945 if (SelectDirectAddr(N2,
Addr)) {
1946 switch (
N->getOpcode()) {
1951 NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1952 NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1953 NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar,
1954 NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
1959 NVPTX::STV_i16_v4_avar, NVPTX::STV_i32_v4_avar,
None,
1960 NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar,
1961 NVPTX::STV_f32_v4_avar,
None);
1964 StOps.push_back(
Addr);
1967 switch (
N->getOpcode()) {
1972 NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1973 NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1974 NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi,
1975 NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
1980 NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi,
None,
1981 NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi,
1982 NVPTX::STV_f32_v4_asi,
None);
1985 StOps.push_back(
Base);
1986 StOps.push_back(Offset);
1990 switch (
N->getOpcode()) {
1996 NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64,
1997 NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64,
1998 NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64,
1999 NVPTX::STV_f64_v2_ari_64);
2004 NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64,
None,
2005 NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64,
2006 NVPTX::STV_f32_v4_ari_64,
None);
2010 switch (
N->getOpcode()) {
2015 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
2016 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
2017 NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari,
2018 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
2023 NVPTX::STV_i16_v4_ari, NVPTX::STV_i32_v4_ari,
None,
2024 NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari,
2025 NVPTX::STV_f32_v4_ari,
None);
2029 StOps.push_back(
Base);
2030 StOps.push_back(Offset);
2033 switch (
N->getOpcode()) {
2039 NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
2040 NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64,
2041 NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
2042 NVPTX::STV_f64_v2_areg_64);
2047 NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64,
None,
2048 NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64,
2049 NVPTX::STV_f32_v4_areg_64,
None);
2053 switch (
N->getOpcode()) {
2059 NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
2060 NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg,
2061 NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg,
2062 NVPTX::STV_f64_v2_areg);
2067 NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg,
None,
2068 NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg,
2069 NVPTX::STV_f32_v4_areg,
None);
2073 StOps.push_back(N2);
2079 StOps.push_back(Chain);
2090 bool NVPTXDAGToDAGISel::tryLoadParam(
SDNode *Node) {
2091 SDValue Chain = Node->getOperand(0);
2098 switch (Node->getOpcode()) {
2112 EVT EltVT = Node->getValueType(0);
2122 NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2123 NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2124 NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2,
2125 NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
2130 NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2131 NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16,
2132 NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32,
2133 NVPTX::LoadParamMemV2F64);
2138 NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32,
None,
2139 NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2,
2140 NVPTX::LoadParamMemV4F32,
None);
2156 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2160 Ops.push_back(Chain);
2161 Ops.push_back(
Flag);
2167 bool NVPTXDAGToDAGISel::tryStoreRetval(
SDNode *
N) {
2171 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2175 unsigned NumElts = 1;
2176 switch (
N->getOpcode()) {
2192 for (
unsigned i = 0;
i < NumElts; ++
i)
2193 Ops.push_back(
N->getOperand(
i + 2));
2195 Ops.push_back(Chain);
2206 NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2207 NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2208 NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2,
2209 NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
2213 NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2214 NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2215 NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2,
2216 NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2220 NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2221 NVPTX::StoreRetvalV4I32,
None,
2222 NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2,
2223 NVPTX::StoreRetvalV4F32,
None);
2237 bool NVPTXDAGToDAGISel::tryStoreParam(
SDNode *
N) {
2241 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2243 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2248 unsigned NumElts = 1;
2249 switch (
N->getOpcode()) {
2267 for (
unsigned i = 0;
i < NumElts; ++
i)
2268 Ops.push_back(
N->getOperand(
i + 3));
2271 Ops.push_back(Chain);
2272 Ops.push_back(
Flag);
2278 switch (
N->getOpcode()) {
2285 NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2286 NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2287 NVPTX::StoreParamF16, NVPTX::StoreParamF16x2,
2288 NVPTX::StoreParamF32, NVPTX::StoreParamF64);
2292 NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2293 NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2294 NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2,
2295 NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
2299 NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2300 NVPTX::StoreParamV4I32,
None,
2301 NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2,
2302 NVPTX::StoreParamV4F32,
None);
2312 Opcode = NVPTX::StoreParamI32;
2321 Opcode = NVPTX::StoreParamI32;
2340 bool NVPTXDAGToDAGISel::tryTextureIntrinsic(
SDNode *
N) {
2343 switch (
N->getOpcode()) {
2344 default:
return false;
2346 Opc = NVPTX::TEX_1D_F32_S32_RR;
2349 Opc = NVPTX::TEX_1D_F32_F32_RR;
2352 Opc = NVPTX::TEX_1D_F32_F32_LEVEL_RR;
2355 Opc = NVPTX::TEX_1D_F32_F32_GRAD_RR;
2358 Opc = NVPTX::TEX_1D_S32_S32_RR;
2361 Opc = NVPTX::TEX_1D_S32_F32_RR;
2364 Opc = NVPTX::TEX_1D_S32_F32_LEVEL_RR;
2367 Opc = NVPTX::TEX_1D_S32_F32_GRAD_RR;
2370 Opc = NVPTX::TEX_1D_U32_S32_RR;
2373 Opc = NVPTX::TEX_1D_U32_F32_RR;
2376 Opc = NVPTX::TEX_1D_U32_F32_LEVEL_RR;
2379 Opc = NVPTX::TEX_1D_U32_F32_GRAD_RR;
2382 Opc = NVPTX::TEX_1D_ARRAY_F32_S32_RR;
2385 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_RR;
2388 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR;
2391 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR;
2394 Opc = NVPTX::TEX_1D_ARRAY_S32_S32_RR;
2397 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_RR;
2400 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR;
2403 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR;
2406 Opc = NVPTX::TEX_1D_ARRAY_U32_S32_RR;
2409 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_RR;
2412 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR;
2415 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR;
2418 Opc = NVPTX::TEX_2D_F32_S32_RR;
2421 Opc = NVPTX::TEX_2D_F32_F32_RR;
2424 Opc = NVPTX::TEX_2D_F32_F32_LEVEL_RR;
2427 Opc = NVPTX::TEX_2D_F32_F32_GRAD_RR;
2430 Opc = NVPTX::TEX_2D_S32_S32_RR;
2433 Opc = NVPTX::TEX_2D_S32_F32_RR;
2436 Opc = NVPTX::TEX_2D_S32_F32_LEVEL_RR;
2439 Opc = NVPTX::TEX_2D_S32_F32_GRAD_RR;
2442 Opc = NVPTX::TEX_2D_U32_S32_RR;
2445 Opc = NVPTX::TEX_2D_U32_F32_RR;
2448 Opc = NVPTX::TEX_2D_U32_F32_LEVEL_RR;
2451 Opc = NVPTX::TEX_2D_U32_F32_GRAD_RR;
2454 Opc = NVPTX::TEX_2D_ARRAY_F32_S32_RR;
2457 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_RR;
2460 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR;
2463 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR;
2466 Opc = NVPTX::TEX_2D_ARRAY_S32_S32_RR;
2469 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_RR;
2472 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR;
2475 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR;
2478 Opc = NVPTX::TEX_2D_ARRAY_U32_S32_RR;
2481 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_RR;
2484 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR;
2487 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR;
2490 Opc = NVPTX::TEX_3D_F32_S32_RR;
2493 Opc = NVPTX::TEX_3D_F32_F32_RR;
2496 Opc = NVPTX::TEX_3D_F32_F32_LEVEL_RR;
2499 Opc = NVPTX::TEX_3D_F32_F32_GRAD_RR;
2502 Opc = NVPTX::TEX_3D_S32_S32_RR;
2505 Opc = NVPTX::TEX_3D_S32_F32_RR;
2508 Opc = NVPTX::TEX_3D_S32_F32_LEVEL_RR;
2511 Opc = NVPTX::TEX_3D_S32_F32_GRAD_RR;
2514 Opc = NVPTX::TEX_3D_U32_S32_RR;
2517 Opc = NVPTX::TEX_3D_U32_F32_RR;
2520 Opc = NVPTX::TEX_3D_U32_F32_LEVEL_RR;
2523 Opc = NVPTX::TEX_3D_U32_F32_GRAD_RR;
2526 Opc = NVPTX::TEX_CUBE_F32_F32_RR;
2529 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL_RR;
2532 Opc = NVPTX::TEX_CUBE_S32_F32_RR;
2535 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL_RR;
2538 Opc = NVPTX::TEX_CUBE_U32_F32_RR;
2541 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL_RR;
2544 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_RR;
2547 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR;
2550 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_RR;
2553 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR;
2556 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_RR;
2559 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR;
2562 Opc = NVPTX::TLD4_R_2D_F32_F32_RR;
2565 Opc = NVPTX::TLD4_G_2D_F32_F32_RR;
2568 Opc = NVPTX::TLD4_B_2D_F32_F32_RR;
2571 Opc = NVPTX::TLD4_A_2D_F32_F32_RR;
2574 Opc = NVPTX::TLD4_R_2D_S32_F32_RR;
2577 Opc = NVPTX::TLD4_G_2D_S32_F32_RR;
2580 Opc = NVPTX::TLD4_B_2D_S32_F32_RR;
2583 Opc = NVPTX::TLD4_A_2D_S32_F32_RR;
2586 Opc = NVPTX::TLD4_R_2D_U32_F32_RR;
2589 Opc = NVPTX::TLD4_G_2D_U32_F32_RR;
2592 Opc = NVPTX::TLD4_B_2D_U32_F32_RR;
2595 Opc = NVPTX::TLD4_A_2D_U32_F32_RR;
2598 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32_R;
2601 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_R;
2604 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_R;
2607 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_R;
2610 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32_R;
2613 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_R;
2616 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_R;
2619 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_R;
2622 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32_R;
2625 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_R;
2628 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_R;
2631 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_R;
2634 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_R;
2637 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_R;
2640 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_R;
2643 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_R;
2646 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_R;
2649 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_R;
2652 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_R;
2655 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_R;
2658 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_R;
2661 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_R;
2664 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_R;
2667 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_R;
2670 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32_R;
2673 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_R;
2676 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_R;
2679 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_R;
2682 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32_R;
2685 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_R;
2688 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_R;
2691 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_R;
2694 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32_R;
2697 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_R;
2700 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_R;
2703 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_R;
2706 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_R;
2709 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_R;
2712 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_R;
2715 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_R;
2718 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_R;
2721 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_R;
2724 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_R;
2727 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_R;
2730 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_R;
2733 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_R;
2736 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_R;
2739 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_R;
2742 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32_R;
2745 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_R;
2748 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_R;
2751 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_R;
2754 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32_R;
2757 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_R;
2760 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_R;
2763 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_R;
2766 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32_R;
2769 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_R;
2772 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_R;
2775 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_R;
2778 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_R;
2781 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_R;
2784 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_R;
2787 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_R;
2790 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_R;
2793 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_R;
2796 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_R;
2799 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_R;
2802 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_R;
2805 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_R;
2808 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_R;
2811 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R;
2814 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R;
2817 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R;
2820 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32_R;
2823 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32_R;
2826 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32_R;
2829 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32_R;
2832 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32_R;
2835 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32_R;
2838 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32_R;
2841 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32_R;
2844 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32_R;
2847 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R;
2853 Ops.push_back(
N->getOperand(0));
2859 bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(
SDNode *
N) {
2861 switch (
N->getOpcode()) {
2862 default:
return false;
2864 Opc = NVPTX::SULD_1D_I8_CLAMP_R;
2867 Opc = NVPTX::SULD_1D_I16_CLAMP_R;
2870 Opc = NVPTX::SULD_1D_I32_CLAMP_R;
2873 Opc = NVPTX::SULD_1D_I64_CLAMP_R;
2876 Opc = NVPTX::SULD_1D_V2I8_CLAMP_R;
2879 Opc = NVPTX::SULD_1D_V2I16_CLAMP_R;
2882 Opc = NVPTX::SULD_1D_V2I32_CLAMP_R;
2885 Opc = NVPTX::SULD_1D_V2I64_CLAMP_R;
2888 Opc = NVPTX::SULD_1D_V4I8_CLAMP_R;
2891 Opc = NVPTX::SULD_1D_V4I16_CLAMP_R;
2894 Opc = NVPTX::SULD_1D_V4I32_CLAMP_R;
2897 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP_R;
2900 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP_R;
2903 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP_R;
2906 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP_R;
2909 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_R;
2912 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_R;
2915 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_R;
2918 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_R;
2921 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_R;
2924 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_R;
2927 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_R;
2930 Opc = NVPTX::SULD_2D_I8_CLAMP_R;
2933 Opc = NVPTX::SULD_2D_I16_CLAMP_R;
2936 Opc = NVPTX::SULD_2D_I32_CLAMP_R;
2939 Opc = NVPTX::SULD_2D_I64_CLAMP_R;
2942 Opc = NVPTX::SULD_2D_V2I8_CLAMP_R;
2945 Opc = NVPTX::SULD_2D_V2I16_CLAMP_R;
2948 Opc = NVPTX::SULD_2D_V2I32_CLAMP_R;
2951 Opc = NVPTX::SULD_2D_V2I64_CLAMP_R;
2954 Opc = NVPTX::SULD_2D_V4I8_CLAMP_R;
2957 Opc = NVPTX::SULD_2D_V4I16_CLAMP_R;
2960 Opc = NVPTX::SULD_2D_V4I32_CLAMP_R;
2963 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP_R;
2966 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP_R;
2969 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP_R;
2972 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP_R;
2975 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_R;
2978 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_R;
2981 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_R;
2984 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_R;
2987 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_R;
2990 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_R;
2993 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_R;
2996 Opc = NVPTX::SULD_3D_I8_CLAMP_R;
2999 Opc = NVPTX::SULD_3D_I16_CLAMP_R;
3002 Opc = NVPTX::SULD_3D_I32_CLAMP_R;
3005 Opc = NVPTX::SULD_3D_I64_CLAMP_R;
3008 Opc = NVPTX::SULD_3D_V2I8_CLAMP_R;
3011 Opc = NVPTX::SULD_3D_V2I16_CLAMP_R;
3014 Opc = NVPTX::SULD_3D_V2I32_CLAMP_R;
3017 Opc = NVPTX::SULD_3D_V2I64_CLAMP_R;
3020 Opc = NVPTX::SULD_3D_V4I8_CLAMP_R;
3023 Opc = NVPTX::SULD_3D_V4I16_CLAMP_R;
3026 Opc = NVPTX::SULD_3D_V4I32_CLAMP_R;
3029 Opc = NVPTX::SULD_1D_I8_TRAP_R;
3032 Opc = NVPTX::SULD_1D_I16_TRAP_R;
3035 Opc = NVPTX::SULD_1D_I32_TRAP_R;
3038 Opc = NVPTX::SULD_1D_I64_TRAP_R;
3041 Opc = NVPTX::SULD_1D_V2I8_TRAP_R;
3044 Opc = NVPTX::SULD_1D_V2I16_TRAP_R;
3047 Opc = NVPTX::SULD_1D_V2I32_TRAP_R;
3050 Opc = NVPTX::SULD_1D_V2I64_TRAP_R;
3053 Opc = NVPTX::SULD_1D_V4I8_TRAP_R;
3056 Opc = NVPTX::SULD_1D_V4I16_TRAP_R;
3059 Opc = NVPTX::SULD_1D_V4I32_TRAP_R;
3062 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP_R;
3065 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP_R;
3068 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP_R;
3071 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP_R;
3074 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP_R;
3077 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP_R;
3080 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP_R;
3083 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP_R;
3086 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP_R;
3089 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP_R;
3092 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP_R;
3095 Opc = NVPTX::SULD_2D_I8_TRAP_R;
3098 Opc = NVPTX::SULD_2D_I16_TRAP_R;
3101 Opc = NVPTX::SULD_2D_I32_TRAP_R;
3104 Opc = NVPTX::SULD_2D_I64_TRAP_R;
3107 Opc = NVPTX::SULD_2D_V2I8_TRAP_R;
3110 Opc = NVPTX::SULD_2D_V2I16_TRAP_R;
3113 Opc = NVPTX::SULD_2D_V2I32_TRAP_R;
3116 Opc = NVPTX::SULD_2D_V2I64_TRAP_R;
3119 Opc = NVPTX::SULD_2D_V4I8_TRAP_R;
3122 Opc = NVPTX::SULD_2D_V4I16_TRAP_R;
3125 Opc = NVPTX::SULD_2D_V4I32_TRAP_R;
3128 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP_R;
3131 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP_R;
3134 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP_R;
3137 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP_R;
3140 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP_R;
3143 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP_R;
3146 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP_R;
3149 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP_R;
3152 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP_R;
3155 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP_R;
3158 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP_R;
3161 Opc = NVPTX::SULD_3D_I8_TRAP_R;
3164 Opc = NVPTX::SULD_3D_I16_TRAP_R;
3167 Opc = NVPTX::SULD_3D_I32_TRAP_R;
3170 Opc = NVPTX::SULD_3D_I64_TRAP_R;
3173 Opc = NVPTX::SULD_3D_V2I8_TRAP_R;
3176 Opc = NVPTX::SULD_3D_V2I16_TRAP_R;
3179 Opc = NVPTX::SULD_3D_V2I32_TRAP_R;
3182 Opc = NVPTX::SULD_3D_V2I64_TRAP_R;
3185 Opc = NVPTX::SULD_3D_V4I8_TRAP_R;
3188 Opc = NVPTX::SULD_3D_V4I16_TRAP_R;
3191 Opc = NVPTX::SULD_3D_V4I32_TRAP_R;
3194 Opc = NVPTX::SULD_1D_I8_ZERO_R;
3197 Opc = NVPTX::SULD_1D_I16_ZERO_R;
3200 Opc = NVPTX::SULD_1D_I32_ZERO_R;
3203 Opc = NVPTX::SULD_1D_I64_ZERO_R;
3206 Opc = NVPTX::SULD_1D_V2I8_ZERO_R;
3209 Opc = NVPTX::SULD_1D_V2I16_ZERO_R;
3212 Opc = NVPTX::SULD_1D_V2I32_ZERO_R;
3215 Opc = NVPTX::SULD_1D_V2I64_ZERO_R;
3218 Opc = NVPTX::SULD_1D_V4I8_ZERO_R;
3221 Opc = NVPTX::SULD_1D_V4I16_ZERO_R;
3224 Opc = NVPTX::SULD_1D_V4I32_ZERO_R;
3227 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO_R;
3230 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO_R;
3233 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO_R;
3236 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO_R;
3239 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO_R;
3242 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO_R;
3245 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO_R;
3248 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO_R;
3251 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO_R;
3254 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO_R;
3257 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO_R;
3260 Opc = NVPTX::SULD_2D_I8_ZERO_R;
3263 Opc = NVPTX::SULD_2D_I16_ZERO_R;
3266 Opc = NVPTX::SULD_2D_I32_ZERO_R;
3269 Opc = NVPTX::SULD_2D_I64_ZERO_R;
3272 Opc = NVPTX::SULD_2D_V2I8_ZERO_R;
3275 Opc = NVPTX::SULD_2D_V2I16_ZERO_R;
3278 Opc = NVPTX::SULD_2D_V2I32_ZERO_R;
3281 Opc = NVPTX::SULD_2D_V2I64_ZERO_R;
3284 Opc = NVPTX::SULD_2D_V4I8_ZERO_R;
3287 Opc = NVPTX::SULD_2D_V4I16_ZERO_R;
3290 Opc = NVPTX::SULD_2D_V4I32_ZERO_R;
3293 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO_R;
3296 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO_R;
3299 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO_R;
3302 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO_R;
3305 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO_R;
3308 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO_R;
3311 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO_R;
3314 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO_R;
3317 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO_R;
3320 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO_R;
3323 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO_R;
3326 Opc = NVPTX::SULD_3D_I8_ZERO_R;
3329 Opc = NVPTX::SULD_3D_I16_ZERO_R;
3332 Opc = NVPTX::SULD_3D_I32_ZERO_R;
3335 Opc = NVPTX::SULD_3D_I64_ZERO_R;
3338 Opc = NVPTX::SULD_3D_V2I8_ZERO_R;
3341 Opc = NVPTX::SULD_3D_V2I16_ZERO_R;
3344 Opc = NVPTX::SULD_3D_V2I32_ZERO_R;
3347 Opc = NVPTX::SULD_3D_V2I64_ZERO_R;
3350 Opc = NVPTX::SULD_3D_V4I8_ZERO_R;
3353 Opc = NVPTX::SULD_3D_V4I16_ZERO_R;
3356 Opc = NVPTX::SULD_3D_V4I32_ZERO_R;
3362 Ops.push_back(
N->getOperand(0));
3371 bool NVPTXDAGToDAGISel::tryBFE(
SDNode *
N) {
3378 bool IsSigned =
false;
3383 if (isa<ConstantSDNode>(
LHS) && !isa<ConstantSDNode>(
RHS)) {
3408 Val =
LHS.getNode()->getOperand(0);
3409 Start =
LHS.getNode()->getOperand(1);
3415 uint64_t GoodBits = Start.getValueSizeInBits() - StartVal;
3416 if (NumBits > GoodBits) {
3450 if (isa<ConstantSDNode>(AndLHS)) {
3474 NumBits = NumZeros + NumOnes - ShiftAmt;
3480 if (ShiftAmt < NumZeros) {
3497 Val =
LHS->getOperand(0);
3516 if (OuterShiftAmt < InnerShiftAmt) {
3552 Opc = NVPTX::BFE_S32rii;
3554 Opc = NVPTX::BFE_U32rii;
3558 Opc = NVPTX::BFE_S64rii;
3560 Opc = NVPTX::BFE_U64rii;
3577 bool NVPTXDAGToDAGISel::SelectDirectAddr(
SDValue N,
SDValue &Address) {
3599 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3604 if (SelectDirectAddr(
base,
Base)) {
3627 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3639 if (SelectDirectAddr(
Addr.getOperand(0),
Addr)) {
3644 dyn_cast<FrameIndexSDNode>(
Addr.getOperand(0)))
3669 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(
SDNode *
N,
3670 unsigned int spN)
const {
3671 const Value *Src =
nullptr;
3672 if (
MemSDNode *mN = dyn_cast<MemSDNode>(
N)) {
3673 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3675 Src = mN->getMemOperand()->getValue();
3679 if (
auto *PT = dyn_cast<PointerType>(Src->getType()))
3680 return (PT->getAddressSpace() == spN);
3687 const SDValue &
Op,
unsigned ConstraintID, std::vector<SDValue> &OutOps) {
3689 switch (ConstraintID) {
3693 if (SelectDirectAddr(
Op, Op0)) {
3694 OutOps.push_back(Op0);
3698 if (SelectADDRri(
Op.getNode(),
Op, Op0, Op1)) {
3699 OutOps.push_back(Op0);
3700 OutOps.push_back(Op1);
3710 unsigned NVPTXDAGToDAGISel::GetConvertOpcode(
MVT DestTy,
MVT SrcTy,
3720 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
3722 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
3724 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
3731 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
3733 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
3735 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
3742 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
3744 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
3746 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
3753 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
3755 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
3757 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;