20#include "llvm/IR/IntrinsicsNVPTX.h"
30#define DEBUG_TYPE "nvptx-isel"
31#define PASS_NAME "NVPTX DAG->DAG Pattern Instruction Selection"
35 cl::desc(
"Enable reciprocal sqrt optimization"));
51 doMulWide = (OptLevel > CodeGenOptLevel::None);
59int NVPTXDAGToDAGISel::getDivF32Level()
const {
63bool NVPTXDAGToDAGISel::usePrecSqrtF32()
const {
67bool NVPTXDAGToDAGISel::useF32FTZ()
const {
71bool NVPTXDAGToDAGISel::allowFMA()
const {
76bool NVPTXDAGToDAGISel::allowUnsafeFPMath()
const {
81bool NVPTXDAGToDAGISel::doRsqrtOpt()
const {
return EnableRsqrtOpt; }
85void NVPTXDAGToDAGISel::Select(
SDNode *
N) {
87 if (
N->isMachineOpcode()) {
92 switch (
N->getOpcode()) {
104 if (tryEXTRACT_VECTOR_ELEMENT(
N))
111 SelectSETP_BF16X2(
N);
115 if (tryLoadVector(
N))
127 if (tryStoreVector(
N))
139 if (tryStoreRetval(
N))
147 if (tryStoreParam(
N))
151 if (tryIntrinsicNoChain(
N))
155 if (tryIntrinsicChain(
N))
332 if (tryTextureIntrinsic(
N))
500 if (trySurfaceIntrinsic(
N))
511 SelectAddrSpaceCast(
N);
514 if (tryConstantFP(
N))
523bool NVPTXDAGToDAGISel::tryIntrinsicChain(
SDNode *
N) {
524 unsigned IID =
N->getConstantOperandVal(1);
528 case Intrinsic::nvvm_ldg_global_f:
529 case Intrinsic::nvvm_ldg_global_i:
530 case Intrinsic::nvvm_ldg_global_p:
531 case Intrinsic::nvvm_ldu_global_f:
532 case Intrinsic::nvvm_ldu_global_i:
533 case Intrinsic::nvvm_ldu_global_p:
540bool NVPTXDAGToDAGISel::tryConstantFP(
SDNode *
N) {
541 if (
N->getValueType(0) != MVT::f16 &&
N->getValueType(0) != MVT::bf16)
544 cast<ConstantFPSDNode>(
N)->getValueAPF(),
SDLoc(
N),
N->getValueType(0));
546 (
N->getValueType(0) == MVT::f16 ? NVPTX::LOAD_CONST_F16
547 : NVPTX::LOAD_CONST_BF16),
548 SDLoc(
N),
N->getValueType(0), Val);
576 return CmpMode::NotANumber;
610bool NVPTXDAGToDAGISel::SelectSETP_F16X2(
SDNode *
N) {
611 unsigned PTXCmpMode =
612 getPTXCmpMode(*cast<CondCodeSDNode>(
N->getOperand(2)), useF32FTZ());
615 NVPTX::SETP_f16x2rr,
DL, MVT::i1, MVT::i1,
N->getOperand(0),
621bool NVPTXDAGToDAGISel::SelectSETP_BF16X2(
SDNode *
N) {
622 unsigned PTXCmpMode =
623 getPTXCmpMode(*cast<CondCodeSDNode>(
N->getOperand(2)), useF32FTZ());
626 NVPTX::SETP_bf16x2rr,
DL, MVT::i1, MVT::i1,
N->getOperand(0),
634bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(
SDNode *
N) {
644 for (
auto *U :
Vector.getNode()->uses()) {
647 if (
U->getOperand(0) !=
Vector)
650 dyn_cast<ConstantSDNode>(
U->getOperand(1))) {
651 if (IdxConst->getZExtValue() == 0)
653 else if (IdxConst->getZExtValue() == 1)
670 for (
auto *
Node : E0)
672 for (
auto *
Node : E1)
679 const Value *Src =
N->getMemOperand()->getValue();
684 if (
auto *PT = dyn_cast<PointerType>(Src->getType())) {
685 switch (PT->getAddressSpace()) {
720 if (
N->isInvariant())
732 if (
auto *
A = dyn_cast<const Argument>(V))
733 return IsKernelFn &&
A->onlyReadsMemory() &&
A->hasNoAliasAttr();
734 if (
auto *GV = dyn_cast<const GlobalVariable>(V))
735 return GV->isConstant();
740bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(
SDNode *
N) {
741 unsigned IID =
N->getConstantOperandVal(0);
745 case Intrinsic::nvvm_texsurf_handle_internal:
746 SelectTexSurfHandle(
N);
751void NVPTXDAGToDAGISel::SelectTexSurfHandle(
SDNode *
N) {
756 MVT::i64, GlobalVal));
759void NVPTXDAGToDAGISel::SelectAddrSpaceCast(
SDNode *
N) {
764 assert(SrcAddrSpace != DstAddrSpace &&
765 "addrspacecast must be between different address spaces");
770 switch (SrcAddrSpace) {
773 Opc = TM.
is64Bit() ? NVPTX::cvta_global_64 : NVPTX::cvta_global;
777 ? NVPTX::cvta_shared_6432
778 : NVPTX::cvta_shared_64)
779 : NVPTX::cvta_shared;
783 ? NVPTX::cvta_const_6432
784 : NVPTX::cvta_const_64)
789 ? NVPTX::cvta_local_6432
790 : NVPTX::cvta_local_64)
799 if (SrcAddrSpace != 0)
802 switch (DstAddrSpace) {
805 Opc = TM.
is64Bit() ? NVPTX::cvta_to_global_64 : NVPTX::cvta_to_global;
809 ? NVPTX::cvta_to_shared_3264
810 : NVPTX::cvta_to_shared_64)
811 : NVPTX::cvta_to_shared;
815 ? NVPTX::cvta_to_const_3264
816 : NVPTX::cvta_to_const_64)
817 : NVPTX::cvta_to_const;
821 ? NVPTX::cvta_to_local_3264
822 : NVPTX::cvta_to_local_64)
823 : NVPTX::cvta_to_local;
826 Opc = TM.
is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
827 : NVPTX::nvvm_ptr_gen_to_param;
838static std::optional<unsigned>
840 unsigned Opcode_i16,
unsigned Opcode_i32,
841 std::optional<unsigned> Opcode_i64,
unsigned Opcode_f32,
842 std::optional<unsigned> Opcode_f64) {
885bool NVPTXDAGToDAGISel::tryLoad(
SDNode *
N) {
888 assert(
LD->readMem() &&
"Expected load");
890 EVT LoadedVT =
LD->getMemoryVT();
891 SDNode *NVPTXLD =
nullptr;
935 unsigned fromTypeWidth = std::max(8U, (
unsigned)ScalarVT.
getSizeInBits());
936 unsigned int fromType;
942 "Unexpected vector type");
957 std::optional<unsigned> Opcode;
960 if (SelectDirectAddr(N1,
Addr)) {
961 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar,
962 NVPTX::LD_i32_avar, NVPTX::LD_i64_avar,
963 NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
966 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
967 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
968 getI32Imm(fromTypeWidth, dl),
Addr, Chain };
972 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
973 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
974 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
977 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
978 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
979 getI32Imm(fromTypeWidth, dl),
Base,
Offset, Chain };
983 if (PointerSize == 64)
986 NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64,
987 NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
989 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari,
990 NVPTX::LD_i32_ari, NVPTX::LD_i64_ari,
991 NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
994 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
995 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
996 getI32Imm(fromTypeWidth, dl),
Base,
Offset, Chain };
999 if (PointerSize == 64)
1001 pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
1002 NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64,
1003 NVPTX::LD_f32_areg_64, NVPTX::LD_f64_areg_64);
1005 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg,
1006 NVPTX::LD_i32_areg, NVPTX::LD_i64_areg,
1007 NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
1010 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
1011 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
1012 getI32Imm(fromTypeWidth, dl), N1, Chain };
1026bool NVPTXDAGToDAGISel::tryLoadVector(
SDNode *
N) {
1031 std::optional<unsigned> Opcode;
1043 return tryLDGLDU(
N);
1068 unsigned FromTypeWidth = std::max(8U, (
unsigned)ScalarVT.
getSizeInBits());
1071 unsigned ExtensionType = cast<ConstantSDNode>(
1072 N->getOperand(
N->getNumOperands() - 1))->getZExtValue();
1080 switch (
N->getOpcode()) {
1091 EVT EltVT =
N->getValueType(0);
1103 if (SelectDirectAddr(Op1,
Addr)) {
1104 switch (
N->getOpcode()) {
1109 NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1110 NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1111 NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1116 NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar,
1117 std::nullopt, NVPTX::LDV_f32_v4_avar, std::nullopt);
1122 SDValue Ops[] = { getI32Imm(IsVolatile,
DL), getI32Imm(CodeAddrSpace,
DL),
1123 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1124 getI32Imm(FromTypeWidth,
DL),
Addr, Chain };
1126 }
else if (PointerSize == 64
1129 switch (
N->getOpcode()) {
1134 NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1135 NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1136 NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1141 NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi,
1142 std::nullopt, NVPTX::LDV_f32_v4_asi, std::nullopt);
1147 SDValue Ops[] = { getI32Imm(IsVolatile,
DL), getI32Imm(CodeAddrSpace,
DL),
1148 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1151 }
else if (PointerSize == 64
1154 if (PointerSize == 64) {
1155 switch (
N->getOpcode()) {
1161 NVPTX::LDV_i8_v2_ari_64, NVPTX::LDV_i16_v2_ari_64,
1162 NVPTX::LDV_i32_v2_ari_64, NVPTX::LDV_i64_v2_ari_64,
1163 NVPTX::LDV_f32_v2_ari_64, NVPTX::LDV_f64_v2_ari_64);
1168 NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, std::nullopt,
1169 NVPTX::LDV_f32_v4_ari_64, std::nullopt);
1173 switch (
N->getOpcode()) {
1178 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1179 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1180 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1185 NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari,
1186 std::nullopt, NVPTX::LDV_f32_v4_ari, std::nullopt);
1192 SDValue Ops[] = { getI32Imm(IsVolatile,
DL), getI32Imm(CodeAddrSpace,
DL),
1193 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1198 if (PointerSize == 64) {
1199 switch (
N->getOpcode()) {
1205 NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1206 NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1207 NVPTX::LDV_f64_v2_areg_64);
1212 NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, std::nullopt,
1213 NVPTX::LDV_f32_v4_areg_64, std::nullopt);
1217 switch (
N->getOpcode()) {
1223 NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1224 NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f32_v2_areg,
1225 NVPTX::LDV_f64_v2_areg);
1230 NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg,
1231 std::nullopt, NVPTX::LDV_f32_v4_areg, std::nullopt);
1237 SDValue Ops[] = { getI32Imm(IsVolatile,
DL), getI32Imm(CodeAddrSpace,
DL),
1238 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1239 getI32Imm(FromTypeWidth,
DL), Op1, Chain };
1250bool NVPTXDAGToDAGISel::tryLDGLDU(
SDNode *
N) {
1260 Op1 =
N->getOperand(2);
1261 Mem = cast<MemIntrinsicSDNode>(
N);
1262 unsigned IID =
N->getConstantOperandVal(1);
1266 case Intrinsic::nvvm_ldg_global_f:
1267 case Intrinsic::nvvm_ldg_global_i:
1268 case Intrinsic::nvvm_ldg_global_p:
1271 case Intrinsic::nvvm_ldu_global_f:
1272 case Intrinsic::nvvm_ldu_global_i:
1273 case Intrinsic::nvvm_ldu_global_p:
1278 Op1 =
N->getOperand(1);
1279 Mem = cast<MemSDNode>(
N);
1282 std::optional<unsigned> Opcode;
1286 EVT OrigType =
N->getValueType(0);
1289 unsigned NumElts = 1;
1294 if ((EltVT == MVT::f16 && OrigType == MVT::v2f16) ||
1295 (EltVT == MVT::bf16 && OrigType == MVT::v2bf16) ||
1296 (EltVT == MVT::i16 && OrigType == MVT::v2i16)) {
1297 assert(NumElts % 2 == 0 &&
"Vector must have even number of elements");
1300 }
else if (OrigType == MVT::v4i8) {
1309 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1311 for (
unsigned i = 0; i != NumElts; ++i) {
1317 if (SelectDirectAddr(Op1,
Addr)) {
1318 switch (
N->getOpcode()) {
1325 NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1326 NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1327 NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1328 NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1329 NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1330 NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1333 NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1334 NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1335 NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1336 NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1337 NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1338 NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1343 NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1344 NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1345 NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1346 NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1347 NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1348 NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1352 NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1353 NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1354 NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1355 NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1356 NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1357 NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1363 NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1364 NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, std::nullopt,
1365 NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, std::nullopt);
1370 NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1371 NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, std::nullopt,
1372 NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, std::nullopt);
1382 switch (
N->getOpcode()) {
1389 NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1390 NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1391 NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1392 NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1393 NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1394 NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1397 NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1398 NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1399 NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1400 NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1401 NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1402 NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1407 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1408 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1409 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1410 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1411 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1412 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1416 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1417 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1418 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1419 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1420 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1421 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1427 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1428 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, std::nullopt,
1429 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, std::nullopt);
1434 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1435 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, std::nullopt,
1436 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, std::nullopt);
1440 switch (
N->getOpcode()) {
1447 NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1448 NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1449 NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1450 NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1451 NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1452 NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1455 NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1456 NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1457 NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1458 NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1459 NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1460 NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1465 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1466 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1467 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1468 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1469 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1470 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1474 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1475 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1476 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1477 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1478 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1479 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1485 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1486 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, std::nullopt,
1487 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, std::nullopt);
1492 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1493 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, std::nullopt,
1494 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, std::nullopt);
1504 switch (
N->getOpcode()) {
1511 NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1512 NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1513 NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1514 NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1515 NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1516 NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1519 NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1520 NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1521 NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1522 NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1523 NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1524 NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1529 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1530 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1531 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1532 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1533 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1534 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1538 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1539 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1540 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1541 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1542 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1543 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1549 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1550 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, std::nullopt,
1551 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, std::nullopt);
1556 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1557 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, std::nullopt,
1558 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, std::nullopt);
1562 switch (
N->getOpcode()) {
1569 NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1570 NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1571 NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1572 NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1573 NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1574 NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1577 NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1578 NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1579 NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1580 NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1581 NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1582 NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1587 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1588 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1589 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1590 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1591 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1592 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1596 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1597 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1598 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1599 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1600 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1601 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1607 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1608 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, std::nullopt,
1609 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, std::nullopt);
1614 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1615 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, std::nullopt,
1616 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, std::nullopt);
1622 SDValue Ops[] = { Op1, Chain };
1639 if (OrigType != EltVT &&
1649 for (
unsigned i = 0; i != NumElts; ++i) {
1665bool NVPTXDAGToDAGISel::tryStore(
SDNode *
N) {
1668 assert(
ST->writeMem() &&
"Expected store");
1671 assert((PlainStore || AtomicStore) &&
"Expected store");
1672 EVT StoreVT =
ST->getMemoryVT();
1673 SDNode *NVPTXST =
nullptr;
1676 if (PlainStore && PlainStore->
isIndexed())
1715 "Unexpected vector type");
1728 std::optional<unsigned> Opcode;
1730 Value.getNode()->getSimpleValueType(0).SimpleTy;
1732 if (SelectDirectAddr(BasePtr,
Addr)) {
1733 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1734 NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1735 NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1739 getI32Imm(isVolatile, dl),
1740 getI32Imm(CodeAddrSpace, dl),
1741 getI32Imm(vecType, dl),
1742 getI32Imm(toType, dl),
1743 getI32Imm(toTypeWidth, dl),
1747 }
else if (PointerSize == 64
1750 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1751 NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1752 NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1756 getI32Imm(isVolatile, dl),
1757 getI32Imm(CodeAddrSpace, dl),
1758 getI32Imm(vecType, dl),
1759 getI32Imm(toType, dl),
1760 getI32Imm(toTypeWidth, dl),
1765 }
else if (PointerSize == 64
1768 if (PointerSize == 64)
1771 NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64,
1772 NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1774 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1775 NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1776 NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1781 getI32Imm(isVolatile, dl),
1782 getI32Imm(CodeAddrSpace, dl),
1783 getI32Imm(vecType, dl),
1784 getI32Imm(toType, dl),
1785 getI32Imm(toTypeWidth, dl),
1791 if (PointerSize == 64)
1793 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1794 NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1795 NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1797 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1798 NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1799 NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1803 getI32Imm(isVolatile, dl),
1804 getI32Imm(CodeAddrSpace, dl),
1805 getI32Imm(vecType, dl),
1806 getI32Imm(toType, dl),
1807 getI32Imm(toTypeWidth, dl),
1822bool NVPTXDAGToDAGISel::tryStoreVector(
SDNode *
N) {
1826 std::optional<unsigned> Opcode;
1861 switch (
N->getOpcode()) {
1866 N2 =
N->getOperand(3);
1874 N2 =
N->getOperand(5);
1896 if (SelectDirectAddr(N2,
Addr)) {
1897 switch (
N->getOpcode()) {
1902 NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1903 NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1904 NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
1908 NVPTX::STV_i8_v4_avar, NVPTX::STV_i16_v4_avar,
1909 NVPTX::STV_i32_v4_avar, std::nullopt,
1910 NVPTX::STV_f32_v4_avar, std::nullopt);
1916 switch (
N->getOpcode()) {
1921 NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1922 NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1923 NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
1928 NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi,
1929 std::nullopt, NVPTX::STV_f32_v4_asi, std::nullopt);
1936 if (PointerSize == 64) {
1937 switch (
N->getOpcode()) {
1943 NVPTX::STV_i8_v2_ari_64, NVPTX::STV_i16_v2_ari_64,
1944 NVPTX::STV_i32_v2_ari_64, NVPTX::STV_i64_v2_ari_64,
1945 NVPTX::STV_f32_v2_ari_64, NVPTX::STV_f64_v2_ari_64);
1950 NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, std::nullopt,
1951 NVPTX::STV_f32_v4_ari_64, std::nullopt);
1955 switch (
N->getOpcode()) {
1960 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
1961 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
1962 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
1966 NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari,
1967 NVPTX::STV_i32_v4_ari, std::nullopt,
1968 NVPTX::STV_f32_v4_ari, std::nullopt);
1975 if (PointerSize == 64) {
1976 switch (
N->getOpcode()) {
1982 NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
1983 NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
1984 NVPTX::STV_f64_v2_areg_64);
1989 NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, std::nullopt,
1990 NVPTX::STV_f32_v4_areg_64, std::nullopt);
1994 switch (
N->getOpcode()) {
2000 NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
2001 NVPTX::STV_i64_v2_areg, NVPTX::STV_f32_v2_areg,
2002 NVPTX::STV_f64_v2_areg);
2007 NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg,
2008 std::nullopt, NVPTX::STV_f32_v4_areg, std::nullopt);
2029bool NVPTXDAGToDAGISel::tryLoadParam(
SDNode *
Node) {
2037 switch (
Node->getOpcode()) {
2051 EVT EltVT =
Node->getValueType(0);
2054 std::optional<unsigned> Opcode;
2061 NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2062 NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2063 NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
2068 NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2069 NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F32,
2070 NVPTX::LoadParamMemV2F64);
2075 NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32,
2076 std::nullopt, NVPTX::LoadParamMemV4F32, std::nullopt);
2085 }
else if (VecSize == 2) {
2088 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2092 unsigned OffsetVal =
Offset->getAsZExtVal();
2103bool NVPTXDAGToDAGISel::tryStoreRetval(
SDNode *
N) {
2107 unsigned OffsetVal =
Offset->getAsZExtVal();
2111 unsigned NumElts = 1;
2112 switch (
N->getOpcode()) {
2128 for (
unsigned i = 0; i < NumElts; ++i)
2136 std::optional<unsigned> Opcode = 0;
2142 NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2143 NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2144 NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
2145 if (Opcode == NVPTX::StoreRetvalI8) {
2149 switch (Ops[0].getSimpleValueType().SimpleTy) {
2153 Opcode = NVPTX::StoreRetvalI8TruncI32;
2156 Opcode = NVPTX::StoreRetvalI8TruncI64;
2163 NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2164 NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2165 NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2169 NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2170 NVPTX::StoreRetvalV4I32, std::nullopt,
2171 NVPTX::StoreRetvalV4F32, std::nullopt);
2185bool NVPTXDAGToDAGISel::tryStoreParam(
SDNode *
N) {
2189 unsigned ParamVal =
Param->getAsZExtVal();
2191 unsigned OffsetVal =
Offset->getAsZExtVal();
2193 SDValue Glue =
N->getOperand(
N->getNumOperands() - 1);
2196 unsigned NumElts = 1;
2197 switch (
N->getOpcode()) {
2215 for (
unsigned i = 0; i < NumElts; ++i)
2225 std::optional<unsigned> Opcode = 0;
2226 switch (
N->getOpcode()) {
2233 NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2234 NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2235 NVPTX::StoreParamF32, NVPTX::StoreParamF64);
2236 if (Opcode == NVPTX::StoreParamI8) {
2240 switch (Ops[0].getSimpleValueType().SimpleTy) {
2244 Opcode = NVPTX::StoreParamI8TruncI32;
2247 Opcode = NVPTX::StoreParamI8TruncI64;
2254 NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2255 NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2256 NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
2260 NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2261 NVPTX::StoreParamV4I32, std::nullopt,
2262 NVPTX::StoreParamV4F32, std::nullopt);
2272 Opcode = NVPTX::StoreParamI32;
2276 MVT::i32, Ops[0], CvtNone);
2281 Opcode = NVPTX::StoreParamI32;
2285 MVT::i32, Ops[0], CvtNone);
2300bool NVPTXDAGToDAGISel::tryTextureIntrinsic(
SDNode *
N) {
2303 switch (
N->getOpcode()) {
2304 default:
return false;
2306 Opc = NVPTX::TEX_1D_F32_S32_RR;
2309 Opc = NVPTX::TEX_1D_F32_F32_RR;
2312 Opc = NVPTX::TEX_1D_F32_F32_LEVEL_RR;
2315 Opc = NVPTX::TEX_1D_F32_F32_GRAD_RR;
2318 Opc = NVPTX::TEX_1D_S32_S32_RR;
2321 Opc = NVPTX::TEX_1D_S32_F32_RR;
2324 Opc = NVPTX::TEX_1D_S32_F32_LEVEL_RR;
2327 Opc = NVPTX::TEX_1D_S32_F32_GRAD_RR;
2330 Opc = NVPTX::TEX_1D_U32_S32_RR;
2333 Opc = NVPTX::TEX_1D_U32_F32_RR;
2336 Opc = NVPTX::TEX_1D_U32_F32_LEVEL_RR;
2339 Opc = NVPTX::TEX_1D_U32_F32_GRAD_RR;
2342 Opc = NVPTX::TEX_1D_ARRAY_F32_S32_RR;
2345 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_RR;
2348 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR;
2351 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR;
2354 Opc = NVPTX::TEX_1D_ARRAY_S32_S32_RR;
2357 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_RR;
2360 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR;
2363 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR;
2366 Opc = NVPTX::TEX_1D_ARRAY_U32_S32_RR;
2369 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_RR;
2372 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR;
2375 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR;
2378 Opc = NVPTX::TEX_2D_F32_S32_RR;
2381 Opc = NVPTX::TEX_2D_F32_F32_RR;
2384 Opc = NVPTX::TEX_2D_F32_F32_LEVEL_RR;
2387 Opc = NVPTX::TEX_2D_F32_F32_GRAD_RR;
2390 Opc = NVPTX::TEX_2D_S32_S32_RR;
2393 Opc = NVPTX::TEX_2D_S32_F32_RR;
2396 Opc = NVPTX::TEX_2D_S32_F32_LEVEL_RR;
2399 Opc = NVPTX::TEX_2D_S32_F32_GRAD_RR;
2402 Opc = NVPTX::TEX_2D_U32_S32_RR;
2405 Opc = NVPTX::TEX_2D_U32_F32_RR;
2408 Opc = NVPTX::TEX_2D_U32_F32_LEVEL_RR;
2411 Opc = NVPTX::TEX_2D_U32_F32_GRAD_RR;
2414 Opc = NVPTX::TEX_2D_ARRAY_F32_S32_RR;
2417 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_RR;
2420 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR;
2423 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR;
2426 Opc = NVPTX::TEX_2D_ARRAY_S32_S32_RR;
2429 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_RR;
2432 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR;
2435 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR;
2438 Opc = NVPTX::TEX_2D_ARRAY_U32_S32_RR;
2441 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_RR;
2444 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR;
2447 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR;
2450 Opc = NVPTX::TEX_3D_F32_S32_RR;
2453 Opc = NVPTX::TEX_3D_F32_F32_RR;
2456 Opc = NVPTX::TEX_3D_F32_F32_LEVEL_RR;
2459 Opc = NVPTX::TEX_3D_F32_F32_GRAD_RR;
2462 Opc = NVPTX::TEX_3D_S32_S32_RR;
2465 Opc = NVPTX::TEX_3D_S32_F32_RR;
2468 Opc = NVPTX::TEX_3D_S32_F32_LEVEL_RR;
2471 Opc = NVPTX::TEX_3D_S32_F32_GRAD_RR;
2474 Opc = NVPTX::TEX_3D_U32_S32_RR;
2477 Opc = NVPTX::TEX_3D_U32_F32_RR;
2480 Opc = NVPTX::TEX_3D_U32_F32_LEVEL_RR;
2483 Opc = NVPTX::TEX_3D_U32_F32_GRAD_RR;
2486 Opc = NVPTX::TEX_CUBE_F32_F32_RR;
2489 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL_RR;
2492 Opc = NVPTX::TEX_CUBE_S32_F32_RR;
2495 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL_RR;
2498 Opc = NVPTX::TEX_CUBE_U32_F32_RR;
2501 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL_RR;
2504 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_RR;
2507 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR;
2510 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_RR;
2513 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR;
2516 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_RR;
2519 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR;
2522 Opc = NVPTX::TLD4_R_2D_F32_F32_RR;
2525 Opc = NVPTX::TLD4_G_2D_F32_F32_RR;
2528 Opc = NVPTX::TLD4_B_2D_F32_F32_RR;
2531 Opc = NVPTX::TLD4_A_2D_F32_F32_RR;
2534 Opc = NVPTX::TLD4_R_2D_S32_F32_RR;
2537 Opc = NVPTX::TLD4_G_2D_S32_F32_RR;
2540 Opc = NVPTX::TLD4_B_2D_S32_F32_RR;
2543 Opc = NVPTX::TLD4_A_2D_S32_F32_RR;
2546 Opc = NVPTX::TLD4_R_2D_U32_F32_RR;
2549 Opc = NVPTX::TLD4_G_2D_U32_F32_RR;
2552 Opc = NVPTX::TLD4_B_2D_U32_F32_RR;
2555 Opc = NVPTX::TLD4_A_2D_U32_F32_RR;
2558 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32_R;
2561 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_R;
2564 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_R;
2567 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_R;
2570 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32_R;
2573 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_R;
2576 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_R;
2579 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_R;
2582 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32_R;
2585 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_R;
2588 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_R;
2591 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_R;
2594 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_R;
2597 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_R;
2600 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_R;
2603 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_R;
2606 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_R;
2609 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_R;
2612 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_R;
2615 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_R;
2618 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_R;
2621 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_R;
2624 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_R;
2627 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_R;
2630 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32_R;
2633 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_R;
2636 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_R;
2639 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_R;
2642 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32_R;
2645 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_R;
2648 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_R;
2651 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_R;
2654 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32_R;
2657 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_R;
2660 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_R;
2663 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_R;
2666 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_R;
2669 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_R;
2672 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_R;
2675 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_R;
2678 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_R;
2681 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_R;
2684 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_R;
2687 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_R;
2690 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_R;
2693 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_R;
2696 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_R;
2699 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_R;
2702 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32_R;
2705 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_R;
2708 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_R;
2711 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_R;
2714 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32_R;
2717 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_R;
2720 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_R;
2723 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_R;
2726 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32_R;
2729 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_R;
2732 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_R;
2735 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_R;
2738 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_R;
2741 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_R;
2744 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_R;
2747 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_R;
2750 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_R;
2753 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_R;
2756 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_R;
2759 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_R;
2762 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_R;
2765 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_R;
2768 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_R;
2771 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R;
2774 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R;
2777 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R;
2780 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32_R;
2783 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32_R;
2786 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32_R;
2789 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32_R;
2792 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32_R;
2795 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32_R;
2798 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32_R;
2801 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32_R;
2804 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32_R;
2807 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R;
2810 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_GRAD_R;
2813 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_GRAD_R;
2816 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_GRAD_R;
2819 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD_R;
2822 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD_R;
2825 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD_R;
2837bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(
SDNode *
N) {
2839 switch (
N->getOpcode()) {
2840 default:
return false;
2842 Opc = NVPTX::SULD_1D_I8_CLAMP_R;
2845 Opc = NVPTX::SULD_1D_I16_CLAMP_R;
2848 Opc = NVPTX::SULD_1D_I32_CLAMP_R;
2851 Opc = NVPTX::SULD_1D_I64_CLAMP_R;
2854 Opc = NVPTX::SULD_1D_V2I8_CLAMP_R;
2857 Opc = NVPTX::SULD_1D_V2I16_CLAMP_R;
2860 Opc = NVPTX::SULD_1D_V2I32_CLAMP_R;
2863 Opc = NVPTX::SULD_1D_V2I64_CLAMP_R;
2866 Opc = NVPTX::SULD_1D_V4I8_CLAMP_R;
2869 Opc = NVPTX::SULD_1D_V4I16_CLAMP_R;
2872 Opc = NVPTX::SULD_1D_V4I32_CLAMP_R;
2875 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP_R;
2878 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP_R;
2881 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP_R;
2884 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP_R;
2887 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_R;
2890 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_R;
2893 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_R;
2896 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_R;
2899 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_R;
2902 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_R;
2905 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_R;
2908 Opc = NVPTX::SULD_2D_I8_CLAMP_R;
2911 Opc = NVPTX::SULD_2D_I16_CLAMP_R;
2914 Opc = NVPTX::SULD_2D_I32_CLAMP_R;
2917 Opc = NVPTX::SULD_2D_I64_CLAMP_R;
2920 Opc = NVPTX::SULD_2D_V2I8_CLAMP_R;
2923 Opc = NVPTX::SULD_2D_V2I16_CLAMP_R;
2926 Opc = NVPTX::SULD_2D_V2I32_CLAMP_R;
2929 Opc = NVPTX::SULD_2D_V2I64_CLAMP_R;
2932 Opc = NVPTX::SULD_2D_V4I8_CLAMP_R;
2935 Opc = NVPTX::SULD_2D_V4I16_CLAMP_R;
2938 Opc = NVPTX::SULD_2D_V4I32_CLAMP_R;
2941 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP_R;
2944 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP_R;
2947 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP_R;
2950 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP_R;
2953 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_R;
2956 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_R;
2959 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_R;
2962 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_R;
2965 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_R;
2968 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_R;
2971 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_R;
2974 Opc = NVPTX::SULD_3D_I8_CLAMP_R;
2977 Opc = NVPTX::SULD_3D_I16_CLAMP_R;
2980 Opc = NVPTX::SULD_3D_I32_CLAMP_R;
2983 Opc = NVPTX::SULD_3D_I64_CLAMP_R;
2986 Opc = NVPTX::SULD_3D_V2I8_CLAMP_R;
2989 Opc = NVPTX::SULD_3D_V2I16_CLAMP_R;
2992 Opc = NVPTX::SULD_3D_V2I32_CLAMP_R;
2995 Opc = NVPTX::SULD_3D_V2I64_CLAMP_R;
2998 Opc = NVPTX::SULD_3D_V4I8_CLAMP_R;
3001 Opc = NVPTX::SULD_3D_V4I16_CLAMP_R;
3004 Opc = NVPTX::SULD_3D_V4I32_CLAMP_R;
3007 Opc = NVPTX::SULD_1D_I8_TRAP_R;
3010 Opc = NVPTX::SULD_1D_I16_TRAP_R;
3013 Opc = NVPTX::SULD_1D_I32_TRAP_R;
3016 Opc = NVPTX::SULD_1D_I64_TRAP_R;
3019 Opc = NVPTX::SULD_1D_V2I8_TRAP_R;
3022 Opc = NVPTX::SULD_1D_V2I16_TRAP_R;
3025 Opc = NVPTX::SULD_1D_V2I32_TRAP_R;
3028 Opc = NVPTX::SULD_1D_V2I64_TRAP_R;
3031 Opc = NVPTX::SULD_1D_V4I8_TRAP_R;
3034 Opc = NVPTX::SULD_1D_V4I16_TRAP_R;
3037 Opc = NVPTX::SULD_1D_V4I32_TRAP_R;
3040 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP_R;
3043 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP_R;
3046 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP_R;
3049 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP_R;
3052 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP_R;
3055 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP_R;
3058 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP_R;
3061 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP_R;
3064 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP_R;
3067 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP_R;
3070 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP_R;
3073 Opc = NVPTX::SULD_2D_I8_TRAP_R;
3076 Opc = NVPTX::SULD_2D_I16_TRAP_R;
3079 Opc = NVPTX::SULD_2D_I32_TRAP_R;
3082 Opc = NVPTX::SULD_2D_I64_TRAP_R;
3085 Opc = NVPTX::SULD_2D_V2I8_TRAP_R;
3088 Opc = NVPTX::SULD_2D_V2I16_TRAP_R;
3091 Opc = NVPTX::SULD_2D_V2I32_TRAP_R;
3094 Opc = NVPTX::SULD_2D_V2I64_TRAP_R;
3097 Opc = NVPTX::SULD_2D_V4I8_TRAP_R;
3100 Opc = NVPTX::SULD_2D_V4I16_TRAP_R;
3103 Opc = NVPTX::SULD_2D_V4I32_TRAP_R;
3106 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP_R;
3109 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP_R;
3112 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP_R;
3115 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP_R;
3118 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP_R;
3121 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP_R;
3124 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP_R;
3127 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP_R;
3130 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP_R;
3133 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP_R;
3136 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP_R;
3139 Opc = NVPTX::SULD_3D_I8_TRAP_R;
3142 Opc = NVPTX::SULD_3D_I16_TRAP_R;
3145 Opc = NVPTX::SULD_3D_I32_TRAP_R;
3148 Opc = NVPTX::SULD_3D_I64_TRAP_R;
3151 Opc = NVPTX::SULD_3D_V2I8_TRAP_R;
3154 Opc = NVPTX::SULD_3D_V2I16_TRAP_R;
3157 Opc = NVPTX::SULD_3D_V2I32_TRAP_R;
3160 Opc = NVPTX::SULD_3D_V2I64_TRAP_R;
3163 Opc = NVPTX::SULD_3D_V4I8_TRAP_R;
3166 Opc = NVPTX::SULD_3D_V4I16_TRAP_R;
3169 Opc = NVPTX::SULD_3D_V4I32_TRAP_R;
3172 Opc = NVPTX::SULD_1D_I8_ZERO_R;
3175 Opc = NVPTX::SULD_1D_I16_ZERO_R;
3178 Opc = NVPTX::SULD_1D_I32_ZERO_R;
3181 Opc = NVPTX::SULD_1D_I64_ZERO_R;
3184 Opc = NVPTX::SULD_1D_V2I8_ZERO_R;
3187 Opc = NVPTX::SULD_1D_V2I16_ZERO_R;
3190 Opc = NVPTX::SULD_1D_V2I32_ZERO_R;
3193 Opc = NVPTX::SULD_1D_V2I64_ZERO_R;
3196 Opc = NVPTX::SULD_1D_V4I8_ZERO_R;
3199 Opc = NVPTX::SULD_1D_V4I16_ZERO_R;
3202 Opc = NVPTX::SULD_1D_V4I32_ZERO_R;
3205 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO_R;
3208 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO_R;
3211 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO_R;
3214 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO_R;
3217 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO_R;
3220 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO_R;
3223 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO_R;
3226 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO_R;
3229 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO_R;
3232 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO_R;
3235 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO_R;
3238 Opc = NVPTX::SULD_2D_I8_ZERO_R;
3241 Opc = NVPTX::SULD_2D_I16_ZERO_R;
3244 Opc = NVPTX::SULD_2D_I32_ZERO_R;
3247 Opc = NVPTX::SULD_2D_I64_ZERO_R;
3250 Opc = NVPTX::SULD_2D_V2I8_ZERO_R;
3253 Opc = NVPTX::SULD_2D_V2I16_ZERO_R;
3256 Opc = NVPTX::SULD_2D_V2I32_ZERO_R;
3259 Opc = NVPTX::SULD_2D_V2I64_ZERO_R;
3262 Opc = NVPTX::SULD_2D_V4I8_ZERO_R;
3265 Opc = NVPTX::SULD_2D_V4I16_ZERO_R;
3268 Opc = NVPTX::SULD_2D_V4I32_ZERO_R;
3271 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO_R;
3274 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO_R;
3277 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO_R;
3280 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO_R;
3283 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO_R;
3286 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO_R;
3289 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO_R;
3292 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO_R;
3295 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO_R;
3298 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO_R;
3301 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO_R;
3304 Opc = NVPTX::SULD_3D_I8_ZERO_R;
3307 Opc = NVPTX::SULD_3D_I16_ZERO_R;
3310 Opc = NVPTX::SULD_3D_I32_ZERO_R;
3313 Opc = NVPTX::SULD_3D_I64_ZERO_R;
3316 Opc = NVPTX::SULD_3D_V2I8_ZERO_R;
3319 Opc = NVPTX::SULD_3D_V2I16_ZERO_R;
3322 Opc = NVPTX::SULD_3D_V2I32_ZERO_R;
3325 Opc = NVPTX::SULD_3D_V2I64_ZERO_R;
3328 Opc = NVPTX::SULD_3D_V4I8_ZERO_R;
3331 Opc = NVPTX::SULD_3D_V4I16_ZERO_R;
3334 Opc = NVPTX::SULD_3D_V4I32_ZERO_R;
3349bool NVPTXDAGToDAGISel::tryBFE(
SDNode *
N) {
3356 bool IsSigned =
false;
3361 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3386 Val =
LHS.getNode()->getOperand(0);
3387 Start =
LHS.getNode()->getOperand(1);
3393 int64_t GoodBits = Start.getValueSizeInBits() - StartVal;
3394 if (NumBits > GoodBits) {
3428 if (isa<ConstantSDNode>(AndLHS)) {
3452 NumBits = NumZeros + NumOnes - ShiftAmt;
3458 if (ShiftAmt < NumZeros) {
3475 Val =
LHS->getOperand(0);
3494 if (OuterShiftAmt < InnerShiftAmt) {
3530 Opc = NVPTX::BFE_S32rii;
3532 Opc = NVPTX::BFE_U32rii;
3536 Opc = NVPTX::BFE_S64rii;
3538 Opc = NVPTX::BFE_U64rii;
3577bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3582 if (SelectDirectAddr(base,
Base)) {
3605bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3617 if (SelectDirectAddr(
Addr.getOperand(0),
Addr)) {
3622 dyn_cast<FrameIndexSDNode>(
Addr.getOperand(0)))
3647bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(
SDNode *
N,
3648 unsigned int spN)
const {
3649 const Value *Src =
nullptr;
3650 if (
MemSDNode *mN = dyn_cast<MemSDNode>(
N)) {
3651 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3653 Src = mN->getMemOperand()->getValue();
3657 if (
auto *PT = dyn_cast<PointerType>(Src->getType()))
3658 return (PT->getAddressSpace() == spN);
3666 std::vector<SDValue> &OutOps) {
3668 switch (ConstraintID) {
3672 if (SelectDirectAddr(
Op, Op0)) {
3673 OutOps.push_back(Op0);
3677 if (SelectADDRri(
Op.getNode(),
Op, Op0, Op1)) {
3678 OutOps.push_back(Op0);
3679 OutOps.push_back(Op1);
3689unsigned NVPTXDAGToDAGISel::GetConvertOpcode(
MVT DestTy,
MVT SrcTy,
3700 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
3702 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
3704 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
3711 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
3713 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
3715 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
3722 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
3724 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
3726 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
3733 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
3735 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
3737 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
3744 return NVPTX::CVT_f32_f16;
3746 return NVPTX::CVT_f64_f16;
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
Atomic ordering constants.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static unsigned int getCodeAddrSpace(MemSDNode *N)
static int getLdStRegType(EVT VT)
static cl::opt< bool > EnableRsqrtOpt("nvptx-rsqrt-approx-opt", cl::init(true), cl::Hidden, cl::desc("Enable reciprocal sqrt optimization"))
static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ)
static std::optional< unsigned > pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16, unsigned Opcode_i32, std::optional< unsigned > Opcode_i64, unsigned Opcode_f32, std::optional< unsigned > Opcode_f64)
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, unsigned CodeAddrSpace, MachineFunction *F)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getSrcAddressSpace() const
unsigned getDestAddressSpace() const
This is an SDNode representing atomic operations.
const SDValue & getVal() const
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
FunctionPass class - This class is used to implement most global optimizations.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
A description of a memory reference used in the backend.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
EVT getMemoryVT() const
Return the type of the in-memory value.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions.
const NVPTXSubtarget * Subtarget
const NVPTXTargetLowering * getTargetLowering() const override
bool useF32FTZ(const MachineFunction &MF) const
bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const
bool usePrecSqrtF32() const
bool allowUnsafeFPMath(MachineFunction &MF) const
int getDivF32Level() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const SDValue & getOperand(unsigned Num) const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getTargetConstantFP(double Val, const SDLoc &DL, EVT VT)
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getValue() const
unsigned getPointerSizeInBits(unsigned AS) const
LLVM Value Representation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ TexUnified1DS32FloatLevel
@ Tex1DArrayFloatFloatLevel
@ TexUnified2DU32FloatGrad
@ Tld4UnifiedG2DFloatFloat
@ TexUnifiedCubeArrayFloatFloatLevel
@ Tld4UnifiedR2DFloatFloat
@ Tex2DArrayS32FloatLevel
@ TexUnified1DArrayFloatFloatLevel
@ TexUnified2DFloatFloatLevel
@ TexUnified3DFloatFloatLevel
@ TexUnified1DFloatFloatLevel
@ TexUnified2DArrayU32Float
@ TexUnified1DArrayFloatFloat
@ Tex1DArrayFloatFloatGrad
@ TexUnifiedCubeArrayU32FloatGrad
@ TexUnified1DFloatFloatGrad
@ TexUnifiedCubeFloatFloatGrad
@ TexUnified2DArrayFloatFloat
@ TexUnified3DU32FloatLevel
@ TexUnified1DArrayU32Float
@ TexUnified2DArrayFloatFloatLevel
@ TexUnified2DFloatFloatGrad
@ TexUnified2DArrayU32S32
@ TexUnifiedCubeArrayS32FloatLevel
@ TexUnified1DArrayS32Float
@ TexUnified1DArrayS32FloatLevel
@ TexUnified2DS32FloatLevel
@ TexUnified3DU32FloatGrad
@ TexUnifiedCubeU32FloatLevel
@ TexUnified2DArrayU32FloatGrad
@ TexUnifiedCubeFloatFloatLevel
@ TexUnified1DArrayFloatS32
@ TexUnifiedCubeS32FloatLevel
@ TexUnified1DS32FloatGrad
@ Tex2DArrayFloatFloatLevel
@ TexUnifiedCubeArrayFloatFloat
@ TexUnifiedCubeArrayFloatFloatGrad
@ TexUnifiedCubeFloatFloat
@ TexUnified1DArrayU32S32
@ TexUnified3DFloatFloatGrad
@ Tld4UnifiedA2DFloatFloat
@ TexUnified3DS32FloatGrad
@ TexUnified2DU32FloatLevel
@ TexUnified1DArrayS32S32
@ TexCubeArrayFloatFloatLevel
@ TexUnified1DU32FloatGrad
@ TexCubeArrayS32FloatLevel
@ Tex2DArrayU32FloatLevel
@ Tex1DArrayU32FloatLevel
@ TexUnified2DArrayU32FloatLevel
@ TexUnified1DArrayFloatFloatGrad
@ TexUnifiedCubeS32FloatGrad
@ TexCubeArrayU32FloatLevel
@ TexUnified3DS32FloatLevel
@ TexUnifiedCubeArrayS32FloatGrad
@ TexUnified2DArrayS32Float
@ Tex2DArrayFloatFloatGrad
@ TexUnifiedCubeArrayS32Float
@ TexUnified2DArrayS32FloatLevel
@ Tex1DArrayS32FloatLevel
@ TexUnifiedCubeArrayU32FloatLevel
@ TexUnified2DArrayS32S32
@ TexUnified2DArrayFloatFloatGrad
@ TexUnifiedCubeU32FloatGrad
@ Tld4UnifiedB2DFloatFloat
@ TexUnified1DArrayU32FloatLevel
@ TexUnified1DArrayS32FloatGrad
@ TexUnified2DS32FloatGrad
@ TexUnified2DArrayS32FloatGrad
@ TexUnified1DU32FloatLevel
@ TexUnifiedCubeArrayU32Float
@ TexUnified2DArrayFloatS32
@ TexUnified1DArrayU32FloatGrad
initializer< Ty > init(const Ty &Val)
constexpr uint64_t PointerSize
aarch64 pointer size.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOptLevel OptLevel)
createNVPTXISelDag - This pass converts a legalized DAG into a NVPTX-specific DAG,...
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
CodeGenOptLevel
Code generation optimization level.
AtomicOrdering
Atomic ordering for LLVM's memory model.
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
bool isKernelFunction(const Function &F)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.