20#include "llvm/IR/IntrinsicsNVPTX.h"
30#define DEBUG_TYPE "nvptx-isel"
31#define PASS_NAME "NVPTX DAG->DAG Pattern Instruction Selection"
47 doMulWide = (OptLevel > CodeGenOptLevel::None);
55int NVPTXDAGToDAGISel::getDivF32Level()
const {
59bool NVPTXDAGToDAGISel::usePrecSqrtF32()
const {
63bool NVPTXDAGToDAGISel::useF32FTZ()
const {
67bool NVPTXDAGToDAGISel::allowFMA()
const {
72bool NVPTXDAGToDAGISel::allowUnsafeFPMath()
const {
77bool NVPTXDAGToDAGISel::useShortPointers()
const {
83void NVPTXDAGToDAGISel::Select(
SDNode *
N) {
85 if (
N->isMachineOpcode()) {
90 switch (
N->getOpcode()) {
102 if (tryEXTRACT_VECTOR_ELEMENT(
N))
109 SelectSETP_BF16X2(
N);
113 if (tryLoadVector(
N))
125 if (tryStoreVector(
N))
137 if (tryStoreRetval(
N))
145 if (tryStoreParam(
N))
149 if (tryIntrinsicNoChain(
N))
153 if (tryIntrinsicChain(
N))
330 if (tryTextureIntrinsic(
N))
498 if (trySurfaceIntrinsic(
N))
509 SelectAddrSpaceCast(
N);
512 if (tryConstantFP(
N))
521bool NVPTXDAGToDAGISel::tryIntrinsicChain(
SDNode *
N) {
522 unsigned IID =
N->getConstantOperandVal(1);
526 case Intrinsic::nvvm_ldg_global_f:
527 case Intrinsic::nvvm_ldg_global_i:
528 case Intrinsic::nvvm_ldg_global_p:
529 case Intrinsic::nvvm_ldu_global_f:
530 case Intrinsic::nvvm_ldu_global_i:
531 case Intrinsic::nvvm_ldu_global_p:
538bool NVPTXDAGToDAGISel::tryConstantFP(
SDNode *
N) {
539 if (
N->getValueType(0) != MVT::f16 &&
N->getValueType(0) != MVT::bf16)
542 cast<ConstantFPSDNode>(
N)->getValueAPF(),
SDLoc(
N),
N->getValueType(0));
544 (
N->getValueType(0) == MVT::f16 ? NVPTX::LOAD_CONST_F16
545 : NVPTX::LOAD_CONST_BF16),
546 SDLoc(
N),
N->getValueType(0), Val);
574 return CmpMode::NotANumber;
608bool NVPTXDAGToDAGISel::SelectSETP_F16X2(
SDNode *
N) {
609 unsigned PTXCmpMode =
610 getPTXCmpMode(*cast<CondCodeSDNode>(
N->getOperand(2)), useF32FTZ());
613 NVPTX::SETP_f16x2rr,
DL, MVT::i1, MVT::i1,
N->getOperand(0),
619bool NVPTXDAGToDAGISel::SelectSETP_BF16X2(
SDNode *
N) {
620 unsigned PTXCmpMode =
621 getPTXCmpMode(*cast<CondCodeSDNode>(
N->getOperand(2)), useF32FTZ());
624 NVPTX::SETP_bf16x2rr,
DL, MVT::i1, MVT::i1,
N->getOperand(0),
632bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(
SDNode *
N) {
642 for (
auto *U :
Vector.getNode()->uses()) {
645 if (
U->getOperand(0) !=
Vector)
648 dyn_cast<ConstantSDNode>(
U->getOperand(1))) {
649 if (IdxConst->getZExtValue() == 0)
651 else if (IdxConst->getZExtValue() == 1)
668 for (
auto *
Node : E0)
670 for (
auto *
Node : E1)
677 const Value *Src =
N->getMemOperand()->getValue();
682 if (
auto *PT = dyn_cast<PointerType>(Src->getType())) {
683 switch (PT->getAddressSpace()) {
718 if (
N->isInvariant())
730 if (
auto *
A = dyn_cast<const Argument>(V))
731 return IsKernelFn &&
A->onlyReadsMemory() &&
A->hasNoAliasAttr();
732 if (
auto *GV = dyn_cast<const GlobalVariable>(V))
733 return GV->isConstant();
738bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(
SDNode *
N) {
739 unsigned IID =
N->getConstantOperandVal(0);
743 case Intrinsic::nvvm_texsurf_handle_internal:
744 SelectTexSurfHandle(
N);
749void NVPTXDAGToDAGISel::SelectTexSurfHandle(
SDNode *
N) {
754 MVT::i64, GlobalVal));
757void NVPTXDAGToDAGISel::SelectAddrSpaceCast(
SDNode *
N) {
762 assert(SrcAddrSpace != DstAddrSpace &&
763 "addrspacecast must be between different address spaces");
768 switch (SrcAddrSpace) {
771 Opc = TM.
is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
774 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_shared_yes_6432
775 : NVPTX::cvta_shared_yes_64)
776 : NVPTX::cvta_shared_yes;
779 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_const_yes_6432
780 : NVPTX::cvta_const_yes_64)
781 : NVPTX::cvta_const_yes;
784 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_local_yes_6432
785 : NVPTX::cvta_local_yes_64)
786 : NVPTX::cvta_local_yes;
794 if (SrcAddrSpace != 0)
797 switch (DstAddrSpace) {
800 Opc = TM.
is64Bit() ? NVPTX::cvta_to_global_yes_64
801 : NVPTX::cvta_to_global_yes;
804 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_shared_yes_3264
805 : NVPTX::cvta_to_shared_yes_64)
806 : NVPTX::cvta_to_shared_yes;
809 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_const_yes_3264
810 : NVPTX::cvta_to_const_yes_64)
811 : NVPTX::cvta_to_const_yes;
814 Opc = TM.
is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_local_yes_3264
815 : NVPTX::cvta_to_local_yes_64)
816 : NVPTX::cvta_to_local_yes;
819 Opc = TM.
is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
820 : NVPTX::nvvm_ptr_gen_to_param;
831static std::optional<unsigned>
833 unsigned Opcode_i16,
unsigned Opcode_i32,
834 std::optional<unsigned> Opcode_i64,
unsigned Opcode_f32,
835 std::optional<unsigned> Opcode_f64) {
878bool NVPTXDAGToDAGISel::tryLoad(
SDNode *
N) {
881 assert(
LD->readMem() &&
"Expected load");
883 EVT LoadedVT =
LD->getMemoryVT();
884 SDNode *NVPTXLD =
nullptr;
928 unsigned fromTypeWidth = std::max(8U, (
unsigned)ScalarVT.
getSizeInBits());
929 unsigned int fromType;
935 "Unexpected vector type");
950 std::optional<unsigned> Opcode;
953 if (SelectDirectAddr(N1,
Addr)) {
954 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar,
955 NVPTX::LD_i32_avar, NVPTX::LD_i64_avar,
956 NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
959 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
960 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
961 getI32Imm(fromTypeWidth, dl),
Addr, Chain };
965 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
966 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
967 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
970 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
971 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
972 getI32Imm(fromTypeWidth, dl),
Base,
Offset, Chain };
976 if (PointerSize == 64)
979 NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64,
980 NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
982 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari,
983 NVPTX::LD_i32_ari, NVPTX::LD_i64_ari,
984 NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
987 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
988 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
989 getI32Imm(fromTypeWidth, dl),
Base,
Offset, Chain };
992 if (PointerSize == 64)
995 NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64,
996 NVPTX::LD_f32_areg_64, NVPTX::LD_f64_areg_64);
998 Opcode =
pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg,
999 NVPTX::LD_i32_areg, NVPTX::LD_i64_areg,
1000 NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
1003 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
1004 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
1005 getI32Imm(fromTypeWidth, dl), N1, Chain };
1019bool NVPTXDAGToDAGISel::tryLoadVector(
SDNode *
N) {
1024 std::optional<unsigned> Opcode;
1036 return tryLDGLDU(
N);
1061 unsigned FromTypeWidth = std::max(8U, (
unsigned)ScalarVT.
getSizeInBits());
1064 unsigned ExtensionType = cast<ConstantSDNode>(
1065 N->getOperand(
N->getNumOperands() - 1))->getZExtValue();
1073 switch (
N->getOpcode()) {
1084 EVT EltVT =
N->getValueType(0);
1096 if (SelectDirectAddr(Op1,
Addr)) {
1097 switch (
N->getOpcode()) {
1102 NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1103 NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1104 NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
1109 NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar,
1110 std::nullopt, NVPTX::LDV_f32_v4_avar, std::nullopt);
1115 SDValue Ops[] = { getI32Imm(IsVolatile,
DL), getI32Imm(CodeAddrSpace,
DL),
1116 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1117 getI32Imm(FromTypeWidth,
DL),
Addr, Chain };
1119 }
else if (PointerSize == 64
1122 switch (
N->getOpcode()) {
1127 NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1128 NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1129 NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
1134 NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi,
1135 std::nullopt, NVPTX::LDV_f32_v4_asi, std::nullopt);
1140 SDValue Ops[] = { getI32Imm(IsVolatile,
DL), getI32Imm(CodeAddrSpace,
DL),
1141 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1144 }
else if (PointerSize == 64
1147 if (PointerSize == 64) {
1148 switch (
N->getOpcode()) {
1154 NVPTX::LDV_i8_v2_ari_64, NVPTX::LDV_i16_v2_ari_64,
1155 NVPTX::LDV_i32_v2_ari_64, NVPTX::LDV_i64_v2_ari_64,
1156 NVPTX::LDV_f32_v2_ari_64, NVPTX::LDV_f64_v2_ari_64);
1161 NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, std::nullopt,
1162 NVPTX::LDV_f32_v4_ari_64, std::nullopt);
1166 switch (
N->getOpcode()) {
1171 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1172 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1173 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
1178 NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari,
1179 std::nullopt, NVPTX::LDV_f32_v4_ari, std::nullopt);
1185 SDValue Ops[] = { getI32Imm(IsVolatile,
DL), getI32Imm(CodeAddrSpace,
DL),
1186 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1191 if (PointerSize == 64) {
1192 switch (
N->getOpcode()) {
1198 NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1199 NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1200 NVPTX::LDV_f64_v2_areg_64);
1205 NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, std::nullopt,
1206 NVPTX::LDV_f32_v4_areg_64, std::nullopt);
1210 switch (
N->getOpcode()) {
1216 NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1217 NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f32_v2_areg,
1218 NVPTX::LDV_f64_v2_areg);
1223 NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg,
1224 std::nullopt, NVPTX::LDV_f32_v4_areg, std::nullopt);
1230 SDValue Ops[] = { getI32Imm(IsVolatile,
DL), getI32Imm(CodeAddrSpace,
DL),
1231 getI32Imm(VecType,
DL), getI32Imm(FromType,
DL),
1232 getI32Imm(FromTypeWidth,
DL), Op1, Chain };
1243bool NVPTXDAGToDAGISel::tryLDGLDU(
SDNode *
N) {
1253 Op1 =
N->getOperand(2);
1254 Mem = cast<MemIntrinsicSDNode>(
N);
1255 unsigned IID =
N->getConstantOperandVal(1);
1259 case Intrinsic::nvvm_ldg_global_f:
1260 case Intrinsic::nvvm_ldg_global_i:
1261 case Intrinsic::nvvm_ldg_global_p:
1264 case Intrinsic::nvvm_ldu_global_f:
1265 case Intrinsic::nvvm_ldu_global_i:
1266 case Intrinsic::nvvm_ldu_global_p:
1271 Op1 =
N->getOperand(1);
1272 Mem = cast<MemSDNode>(
N);
1275 std::optional<unsigned> Opcode;
1279 EVT OrigType =
N->getValueType(0);
1282 unsigned NumElts = 1;
1287 if ((EltVT == MVT::f16 && OrigType == MVT::v2f16) ||
1288 (EltVT == MVT::bf16 && OrigType == MVT::v2bf16) ||
1289 (EltVT == MVT::i16 && OrigType == MVT::v2i16)) {
1290 assert(NumElts % 2 == 0 &&
"Vector must have even number of elements");
1293 }
else if (OrigType == MVT::v4i8) {
1302 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1304 for (
unsigned i = 0; i != NumElts; ++i) {
1310 if (SelectDirectAddr(Op1,
Addr)) {
1311 switch (
N->getOpcode()) {
1318 NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1319 NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1320 NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1321 NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1322 NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1323 NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1326 NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1327 NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1328 NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1329 NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1330 NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1331 NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
1336 NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1337 NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1338 NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1339 NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1340 NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1341 NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
1345 NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1346 NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1347 NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1348 NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1349 NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1350 NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
1356 NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1357 NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, std::nullopt,
1358 NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, std::nullopt);
1363 NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1364 NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, std::nullopt,
1365 NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, std::nullopt);
1375 switch (
N->getOpcode()) {
1382 NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1383 NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1384 NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1385 NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1386 NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1387 NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1390 NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1391 NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1392 NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1393 NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1394 NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1395 NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
1400 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1401 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1402 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1403 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1404 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1405 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
1409 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1410 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1411 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1412 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1413 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1414 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
1420 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1421 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, std::nullopt,
1422 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, std::nullopt);
1427 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1428 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, std::nullopt,
1429 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, std::nullopt);
1433 switch (
N->getOpcode()) {
1440 NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1441 NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1442 NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1443 NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1444 NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1445 NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1448 NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1449 NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1450 NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1451 NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1452 NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1453 NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
1458 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1459 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1460 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1461 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1462 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1463 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
1467 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1468 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1469 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1470 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1471 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1472 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
1478 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1479 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, std::nullopt,
1480 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, std::nullopt);
1485 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1486 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, std::nullopt,
1487 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, std::nullopt);
1497 switch (
N->getOpcode()) {
1504 NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1505 NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1506 NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1507 NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1508 NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1509 NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1512 NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1513 NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1514 NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1515 NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1516 NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1517 NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
1522 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1523 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1524 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1525 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1526 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1527 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
1531 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1532 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1533 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1534 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1535 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1536 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
1542 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1543 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, std::nullopt,
1544 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, std::nullopt);
1549 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1550 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, std::nullopt,
1551 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, std::nullopt);
1555 switch (
N->getOpcode()) {
1562 NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1563 NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1564 NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1565 NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1566 NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1567 NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1570 NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1571 NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1572 NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1573 NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1574 NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1575 NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
1580 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1581 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1582 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1583 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1584 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1585 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
1589 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1590 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1591 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1592 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1593 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1594 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
1600 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1601 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, std::nullopt,
1602 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, std::nullopt);
1607 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1608 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, std::nullopt,
1609 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, std::nullopt);
1615 SDValue Ops[] = { Op1, Chain };
1632 if (OrigType != EltVT &&
1642 for (
unsigned i = 0; i != NumElts; ++i) {
1658bool NVPTXDAGToDAGISel::tryStore(
SDNode *
N) {
1661 assert(
ST->writeMem() &&
"Expected store");
1664 assert((PlainStore || AtomicStore) &&
"Expected store");
1665 EVT StoreVT =
ST->getMemoryVT();
1666 SDNode *NVPTXST =
nullptr;
1669 if (PlainStore && PlainStore->
isIndexed())
1708 "Unexpected vector type");
1721 std::optional<unsigned> Opcode;
1723 Value.getNode()->getSimpleValueType(0).SimpleTy;
1725 if (SelectDirectAddr(BasePtr,
Addr)) {
1726 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1727 NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1728 NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1732 getI32Imm(isVolatile, dl),
1733 getI32Imm(CodeAddrSpace, dl),
1734 getI32Imm(vecType, dl),
1735 getI32Imm(toType, dl),
1736 getI32Imm(toTypeWidth, dl),
1740 }
else if (PointerSize == 64
1743 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1744 NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1745 NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1749 getI32Imm(isVolatile, dl),
1750 getI32Imm(CodeAddrSpace, dl),
1751 getI32Imm(vecType, dl),
1752 getI32Imm(toType, dl),
1753 getI32Imm(toTypeWidth, dl),
1758 }
else if (PointerSize == 64
1761 if (PointerSize == 64)
1764 NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64,
1765 NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1767 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1768 NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1769 NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1774 getI32Imm(isVolatile, dl),
1775 getI32Imm(CodeAddrSpace, dl),
1776 getI32Imm(vecType, dl),
1777 getI32Imm(toType, dl),
1778 getI32Imm(toTypeWidth, dl),
1784 if (PointerSize == 64)
1786 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1787 NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1788 NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1790 Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1791 NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1792 NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1796 getI32Imm(isVolatile, dl),
1797 getI32Imm(CodeAddrSpace, dl),
1798 getI32Imm(vecType, dl),
1799 getI32Imm(toType, dl),
1800 getI32Imm(toTypeWidth, dl),
1815bool NVPTXDAGToDAGISel::tryStoreVector(
SDNode *
N) {
1819 std::optional<unsigned> Opcode;
1854 switch (
N->getOpcode()) {
1859 N2 =
N->getOperand(3);
1867 N2 =
N->getOperand(5);
1889 if (SelectDirectAddr(N2,
Addr)) {
1890 switch (
N->getOpcode()) {
1895 NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1896 NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1897 NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
1901 NVPTX::STV_i8_v4_avar, NVPTX::STV_i16_v4_avar,
1902 NVPTX::STV_i32_v4_avar, std::nullopt,
1903 NVPTX::STV_f32_v4_avar, std::nullopt);
1909 switch (
N->getOpcode()) {
1914 NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1915 NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1916 NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
1921 NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi,
1922 std::nullopt, NVPTX::STV_f32_v4_asi, std::nullopt);
1929 if (PointerSize == 64) {
1930 switch (
N->getOpcode()) {
1936 NVPTX::STV_i8_v2_ari_64, NVPTX::STV_i16_v2_ari_64,
1937 NVPTX::STV_i32_v2_ari_64, NVPTX::STV_i64_v2_ari_64,
1938 NVPTX::STV_f32_v2_ari_64, NVPTX::STV_f64_v2_ari_64);
1943 NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, std::nullopt,
1944 NVPTX::STV_f32_v4_ari_64, std::nullopt);
1948 switch (
N->getOpcode()) {
1953 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
1954 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
1955 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
1959 NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari,
1960 NVPTX::STV_i32_v4_ari, std::nullopt,
1961 NVPTX::STV_f32_v4_ari, std::nullopt);
1968 if (PointerSize == 64) {
1969 switch (
N->getOpcode()) {
1975 NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
1976 NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
1977 NVPTX::STV_f64_v2_areg_64);
1982 NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, std::nullopt,
1983 NVPTX::STV_f32_v4_areg_64, std::nullopt);
1987 switch (
N->getOpcode()) {
1993 NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
1994 NVPTX::STV_i64_v2_areg, NVPTX::STV_f32_v2_areg,
1995 NVPTX::STV_f64_v2_areg);
2000 NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg,
2001 std::nullopt, NVPTX::STV_f32_v4_areg, std::nullopt);
2022bool NVPTXDAGToDAGISel::tryLoadParam(
SDNode *
Node) {
2030 switch (
Node->getOpcode()) {
2044 EVT EltVT =
Node->getValueType(0);
2047 std::optional<unsigned> Opcode;
2054 NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2055 NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2056 NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
2061 NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2062 NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F32,
2063 NVPTX::LoadParamMemV2F64);
2068 NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32,
2069 std::nullopt, NVPTX::LoadParamMemV4F32, std::nullopt);
2078 }
else if (VecSize == 2) {
2081 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2085 unsigned OffsetVal =
Offset->getAsZExtVal();
2096bool NVPTXDAGToDAGISel::tryStoreRetval(
SDNode *
N) {
2100 unsigned OffsetVal =
Offset->getAsZExtVal();
2104 unsigned NumElts = 1;
2105 switch (
N->getOpcode()) {
2121 for (
unsigned i = 0; i < NumElts; ++i)
2129 std::optional<unsigned> Opcode = 0;
2135 NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2136 NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2137 NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
2138 if (Opcode == NVPTX::StoreRetvalI8) {
2142 switch (Ops[0].getSimpleValueType().SimpleTy) {
2146 Opcode = NVPTX::StoreRetvalI8TruncI32;
2149 Opcode = NVPTX::StoreRetvalI8TruncI64;
2156 NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2157 NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2158 NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
2162 NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2163 NVPTX::StoreRetvalV4I32, std::nullopt,
2164 NVPTX::StoreRetvalV4F32, std::nullopt);
2178bool NVPTXDAGToDAGISel::tryStoreParam(
SDNode *
N) {
2182 unsigned ParamVal =
Param->getAsZExtVal();
2184 unsigned OffsetVal =
Offset->getAsZExtVal();
2186 SDValue Glue =
N->getOperand(
N->getNumOperands() - 1);
2189 unsigned NumElts = 1;
2190 switch (
N->getOpcode()) {
2208 for (
unsigned i = 0; i < NumElts; ++i)
2218 std::optional<unsigned> Opcode = 0;
2219 switch (
N->getOpcode()) {
2226 NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2227 NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2228 NVPTX::StoreParamF32, NVPTX::StoreParamF64);
2229 if (Opcode == NVPTX::StoreParamI8) {
2233 switch (Ops[0].getSimpleValueType().SimpleTy) {
2237 Opcode = NVPTX::StoreParamI8TruncI32;
2240 Opcode = NVPTX::StoreParamI8TruncI64;
2247 NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2248 NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2249 NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
2253 NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2254 NVPTX::StoreParamV4I32, std::nullopt,
2255 NVPTX::StoreParamV4F32, std::nullopt);
2265 Opcode = NVPTX::StoreParamI32;
2269 MVT::i32, Ops[0], CvtNone);
2274 Opcode = NVPTX::StoreParamI32;
2278 MVT::i32, Ops[0], CvtNone);
2293bool NVPTXDAGToDAGISel::tryTextureIntrinsic(
SDNode *
N) {
2296 switch (
N->getOpcode()) {
2297 default:
return false;
2299 Opc = NVPTX::TEX_1D_F32_S32_RR;
2302 Opc = NVPTX::TEX_1D_F32_F32_RR;
2305 Opc = NVPTX::TEX_1D_F32_F32_LEVEL_RR;
2308 Opc = NVPTX::TEX_1D_F32_F32_GRAD_RR;
2311 Opc = NVPTX::TEX_1D_S32_S32_RR;
2314 Opc = NVPTX::TEX_1D_S32_F32_RR;
2317 Opc = NVPTX::TEX_1D_S32_F32_LEVEL_RR;
2320 Opc = NVPTX::TEX_1D_S32_F32_GRAD_RR;
2323 Opc = NVPTX::TEX_1D_U32_S32_RR;
2326 Opc = NVPTX::TEX_1D_U32_F32_RR;
2329 Opc = NVPTX::TEX_1D_U32_F32_LEVEL_RR;
2332 Opc = NVPTX::TEX_1D_U32_F32_GRAD_RR;
2335 Opc = NVPTX::TEX_1D_ARRAY_F32_S32_RR;
2338 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_RR;
2341 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR;
2344 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR;
2347 Opc = NVPTX::TEX_1D_ARRAY_S32_S32_RR;
2350 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_RR;
2353 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR;
2356 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR;
2359 Opc = NVPTX::TEX_1D_ARRAY_U32_S32_RR;
2362 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_RR;
2365 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR;
2368 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR;
2371 Opc = NVPTX::TEX_2D_F32_S32_RR;
2374 Opc = NVPTX::TEX_2D_F32_F32_RR;
2377 Opc = NVPTX::TEX_2D_F32_F32_LEVEL_RR;
2380 Opc = NVPTX::TEX_2D_F32_F32_GRAD_RR;
2383 Opc = NVPTX::TEX_2D_S32_S32_RR;
2386 Opc = NVPTX::TEX_2D_S32_F32_RR;
2389 Opc = NVPTX::TEX_2D_S32_F32_LEVEL_RR;
2392 Opc = NVPTX::TEX_2D_S32_F32_GRAD_RR;
2395 Opc = NVPTX::TEX_2D_U32_S32_RR;
2398 Opc = NVPTX::TEX_2D_U32_F32_RR;
2401 Opc = NVPTX::TEX_2D_U32_F32_LEVEL_RR;
2404 Opc = NVPTX::TEX_2D_U32_F32_GRAD_RR;
2407 Opc = NVPTX::TEX_2D_ARRAY_F32_S32_RR;
2410 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_RR;
2413 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR;
2416 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR;
2419 Opc = NVPTX::TEX_2D_ARRAY_S32_S32_RR;
2422 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_RR;
2425 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR;
2428 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR;
2431 Opc = NVPTX::TEX_2D_ARRAY_U32_S32_RR;
2434 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_RR;
2437 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR;
2440 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR;
2443 Opc = NVPTX::TEX_3D_F32_S32_RR;
2446 Opc = NVPTX::TEX_3D_F32_F32_RR;
2449 Opc = NVPTX::TEX_3D_F32_F32_LEVEL_RR;
2452 Opc = NVPTX::TEX_3D_F32_F32_GRAD_RR;
2455 Opc = NVPTX::TEX_3D_S32_S32_RR;
2458 Opc = NVPTX::TEX_3D_S32_F32_RR;
2461 Opc = NVPTX::TEX_3D_S32_F32_LEVEL_RR;
2464 Opc = NVPTX::TEX_3D_S32_F32_GRAD_RR;
2467 Opc = NVPTX::TEX_3D_U32_S32_RR;
2470 Opc = NVPTX::TEX_3D_U32_F32_RR;
2473 Opc = NVPTX::TEX_3D_U32_F32_LEVEL_RR;
2476 Opc = NVPTX::TEX_3D_U32_F32_GRAD_RR;
2479 Opc = NVPTX::TEX_CUBE_F32_F32_RR;
2482 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL_RR;
2485 Opc = NVPTX::TEX_CUBE_S32_F32_RR;
2488 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL_RR;
2491 Opc = NVPTX::TEX_CUBE_U32_F32_RR;
2494 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL_RR;
2497 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_RR;
2500 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR;
2503 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_RR;
2506 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR;
2509 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_RR;
2512 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR;
2515 Opc = NVPTX::TLD4_R_2D_F32_F32_RR;
2518 Opc = NVPTX::TLD4_G_2D_F32_F32_RR;
2521 Opc = NVPTX::TLD4_B_2D_F32_F32_RR;
2524 Opc = NVPTX::TLD4_A_2D_F32_F32_RR;
2527 Opc = NVPTX::TLD4_R_2D_S32_F32_RR;
2530 Opc = NVPTX::TLD4_G_2D_S32_F32_RR;
2533 Opc = NVPTX::TLD4_B_2D_S32_F32_RR;
2536 Opc = NVPTX::TLD4_A_2D_S32_F32_RR;
2539 Opc = NVPTX::TLD4_R_2D_U32_F32_RR;
2542 Opc = NVPTX::TLD4_G_2D_U32_F32_RR;
2545 Opc = NVPTX::TLD4_B_2D_U32_F32_RR;
2548 Opc = NVPTX::TLD4_A_2D_U32_F32_RR;
2551 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32_R;
2554 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_R;
2557 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_R;
2560 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_R;
2563 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32_R;
2566 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_R;
2569 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_R;
2572 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_R;
2575 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32_R;
2578 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_R;
2581 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_R;
2584 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_R;
2587 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_R;
2590 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_R;
2593 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_R;
2596 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_R;
2599 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_R;
2602 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_R;
2605 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_R;
2608 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_R;
2611 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_R;
2614 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_R;
2617 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_R;
2620 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_R;
2623 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32_R;
2626 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_R;
2629 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_R;
2632 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_R;
2635 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32_R;
2638 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_R;
2641 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_R;
2644 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_R;
2647 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32_R;
2650 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_R;
2653 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_R;
2656 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_R;
2659 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_R;
2662 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_R;
2665 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_R;
2668 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_R;
2671 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_R;
2674 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_R;
2677 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_R;
2680 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_R;
2683 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_R;
2686 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_R;
2689 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_R;
2692 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_R;
2695 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32_R;
2698 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_R;
2701 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_R;
2704 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_R;
2707 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32_R;
2710 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_R;
2713 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_R;
2716 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_R;
2719 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32_R;
2722 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_R;
2725 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_R;
2728 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_R;
2731 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_R;
2734 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_R;
2737 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_R;
2740 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_R;
2743 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_R;
2746 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_R;
2749 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_R;
2752 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_R;
2755 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_R;
2758 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_R;
2761 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_R;
2764 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R;
2767 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R;
2770 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R;
2773 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32_R;
2776 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32_R;
2779 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32_R;
2782 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32_R;
2785 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32_R;
2788 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32_R;
2791 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32_R;
2794 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32_R;
2797 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32_R;
2800 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R;
2803 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_GRAD_R;
2806 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_GRAD_R;
2809 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_GRAD_R;
2812 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD_R;
2815 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD_R;
2818 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD_R;
2830bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(
SDNode *
N) {
2832 switch (
N->getOpcode()) {
2833 default:
return false;
2835 Opc = NVPTX::SULD_1D_I8_CLAMP_R;
2838 Opc = NVPTX::SULD_1D_I16_CLAMP_R;
2841 Opc = NVPTX::SULD_1D_I32_CLAMP_R;
2844 Opc = NVPTX::SULD_1D_I64_CLAMP_R;
2847 Opc = NVPTX::SULD_1D_V2I8_CLAMP_R;
2850 Opc = NVPTX::SULD_1D_V2I16_CLAMP_R;
2853 Opc = NVPTX::SULD_1D_V2I32_CLAMP_R;
2856 Opc = NVPTX::SULD_1D_V2I64_CLAMP_R;
2859 Opc = NVPTX::SULD_1D_V4I8_CLAMP_R;
2862 Opc = NVPTX::SULD_1D_V4I16_CLAMP_R;
2865 Opc = NVPTX::SULD_1D_V4I32_CLAMP_R;
2868 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP_R;
2871 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP_R;
2874 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP_R;
2877 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP_R;
2880 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_R;
2883 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_R;
2886 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_R;
2889 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_R;
2892 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_R;
2895 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_R;
2898 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_R;
2901 Opc = NVPTX::SULD_2D_I8_CLAMP_R;
2904 Opc = NVPTX::SULD_2D_I16_CLAMP_R;
2907 Opc = NVPTX::SULD_2D_I32_CLAMP_R;
2910 Opc = NVPTX::SULD_2D_I64_CLAMP_R;
2913 Opc = NVPTX::SULD_2D_V2I8_CLAMP_R;
2916 Opc = NVPTX::SULD_2D_V2I16_CLAMP_R;
2919 Opc = NVPTX::SULD_2D_V2I32_CLAMP_R;
2922 Opc = NVPTX::SULD_2D_V2I64_CLAMP_R;
2925 Opc = NVPTX::SULD_2D_V4I8_CLAMP_R;
2928 Opc = NVPTX::SULD_2D_V4I16_CLAMP_R;
2931 Opc = NVPTX::SULD_2D_V4I32_CLAMP_R;
2934 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP_R;
2937 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP_R;
2940 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP_R;
2943 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP_R;
2946 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_R;
2949 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_R;
2952 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_R;
2955 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_R;
2958 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_R;
2961 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_R;
2964 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_R;
2967 Opc = NVPTX::SULD_3D_I8_CLAMP_R;
2970 Opc = NVPTX::SULD_3D_I16_CLAMP_R;
2973 Opc = NVPTX::SULD_3D_I32_CLAMP_R;
2976 Opc = NVPTX::SULD_3D_I64_CLAMP_R;
2979 Opc = NVPTX::SULD_3D_V2I8_CLAMP_R;
2982 Opc = NVPTX::SULD_3D_V2I16_CLAMP_R;
2985 Opc = NVPTX::SULD_3D_V2I32_CLAMP_R;
2988 Opc = NVPTX::SULD_3D_V2I64_CLAMP_R;
2991 Opc = NVPTX::SULD_3D_V4I8_CLAMP_R;
2994 Opc = NVPTX::SULD_3D_V4I16_CLAMP_R;
2997 Opc = NVPTX::SULD_3D_V4I32_CLAMP_R;
3000 Opc = NVPTX::SULD_1D_I8_TRAP_R;
3003 Opc = NVPTX::SULD_1D_I16_TRAP_R;
3006 Opc = NVPTX::SULD_1D_I32_TRAP_R;
3009 Opc = NVPTX::SULD_1D_I64_TRAP_R;
3012 Opc = NVPTX::SULD_1D_V2I8_TRAP_R;
3015 Opc = NVPTX::SULD_1D_V2I16_TRAP_R;
3018 Opc = NVPTX::SULD_1D_V2I32_TRAP_R;
3021 Opc = NVPTX::SULD_1D_V2I64_TRAP_R;
3024 Opc = NVPTX::SULD_1D_V4I8_TRAP_R;
3027 Opc = NVPTX::SULD_1D_V4I16_TRAP_R;
3030 Opc = NVPTX::SULD_1D_V4I32_TRAP_R;
3033 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP_R;
3036 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP_R;
3039 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP_R;
3042 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP_R;
3045 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP_R;
3048 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP_R;
3051 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP_R;
3054 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP_R;
3057 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP_R;
3060 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP_R;
3063 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP_R;
3066 Opc = NVPTX::SULD_2D_I8_TRAP_R;
3069 Opc = NVPTX::SULD_2D_I16_TRAP_R;
3072 Opc = NVPTX::SULD_2D_I32_TRAP_R;
3075 Opc = NVPTX::SULD_2D_I64_TRAP_R;
3078 Opc = NVPTX::SULD_2D_V2I8_TRAP_R;
3081 Opc = NVPTX::SULD_2D_V2I16_TRAP_R;
3084 Opc = NVPTX::SULD_2D_V2I32_TRAP_R;
3087 Opc = NVPTX::SULD_2D_V2I64_TRAP_R;
3090 Opc = NVPTX::SULD_2D_V4I8_TRAP_R;
3093 Opc = NVPTX::SULD_2D_V4I16_TRAP_R;
3096 Opc = NVPTX::SULD_2D_V4I32_TRAP_R;
3099 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP_R;
3102 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP_R;
3105 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP_R;
3108 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP_R;
3111 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP_R;
3114 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP_R;
3117 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP_R;
3120 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP_R;
3123 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP_R;
3126 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP_R;
3129 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP_R;
3132 Opc = NVPTX::SULD_3D_I8_TRAP_R;
3135 Opc = NVPTX::SULD_3D_I16_TRAP_R;
3138 Opc = NVPTX::SULD_3D_I32_TRAP_R;
3141 Opc = NVPTX::SULD_3D_I64_TRAP_R;
3144 Opc = NVPTX::SULD_3D_V2I8_TRAP_R;
3147 Opc = NVPTX::SULD_3D_V2I16_TRAP_R;
3150 Opc = NVPTX::SULD_3D_V2I32_TRAP_R;
3153 Opc = NVPTX::SULD_3D_V2I64_TRAP_R;
3156 Opc = NVPTX::SULD_3D_V4I8_TRAP_R;
3159 Opc = NVPTX::SULD_3D_V4I16_TRAP_R;
3162 Opc = NVPTX::SULD_3D_V4I32_TRAP_R;
3165 Opc = NVPTX::SULD_1D_I8_ZERO_R;
3168 Opc = NVPTX::SULD_1D_I16_ZERO_R;
3171 Opc = NVPTX::SULD_1D_I32_ZERO_R;
3174 Opc = NVPTX::SULD_1D_I64_ZERO_R;
3177 Opc = NVPTX::SULD_1D_V2I8_ZERO_R;
3180 Opc = NVPTX::SULD_1D_V2I16_ZERO_R;
3183 Opc = NVPTX::SULD_1D_V2I32_ZERO_R;
3186 Opc = NVPTX::SULD_1D_V2I64_ZERO_R;
3189 Opc = NVPTX::SULD_1D_V4I8_ZERO_R;
3192 Opc = NVPTX::SULD_1D_V4I16_ZERO_R;
3195 Opc = NVPTX::SULD_1D_V4I32_ZERO_R;
3198 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO_R;
3201 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO_R;
3204 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO_R;
3207 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO_R;
3210 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO_R;
3213 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO_R;
3216 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO_R;
3219 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO_R;
3222 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO_R;
3225 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO_R;
3228 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO_R;
3231 Opc = NVPTX::SULD_2D_I8_ZERO_R;
3234 Opc = NVPTX::SULD_2D_I16_ZERO_R;
3237 Opc = NVPTX::SULD_2D_I32_ZERO_R;
3240 Opc = NVPTX::SULD_2D_I64_ZERO_R;
3243 Opc = NVPTX::SULD_2D_V2I8_ZERO_R;
3246 Opc = NVPTX::SULD_2D_V2I16_ZERO_R;
3249 Opc = NVPTX::SULD_2D_V2I32_ZERO_R;
3252 Opc = NVPTX::SULD_2D_V2I64_ZERO_R;
3255 Opc = NVPTX::SULD_2D_V4I8_ZERO_R;
3258 Opc = NVPTX::SULD_2D_V4I16_ZERO_R;
3261 Opc = NVPTX::SULD_2D_V4I32_ZERO_R;
3264 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO_R;
3267 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO_R;
3270 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO_R;
3273 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO_R;
3276 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO_R;
3279 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO_R;
3282 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO_R;
3285 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO_R;
3288 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO_R;
3291 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO_R;
3294 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO_R;
3297 Opc = NVPTX::SULD_3D_I8_ZERO_R;
3300 Opc = NVPTX::SULD_3D_I16_ZERO_R;
3303 Opc = NVPTX::SULD_3D_I32_ZERO_R;
3306 Opc = NVPTX::SULD_3D_I64_ZERO_R;
3309 Opc = NVPTX::SULD_3D_V2I8_ZERO_R;
3312 Opc = NVPTX::SULD_3D_V2I16_ZERO_R;
3315 Opc = NVPTX::SULD_3D_V2I32_ZERO_R;
3318 Opc = NVPTX::SULD_3D_V2I64_ZERO_R;
3321 Opc = NVPTX::SULD_3D_V4I8_ZERO_R;
3324 Opc = NVPTX::SULD_3D_V4I16_ZERO_R;
3327 Opc = NVPTX::SULD_3D_V4I32_ZERO_R;
3342bool NVPTXDAGToDAGISel::tryBFE(
SDNode *
N) {
3349 bool IsSigned =
false;
3354 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3379 Val =
LHS.getNode()->getOperand(0);
3380 Start =
LHS.getNode()->getOperand(1);
3386 int64_t GoodBits = Start.getValueSizeInBits() - StartVal;
3387 if (NumBits > GoodBits) {
3421 if (isa<ConstantSDNode>(AndLHS)) {
3445 NumBits = NumZeros + NumOnes - ShiftAmt;
3451 if (ShiftAmt < NumZeros) {
3468 Val =
LHS->getOperand(0);
3487 if (OuterShiftAmt < InnerShiftAmt) {
3523 Opc = NVPTX::BFE_S32rii;
3525 Opc = NVPTX::BFE_U32rii;
3529 Opc = NVPTX::BFE_S64rii;
3531 Opc = NVPTX::BFE_U64rii;
3570bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3575 if (SelectDirectAddr(base,
Base)) {
3598bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3610 if (SelectDirectAddr(
Addr.getOperand(0),
Addr)) {
3615 dyn_cast<FrameIndexSDNode>(
Addr.getOperand(0)))
3640bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(
SDNode *
N,
3641 unsigned int spN)
const {
3642 const Value *Src =
nullptr;
3643 if (
MemSDNode *mN = dyn_cast<MemSDNode>(
N)) {
3644 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3646 Src = mN->getMemOperand()->getValue();
3650 if (
auto *PT = dyn_cast<PointerType>(Src->getType()))
3651 return (PT->getAddressSpace() == spN);
3659 std::vector<SDValue> &OutOps) {
3661 switch (ConstraintID) {
3665 if (SelectDirectAddr(
Op, Op0)) {
3666 OutOps.push_back(Op0);
3670 if (SelectADDRri(
Op.getNode(),
Op, Op0, Op1)) {
3671 OutOps.push_back(Op0);
3672 OutOps.push_back(Op1);
3682unsigned NVPTXDAGToDAGISel::GetConvertOpcode(
MVT DestTy,
MVT SrcTy,
3693 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
3695 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
3697 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
3704 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
3706 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
3708 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
3715 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
3717 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
3719 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
3726 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
3728 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
3730 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
3737 return NVPTX::CVT_f32_f16;
3739 return NVPTX::CVT_f64_f16;
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
Atomic ordering constants.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static unsigned int getCodeAddrSpace(MemSDNode *N)
static int getLdStRegType(EVT VT)
static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ)
static std::optional< unsigned > pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16, unsigned Opcode_i32, std::optional< unsigned > Opcode_i64, unsigned Opcode_f32, std::optional< unsigned > Opcode_f64)
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, unsigned CodeAddrSpace, MachineFunction *F)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getSrcAddressSpace() const
unsigned getDestAddressSpace() const
This is an SDNode representing atomic operations.
const SDValue & getVal() const
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
FunctionPass class - This class is used to implement most global optimizations.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
A description of a memory reference used in the backend.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
EVT getMemoryVT() const
Return the type of the in-memory value.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions.
const NVPTXSubtarget * Subtarget
const NVPTXTargetLowering * getTargetLowering() const override
bool useF32FTZ(const MachineFunction &MF) const
bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const
bool usePrecSqrtF32() const
bool allowUnsafeFPMath(MachineFunction &MF) const
int getDivF32Level() const
bool useShortPointers() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const SDValue & getOperand(unsigned Num) const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getTargetConstantFP(double Val, const SDLoc &DL, EVT VT)
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getValue() const
LLVM Value Representation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ TexUnified1DS32FloatLevel
@ Tex1DArrayFloatFloatLevel
@ TexUnified2DU32FloatGrad
@ Tld4UnifiedG2DFloatFloat
@ TexUnifiedCubeArrayFloatFloatLevel
@ Tld4UnifiedR2DFloatFloat
@ Tex2DArrayS32FloatLevel
@ TexUnified1DArrayFloatFloatLevel
@ TexUnified2DFloatFloatLevel
@ TexUnified3DFloatFloatLevel
@ TexUnified1DFloatFloatLevel
@ TexUnified2DArrayU32Float
@ TexUnified1DArrayFloatFloat
@ Tex1DArrayFloatFloatGrad
@ TexUnifiedCubeArrayU32FloatGrad
@ TexUnified1DFloatFloatGrad
@ TexUnifiedCubeFloatFloatGrad
@ TexUnified2DArrayFloatFloat
@ TexUnified3DU32FloatLevel
@ TexUnified1DArrayU32Float
@ TexUnified2DArrayFloatFloatLevel
@ TexUnified2DFloatFloatGrad
@ TexUnified2DArrayU32S32
@ TexUnifiedCubeArrayS32FloatLevel
@ TexUnified1DArrayS32Float
@ TexUnified1DArrayS32FloatLevel
@ TexUnified2DS32FloatLevel
@ TexUnified3DU32FloatGrad
@ TexUnifiedCubeU32FloatLevel
@ TexUnified2DArrayU32FloatGrad
@ TexUnifiedCubeFloatFloatLevel
@ TexUnified1DArrayFloatS32
@ TexUnifiedCubeS32FloatLevel
@ TexUnified1DS32FloatGrad
@ Tex2DArrayFloatFloatLevel
@ TexUnifiedCubeArrayFloatFloat
@ TexUnifiedCubeArrayFloatFloatGrad
@ TexUnifiedCubeFloatFloat
@ TexUnified1DArrayU32S32
@ TexUnified3DFloatFloatGrad
@ Tld4UnifiedA2DFloatFloat
@ TexUnified3DS32FloatGrad
@ TexUnified2DU32FloatLevel
@ TexUnified1DArrayS32S32
@ TexCubeArrayFloatFloatLevel
@ TexUnified1DU32FloatGrad
@ TexCubeArrayS32FloatLevel
@ Tex2DArrayU32FloatLevel
@ Tex1DArrayU32FloatLevel
@ TexUnified2DArrayU32FloatLevel
@ TexUnified1DArrayFloatFloatGrad
@ TexUnifiedCubeS32FloatGrad
@ TexCubeArrayU32FloatLevel
@ TexUnified3DS32FloatLevel
@ TexUnifiedCubeArrayS32FloatGrad
@ TexUnified2DArrayS32Float
@ Tex2DArrayFloatFloatGrad
@ TexUnifiedCubeArrayS32Float
@ TexUnified2DArrayS32FloatLevel
@ Tex1DArrayS32FloatLevel
@ TexUnifiedCubeArrayU32FloatLevel
@ TexUnified2DArrayS32S32
@ TexUnified2DArrayFloatFloatGrad
@ TexUnifiedCubeU32FloatGrad
@ Tld4UnifiedB2DFloatFloat
@ TexUnified1DArrayU32FloatLevel
@ TexUnified1DArrayS32FloatGrad
@ TexUnified2DS32FloatGrad
@ TexUnified2DArrayS32FloatGrad
@ TexUnified1DU32FloatLevel
@ TexUnifiedCubeArrayU32Float
@ TexUnified2DArrayFloatS32
@ TexUnified1DArrayU32FloatGrad
constexpr uint64_t PointerSize
aarch64 pointer size.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOptLevel OptLevel)
createNVPTXISelDag - This pass converts a legalized DAG into a NVPTX-specific DAG,...
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
CodeGenOptLevel
Code generation optimization level.
AtomicOrdering
Atomic ordering for LLVM's memory model.
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
bool isKernelFunction(const Function &F)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.