45#include "llvm/IR/IntrinsicsNVPTX.h"
66#define DEBUG_TYPE "nvptx-lower"
78 cl::desc(
"NVPTX Specific: FMA contraction (0: don't do it"
79 " 1: do it 2: do it aggressively"),
84 cl::desc(
"NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
85 " IEEE Compliant F32 div.rnd if available."),
90 cl::desc(
"NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
94 "nvptx-force-min-byval-param-align",
cl::Hidden,
95 cl::desc(
"NVPTX Specific: force 4-byte minimal alignment for byval"
96 " params of device functions."),
178 Offsets->push_back(StartingOffset + 0);
179 Offsets->push_back(StartingOffset + 8);
186 if (
StructType *STy = dyn_cast<StructType>(Ty)) {
187 auto const *SL =
DL.getStructLayout(STy);
189 for(
auto *EI : STy->elements()) {
191 StartingOffset + SL->getElementOffset(ElementNum));
198 for (
unsigned i = 0, e = TempVTs.
size(); i != e; ++i) {
225 for (
unsigned j = 0; j != NumElts; ++j) {
233 Offsets->push_back(Off);
248 "Promotion is not suitable for scalars of size larger than 64-bits");
250 *PromotedVT = MVT::i1;
255 *PromotedVT = MVT::i8;
258 *PromotedVT = MVT::i16;
261 *PromotedVT = MVT::i32;
264 *PromotedVT = MVT::i64;
267 return EVT(*PromotedVT) != VT;
287 if (ParamAlignment < AccessSize)
290 if (Offsets[
Idx] & (AccessSize - 1))
293 EVT EltVT = ValueVTs[
Idx];
297 if (EltSize >= AccessSize)
300 unsigned NumElts = AccessSize / EltSize;
302 if (AccessSize != EltSize * NumElts)
306 if (
Idx + NumElts > ValueVTs.
size())
310 if (NumElts != 4 && NumElts != 2)
313 for (
unsigned j =
Idx + 1; j <
Idx + NumElts; ++j) {
315 if (ValueVTs[j] != EltVT)
319 if (Offsets[j] - Offsets[j - 1] != EltSize)
347 Align ParamAlignment,
bool IsVAArg =
false) {
357 for (
int I = 0,
E = ValueVTs.
size();
I !=
E; ++
I) {
360 for (
unsigned AccessSize : {16, 8, 4, 2}) {
362 I, AccessSize, ValueVTs, Offsets, ParamAlignment);
371 assert(
I + 1 <
E &&
"Not enough elements.");
377 assert(
I + 3 <
E &&
"Not enough elements.");
438 Op, VT, IsOpSupported ? Action : NoBF16Action);
443 bool IsOpSupported =
false;
496 {MVT::bf16, MVT::f16, MVT::v2bf16, MVT::v2f16, MVT::f32, MVT::f64,
497 MVT::i1, MVT::i8, MVT::i16, MVT::v2i16, MVT::i32, MVT::i64}) {
618 for (
const auto& Ty : {MVT::i16, MVT::i32, MVT::i64}) {
700 const bool IsFP16FP16x2NegAvailable = STI.
getSmVersion() >= 53 &&
703 for (
const auto &VT : {MVT::f16, MVT::v2f16})
741 for (
const auto &
Op :
753 return IsAtLeastSm80 ?
Legal : NotSm80Action;
760 setFP16OperationAction(
Op, MVT::v2f16, GetMinMaxAction(
Expand),
Expand);
764 setFP16OperationAction(
Op, MVT::f16, GetMinMaxAction(
Expand),
Expand);
767 setFP16OperationAction(
Op, MVT::v2f16, GetMinMaxAction(
Expand),
Expand);
786 return "NVPTXISD::CALL";
788 return "NVPTXISD::RET_GLUE";
790 return "NVPTXISD::LOAD_PARAM";
792 return "NVPTXISD::Wrapper";
794 return "NVPTXISD::DeclareParam";
796 return "NVPTXISD::DeclareScalarParam";
798 return "NVPTXISD::DeclareRet";
800 return "NVPTXISD::DeclareScalarRet";
802 return "NVPTXISD::DeclareRetParam";
804 return "NVPTXISD::PrintCall";
806 return "NVPTXISD::PrintConvergentCall";
808 return "NVPTXISD::PrintCallUni";
810 return "NVPTXISD::PrintConvergentCallUni";
812 return "NVPTXISD::LoadParam";
814 return "NVPTXISD::LoadParamV2";
816 return "NVPTXISD::LoadParamV4";
818 return "NVPTXISD::StoreParam";
820 return "NVPTXISD::StoreParamV2";
822 return "NVPTXISD::StoreParamV4";
824 return "NVPTXISD::StoreParamS32";
826 return "NVPTXISD::StoreParamU32";
828 return "NVPTXISD::CallArgBegin";
830 return "NVPTXISD::CallArg";
832 return "NVPTXISD::LastCallArg";
834 return "NVPTXISD::CallArgEnd";
836 return "NVPTXISD::CallVoid";
838 return "NVPTXISD::CallVal";
840 return "NVPTXISD::CallSymbol";
842 return "NVPTXISD::Prototype";
844 return "NVPTXISD::MoveParam";
846 return "NVPTXISD::StoreRetval";
848 return "NVPTXISD::StoreRetvalV2";
850 return "NVPTXISD::StoreRetvalV4";
852 return "NVPTXISD::PseudoUseParam";
854 return "NVPTXISD::RETURN";
856 return "NVPTXISD::CallSeqBegin";
858 return "NVPTXISD::CallSeqEnd";
860 return "NVPTXISD::CallPrototype";
862 return "NVPTXISD::ProxyReg";
864 return "NVPTXISD::LoadV2";
866 return "NVPTXISD::LoadV4";
868 return "NVPTXISD::LDGV2";
870 return "NVPTXISD::LDGV4";
872 return "NVPTXISD::LDUV2";
874 return "NVPTXISD::LDUV4";
876 return "NVPTXISD::StoreV2";
878 return "NVPTXISD::StoreV4";
880 return "NVPTXISD::FUN_SHFL_CLAMP";
882 return "NVPTXISD::FUN_SHFR_CLAMP";
884 return "NVPTXISD::IMAD";
886 return "NVPTXISD::SETP_F16X2";
888 return "NVPTXISD::Dummy";
890 return "NVPTXISD::MUL_WIDE_SIGNED";
892 return "NVPTXISD::MUL_WIDE_UNSIGNED";
896 return "NVPTXISD::Tex1DFloatFloatLevel";
898 return "NVPTXISD::Tex1DFloatFloatGrad";
902 return "NVPTXISD::Tex1DS32FloatLevel";
904 return "NVPTXISD::Tex1DS32FloatGrad";
908 return "NVPTXISD::Tex1DU32FloatLevel";
910 return "NVPTXISD::Tex1DU32FloatGrad";
914 return "NVPTXISD::Tex1DArrayFloatFloatLevel";
916 return "NVPTXISD::Tex1DArrayFloatFloatGrad";
920 return "NVPTXISD::Tex1DArrayS32FloatLevel";
922 return "NVPTXISD::Tex1DArrayS32FloatGrad";
926 return "NVPTXISD::Tex1DArrayU32FloatLevel";
928 return "NVPTXISD::Tex1DArrayU32FloatGrad";
932 return "NVPTXISD::Tex2DFloatFloatLevel";
934 return "NVPTXISD::Tex2DFloatFloatGrad";
938 return "NVPTXISD::Tex2DS32FloatLevel";
940 return "NVPTXISD::Tex2DS32FloatGrad";
944 return "NVPTXISD::Tex2DU32FloatLevel";
946 return "NVPTXISD::Tex2DU32FloatGrad";
950 return "NVPTXISD::Tex2DArrayFloatFloatLevel";
952 return "NVPTXISD::Tex2DArrayFloatFloatGrad";
956 return "NVPTXISD::Tex2DArrayS32FloatLevel";
958 return "NVPTXISD::Tex2DArrayS32FloatGrad";
962 return "NVPTXISD::Tex2DArrayU32FloatLevel";
964 return "NVPTXISD::Tex2DArrayU32FloatGrad";
968 return "NVPTXISD::Tex3DFloatFloatLevel";
970 return "NVPTXISD::Tex3DFloatFloatGrad";
974 return "NVPTXISD::Tex3DS32FloatLevel";
976 return "NVPTXISD::Tex3DS32FloatGrad";
980 return "NVPTXISD::Tex3DU32FloatLevel";
982 return "NVPTXISD::Tex3DU32FloatGrad";
985 return "NVPTXISD::TexCubeFloatFloatLevel";
988 return "NVPTXISD::TexCubeS32FloatLevel";
991 return "NVPTXISD::TexCubeU32FloatLevel";
993 return "NVPTXISD::TexCubeArrayFloatFloat";
995 return "NVPTXISD::TexCubeArrayFloatFloatLevel";
997 return "NVPTXISD::TexCubeArrayS32Float";
999 return "NVPTXISD::TexCubeArrayS32FloatLevel";
1001 return "NVPTXISD::TexCubeArrayU32Float";
1003 return "NVPTXISD::TexCubeArrayU32FloatLevel";
1005 return "NVPTXISD::Tld4R2DFloatFloat";
1007 return "NVPTXISD::Tld4G2DFloatFloat";
1009 return "NVPTXISD::Tld4B2DFloatFloat";
1011 return "NVPTXISD::Tld4A2DFloatFloat";
1013 return "NVPTXISD::Tld4R2DS64Float";
1015 return "NVPTXISD::Tld4G2DS64Float";
1017 return "NVPTXISD::Tld4B2DS64Float";
1019 return "NVPTXISD::Tld4A2DS64Float";
1021 return "NVPTXISD::Tld4R2DU64Float";
1023 return "NVPTXISD::Tld4G2DU64Float";
1025 return "NVPTXISD::Tld4B2DU64Float";
1027 return "NVPTXISD::Tld4A2DU64Float";
1030 return "NVPTXISD::TexUnified1DFloatS32";
1032 return "NVPTXISD::TexUnified1DFloatFloat";
1034 return "NVPTXISD::TexUnified1DFloatFloatLevel";
1036 return "NVPTXISD::TexUnified1DFloatFloatGrad";
1038 return "NVPTXISD::TexUnified1DS32S32";
1040 return "NVPTXISD::TexUnified1DS32Float";
1042 return "NVPTXISD::TexUnified1DS32FloatLevel";
1044 return "NVPTXISD::TexUnified1DS32FloatGrad";
1046 return "NVPTXISD::TexUnified1DU32S32";
1048 return "NVPTXISD::TexUnified1DU32Float";
1050 return "NVPTXISD::TexUnified1DU32FloatLevel";
1052 return "NVPTXISD::TexUnified1DU32FloatGrad";
1054 return "NVPTXISD::TexUnified1DArrayFloatS32";
1056 return "NVPTXISD::TexUnified1DArrayFloatFloat";
1058 return "NVPTXISD::TexUnified1DArrayFloatFloatLevel";
1060 return "NVPTXISD::TexUnified1DArrayFloatFloatGrad";
1062 return "NVPTXISD::TexUnified1DArrayS32S32";
1064 return "NVPTXISD::TexUnified1DArrayS32Float";
1066 return "NVPTXISD::TexUnified1DArrayS32FloatLevel";
1068 return "NVPTXISD::TexUnified1DArrayS32FloatGrad";
1070 return "NVPTXISD::TexUnified1DArrayU32S32";
1072 return "NVPTXISD::TexUnified1DArrayU32Float";
1074 return "NVPTXISD::TexUnified1DArrayU32FloatLevel";
1076 return "NVPTXISD::TexUnified1DArrayU32FloatGrad";
1078 return "NVPTXISD::TexUnified2DFloatS32";
1080 return "NVPTXISD::TexUnified2DFloatFloat";
1082 return "NVPTXISD::TexUnified2DFloatFloatLevel";
1084 return "NVPTXISD::TexUnified2DFloatFloatGrad";
1086 return "NVPTXISD::TexUnified2DS32S32";
1088 return "NVPTXISD::TexUnified2DS32Float";
1090 return "NVPTXISD::TexUnified2DS32FloatLevel";
1092 return "NVPTXISD::TexUnified2DS32FloatGrad";
1094 return "NVPTXISD::TexUnified2DU32S32";
1096 return "NVPTXISD::TexUnified2DU32Float";
1098 return "NVPTXISD::TexUnified2DU32FloatLevel";
1100 return "NVPTXISD::TexUnified2DU32FloatGrad";
1102 return "NVPTXISD::TexUnified2DArrayFloatS32";
1104 return "NVPTXISD::TexUnified2DArrayFloatFloat";
1106 return "NVPTXISD::TexUnified2DArrayFloatFloatLevel";
1108 return "NVPTXISD::TexUnified2DArrayFloatFloatGrad";
1110 return "NVPTXISD::TexUnified2DArrayS32S32";
1112 return "NVPTXISD::TexUnified2DArrayS32Float";
1114 return "NVPTXISD::TexUnified2DArrayS32FloatLevel";
1116 return "NVPTXISD::TexUnified2DArrayS32FloatGrad";
1118 return "NVPTXISD::TexUnified2DArrayU32S32";
1120 return "NVPTXISD::TexUnified2DArrayU32Float";
1122 return "NVPTXISD::TexUnified2DArrayU32FloatLevel";
1124 return "NVPTXISD::TexUnified2DArrayU32FloatGrad";
1126 return "NVPTXISD::TexUnified3DFloatS32";
1128 return "NVPTXISD::TexUnified3DFloatFloat";
1130 return "NVPTXISD::TexUnified3DFloatFloatLevel";
1132 return "NVPTXISD::TexUnified3DFloatFloatGrad";
1134 return "NVPTXISD::TexUnified3DS32S32";
1136 return "NVPTXISD::TexUnified3DS32Float";
1138 return "NVPTXISD::TexUnified3DS32FloatLevel";
1140 return "NVPTXISD::TexUnified3DS32FloatGrad";
1142 return "NVPTXISD::TexUnified3DU32S32";
1144 return "NVPTXISD::TexUnified3DU32Float";
1146 return "NVPTXISD::TexUnified3DU32FloatLevel";
1148 return "NVPTXISD::TexUnified3DU32FloatGrad";
1150 return "NVPTXISD::TexUnifiedCubeFloatFloat";
1152 return "NVPTXISD::TexUnifiedCubeFloatFloatLevel";
1154 return "NVPTXISD::TexUnifiedCubeS32Float";
1156 return "NVPTXISD::TexUnifiedCubeS32FloatLevel";
1158 return "NVPTXISD::TexUnifiedCubeU32Float";
1160 return "NVPTXISD::TexUnifiedCubeU32FloatLevel";
1162 return "NVPTXISD::TexUnifiedCubeArrayFloatFloat";
1164 return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel";
1166 return "NVPTXISD::TexUnifiedCubeArrayS32Float";
1168 return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel";
1170 return "NVPTXISD::TexUnifiedCubeArrayU32Float";
1172 return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel";
1174 return "NVPTXISD::Tld4UnifiedR2DFloatFloat";
1176 return "NVPTXISD::Tld4UnifiedG2DFloatFloat";
1178 return "NVPTXISD::Tld4UnifiedB2DFloatFloat";
1180 return "NVPTXISD::Tld4UnifiedA2DFloatFloat";
1182 return "NVPTXISD::Tld4UnifiedR2DS64Float";
1184 return "NVPTXISD::Tld4UnifiedG2DS64Float";
1186 return "NVPTXISD::Tld4UnifiedB2DS64Float";
1188 return "NVPTXISD::Tld4UnifiedA2DS64Float";
1190 return "NVPTXISD::Tld4UnifiedR2DU64Float";
1192 return "NVPTXISD::Tld4UnifiedG2DU64Float";
1194 return "NVPTXISD::Tld4UnifiedB2DU64Float";
1196 return "NVPTXISD::Tld4UnifiedA2DU64Float";
1394 bool Reciprocal)
const {
1415 if (Reciprocal || ExtraSteps > 0) {
1417 return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_rsqrt_approx_ftz_f
1418 : Intrinsic::nvvm_rsqrt_approx_f);
1419 else if (VT == MVT::f64)
1420 return MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d);
1425 return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_sqrt_approx_ftz_f
1426 : Intrinsic::nvvm_sqrt_approx_f);
1434 DAG.
getConstant(Intrinsic::nvvm_rcp_approx_ftz_d,
DL, MVT::i32),
1435 MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d));
1457 std::optional<std::pair<unsigned, const APInt &>> VAInfo,
1458 const CallBase &CB,
unsigned UniqueCallSite)
const {
1462 assert(isABI &&
"Non-ABI compilation is not supported");
1466 std::string Prototype;
1468 O <<
"prototype_" << UniqueCallSite <<
" : .callprototype ";
1477 if (
auto *ITy = dyn_cast<IntegerType>(retTy)) {
1478 size = ITy->getBitWidth();
1481 "Floating point type expected here");
1489 O <<
".param .b" <<
size <<
" _";
1490 }
else if (isa<PointerType>(retTy)) {
1491 O <<
".param .b" << PtrVT.getSizeInBits() <<
" _";
1493 O <<
".param .align " << (retAlignment ? retAlignment->value() : 0)
1494 <<
" .b8 _[" <<
DL.getTypeAllocSize(retTy) <<
"]";
1505 unsigned NumArgs = VAInfo ? VAInfo->first : Args.size();
1506 for (
unsigned i = 0, OIdx = 0; i != NumArgs; ++i, ++OIdx) {
1507 Type *Ty = Args[i].Ty;
1513 if (!Outs[OIdx].Flags.isByVal()) {
1515 unsigned ParamAlign = 0;
1516 const CallInst *CallI = cast<CallInst>(&CB);
1518 if (!
getAlign(*CallI, i + 1, ParamAlign))
1520 O <<
".param .align " << ParamAlign <<
" .b8 ";
1522 O <<
"[" <<
DL.getTypeAllocSize(Ty) <<
"]";
1526 if (
unsigned len = vtparts.
size())
1532 (
getValueType(
DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
1533 "type mismatch between callee prototype and arguments");
1536 if (isa<IntegerType>(Ty)) {
1537 sz = cast<IntegerType>(Ty)->getBitWidth();
1539 }
else if (isa<PointerType>(Ty)) {
1540 sz = PtrVT.getSizeInBits();
1544 O <<
".param .b" << sz <<
" ";
1549 Type *ETy = Args[i].IndirectType;
1550 Align InitialAlign = Outs[OIdx].Flags.getNonZeroByValAlign();
1551 Align ParamByValAlign =
1554 O <<
".param .align " << ParamByValAlign.
value() <<
" .b8 ";
1556 O <<
"[" << Outs[OIdx].Flags.getByValSize() <<
"]";
1560 O << (first ?
"" :
",") <<
" .param .align " << VAInfo->second
1570Align NVPTXTargetLowering::getArgumentAlignment(
SDValue Callee,
1576 return DL.getABITypeAlign(Ty);
1579 unsigned Alignment = 0;
1582 if (!DirectCallee) {
1587 if (
const auto *CI = dyn_cast<CallInst>(CB)) {
1590 return Align(Alignment);
1599 return Align(Alignment);
1606 return DL.getABITypeAlign(Ty);
1614 "Support for variadic functions (unsized array parameter) introduced "
1615 "in PTX ISA version 6.0 and requires target sm_30.");
1631 assert(isABI &&
"Non-ABI compilation is not supported");
1653 unsigned VAOffset = 0;
1660 unsigned ParamCount = 0;
1673 for (
unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
1674 EVT VT = Outs[OIdx].VT;
1675 Type *Ty = Args[i].Ty;
1677 bool IsByVal = Outs[OIdx].Flags.isByVal();
1682 assert((!IsByVal || Args[i].IndirectType) &&
1683 "byval arg must have indirect type");
1684 Type *ETy = (IsByVal ? Args[i].IndirectType : Ty);
1692 Align InitialAlign = Outs[OIdx].Flags.getNonZeroByValAlign();
1696 VAOffset =
alignTo(VAOffset, ArgAlign);
1698 ArgAlign = getArgumentAlignment(Callee, CB, Ty, ParamCount + 1,
DL);
1702 (IsByVal ? Outs[OIdx].Flags.getByValSize() :
DL.getTypeAllocSize(Ty));
1708 if (ParamCount == FirstVAArg) {
1714 DeclareParamVTs, DeclareParamOps);
1716 NeedAlign = PassAsArray;
1717 }
else if (PassAsArray) {
1734 SDValue DeclareScalarParamOps[] = {
1739 DeclareScalarParamOps);
1748 bool ExtendIntegerParam =
1753 for (
unsigned j = 0, je = VTs.
size(); j != je; ++j) {
1755 int CurOffset = Offsets[j];
1762 assert(StoreOperands.
empty() &&
"Unfinished preceding store.");
1765 DAG.
getConstant(IsVAArg ? FirstVAArg : ParamCount, dl, MVT::i32));
1767 IsByVal ? CurOffset + VAOffset : (IsVAArg ? VAOffset : CurOffset),
1771 SDValue StVal = OutVals[OIdx];
1775 EltVT =
EVT(PromotedVT);
1780 StVal = DAG.
getNode(Ext, dl, PromotedVT, StVal);
1789 }
else if (ExtendIntegerParam) {
1790 assert(VTs.
size() == 1 &&
"Scalar can't have multiple parts.");
1794 dl, MVT::i32, StVal);
1807 unsigned NumElts = StoreOperands.
size() - 3;
1827 EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : EltVT;
1830 Op, dl, DAG.
getVTList(MVT::Other, MVT::Glue), StoreOperands,
1836 StoreOperands.
clear();
1840 if (!IsByVal && IsVAArg) {
1842 "Vectorization is expected to be disabled for variadics.");
1843 VAOffset +=
DL.getTypeAllocSize(
1850 assert(StoreOperands.
empty() &&
"Unfinished parameter store.");
1851 if (!IsByVal && VTs.
size() > 0)
1854 if (IsByVal && IsVAArg)
1862 if (Ins.size() > 0) {
1869 unsigned resultsz =
DL.getTypeAllocSizeInBits(
RetTy);
1880 retAlignment = getArgumentAlignment(Callee, CB,
RetTy, 0,
DL);
1881 assert(retAlignment &&
"retAlignment is guaranteed to be set");
1884 Chain, DAG.
getConstant(retAlignment->value(), dl, MVT::i32),
1902 VADeclareParam->
getVTList(), DeclareParamOps);
1910 if (isa<ExternalSymbolSDNode>(Callee)) {
1915 assert(CalleeFunc !=
nullptr &&
"Libcall callee must be set.");
1919 CalleeFunc->
addFnAttr(
"nvptx-libcall-callee",
"true");
1932 DL,
RetTy, Args, Outs, retAlignment,
1934 ? std::optional<std::pair<unsigned, const APInt &>>(std::make_pair(
1936 cast<ConstantSDNode>(VADeclareParam->
getOperand(1))
1939 *CB, UniqueCallSite);
1952 Chain, DAG.
getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InGlue
1959 Chain = DAG.
getNode(Opcode, dl, PrintCallVTs, PrintCallOps);
1964 SDValue CallVoidOps[] = { Chain, Callee, InGlue };
1970 SDValue CallArgBeginOps[] = { Chain, InGlue };
1975 for (
unsigned i = 0, e = std::min(CLI.
NumFixedArgs + 1, ParamCount); i != e;
1985 Chain = DAG.
getNode(opcode, dl, CallArgVTs, CallArgOps);
1989 SDValue CallArgEndOps[] = { Chain,
1998 Chain, DAG.
getConstant(UniqueCallSite, dl, MVT::i32), InGlue};
2007 if (Ins.size() > 0) {
2011 assert(VTs.
size() == Ins.size() &&
"Bad value decomposition");
2013 Align RetAlign = getArgumentAlignment(Callee, CB,
RetTy, 0,
DL);
2022 bool ExtendIntegerRetVal =
2023 RetTy->isIntegerTy() &&
DL.getTypeAllocSizeInBits(
RetTy) < 32;
2025 for (
unsigned i = 0, e = VTs.
size(); i != e; ++i) {
2026 bool needTruncate =
false;
2027 EVT TheLoadType = VTs[i];
2028 EVT EltType = Ins[i].VT;
2033 TheLoadType =
EVT(PromotedVT);
2034 EltType =
EVT(PromotedVT);
2035 needTruncate =
true;
2038 if (ExtendIntegerRetVal) {
2039 TheLoadType = MVT::i32;
2041 needTruncate =
true;
2043 if (VTs[i].isInteger())
2044 needTruncate =
true;
2050 assert(VecIdx == -1 && LoadVTs.
empty() &&
"Orphaned operand list.");
2057 unsigned NumElts = LoadVTs.
size();
2077 DAG.
getConstant(Offsets[VecIdx], dl, MVT::i32), InGlue};
2079 Op, dl, DAG.
getVTList(LoadVTs), LoadOperands, TheLoadType,
2083 for (
unsigned j = 0; j < NumElts; ++j) {
2087 ProxyRegTruncates.
push_back(std::optional<MVT>(Ins[VecIdx + j].VT));
2089 ProxyRegTruncates.
push_back(std::optional<MVT>());
2093 InGlue = RetVal.
getValue(NumElts + 1);
2103 DAG.
getCALLSEQ_END(Chain, UniqueCallSite, UniqueCallSite + 1, InGlue, dl);
2109 for (
unsigned i = 0; i < ProxyRegOps.
size(); ++i) {
2112 DAG.
getVTList(ProxyRegOps[i].getSimpleValueType(), MVT::Other, MVT::Glue),
2113 { Chain, ProxyRegOps[i], InGlue }
2116 Chain = Ret.getValue(1);
2117 InGlue = Ret.getValue(2);
2119 if (ProxyRegTruncates[i]) {
2140 unsigned NumOperands = Node->getNumOperands();
2141 for (
unsigned i = 0; i < NumOperands; ++i) {
2142 SDValue SubOp = Node->getOperand(i);
2146 for (
unsigned j = 0; j < NumSubElem; ++j) {
2166 EVT VT =
Op->getValueType(0);
2171 if (VT == MVT::v2f16 || VT == MVT::v2bf16) {
2172 if (!(isa<ConstantFPSDNode>(
Op->getOperand(0)) &&
2173 isa<ConstantFPSDNode>(
Op->getOperand(1))))
2176 E0 = cast<ConstantFPSDNode>(
Op->getOperand(0))
2179 E1 = cast<ConstantFPSDNode>(
Op->getOperand(1))
2183 assert(VT == MVT::v2i16);
2184 if (!(isa<ConstantSDNode>(
Op->getOperand(0)) &&
2185 isa<ConstantSDNode>(
Op->getOperand(1))))
2188 E0 = cast<ConstantSDNode>(
Op->getOperand(0))->getAPIntValue();
2189 E1 = cast<ConstantSDNode>(
Op->getOperand(1))->getAPIntValue();
2200 if (isa<ConstantSDNode>(
Index.getNode()))
2228 EVT VT =
Op.getValueType();
2289 EVT VT =
Op.getValueType();
2340 EVT VT =
Op.getValueType();
2343 return LowerFROUND32(
Op, DAG);
2346 return LowerFROUND64(
Op, DAG);
2362 EVT VT =
Op.getValueType();
2368 const int SignBitMask = 0x80000000;
2371 const int PointFiveInBits = 0x3F000000;
2372 SDValue PointFiveWithSignRaw =
2403 EVT VT =
Op.getValueType();
2433 if (
Op.getValueType() != MVT::v2i16)
2435 EVT EltVT =
Op.getValueType().getVectorElementType();
2437 for (
int I = 0,
E =
Op.getValueType().getVectorNumElements();
I <
E;
I++) {
2440 [&](
const SDUse &O) {
2441 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
2442 O.get(), DAG.getIntPtrConstant(I, DL));
2453 switch (
Op.getOpcode()) {
2463 return LowerBUILD_VECTOR(
Op, DAG);
2467 return LowerEXTRACT_VECTOR_ELT(
Op, DAG);
2469 return LowerCONCAT_VECTORS(
Op, DAG);
2471 return LowerSTORE(
Op, DAG);
2473 return LowerLOAD(
Op, DAG);
2475 return LowerShiftLeftParts(
Op, DAG);
2478 return LowerShiftRightParts(
Op, DAG);
2480 return LowerSelect(
Op, DAG);
2482 return LowerFROUND(
Op, DAG);
2484 return LowerVAARG(
Op, DAG);
2486 return LowerVASTART(
Op, DAG);
2511 const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
2512 EVT VT = Node->getValueType(0);
2514 SDValue Tmp1 = Node->getOperand(0);
2515 SDValue Tmp2 = Node->getOperand(1);
2516 const MaybeAlign MA(Node->getConstantOperandVal(3));
2554 SDValue Arg = getParamSymbol(DAG, -1, PtrVT);
2557 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
2558 return DAG.
getStore(
Op.getOperand(0),
DL, VAReg,
Op.getOperand(1),
2568 assert(
Op.getValueType() == MVT::i1 &&
"Custom lowering enabled only for i1");
2579 if (
Op.getValueType() == MVT::i1)
2580 return LowerLOADi1(
Op, DAG);
2586 EVT MemVT =
Load->getMemoryVT();
2588 MemVT, *
Load->getMemOperand())) {
2608 "Custom lowering for i1 load only");
2610 LD->getPointerInfo(),
LD->getAlign(),
2611 LD->getMemOperand()->getFlags());
2616 SDValue Ops[] = { result,
LD->getChain() };
2625 return LowerSTOREi1(
Op, DAG);
2631 VT, *
Store->getMemOperand()))
2639 return LowerSTOREVector(
Op, DAG);
2687 if (Alignment < PrefAlign) {
2696 unsigned Opcode = 0;
2703 bool NeedExt =
false;
2707 bool StoreF16x2 =
false;
2735 for (
unsigned i = 0; i < NumElts; ++i) {
2746 for (
unsigned i = 0; i < NumElts; ++i) {
2756 Ops.
append(
N->op_begin() + 2,
N->op_end());
2783 DAG.
getTruncStore(Tmp1, dl, Tmp3, Tmp2,
ST->getPointerInfo(), MVT::i8,
2784 ST->getAlign(),
ST->getMemOperand()->getFlags());
2812 std::vector<SDValue> OutChains;
2815 assert(isABI &&
"Non-ABI compilation is not supported");
2819 std::vector<Type *> argTypes;
2820 std::vector<const Argument *> theArgs;
2822 theArgs.push_back(&
I);
2823 argTypes.push_back(
I.getType());
2834 unsigned InsIdx = 0;
2837 for (
unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) {
2838 Type *Ty = argTypes[i];
2840 if (theArgs[i]->use_empty()) {
2846 if (vtparts.
empty())
2849 for (
unsigned parti = 0, parte = vtparts.
size(); parti != parte;
2854 if (vtparts.
size() > 0)
2861 for (
unsigned parti = 0; parti < NumRegs; ++parti) {
2878 bool aggregateIsPacked =
false;
2879 if (
StructType *STy = dyn_cast<StructType>(Ty))
2880 aggregateIsPacked = STy->isPacked();
2891 SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
2893 for (
unsigned parti = 0, parte = VTs.
size(); parti != parte; ++parti) {
2895 assert(VecIdx == -1 &&
"Orphaned vector.");
2900 if (VectorInfo[parti] &
PVF_LAST) {
2901 unsigned NumElts = parti - VecIdx + 1;
2902 EVT EltVT = VTs[parti];
2905 if (EltVT == MVT::i1)
2925 P.getNode()->setIROrder(idx + 1);
2926 for (
unsigned j = 0; j < NumElts; ++j) {
2930 if (EltVT == MVT::i1)
2945 Ins[InsIdx].VT.getFixedSizeInBits() >
2949 Elt = DAG.
getNode(Extend, dl, Ins[InsIdx].VT, Elt);
2972 assert(ObjectVT == Ins[InsIdx].VT &&
2973 "Ins type did not match function type");
2974 SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
2977 p.getNode()->setIROrder(idx + 1);
2981 if (!OutChains.empty())
2998 assert(isABI &&
"Non-ABI compilation is not supported");
3007 assert(VTs.
size() == OutVals.
size() &&
"Bad return value decomposition");
3009 for (
unsigned i = 0, e = VTs.
size(); i != e; ++i) {
3010 SDValue PromotedOutVal = OutVals[i];
3013 VTs[i] =
EVT(PromotedVT);
3018 PromotedOutVal = DAG.
getNode(Ext, dl, PromotedVT, PromotedOutVal);
3020 PromotedOutVals.
push_back(PromotedOutVal);
3031 bool ExtendIntegerRetVal =
3032 RetTy->isIntegerTy() &&
DL.getTypeAllocSizeInBits(
RetTy) < 32;
3035 for (
unsigned i = 0, e = VTs.
size(); i != e; ++i) {
3038 assert(StoreOperands.
empty() &&
"Orphaned operand list.");
3044 SDValue RetVal = PromotedOutVals[i];
3046 if (ExtendIntegerRetVal) {
3049 dl, MVT::i32, RetVal);
3062 unsigned NumElts = StoreOperands.
size() - 2;
3079 EVT TheStoreType = ExtendIntegerRetVal ? MVT::i32 : VTs[i];
3081 Op, dl, DAG.
getVTList(MVT::Other), StoreOperands, TheStoreType,
3084 StoreOperands.
clear();
3092 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
3094 if (Constraint.length() > 1)
3101 switch (Intrinsic) {
3105 case Intrinsic::nvvm_tex_1d_v4f32_s32:
3107 case Intrinsic::nvvm_tex_1d_v4f32_f32:
3109 case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
3111 case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
3113 case Intrinsic::nvvm_tex_1d_v4s32_s32:
3115 case Intrinsic::nvvm_tex_1d_v4s32_f32:
3117 case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
3119 case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
3121 case Intrinsic::nvvm_tex_1d_v4u32_s32:
3123 case Intrinsic::nvvm_tex_1d_v4u32_f32:
3125 case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
3127 case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
3130 case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
3132 case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
3134 case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
3136 case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
3138 case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
3140 case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
3142 case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
3144 case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
3146 case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
3148 case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
3150 case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
3152 case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
3155 case Intrinsic::nvvm_tex_2d_v4f32_s32:
3157 case Intrinsic::nvvm_tex_2d_v4f32_f32:
3159 case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
3161 case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
3163 case Intrinsic::nvvm_tex_2d_v4s32_s32:
3165 case Intrinsic::nvvm_tex_2d_v4s32_f32:
3167 case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
3169 case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
3171 case Intrinsic::nvvm_tex_2d_v4u32_s32:
3173 case Intrinsic::nvvm_tex_2d_v4u32_f32:
3175 case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
3177 case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
3180 case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
3182 case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
3184 case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
3186 case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
3188 case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
3190 case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
3192 case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
3194 case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
3196 case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
3198 case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
3200 case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
3202 case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
3205 case Intrinsic::nvvm_tex_3d_v4f32_s32:
3207 case Intrinsic::nvvm_tex_3d_v4f32_f32:
3209 case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
3211 case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
3213 case Intrinsic::nvvm_tex_3d_v4s32_s32:
3215 case Intrinsic::nvvm_tex_3d_v4s32_f32:
3217 case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
3219 case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
3221 case Intrinsic::nvvm_tex_3d_v4u32_s32:
3223 case Intrinsic::nvvm_tex_3d_v4u32_f32:
3225 case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
3227 case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
3230 case Intrinsic::nvvm_tex_cube_v4f32_f32:
3232 case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
3234 case Intrinsic::nvvm_tex_cube_v4s32_f32:
3236 case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
3238 case Intrinsic::nvvm_tex_cube_v4u32_f32:
3240 case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
3243 case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
3245 case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
3247 case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
3249 case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
3251 case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
3253 case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
3256 case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
3258 case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
3260 case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
3262 case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
3264 case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
3266 case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
3268 case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
3270 case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
3272 case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
3274 case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
3276 case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
3278 case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
3281 case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
3283 case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
3285 case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
3287 case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
3289 case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
3291 case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
3293 case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
3295 case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
3297 case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
3299 case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
3301 case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
3303 case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
3306 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
3308 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
3310 case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
3312 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
3314 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
3316 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
3318 case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
3320 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
3322 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
3324 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
3326 case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
3328 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
3331 case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
3333 case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
3335 case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
3337 case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
3339 case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
3341 case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
3343 case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
3345 case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
3347 case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
3349 case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
3351 case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
3353 case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
3356 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
3358 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
3360 case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
3362 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
3364 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
3366 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
3368 case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
3370 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
3372 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
3374 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
3376 case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
3378 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
3381 case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
3383 case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
3385 case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
3387 case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
3389 case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
3391 case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
3393 case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
3395 case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
3397 case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
3399 case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
3401 case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
3403 case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
3406 case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
3408 case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
3410 case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
3412 case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
3414 case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
3416 case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
3419 case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
3421 case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
3423 case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
3425 case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
3427 case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
3429 case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
3432 case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
3434 case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
3436 case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
3438 case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
3440 case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
3442 case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
3444 case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
3446 case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
3448 case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
3450 case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
3452 case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
3454 case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32:
3460 switch (Intrinsic) {
3463 case Intrinsic::nvvm_suld_1d_i8_clamp:
3465 case Intrinsic::nvvm_suld_1d_i16_clamp:
3467 case Intrinsic::nvvm_suld_1d_i32_clamp:
3469 case Intrinsic::nvvm_suld_1d_i64_clamp:
3471 case Intrinsic::nvvm_suld_1d_v2i8_clamp:
3473 case Intrinsic::nvvm_suld_1d_v2i16_clamp:
3475 case Intrinsic::nvvm_suld_1d_v2i32_clamp:
3477 case Intrinsic::nvvm_suld_1d_v2i64_clamp:
3479 case Intrinsic::nvvm_suld_1d_v4i8_clamp:
3481 case Intrinsic::nvvm_suld_1d_v4i16_clamp:
3483 case Intrinsic::nvvm_suld_1d_v4i32_clamp:
3485 case Intrinsic::nvvm_suld_1d_array_i8_clamp:
3487 case Intrinsic::nvvm_suld_1d_array_i16_clamp:
3489 case Intrinsic::nvvm_suld_1d_array_i32_clamp:
3491 case Intrinsic::nvvm_suld_1d_array_i64_clamp:
3493 case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
3495 case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
3497 case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
3499 case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
3501 case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
3503 case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
3505 case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
3507 case Intrinsic::nvvm_suld_2d_i8_clamp:
3509 case Intrinsic::nvvm_suld_2d_i16_clamp:
3511 case Intrinsic::nvvm_suld_2d_i32_clamp:
3513 case Intrinsic::nvvm_suld_2d_i64_clamp:
3515 case Intrinsic::nvvm_suld_2d_v2i8_clamp:
3517 case Intrinsic::nvvm_suld_2d_v2i16_clamp:
3519 case Intrinsic::nvvm_suld_2d_v2i32_clamp:
3521 case Intrinsic::nvvm_suld_2d_v2i64_clamp:
3523 case Intrinsic::nvvm_suld_2d_v4i8_clamp:
3525 case Intrinsic::nvvm_suld_2d_v4i16_clamp:
3527 case Intrinsic::nvvm_suld_2d_v4i32_clamp:
3529 case Intrinsic::nvvm_suld_2d_array_i8_clamp:
3531 case Intrinsic::nvvm_suld_2d_array_i16_clamp:
3533 case Intrinsic::nvvm_suld_2d_array_i32_clamp:
3535 case Intrinsic::nvvm_suld_2d_array_i64_clamp:
3537 case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
3539 case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
3541 case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
3543 case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
3545 case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
3547 case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
3549 case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
3551 case Intrinsic::nvvm_suld_3d_i8_clamp:
3553 case Intrinsic::nvvm_suld_3d_i16_clamp:
3555 case Intrinsic::nvvm_suld_3d_i32_clamp:
3557 case Intrinsic::nvvm_suld_3d_i64_clamp:
3559 case Intrinsic::nvvm_suld_3d_v2i8_clamp:
3561 case Intrinsic::nvvm_suld_3d_v2i16_clamp:
3563 case Intrinsic::nvvm_suld_3d_v2i32_clamp:
3565 case Intrinsic::nvvm_suld_3d_v2i64_clamp:
3567 case Intrinsic::nvvm_suld_3d_v4i8_clamp:
3569 case Intrinsic::nvvm_suld_3d_v4i16_clamp:
3571 case Intrinsic::nvvm_suld_3d_v4i32_clamp:
3573 case Intrinsic::nvvm_suld_1d_i8_trap:
3575 case Intrinsic::nvvm_suld_1d_i16_trap:
3577 case Intrinsic::nvvm_suld_1d_i32_trap:
3579 case Intrinsic::nvvm_suld_1d_i64_trap:
3581 case Intrinsic::nvvm_suld_1d_v2i8_trap:
3583 case Intrinsic::nvvm_suld_1d_v2i16_trap:
3585 case Intrinsic::nvvm_suld_1d_v2i32_trap:
3587 case Intrinsic::nvvm_suld_1d_v2i64_trap:
3589 case Intrinsic::nvvm_suld_1d_v4i8_trap:
3591 case Intrinsic::nvvm_suld_1d_v4i16_trap:
3593 case Intrinsic::nvvm_suld_1d_v4i32_trap:
3595 case Intrinsic::nvvm_suld_1d_array_i8_trap:
3597 case Intrinsic::nvvm_suld_1d_array_i16_trap:
3599 case Intrinsic::nvvm_suld_1d_array_i32_trap:
3601 case Intrinsic::nvvm_suld_1d_array_i64_trap:
3603 case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
3605 case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
3607 case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
3609 case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
3611 case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
3613 case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
3615 case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
3617 case Intrinsic::nvvm_suld_2d_i8_trap:
3619 case Intrinsic::nvvm_suld_2d_i16_trap:
3621 case Intrinsic::nvvm_suld_2d_i32_trap:
3623 case Intrinsic::nvvm_suld_2d_i64_trap:
3625 case Intrinsic::nvvm_suld_2d_v2i8_trap:
3627 case Intrinsic::nvvm_suld_2d_v2i16_trap:
3629 case Intrinsic::nvvm_suld_2d_v2i32_trap:
3631 case Intrinsic::nvvm_suld_2d_v2i64_trap:
3633 case Intrinsic::nvvm_suld_2d_v4i8_trap:
3635 case Intrinsic::nvvm_suld_2d_v4i16_trap:
3637 case Intrinsic::nvvm_suld_2d_v4i32_trap:
3639 case Intrinsic::nvvm_suld_2d_array_i8_trap:
3641 case Intrinsic::nvvm_suld_2d_array_i16_trap:
3643 case Intrinsic::nvvm_suld_2d_array_i32_trap:
3645 case Intrinsic::nvvm_suld_2d_array_i64_trap:
3647 case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
3649 case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
3651 case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
3653 case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
3655 case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
3657 case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
3659 case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
3661 case Intrinsic::nvvm_suld_3d_i8_trap:
3663 case Intrinsic::nvvm_suld_3d_i16_trap:
3665 case Intrinsic::nvvm_suld_3d_i32_trap:
3667 case Intrinsic::nvvm_suld_3d_i64_trap:
3669 case Intrinsic::nvvm_suld_3d_v2i8_trap:
3671 case Intrinsic::nvvm_suld_3d_v2i16_trap:
3673 case Intrinsic::nvvm_suld_3d_v2i32_trap:
3675 case Intrinsic::nvvm_suld_3d_v2i64_trap:
3677 case Intrinsic::nvvm_suld_3d_v4i8_trap:
3679 case Intrinsic::nvvm_suld_3d_v4i16_trap:
3681 case Intrinsic::nvvm_suld_3d_v4i32_trap:
3683 case Intrinsic::nvvm_suld_1d_i8_zero:
3685 case Intrinsic::nvvm_suld_1d_i16_zero:
3687 case Intrinsic::nvvm_suld_1d_i32_zero:
3689 case Intrinsic::nvvm_suld_1d_i64_zero:
3691 case Intrinsic::nvvm_suld_1d_v2i8_zero:
3693 case Intrinsic::nvvm_suld_1d_v2i16_zero:
3695 case Intrinsic::nvvm_suld_1d_v2i32_zero:
3697 case Intrinsic::nvvm_suld_1d_v2i64_zero:
3699 case Intrinsic::nvvm_suld_1d_v4i8_zero:
3701 case Intrinsic::nvvm_suld_1d_v4i16_zero:
3703 case Intrinsic::nvvm_suld_1d_v4i32_zero:
3705 case Intrinsic::nvvm_suld_1d_array_i8_zero:
3707 case Intrinsic::nvvm_suld_1d_array_i16_zero:
3709 case Intrinsic::nvvm_suld_1d_array_i32_zero:
3711 case Intrinsic::nvvm_suld_1d_array_i64_zero:
3713 case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
3715 case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
3717 case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
3719 case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
3721 case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
3723 case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
3725 case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
3727 case Intrinsic::nvvm_suld_2d_i8_zero:
3729 case Intrinsic::nvvm_suld_2d_i16_zero:
3731 case Intrinsic::nvvm_suld_2d_i32_zero:
3733 case Intrinsic::nvvm_suld_2d_i64_zero:
3735 case Intrinsic::nvvm_suld_2d_v2i8_zero:
3737 case Intrinsic::nvvm_suld_2d_v2i16_zero:
3739 case Intrinsic::nvvm_suld_2d_v2i32_zero:
3741 case Intrinsic::nvvm_suld_2d_v2i64_zero:
3743 case Intrinsic::nvvm_suld_2d_v4i8_zero:
3745 case Intrinsic::nvvm_suld_2d_v4i16_zero:
3747 case Intrinsic::nvvm_suld_2d_v4i32_zero:
3749 case Intrinsic::nvvm_suld_2d_array_i8_zero:
3751 case Intrinsic::nvvm_suld_2d_array_i16_zero:
3753 case Intrinsic::nvvm_suld_2d_array_i32_zero:
3755 case Intrinsic::nvvm_suld_2d_array_i64_zero:
3757 case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
3759 case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
3761 case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
3763 case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
3765 case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
3767 case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
3769 case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
3771 case Intrinsic::nvvm_suld_3d_i8_zero:
3773 case Intrinsic::nvvm_suld_3d_i16_zero:
3775 case Intrinsic::nvvm_suld_3d_i32_zero:
3777 case Intrinsic::nvvm_suld_3d_i64_zero:
3779 case Intrinsic::nvvm_suld_3d_v2i8_zero:
3781 case Intrinsic::nvvm_suld_3d_v2i16_zero:
3783 case Intrinsic::nvvm_suld_3d_v2i32_zero:
3785 case Intrinsic::nvvm_suld_3d_v2i64_zero:
3787 case Intrinsic::nvvm_suld_3d_v4i8_zero:
3789 case Intrinsic::nvvm_suld_3d_v4i16_zero:
3791 case Intrinsic::nvvm_suld_3d_v4i32_zero:
3804 switch (Intrinsic) {
3807 case Intrinsic::nvvm_match_all_sync_i32p:
3808 case Intrinsic::nvvm_match_all_sync_i64p:
3813 Info.memVT = MVT::i1;
3818 case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col:
3819 case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row:
3820 case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride:
3821 case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride:
3822 case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col:
3823 case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row:
3824 case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride:
3825 case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride:
3826 case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col:
3827 case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row:
3828 case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride:
3829 case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride:
3830 case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col:
3831 case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row:
3832 case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride:
3833 case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride:
3834 case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col:
3835 case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row:
3836 case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride:
3837 case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride:
3838 case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col:
3839 case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row:
3840 case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride:
3841 case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride: {
3843 Info.memVT = MVT::v8f16;
3844 Info.ptrVal =
I.getArgOperand(0);
3850 case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col:
3851 case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col_stride:
3852 case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col_stride:
3853 case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col:
3854 case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row:
3855 case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row_stride:
3856 case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row_stride:
3857 case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row:
3858 case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_col:
3859 case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_col_stride:
3860 case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_row:
3861 case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_row_stride:
3862 case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col:
3863 case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col_stride:
3864 case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col_stride:
3865 case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col:
3866 case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row:
3867 case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row_stride:
3868 case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row_stride:
3869 case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row:
3870 case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_col:
3871 case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_col_stride:
3872 case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_row:
3873 case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_row_stride: {
3875 Info.memVT = MVT::v2i32;
3876 Info.ptrVal =
I.getArgOperand(0);
3883 case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col:
3884 case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col_stride:
3885 case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col_stride:
3886 case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col:
3887 case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row:
3888 case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row_stride:
3889 case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row_stride:
3890 case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row:
3891 case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_col:
3892 case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_col_stride:
3893 case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_row:
3894 case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_row_stride:
3895 case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_col:
3896 case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_col_stride:
3897 case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_row:
3898 case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_row_stride:
3900 case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col:
3901 case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col_stride:
3902 case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col_stride:
3903 case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col:
3904 case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row:
3905 case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row_stride:
3906 case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row_stride:
3907 case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row:
3908 case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_col:
3909 case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_col_stride:
3910 case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_row:
3911 case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_row_stride:
3912 case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col:
3913 case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col_stride:
3914 case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row:
3915 case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row_stride:
3916 case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_b16:
3917 case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_trans_b16: {
3919 Info.memVT = MVT::v4i32;
3920 Info.ptrVal =
I.getArgOperand(0);
3927 case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col:
3928 case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col_stride:
3929 case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col_stride:
3930 case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col:
3931 case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row:
3932 case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row_stride:
3933 case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row_stride:
3934 case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row:
3936 case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col:
3937 case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col_stride:
3938 case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col_stride:
3939 case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col:
3940 case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row:
3941 case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row_stride:
3942 case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row_stride:
3943 case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row:
3944 case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row:
3945 case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row_stride:
3946 case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col:
3947 case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col_stride:
3948 case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row:
3949 case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row_stride:
3950 case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row_stride:
3951 case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row:
3952 case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col:
3953 case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col_stride:
3954 case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col_stride:
3955 case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col:
3956 case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_b16:
3957 case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_trans_b16: {
3959 Info.memVT = MVT::i32;
3960 Info.ptrVal =
I.getArgOperand(0);
3967 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col:
3968 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row:
3969 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride:
3970 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride:
3971 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col:
3972 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row:
3973 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride:
3974 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride:
3975 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col:
3976 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row:
3977 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride:
3978 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride: {
3980 Info.memVT = MVT::v4f16;
3981 Info.ptrVal =
I.getArgOperand(0);
3988 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col:
3989 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row:
3990 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride:
3991 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride:
3992 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col:
3993 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row:
3994 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride:
3995 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride:
3996 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col:
3997 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row:
3998 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride:
3999 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride:
4000 case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_col:
4001 case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_row:
4002 case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_col_stride:
4003 case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_row_stride: {
4005 Info.memVT = MVT::v8f32;
4006 Info.ptrVal =
I.getArgOperand(0);
4013 case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_col:
4014 case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_col_stride:
4015 case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_row:
4016 case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_row_stride:
4018 case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_col:
4019 case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_col_stride:
4020 case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_row:
4021 case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_row_stride:
4023 case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col:
4024 case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col_stride:
4025 case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row:
4026 case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row_stride:
4027 case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col:
4028 case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col_stride:
4029 case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row:
4030 case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row_stride:
4031 case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col:
4032 case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col_stride:
4033 case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row:
4034 case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row_stride: {
4036 Info.memVT = MVT::v8i32;
4037 Info.ptrVal =
I.getArgOperand(0);
4044 case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col:
4045 case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col_stride:
4046 case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row:
4047 case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row_stride:
4048 case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col:
4049 case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col_stride:
4050 case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row:
4051 case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row_stride:
4052 case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_b16:
4053 case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_trans_b16: {
4055 Info.memVT = MVT::v2i32;
4056 Info.ptrVal =
I.getArgOperand(0);
4063 case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_col:
4064 case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_col_stride:
4065 case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_row:
4066 case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_row_stride:
4068 case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_col:
4069 case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_col_stride:
4070 case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_row:
4071 case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_row_stride: {
4073 Info.memVT = MVT::f64;
4074 Info.ptrVal =
I.getArgOperand(0);
4081 case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_col:
4082 case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_col_stride:
4083 case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_row:
4084 case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_row_stride: {
4086 Info.memVT = MVT::v2f64;
4087 Info.ptrVal =
I.getArgOperand(0);
4094 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col:
4095 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row:
4096 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride:
4097 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride:
4098 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col:
4099 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row:
4100 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride:
4101 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride:
4102 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col:
4103 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row:
4104 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride:
4105 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride: {
4107 Info.memVT = MVT::v4f16;
4108 Info.ptrVal =
I.getArgOperand(0);
4115 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col:
4116 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row:
4117 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride:
4118 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride:
4119 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col:
4120 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row:
4121 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride:
4122 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride:
4123 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col:
4124 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row:
4125 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride:
4126 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride:
4127 case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_col:
4128 case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_row:
4129 case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_col_stride:
4130 case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_row_stride: {
4132 Info.memVT = MVT::v8f32;
4133 Info.ptrVal =
I.getArgOperand(0);
4140 case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col:
4141 case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col_stride:
4142 case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row:
4143 case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row_stride:
4144 case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col:
4145 case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col_stride:
4146 case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row:
4147 case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row_stride:
4148 case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col:
4149 case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col_stride:
4150 case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row:
4151 case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row_stride: {
4153 Info.memVT = MVT::v8i32;
4154 Info.ptrVal =
I.getArgOperand(0);
4161 case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col:
4162 case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col_stride:
4163 case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row:
4164 case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row_stride:
4165 case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col:
4166 case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col_stride:
4167 case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row:
4168 case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row_stride: {
4170 Info.memVT = MVT::v2i32;
4171 Info.ptrVal =
I.getArgOperand(0);
4178 case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_col:
4179 case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_col_stride:
4180 case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_row:
4181 case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_row_stride: {
4183 Info.memVT = MVT::v2f64;
4184 Info.ptrVal =
I.getArgOperand(0);
4191 case Intrinsic::nvvm_atomic_load_inc_32:
4192 case Intrinsic::nvvm_atomic_load_dec_32:
4194 case Intrinsic::nvvm_atomic_add_gen_f_cta:
4195 case Intrinsic::nvvm_atomic_add_gen_f_sys:
4196 case Intrinsic::nvvm_atomic_add_gen_i_cta:
4197 case Intrinsic::nvvm_atomic_add_gen_i_sys:
4198 case Intrinsic::nvvm_atomic_and_gen_i_cta:
4199 case Intrinsic::nvvm_atomic_and_gen_i_sys:
4200 case Intrinsic::nvvm_atomic_cas_gen_i_cta:
4201 case Intrinsic::nvvm_atomic_cas_gen_i_sys:
4202 case Intrinsic::nvvm_atomic_dec_gen_i_cta:
4203 case Intrinsic::nvvm_atomic_dec_gen_i_sys:
4204 case Intrinsic::nvvm_atomic_inc_gen_i_cta:
4205 case Intrinsic::nvvm_atomic_inc_gen_i_sys:
4206 case Intrinsic::nvvm_atomic_max_gen_i_cta:
4207 case Intrinsic::nvvm_atomic_max_gen_i_sys:
4208 case Intrinsic::nvvm_atomic_min_gen_i_cta:
4209 case Intrinsic::nvvm_atomic_min_gen_i_sys:
4210 case Intrinsic::nvvm_atomic_or_gen_i_cta:
4211 case Intrinsic::nvvm_atomic_or_gen_i_sys:
4212 case Intrinsic::nvvm_atomic_exch_gen_i_cta:
4213 case Intrinsic::nvvm_atomic_exch_gen_i_sys:
4214 case Intrinsic::nvvm_atomic_xor_gen_i_cta:
4215 case Intrinsic::nvvm_atomic_xor_gen_i_sys: {
4216 auto &
DL =
I.getModule()->getDataLayout();
4219 Info.ptrVal =
I.getArgOperand(0);
4226 case Intrinsic::nvvm_ldu_global_i:
4227 case Intrinsic::nvvm_ldu_global_f:
4228 case Intrinsic::nvvm_ldu_global_p: {
4229 auto &
DL =
I.getModule()->getDataLayout();
4231 if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
4233 else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
4237 Info.ptrVal =
I.getArgOperand(0);
4240 Info.align = cast<ConstantInt>(
I.getArgOperand(1))->getMaybeAlignValue();
4244 case Intrinsic::nvvm_ldg_global_i:
4245 case Intrinsic::nvvm_ldg_global_f:
4246 case Intrinsic::nvvm_ldg_global_p: {
4247 auto &
DL =
I.getModule()->getDataLayout();
4250 if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
4252 else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
4256 Info.ptrVal =
I.getArgOperand(0);
4259 Info.align = cast<ConstantInt>(
I.getArgOperand(1))->getMaybeAlignValue();
4264 case Intrinsic::nvvm_tex_1d_v4f32_s32:
4265 case Intrinsic::nvvm_tex_1d_v4f32_f32:
4266 case Intrinsic::nvvm_tex_1d_level_v4f32_f32: