43#include "llvm/IR/IntrinsicsNVPTX.h"
65#define DEBUG_TYPE "nvptx-lower"
77 cl::desc(
"NVPTX Specific: FMA contraction (0: don't do it"
78 " 1: do it 2: do it aggressively"),
83 cl::desc(
"NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
84 " IEEE Compliant F32 div.rnd if available."),
89 cl::desc(
"NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
165 Offsets->push_back(StartingOffset + 0);
166 Offsets->push_back(StartingOffset + 8);
173 if (
StructType *STy = dyn_cast<StructType>(Ty)) {
174 auto const *SL =
DL.getStructLayout(STy);
176 for(
auto *EI : STy->elements()) {
178 StartingOffset + SL->getElementOffset(ElementNum));
185 for (
unsigned i = 0, e = TempVTs.
size(); i != e; ++i) {
200 for (
unsigned j = 0; j != NumElts; ++j) {
208 Offsets->push_back(Off);
223 "Promotion is not suitable for scalars of size larger than 64-bits");
242 return EVT(*PromotedVT) != VT;
262 if (ParamAlignment < AccessSize)
265 if (Offsets[
Idx] & (AccessSize - 1))
268 EVT EltVT = ValueVTs[
Idx];
272 if (EltSize >= AccessSize)
275 unsigned NumElts = AccessSize / EltSize;
277 if (AccessSize != EltSize * NumElts)
281 if (
Idx + NumElts > ValueVTs.
size())
285 if (NumElts != 4 && NumElts != 2)
288 for (
unsigned j =
Idx + 1; j <
Idx + NumElts; ++j) {
290 if (ValueVTs[j] != EltVT)
294 if (Offsets[j] - Offsets[j - 1] != EltSize)
322 Align ParamAlignment,
bool IsVAArg =
false) {
332 for (
int I = 0,
E = ValueVTs.
size();
I !=
E; ++
I) {
335 for (
unsigned AccessSize : {16, 8, 4, 2}) {
337 I, AccessSize, ValueVTs, Offsets, ParamAlignment);
346 assert(
I + 1 <
E &&
"Not enough elements.");
352 assert(
I + 3 <
E &&
"Not enough elements.");
583 const bool IsFP16FP16x2NegAvailable = STI.
getSmVersion() >= 53 &&
616 for (
const auto &Op :
626 return IsAtLeastSm80 ?
Legal : NotSm80Action;
655 return "NVPTXISD::CALL";
657 return "NVPTXISD::RET_FLAG";
659 return "NVPTXISD::LOAD_PARAM";
661 return "NVPTXISD::Wrapper";
663 return "NVPTXISD::DeclareParam";
665 return "NVPTXISD::DeclareScalarParam";
667 return "NVPTXISD::DeclareRet";
669 return "NVPTXISD::DeclareScalarRet";
671 return "NVPTXISD::DeclareRetParam";
673 return "NVPTXISD::PrintCall";
675 return "NVPTXISD::PrintConvergentCall";
677 return "NVPTXISD::PrintCallUni";
679 return "NVPTXISD::PrintConvergentCallUni";
681 return "NVPTXISD::LoadParam";
683 return "NVPTXISD::LoadParamV2";
685 return "NVPTXISD::LoadParamV4";
687 return "NVPTXISD::StoreParam";
689 return "NVPTXISD::StoreParamV2";
691 return "NVPTXISD::StoreParamV4";
693 return "NVPTXISD::StoreParamS32";
695 return "NVPTXISD::StoreParamU32";
697 return "NVPTXISD::CallArgBegin";
699 return "NVPTXISD::CallArg";
701 return "NVPTXISD::LastCallArg";
703 return "NVPTXISD::CallArgEnd";
705 return "NVPTXISD::CallVoid";
707 return "NVPTXISD::CallVal";
709 return "NVPTXISD::CallSymbol";
711 return "NVPTXISD::Prototype";
713 return "NVPTXISD::MoveParam";
715 return "NVPTXISD::StoreRetval";
717 return "NVPTXISD::StoreRetvalV2";
719 return "NVPTXISD::StoreRetvalV4";
721 return "NVPTXISD::PseudoUseParam";
723 return "NVPTXISD::RETURN";
725 return "NVPTXISD::CallSeqBegin";
727 return "NVPTXISD::CallSeqEnd";
729 return "NVPTXISD::CallPrototype";
731 return "NVPTXISD::ProxyReg";
733 return "NVPTXISD::LoadV2";
735 return "NVPTXISD::LoadV4";
737 return "NVPTXISD::LDGV2";
739 return "NVPTXISD::LDGV4";
741 return "NVPTXISD::LDUV2";
743 return "NVPTXISD::LDUV4";
745 return "NVPTXISD::StoreV2";
747 return "NVPTXISD::StoreV4";
749 return "NVPTXISD::FUN_SHFL_CLAMP";
751 return "NVPTXISD::FUN_SHFR_CLAMP";
753 return "NVPTXISD::IMAD";
755 return "NVPTXISD::SETP_F16X2";
757 return "NVPTXISD::Dummy";
759 return "NVPTXISD::MUL_WIDE_SIGNED";
761 return "NVPTXISD::MUL_WIDE_UNSIGNED";
765 return "NVPTXISD::Tex1DFloatFloatLevel";
767 return "NVPTXISD::Tex1DFloatFloatGrad";
771 return "NVPTXISD::Tex1DS32FloatLevel";
773 return "NVPTXISD::Tex1DS32FloatGrad";
777 return "NVPTXISD::Tex1DU32FloatLevel";
779 return "NVPTXISD::Tex1DU32FloatGrad";
783 return "NVPTXISD::Tex1DArrayFloatFloatLevel";
785 return "NVPTXISD::Tex1DArrayFloatFloatGrad";
789 return "NVPTXISD::Tex1DArrayS32FloatLevel";
791 return "NVPTXISD::Tex1DArrayS32FloatGrad";
795 return "NVPTXISD::Tex1DArrayU32FloatLevel";
797 return "NVPTXISD::Tex1DArrayU32FloatGrad";
801 return "NVPTXISD::Tex2DFloatFloatLevel";
803 return "NVPTXISD::Tex2DFloatFloatGrad";
807 return "NVPTXISD::Tex2DS32FloatLevel";
809 return "NVPTXISD::Tex2DS32FloatGrad";
813 return "NVPTXISD::Tex2DU32FloatLevel";
815 return "NVPTXISD::Tex2DU32FloatGrad";
819 return "NVPTXISD::Tex2DArrayFloatFloatLevel";
821 return "NVPTXISD::Tex2DArrayFloatFloatGrad";
825 return "NVPTXISD::Tex2DArrayS32FloatLevel";
827 return "NVPTXISD::Tex2DArrayS32FloatGrad";
831 return "NVPTXISD::Tex2DArrayU32FloatLevel";
833 return "NVPTXISD::Tex2DArrayU32FloatGrad";
837 return "NVPTXISD::Tex3DFloatFloatLevel";
839 return "NVPTXISD::Tex3DFloatFloatGrad";
843 return "NVPTXISD::Tex3DS32FloatLevel";
845 return "NVPTXISD::Tex3DS32FloatGrad";
849 return "NVPTXISD::Tex3DU32FloatLevel";
851 return "NVPTXISD::Tex3DU32FloatGrad";
854 return "NVPTXISD::TexCubeFloatFloatLevel";
857 return "NVPTXISD::TexCubeS32FloatLevel";
860 return "NVPTXISD::TexCubeU32FloatLevel";
862 return "NVPTXISD::TexCubeArrayFloatFloat";
864 return "NVPTXISD::TexCubeArrayFloatFloatLevel";
866 return "NVPTXISD::TexCubeArrayS32Float";
868 return "NVPTXISD::TexCubeArrayS32FloatLevel";
870 return "NVPTXISD::TexCubeArrayU32Float";
872 return "NVPTXISD::TexCubeArrayU32FloatLevel";
874 return "NVPTXISD::Tld4R2DFloatFloat";
876 return "NVPTXISD::Tld4G2DFloatFloat";
878 return "NVPTXISD::Tld4B2DFloatFloat";
880 return "NVPTXISD::Tld4A2DFloatFloat";
882 return "NVPTXISD::Tld4R2DS64Float";
884 return "NVPTXISD::Tld4G2DS64Float";
886 return "NVPTXISD::Tld4B2DS64Float";
888 return "NVPTXISD::Tld4A2DS64Float";
890 return "NVPTXISD::Tld4R2DU64Float";
892 return "NVPTXISD::Tld4G2DU64Float";
894 return "NVPTXISD::Tld4B2DU64Float";
896 return "NVPTXISD::Tld4A2DU64Float";
899 return "NVPTXISD::TexUnified1DFloatS32";
901 return "NVPTXISD::TexUnified1DFloatFloat";
903 return "NVPTXISD::TexUnified1DFloatFloatLevel";
905 return "NVPTXISD::TexUnified1DFloatFloatGrad";
907 return "NVPTXISD::TexUnified1DS32S32";
909 return "NVPTXISD::TexUnified1DS32Float";
911 return "NVPTXISD::TexUnified1DS32FloatLevel";
913 return "NVPTXISD::TexUnified1DS32FloatGrad";
915 return "NVPTXISD::TexUnified1DU32S32";
917 return "NVPTXISD::TexUnified1DU32Float";
919 return "NVPTXISD::TexUnified1DU32FloatLevel";
921 return "NVPTXISD::TexUnified1DU32FloatGrad";
923 return "NVPTXISD::TexUnified1DArrayFloatS32";
925 return "NVPTXISD::TexUnified1DArrayFloatFloat";
927 return "NVPTXISD::TexUnified1DArrayFloatFloatLevel";
929 return "NVPTXISD::TexUnified1DArrayFloatFloatGrad";
931 return "NVPTXISD::TexUnified1DArrayS32S32";
933 return "NVPTXISD::TexUnified1DArrayS32Float";
935 return "NVPTXISD::TexUnified1DArrayS32FloatLevel";
937 return "NVPTXISD::TexUnified1DArrayS32FloatGrad";
939 return "NVPTXISD::TexUnified1DArrayU32S32";
941 return "NVPTXISD::TexUnified1DArrayU32Float";
943 return "NVPTXISD::TexUnified1DArrayU32FloatLevel";
945 return "NVPTXISD::TexUnified1DArrayU32FloatGrad";
947 return "NVPTXISD::TexUnified2DFloatS32";
949 return "NVPTXISD::TexUnified2DFloatFloat";
951 return "NVPTXISD::TexUnified2DFloatFloatLevel";
953 return "NVPTXISD::TexUnified2DFloatFloatGrad";
955 return "NVPTXISD::TexUnified2DS32S32";
957 return "NVPTXISD::TexUnified2DS32Float";
959 return "NVPTXISD::TexUnified2DS32FloatLevel";
961 return "NVPTXISD::TexUnified2DS32FloatGrad";
963 return "NVPTXISD::TexUnified2DU32S32";
965 return "NVPTXISD::TexUnified2DU32Float";
967 return "NVPTXISD::TexUnified2DU32FloatLevel";
969 return "NVPTXISD::TexUnified2DU32FloatGrad";
971 return "NVPTXISD::TexUnified2DArrayFloatS32";
973 return "NVPTXISD::TexUnified2DArrayFloatFloat";
975 return "NVPTXISD::TexUnified2DArrayFloatFloatLevel";
977 return "NVPTXISD::TexUnified2DArrayFloatFloatGrad";
979 return "NVPTXISD::TexUnified2DArrayS32S32";
981 return "NVPTXISD::TexUnified2DArrayS32Float";
983 return "NVPTXISD::TexUnified2DArrayS32FloatLevel";
985 return "NVPTXISD::TexUnified2DArrayS32FloatGrad";
987 return "NVPTXISD::TexUnified2DArrayU32S32";
989 return "NVPTXISD::TexUnified2DArrayU32Float";
991 return "NVPTXISD::TexUnified2DArrayU32FloatLevel";
993 return "NVPTXISD::TexUnified2DArrayU32FloatGrad";
995 return "NVPTXISD::TexUnified3DFloatS32";
997 return "NVPTXISD::TexUnified3DFloatFloat";
999 return "NVPTXISD::TexUnified3DFloatFloatLevel";
1001 return "NVPTXISD::TexUnified3DFloatFloatGrad";
1003 return "NVPTXISD::TexUnified3DS32S32";
1005 return "NVPTXISD::TexUnified3DS32Float";
1007 return "NVPTXISD::TexUnified3DS32FloatLevel";
1009 return "NVPTXISD::TexUnified3DS32FloatGrad";
1011 return "NVPTXISD::TexUnified3DU32S32";
1013 return "NVPTXISD::TexUnified3DU32Float";
1015 return "NVPTXISD::TexUnified3DU32FloatLevel";
1017 return "NVPTXISD::TexUnified3DU32FloatGrad";
1019 return "NVPTXISD::TexUnifiedCubeFloatFloat";
1021 return "NVPTXISD::TexUnifiedCubeFloatFloatLevel";
1023 return "NVPTXISD::TexUnifiedCubeS32Float";
1025 return "NVPTXISD::TexUnifiedCubeS32FloatLevel";
1027 return "NVPTXISD::TexUnifiedCubeU32Float";
1029 return "NVPTXISD::TexUnifiedCubeU32FloatLevel";
1031 return "NVPTXISD::TexUnifiedCubeArrayFloatFloat";
1033 return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel";
1035 return "NVPTXISD::TexUnifiedCubeArrayS32Float";
1037 return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel";
1039 return "NVPTXISD::TexUnifiedCubeArrayU32Float";
1041 return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel";
1043 return "NVPTXISD::Tld4UnifiedR2DFloatFloat";
1045 return "NVPTXISD::Tld4UnifiedG2DFloatFloat";
1047 return "NVPTXISD::Tld4UnifiedB2DFloatFloat";
1049 return "NVPTXISD::Tld4UnifiedA2DFloatFloat";
1051 return "NVPTXISD::Tld4UnifiedR2DS64Float";
1053 return "NVPTXISD::Tld4UnifiedG2DS64Float";
1055 return "NVPTXISD::Tld4UnifiedB2DS64Float";
1057 return "NVPTXISD::Tld4UnifiedA2DS64Float";
1059 return "NVPTXISD::Tld4UnifiedR2DU64Float";
1061 return "NVPTXISD::Tld4UnifiedG2DU64Float";
1063 return "NVPTXISD::Tld4UnifiedB2DU64Float";
1065 return "NVPTXISD::Tld4UnifiedA2DU64Float";
1263 bool Reciprocal)
const {
1284 if (Reciprocal || ExtraSteps > 0) {
1286 return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_rsqrt_approx_ftz_f
1287 : Intrinsic::nvvm_rsqrt_approx_f);
1289 return MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d);
1294 return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_sqrt_approx_ftz_f
1295 : Intrinsic::nvvm_sqrt_approx_f);
1304 MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d));
1321 std::optional<std::pair<unsigned, const APInt &>> VAInfo,
1322 const CallBase &CB,
unsigned UniqueCallSite)
const {
1326 assert(isABI &&
"Non-ABI compilation is not supported");
1330 std::string Prototype;
1332 O <<
"prototype_" << UniqueCallSite <<
" : .callprototype ";
1340 if (
auto *ITy = dyn_cast<IntegerType>(retTy)) {
1341 size = ITy->getBitWidth();
1344 "Floating point type expected here");
1352 O <<
".param .b" <<
size <<
" _";
1353 }
else if (isa<PointerType>(retTy)) {
1354 O <<
".param .b" << PtrVT.getSizeInBits() <<
" _";
1357 O <<
".param .align " << (retAlignment ? retAlignment->value() : 0)
1358 <<
" .b8 _[" <<
DL.getTypeAllocSize(retTy) <<
"]";
1369 unsigned NumArgs = VAInfo ? VAInfo->first : Args.size();
1370 for (
unsigned i = 0, OIdx = 0; i != NumArgs; ++i, ++OIdx) {
1371 Type *Ty = Args[i].Ty;
1377 if (!Outs[OIdx].
Flags.isByVal()) {
1379 unsigned ParamAlign = 0;
1380 const CallInst *CallI = cast<CallInst>(&CB);
1382 if (!
getAlign(*CallI, i + 1, ParamAlign))
1384 O <<
".param .align " << ParamAlign <<
" .b8 ";
1386 O <<
"[" <<
DL.getTypeAllocSize(Ty) <<
"]";
1390 if (
unsigned len = vtparts.
size())
1397 "type mismatch between callee prototype and arguments");
1400 if (isa<IntegerType>(Ty)) {
1401 sz = cast<IntegerType>(Ty)->getBitWidth();
1403 }
else if (isa<PointerType>(Ty)) {
1404 sz = PtrVT.getSizeInBits();
1412 O <<
".param .b" << sz <<
" ";
1417 Type *ETy = Args[i].IndirectType;
1418 Align InitialAlign = Outs[OIdx].Flags.getNonZeroByValAlign();
1419 Align ParamByValAlign =
1422 O <<
".param .align " << ParamByValAlign.
value() <<
" .b8 ";
1424 O <<
"[" << Outs[OIdx].Flags.getByValSize() <<
"]";
1428 O << (first ?
"" :
",") <<
" .param .align " << VAInfo->second
1444 return DL.getABITypeAlign(Ty);
1447 unsigned Alignment = 0;
1450 if (!DirectCallee) {
1455 if (
const auto *CI = dyn_cast<CallInst>(CB)) {
1458 return Align(Alignment);
1467 return Align(Alignment);
1474 return DL.getABITypeAlign(Ty);
1482 "Support for variadic functions (unsized array parameter) introduced "
1483 "in PTX ISA version 6.0 and requires target sm_30.");
1499 assert(isABI &&
"Non-ABI compilation is not supported");
1521 unsigned VAOffset = 0;
1528 unsigned ParamCount = 0;
1541 for (
unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
1542 EVT VT = Outs[OIdx].VT;
1543 Type *Ty = Args[i].Ty;
1545 bool IsByVal = Outs[OIdx].Flags.isByVal();
1550 assert((!IsByVal || Args[i].IndirectType) &&
1551 "byval arg must have indirect type");
1552 Type *ETy = (IsByVal ? Args[i].IndirectType : Ty);
1560 Align InitialAlign = Outs[OIdx].Flags.getNonZeroByValAlign();
1564 VAOffset =
alignTo(VAOffset, ArgAlign);
1566 ArgAlign = getArgumentAlignment(
Callee, CB, Ty, ParamCount + 1,
DL);
1570 (IsByVal ? Outs[OIdx].Flags.getByValSize() :
DL.getTypeAllocSize(Ty));
1575 if (ParamCount == FirstVAArg) {
1581 DeclareParamVTs, DeclareParamOps);
1603 SDValue DeclareScalarParamOps[] = {
1608 DeclareScalarParamOps);
1617 bool ExtendIntegerParam =
1622 for (
unsigned j = 0, je = VTs.
size(); j != je; ++j) {
1624 int CurOffset = Offsets[j];
1631 assert(StoreOperands.
empty() &&
"Unfinished preceding store.");
1636 IsByVal ? CurOffset + VAOffset : (IsVAArg ? VAOffset : CurOffset),
1640 SDValue StVal = OutVals[OIdx];
1644 EltVT =
EVT(PromotedVT);
1649 StVal = DAG.
getNode(Ext, dl, PromotedVT, StVal);
1658 }
else if (ExtendIntegerParam) {
1659 assert(VTs.
size() == 1 &&
"Scalar can't have multiple parts.");
1676 unsigned NumElts = StoreOperands.
size() - 3;
1696 EVT TheStoreType = ExtendIntegerParam ?
MVT::i32 : EltVT;
1705 StoreOperands.
clear();
1709 if (!IsByVal && IsVAArg) {
1711 "Vectorization is expected to be disabled for variadics.");
1712 VAOffset +=
DL.getTypeAllocSize(
1719 assert(StoreOperands.
empty() &&
"Unfinished parameter store.");
1720 if (!IsByVal && VTs.
size() > 0)
1723 if (IsByVal && IsVAArg)
1731 if (Ins.size() > 0) {
1738 unsigned resultsz =
DL.getTypeAllocSizeInBits(
RetTy);
1743 if (
RetTy->isFloatingPointTy() ||
RetTy->isPointerTy() ||
1744 (
RetTy->isIntegerTy() && !
RetTy->isIntegerTy(128))) {
1754 retAlignment = getArgumentAlignment(
Callee, CB,
RetTy, 0,
DL);
1755 assert(retAlignment &&
"retAlignment is guaranteed to be set");
1776 VADeclareParam->
getVTList(), DeclareParamOps);
1784 if (isa<ExternalSymbolSDNode>(
Callee)) {
1789 assert(CalleeFunc !=
nullptr &&
"Libcall callee must be set.");
1793 CalleeFunc->
addFnAttr(
"nvptx-libcall-callee",
"true");
1806 DL,
RetTy, Args, Outs, retAlignment,
1808 ? std::optional<std::pair<unsigned, const APInt &>>(std::make_pair(
1810 cast<ConstantSDNode>(VADeclareParam->
getOperand(1))
1813 *CB, UniqueCallSite);
1833 Chain = DAG.
getNode(Opcode, dl, PrintCallVTs, PrintCallOps);
1844 SDValue CallArgBeginOps[] = { Chain, InFlag };
1849 for (
unsigned i = 0, e = std::min(CLI.
NumFixedArgs + 1, ParamCount); i != e;
1859 Chain = DAG.
getNode(opcode, dl, CallArgVTs, CallArgOps);
1863 SDValue CallArgEndOps[] = { Chain,
1881 if (Ins.size() > 0) {
1885 assert(VTs.
size() == Ins.size() &&
"Bad value decomposition");
1896 bool ExtendIntegerRetVal =
1897 RetTy->isIntegerTy() &&
DL.getTypeAllocSizeInBits(
RetTy) < 32;
1899 for (
unsigned i = 0, e = VTs.
size(); i != e; ++i) {
1900 bool needTruncate =
false;
1901 EVT TheLoadType = VTs[i];
1902 EVT EltType = Ins[i].VT;
1907 TheLoadType =
EVT(PromotedVT);
1908 EltType =
EVT(PromotedVT);
1909 needTruncate =
true;
1912 if (ExtendIntegerRetVal) {
1915 needTruncate =
true;
1917 if (VTs[i].isInteger())
1918 needTruncate =
true;
1924 assert(VecIdx == -1 && LoadVTs.
empty() &&
"Orphaned operand list.");
1931 unsigned NumElts = LoadVTs.
size();
1953 Op, dl, DAG.
getVTList(LoadVTs), LoadOperands, TheLoadType,
1957 for (
unsigned j = 0; j < NumElts; ++j) {
1961 ProxyRegTruncates.
push_back(std::optional<MVT>(Ins[VecIdx + j].VT));
1963 ProxyRegTruncates.
push_back(std::optional<MVT>());
1967 InFlag = RetVal.
getValue(NumElts + 1);
1977 DAG.
getCALLSEQ_END(Chain, UniqueCallSite, UniqueCallSite + 1, InFlag, dl);
1983 for (
unsigned i = 0; i < ProxyRegOps.
size(); ++i) {
1987 { Chain, ProxyRegOps[i], InFlag }
1990 Chain = Ret.getValue(1);
1991 InFlag = Ret.getValue(2);
1993 if (ProxyRegTruncates[i]) {
2011 SDNode *Node = Op.getNode();
2014 unsigned NumOperands = Node->getNumOperands();
2015 for (
unsigned i = 0; i < NumOperands; ++i) {
2016 SDValue SubOp = Node->getOperand(i);
2020 for (
unsigned j = 0; j < NumSubElem; ++j) {
2041 isa<ConstantFPSDNode>(
Op->getOperand(0)) &&
2042 isa<ConstantFPSDNode>(
Op->getOperand(1))))
2046 cast<ConstantFPSDNode>(
Op->getOperand(0))->getValueAPF().bitcastToAPInt();
2048 cast<ConstantFPSDNode>(
Op->getOperand(1))->getValueAPF().bitcastToAPInt();
2058 if (isa<ConstantSDNode>(
Index.getNode()))
2083 assert(
Op.getNumOperands() == 3 &&
"Not a double-shift!");
2086 EVT VT =
Op.getValueType();
2144 assert(
Op.getNumOperands() == 3 &&
"Not a double-shift!");
2147 EVT VT =
Op.getValueType();
2198 EVT VT =
Op.getValueType();
2201 return LowerFROUND32(Op, DAG);
2204 return LowerFROUND64(Op, DAG);
2220 EVT VT =
Op.getValueType();
2226 const int SignBitMask = 0x80000000;
2229 const int PointFiveInBits = 0x3F000000;
2230 SDValue PointFiveWithSignRaw =
2261 EVT VT =
Op.getValueType();
2293 switch (Op.getOpcode()) {
2303 return LowerBUILD_VECTOR(Op, DAG);
2307 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2309 return LowerCONCAT_VECTORS(Op, DAG);
2311 return LowerSTORE(Op, DAG);
2313 return LowerLOAD(Op, DAG);
2315 return LowerShiftLeftParts(Op, DAG);
2318 return LowerShiftRightParts(Op, DAG);
2320 return LowerSelect(Op, DAG);
2322 return LowerFROUND(Op, DAG);
2324 return LowerVAARG(Op, DAG);
2326 return LowerVASTART(Op, DAG);
2338 SDNode *Node = Op.getNode();
2339 const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
2340 EVT VT = Node->getValueType(0);
2342 SDValue Tmp1 = Node->getOperand(0);
2343 SDValue Tmp2 = Node->getOperand(1);
2344 const MaybeAlign MA(Node->getConstantOperandVal(3));
2382 SDValue Arg = getParamSymbol(DAG, -1, PtrVT);
2385 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
2386 return DAG.
getStore(
Op.getOperand(0),
DL, VAReg,
Op.getOperand(1),
2396 assert(
Op.getValueType() ==
MVT::i1 &&
"Custom lowering enabled only for i1");
2408 return LowerLOADi1(Op, DAG);
2414 EVT MemVT =
Load->getMemoryVT();
2416 MemVT, *
Load->getMemOperand())) {
2436 "Custom lowering for i1 load only");
2438 LD->getPointerInfo(),
LD->getAlign(),
2439 LD->getMemOperand()->getFlags());
2444 SDValue Ops[] = { result,
LD->getChain() };
2453 return LowerSTOREi1(Op, DAG);
2459 VT, *
Store->getMemOperand()))
2463 return LowerSTOREVector(Op, DAG);
2510 if (Alignment < PrefAlign) {
2519 unsigned Opcode = 0;
2526 bool NeedExt =
false;
2530 bool StoreF16x2 =
false;
2545 "Wrong type for the vector.");
2559 for (
unsigned i = 0; i < NumElts; ++i) {
2569 for (
unsigned i = 0; i < NumElts; ++i) {
2579 Ops.
append(
N->op_begin() + 2,
N->op_end());
2607 ST->getAlign(),
ST->getMemOperand()->getFlags());
2635 std::vector<SDValue> OutChains;
2638 assert(isABI &&
"Non-ABI compilation is not supported");
2642 std::vector<Type *> argTypes;
2643 std::vector<const Argument *> theArgs;
2645 theArgs.push_back(&
I);
2646 argTypes.push_back(
I.getType());
2657 unsigned InsIdx = 0;
2660 for (
unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) {
2661 Type *Ty = argTypes[i];
2663 if (theArgs[i]->use_empty()) {
2669 if (vtparts.
empty())
2672 for (
unsigned parti = 0, parte = vtparts.
size(); parti != parte;
2677 if (vtparts.
size() > 0)
2684 for (
unsigned parti = 0; parti < NumRegs; ++parti) {
2701 bool aggregateIsPacked =
false;
2702 if (
StructType *STy = dyn_cast<StructType>(Ty))
2703 aggregateIsPacked = STy->isPacked();
2714 SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
2716 for (
unsigned parti = 0, parte = VTs.
size(); parti != parte; ++parti) {
2718 assert(VecIdx == -1 &&
"Orphaned vector.");
2723 if (VectorInfo[parti] &
PVF_LAST) {
2724 unsigned NumElts = parti - VecIdx + 1;
2725 EVT EltVT = VTs[parti];
2748 P.getNode()->setIROrder(idx + 1);
2749 for (
unsigned j = 0; j < NumElts; ++j) {
2768 Ins[InsIdx].VT.getFixedSizeInBits() >
2772 Elt = DAG.
getNode(Extend, dl, Ins[InsIdx].VT, Elt);
2795 assert(ObjectVT == Ins[InsIdx].VT &&
2796 "Ins type did not match function type");
2797 SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
2800 p.getNode()->setIROrder(idx + 1);
2804 if (!OutChains.empty())
2821 assert(isABI &&
"Non-ABI compilation is not supported");
2830 assert(VTs.
size() == OutVals.
size() &&
"Bad return value decomposition");
2832 for (
unsigned i = 0, e = VTs.
size(); i != e; ++i) {
2833 SDValue PromotedOutVal = OutVals[i];
2836 VTs[i] =
EVT(PromotedVT);
2841 PromotedOutVal = DAG.
getNode(Ext, dl, PromotedVT, PromotedOutVal);
2843 PromotedOutVals.
push_back(PromotedOutVal);
2854 bool ExtendIntegerRetVal =
2855 RetTy->isIntegerTy() &&
DL.getTypeAllocSizeInBits(
RetTy) < 32;
2858 for (
unsigned i = 0, e = VTs.
size(); i != e; ++i) {
2861 assert(StoreOperands.
empty() &&
"Orphaned operand list.");
2867 SDValue RetVal = PromotedOutVals[i];
2869 if (ExtendIntegerRetVal) {
2885 unsigned NumElts = StoreOperands.
size() - 2;
2902 EVT TheStoreType = ExtendIntegerRetVal ?
MVT::i32 : VTs[i];
2907 StoreOperands.
clear();
2915 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
2917 if (Constraint.length() > 1)
2924 switch (Intrinsic) {
2928 case Intrinsic::nvvm_tex_1d_v4f32_s32:
2930 case Intrinsic::nvvm_tex_1d_v4f32_f32:
2932 case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
2934 case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
2936 case Intrinsic::nvvm_tex_1d_v4s32_s32:
2938 case Intrinsic::nvvm_tex_1d_v4s32_f32:
2940 case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
2942 case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
2944 case Intrinsic::nvvm_tex_1d_v4u32_s32:
2946 case Intrinsic::nvvm_tex_1d_v4u32_f32:
2948 case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
2950 case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
2953 case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
2955 case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
2957 case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
2959 case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
2961 case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
2963 case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
2965 case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
2967 case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
2969 case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
2971 case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
2973 case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
2975 case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
2978 case Intrinsic::nvvm_tex_2d_v4f32_s32:
2980 case Intrinsic::nvvm_tex_2d_v4f32_f32:
2982 case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
2984 case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
2986 case Intrinsic::nvvm_tex_2d_v4s32_s32:
2988 case Intrinsic::nvvm_tex_2d_v4s32_f32:
2990 case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
2992 case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
2994 case Intrinsic::nvvm_tex_2d_v4u32_s32:
2996 case Intrinsic::nvvm_tex_2d_v4u32_f32:
2998 case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
3000 case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
3003 case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
3005 case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
3007 case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
3009 case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
3011 case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
3013 case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
3015 case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
3017 case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
3019 case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
3021 case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
3023 case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
3025 case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
3028 case Intrinsic::nvvm_tex_3d_v4f32_s32:
3030 case Intrinsic::nvvm_tex_3d_v4f32_f32:
3032 case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
3034 case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
3036 case Intrinsic::nvvm_tex_3d_v4s32_s32:
3038 case Intrinsic::nvvm_tex_3d_v4s32_f32:
3040 case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
3042 case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
3044 case Intrinsic::nvvm_tex_3d_v4u32_s32:
3046 case Intrinsic::nvvm_tex_3d_v4u32_f32:
3048 case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
3050 case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
3053 case Intrinsic::nvvm_tex_cube_v4f32_f32:
3055 case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
3057 case Intrinsic::nvvm_tex_cube_v4s32_f32:
3059 case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
3061 case Intrinsic::nvvm_tex_cube_v4u32_f32:
3063 case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
3066 case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
3068 case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
3070 case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
3072 case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
3074 case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
3076 case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
3079 case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
3081 case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
3083 case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
3085 case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
3087 case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
3089 case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
3091 case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
3093 case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
3095 case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
3097 case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
3099 case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
3101 case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
3104 case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
3106 case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
3108 case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
3110 case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
3112 case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
3114 case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
3116 case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
3118 case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
3120 case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
3122 case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
3124 case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
3126 case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
3129 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
3131 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
3133 case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
3135 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
3137 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
3139 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
3141 case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
3143 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
3145 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
3147 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
3149 case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
3151 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
3154 case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
3156 case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
3158 case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
3160 case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
3162 case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
3164 case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
3166 case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
3168 case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
3170 case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
3172 case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
3174 case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
3176 case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
3179 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
3181 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
3183 case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
3185 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
3187 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
3189 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
3191 case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
3193 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
3195 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
3197 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
3199 case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
3201 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
3204 case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
3206 case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
3208 case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
3210 case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
3212 case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
3214 case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
3216 case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
3218 case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
3220 case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
3222 case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
3224 case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
3226 case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
3229 case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
3231 case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
3233 case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
3235 case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
3237 case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
3239 case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
3242 case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
3244 case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
3246 case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
3248 case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
3250 case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
3252 case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
3255 case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
3257 case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
3259 case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
3261 case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
3263 case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
3265 case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
3267 case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
3269 case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
3271 case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
3273 case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
3275 case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
3277 case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32:
3283 switch (Intrinsic) {
3286 case Intrinsic::nvvm_suld_1d_i8_clamp:
3288 case Intrinsic::nvvm_suld_1d_i16_clamp:
3290 case Intrinsic::nvvm_suld_1d_i32_clamp:
3292 case Intrinsic::nvvm_suld_1d_i64_clamp:
3294 case Intrinsic::nvvm_suld_1d_v2i8_clamp:
3296 case Intrinsic::nvvm_suld_1d_v2i16_clamp:
3298 case Intrinsic::nvvm_suld_1d_v2i32_clamp:
3300 case Intrinsic::nvvm_suld_1d_v2i64_clamp:
3302 case Intrinsic::nvvm_suld_1d_v4i8_clamp:
3304 case Intrinsic::nvvm_suld_1d_v4i16_clamp:
3306 case Intrinsic::nvvm_suld_1d_v4i32_clamp:
3308 case Intrinsic::nvvm_suld_1d_array_i8_clamp:
3310 case Intrinsic::nvvm_suld_1d_array_i16_clamp:
3312 case Intrinsic::nvvm_suld_1d_array_i32_clamp:
3314 case Intrinsic::nvvm_suld_1d_array_i64_clamp:
3316 case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
3318 case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
3320 case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
3322 case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
3324 case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
3326 case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
3328 case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
3330 case Intrinsic::nvvm_suld_2d_i8_clamp:
3332 case Intrinsic::nvvm_suld_2d_i16_clamp:
3334 case Intrinsic::nvvm_suld_2d_i32_clamp:
3336 case Intrinsic::nvvm_suld_2d_i64_clamp:
3338 case Intrinsic::nvvm_suld_2d_v2i8_clamp:
3340 case Intrinsic::nvvm_suld_2d_v2i16_clamp:
3342 case Intrinsic::nvvm_suld_2d_v2i32_clamp:
3344 case Intrinsic::nvvm_suld_2d_v2i64_clamp:
3346 case Intrinsic::nvvm_suld_2d_v4i8_clamp:
3348 case Intrinsic::nvvm_suld_2d_v4i16_clamp:
3350 case Intrinsic::nvvm_suld_2d_v4i32_clamp:
3352 case Intrinsic::nvvm_suld_2d_array_i8_clamp:
3354 case Intrinsic::nvvm_suld_2d_array_i16_clamp:
3356 case Intrinsic::nvvm_suld_2d_array_i32_clamp:
3358 case Intrinsic::nvvm_suld_2d_array_i64_clamp:
3360 case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
3362 case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
3364 case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
3366 case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
3368 case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
3370 case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
3372 case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
3374 case Intrinsic::nvvm_suld_3d_i8_clamp:
3376 case Intrinsic::nvvm_suld_3d_i16_clamp:
3378 case Intrinsic::nvvm_suld_3d_i32_clamp:
3380 case Intrinsic::nvvm_suld_3d_i64_clamp:
3382 case Intrinsic::nvvm_suld_3d_v2i8_clamp:
3384 case Intrinsic::nvvm_suld_3d_v2i16_clamp:
3386 case Intrinsic::nvvm_suld_3d_v2i32_clamp:
3388 case Intrinsic::nvvm_suld_3d_v2i64_clamp:
3390 case Intrinsic::nvvm_suld_3d_v4i8_clamp:
3392 case Intrinsic::nvvm_suld_3d_v4i16_clamp:
3394 case Intrinsic::nvvm_suld_3d_v4i32_clamp:
3396 case Intrinsic::nvvm_suld_1d_i8_trap:
3398 case Intrinsic::nvvm_suld_1d_i16_trap:
3400 case Intrinsic::nvvm_suld_1d_i32_trap:
3402 case Intrinsic::nvvm_suld_1d_i64_trap:
3404 case Intrinsic::nvvm_suld_1d_v2i8_trap:
3406 case Intrinsic::nvvm_suld_1d_v2i16_trap:
3408 case Intrinsic::nvvm_suld_1d_v2i32_trap:
3410 case Intrinsic::nvvm_suld_1d_v2i64_trap:
3412 case Intrinsic::nvvm_suld_1d_v4i8_trap:
3414 case Intrinsic::nvvm_suld_1d_v4i16_trap:
3416 case Intrinsic::nvvm_suld_1d_v4i32_trap:
3418 case Intrinsic::nvvm_suld_1d_array_i8_trap:
3420 case Intrinsic::nvvm_suld_1d_array_i16_trap:
3422 case Intrinsic::nvvm_suld_1d_array_i32_trap:
3424 case Intrinsic::nvvm_suld_1d_array_i64_trap:
3426 case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
3428 case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
3430 case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
3432 case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
3434 case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
3436 case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
3438 case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
3440 case Intrinsic::nvvm_suld_2d_i8_trap:
3442 case Intrinsic::nvvm_suld_2d_i16_trap:
3444 case Intrinsic::nvvm_suld_2d_i32_trap:
3446 case Intrinsic::nvvm_suld_2d_i64_trap:
3448 case Intrinsic::nvvm_suld_2d_v2i8_trap:
3450 case Intrinsic::nvvm_suld_2d_v2i16_trap:
3452 case Intrinsic::nvvm_suld_2d_v2i32_trap:
3454 case Intrinsic::nvvm_suld_2d_v2i64_trap:
3456 case Intrinsic::nvvm_suld_2d_v4i8_trap:
3458 case Intrinsic::nvvm_suld_2d_v4i16_trap:
3460 case Intrinsic::nvvm_suld_2d_v4i32_trap:
3462 case Intrinsic::nvvm_suld_2d_array_i8_trap:
3464 case Intrinsic::nvvm_suld_2d_array_i16_trap:
3466 case Intrinsic::nvvm_suld_2d_array_i32_trap:
3468 case Intrinsic::nvvm_suld_2d_array_i64_trap:
3470 case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
3472 case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
3474 case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
3476 case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
3478 case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
3480 case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
3482 case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
3484 case Intrinsic::nvvm_suld_3d_i8_trap:
3486 case Intrinsic::nvvm_suld_3d_i16_trap:
3488 case Intrinsic::nvvm_suld_3d_i32_trap:
3490 case Intrinsic::nvvm_suld_3d_i64_trap:
3492 case Intrinsic::nvvm_suld_3d_v2i8_trap:
3494 case Intrinsic::nvvm_suld_3d_v2i16_trap:
3496 case Intrinsic::nvvm_suld_3d_v2i32_trap:
3498 case Intrinsic::nvvm_suld_3d_v2i64_trap:
3500 case Intrinsic::nvvm_suld_3d_v4i8_trap:
3502 case Intrinsic::nvvm_suld_3d_v4i16_trap:
3504 case Intrinsic::nvvm_suld_3d_v4i32_trap:
3506 case Intrinsic::nvvm_suld_1d_i8_zero:
3508 case Intrinsic::nvvm_suld_1d_i16_zero:
3510 case Intrinsic::nvvm_suld_1d_i32_zero:
3512 case Intrinsic::nvvm_suld_1d_i64_zero:
3514 case Intrinsic::nvvm_suld_1d_v2i8_zero:
3516 case Intrinsic::nvvm_suld_1d_v2i16_zero:
3518 case Intrinsic::nvvm_suld_1d_v2i32_zero:
3520 case Intrinsic::nvvm_suld_1d_v2i64_zero:
3522 case Intrinsic::nvvm_suld_1d_v4i8_zero:
3524 case Intrinsic::nvvm_suld_1d_v4i16_zero:
3526 case Intrinsic::nvvm_suld_1d_v4i32_zero:
3528 case Intrinsic::nvvm_suld_1d_array_i8_zero:
3530 case Intrinsic::nvvm_suld_1d_array_i16_zero:
3532 case Intrinsic::nvvm_suld_1d_array_i32_zero:
3534 case Intrinsic::nvvm_suld_1d_array_i64_zero:
3536 case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
3538 case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
3540 case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
3542 case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
3544 case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
3546 case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
3548 case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
3550 case Intrinsic::nvvm_suld_2d_i8_zero:
3552 case Intrinsic::nvvm_suld_2d_i16_zero:
3554 case Intrinsic::nvvm_suld_2d_i32_zero:
3556 case Intrinsic::nvvm_suld_2d_i64_zero:
3558 case Intrinsic::nvvm_suld_2d_v2i8_zero:
3560 case Intrinsic::nvvm_suld_2d_v2i16_zero:
3562 case Intrinsic::nvvm_suld_2d_v2i32_zero:
3564 case Intrinsic::nvvm_suld_2d_v2i64_zero:
3566 case Intrinsic::nvvm_suld_2d_v4i8_zero:
3568 case Intrinsic::nvvm_suld_2d_v4i16_zero:
3570 case Intrinsic::nvvm_suld_2d_v4i32_zero:
3572 case Intrinsic::nvvm_suld_2d_array_i8_zero:
3574 case Intrinsic::nvvm_suld_2d_array_i16_zero:
3576 case Intrinsic::nvvm_suld_2d_array_i32_zero:
3578 case Intrinsic::nvvm_suld_2d_array_i64_zero:
3580 case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
3582 case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
3584 case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
3586 case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
3588 case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
3590 case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
3592 case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
3594 case Intrinsic::nvvm_suld_3d_i8_zero:
3596 case Intrinsic::nvvm_suld_3d_i16_zero:
3598 case Intrinsic::nvvm_suld_3d_i32_zero:
3600 case Intrinsic::nvvm_suld_3d_i64_zero:
3602 case Intrinsic::nvvm_suld_3d_v2i8_zero:
3604 case Intrinsic::nvvm_suld_3d_v2i16_zero:
3606 case Intrinsic::nvvm_suld_3d_v2i32_zero:
3608 case Intrinsic::nvvm_suld_3d_v2i64_zero:
3610 case Intrinsic::nvvm_suld_3d_v4i8_zero:
3612 case Intrinsic::nvvm_suld_3d_v4i16_zero:
3614 case Intrinsic::nvvm_suld_3d_v4i32_zero:
3627 switch (Intrinsic) {
3630 case Intrinsic::nvvm_match_all_sync_i32p:
3631 case Intrinsic::nvvm_match_all_sync_i64p:
3641 case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col:
3642 case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row:
3643 case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride:
3644 case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride:
3645 case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col:
3646 case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row:
3647 case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride:
3648 case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride:
3649 case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col:
3650 case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row:
3651 case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride:
3652 case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride:
3653 case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col:
3654 case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row:
3655 case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride:
3656 case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride:
3657 case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col:
3658 case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row:
3659 case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride:
3660 case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride:
3661 case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col:
3662 case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row:
3663 case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride:
3664 case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride: {
3667 Info.ptrVal =
I.getArgOperand(0);
3673 case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col:
3674 case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col_stride:
3675 case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col_stride:
3676 case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col:
3677 case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row:
3678 case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row_stride:
3679 case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row_stride:
3680 case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row:
3681 case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_col:
3682 case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_col_stride:
3683 case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_row:
3684 case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_row_stride:
3685 case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col:
3686 case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col_stride:
3687 case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col_stride:
3688 case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col:
3689 case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row:
3690 case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row_stride:
3691 case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row_stride:
3692 case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row:
3693 case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_col:
3694 case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_col_stride:
3695 case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_row:
3696 case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_row_stride: {
3699 Info.ptrVal =
I.getArgOperand(0);
3706 case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col:
3707 case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col_stride:
3708 case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col_stride:
3709 case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col:
3710 case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row:
3711 case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row_stride:
3712 case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row_stride:
3713 case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row:
3714 case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_col:
3715 case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_col_stride:
3716 case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_row:
3717 case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_row_stride:
3718 case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_col:
3719 case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_col_stride:
3720 case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_row:
3721 case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_row_stride:
3723 case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col:
3724 case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col_stride:
3725 case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col_stride:
3726 case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col:
3727 case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row:
3728 case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row_stride:
3729 case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row_stride:
3730 case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row:
3731 case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_col:
3732 case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_col_stride:
3733 case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_row:
3734 case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_row_stride:
3735 case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col:
3736 case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col_stride:
3737 case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row:
3738 case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row_stride:
3739 case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_b16:
3740 case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_trans_b16: {
3743 Info.ptrVal =
I.getArgOperand(0);
3750 case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col:
3751 case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col_stride:
3752 case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col_stride:
3753 case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col:
3754 case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row:
3755 case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row_stride:
3756 case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row_stride:
3757 case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row:
3759 case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col:
3760 case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col_stride:
3761 case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col_stride:
3762 case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col:
3763 case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row:
3764 case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row_stride:
3765 case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row_stride:
3766 case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row:
3767 case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row:
3768 case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row_stride:
3769 case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col:
3770 case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col_stride:
3771 case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row:
3772 case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row_stride:
3773 case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row_stride:
3774 case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row:
3775 case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col:
3776 case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col_stride:
3777 case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col_stride:
3778 case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col:
3779 case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_b16:
3780 case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_trans_b16: {
3783 Info.ptrVal =
I.getArgOperand(0);
3790 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col:
3791 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row:
3792 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride:
3793 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride:
3794 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col:
3795 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row:
3796 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride:
3797 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride:
3798 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col:
3799 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row:
3800 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride:
3801 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride: {
3804 Info.ptrVal =
I.getArgOperand(0);
3811 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col:
3812 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row:
3813 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride:
3814 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride:
3815 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col:
3816 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row:
3817 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride:
3818 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride:
3819 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col:
3820 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row:
3821 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride:
3822 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride:
3823 case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_col:
3824 case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_row:
3825 case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_col_stride:
3826 case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_row_stride: {
3829 Info.ptrVal =
I.getArgOperand(0);
3836 case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_col:
3837 case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_col_stride:
3838 case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_row:
3839 case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_row_stride:
3841 case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_col:
3842 case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_col_stride:
3843 case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_row:
3844 case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_row_stride:
3846 case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col:
3847 case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col_stride:
3848 case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row:
3849 case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row_stride:
3850 case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col:
3851 case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col_stride:
3852 case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row:
3853 case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row_stride:
3854 case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col:
3855 case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col_stride:
3856 case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row:
3857 case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row_stride: {
3860 Info.ptrVal =
I.getArgOperand(0);
3867 case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col:
3868 case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col_stride:
3869 case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row:
3870 case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row_stride:
3871 case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col:
3872 case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col_stride:
3873 case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row:
3874 case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row_stride:
3875 case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_b16:
3876 case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_trans_b16: {
3879 Info.ptrVal =
I.getArgOperand(0);
3886 case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_col:
3887 case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_col_stride:
3888 case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_row:
3889 case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_row_stride:
3891 case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_col:
3892 case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_col_stride:
3893 case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_row:
3894 case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_row_stride: {
3897 Info.ptrVal =
I.getArgOperand(0);
3904 case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_col:
3905 case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_col_stride:
3906 case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_row:
3907 case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_row_stride: {
3910 Info.ptrVal =
I.getArgOperand(0);
3917 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col:
3918 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row:
3919 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride:
3920 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride:
3921 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col:
3922 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row:
3923 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride:
3924 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride:
3925 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col:
3926 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row:
3927 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride:
3928 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride: {
3931 Info.ptrVal =
I.getArgOperand(0);
3938 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col:
3939 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row:
3940 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride:
3941 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride:
3942 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col:
3943 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row:
3944 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride:
3945 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride:
3946 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col:
3947 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row:
3948 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride:
3949 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride:
3950 case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_col:
3951 case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_row:
3952 case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_col_stride:
3953 case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_row_stride: {
3956 Info.ptrVal =
I.getArgOperand(0);
3963 case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col:
3964 case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col_stride:
3965 case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row:
3966 case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row_stride:
3967 case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col:
3968 case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col_stride:
3969 case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row:
3970 case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row_stride:
3971 case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col:
3972 case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col_stride:
3973 case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row:
3974 case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row_stride: {
3977 Info.ptrVal =
I.getArgOperand(0);
3984 case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col:
3985 case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col_stride:
3986 case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row:
3987 case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row_stride:
3988 case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col:
3989 case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col_stride:
3990 case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row:
3991 case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row_stride: {
3994 Info.ptrVal =
I.getArgOperand(0);
4001 case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_col:
4002 case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_col_stride:
4003 case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_row:
4004 case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_row_stride: {
4007 Info.ptrVal =
I.getArgOperand(0);
4014 case Intrinsic::nvvm_atomic_load_inc_32:
4015 case Intrinsic::nvvm_atomic_load_dec_32:
4017 case Intrinsic::nvvm_atomic_add_gen_f_cta:
4018 case Intrinsic::nvvm_atomic_add_gen_f_sys:
4019 case Intrinsic::nvvm_atomic_add_gen_i_cta:
4020 case Intrinsic::nvvm_atomic_add_gen_i_sys:
4021 case Intrinsic::nvvm_atomic_and_gen_i_cta:
4022 case Intrinsic::nvvm_atomic_and_gen_i_sys:
4023 case Intrinsic::nvvm_atomic_cas_gen_i_cta:
4024 case Intrinsic::nvvm_atomic_cas_gen_i_sys:
4025 case Intrinsic::nvvm_atomic_dec_gen_i_cta:
4026 case Intrinsic::nvvm_atomic_dec_gen_i_sys:
4027 case Intrinsic::nvvm_atomic_inc_gen_i_cta:
4028 case Intrinsic::nvvm_atomic_inc_gen_i_sys:
4029 case Intrinsic::nvvm_atomic_max_gen_i_cta:
4030 case Intrinsic::nvvm_atomic_max_gen_i_sys:
4031 case Intrinsic::nvvm_atomic_min_gen_i_cta:
4032 case Intrinsic::nvvm_atomic_min_gen_i_sys:
4033 case Intrinsic::nvvm_atomic_or_gen_i_cta:
4034 case Intrinsic::nvvm_atomic_or_gen_i_sys:
4035 case Intrinsic::nvvm_atomic_exch_gen_i_cta:
4036 case Intrinsic::nvvm_atomic_exch_gen_i_sys:
4037 case Intrinsic::nvvm_atomic_xor_gen_i_cta:
4038 case Intrinsic::nvvm_atomic_xor_gen_i_sys: {
4039 auto &
DL =
I.getModule()->getDataLayout();
4042 Info.ptrVal =
I.getArgOperand(0);
4049 case Intrinsic::nvvm_ldu_global_i:
4050 case Intrinsic::nvvm_ldu_global_f:
4051 case Intrinsic::nvvm_ldu_global_p: {
4052 auto &
DL =
I.getModule()->getDataLayout();
4054 if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
4056 else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
4060 Info.ptrVal =
I.getArgOperand(0);
4063 Info.align = cast<ConstantInt>(
I.getArgOperand(1))->getMaybeAlignValue();
4067 case Intrinsic::nvvm_ldg_global_i:
4068 case Intrinsic::nvvm_ldg_global_f:
4069 case Intrinsic::nvvm_ldg_global_p: {
4070 auto &
DL =
I.getModule()->getDataLayout();
4073 if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
4075 else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
4079 Info.ptrVal =
I.getArgOperand(0);
4082 Info.align = cast<ConstantInt>(
I.getArgOperand(1))->getMaybeAlignValue();
4087 case Intrinsic::nvvm_tex_1d_v4f32_s32:
4088 case Intrinsic::nvvm_tex_1d_v4f32_f32:
4089 case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
4090 case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
4091 case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
4092 case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
4093 case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
4094 case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
4095 case Intrinsic::nvvm_tex_2d_v4f32_s32:
4096 case Intrinsic::nvvm_tex_2d_v4f32_f32:
4097 case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
4098 case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
4099 case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
4100 case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
4101 case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
4102 case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
4103 case Intrinsic::nvvm_tex_3d_v4f32_s32:
4104 case Intrinsic::nvvm_tex_3d_v4f32_f32:
4105 case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
4106 case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
4107 case Intrinsic::nvvm_tex_cube_v4f32_f32:
4108 case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
4109 case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
4110 case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
4111 case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
4112 case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
4113 case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
4114 case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
4115 case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
4116 case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
4117 case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
4118 case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
4119 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
4120 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
4121 case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
4122 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
4123 case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
4124 case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
4125 case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
4126 case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
4127 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
4128 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
4129 case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
4130 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
4131 case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
4132 case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
4133 case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
4134 case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
4135 case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
4136 case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
4137 case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
4138 case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
4139 case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
4140 case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
4141 case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
4142 case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
4145 Info.ptrVal =
nullptr;
4151 case Intrinsic::nvvm_tex_1d_v4s32_s32:
4152 case Intrinsic::nvvm_tex_1d_v4s32_f32:
4153 case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
4154 case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
4155 case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
4156 case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
4157 case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
4158 case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
4159 case Intrinsic::nvvm_tex_2d_v4s32_s32:
4160 case Intrinsic::nvvm_tex_2d_v4s32_f32:
4161 case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
4162 case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
4163 case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
4164 case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
4165 case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
4166 case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
4167 case Intrinsic::nvvm_tex_3d_v4s32_s32:
4168 case Intrinsic::nvvm_tex_3d_v4s32_f32:
4169 case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
4170 case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
4171 case Intrinsic::nvvm_tex_cube_v4s32_f32:
4172 case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
4173 case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
4174 case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
4175 case Intrinsic::nvvm_tex_cube_v4u32_f32:
4176 case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
4177 case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
4178 case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
4179 case Intrinsic::nvvm_tex_1d_v4u32_s32:
4180 case Intrinsic::nvvm_tex_1d_v4u32_f32:
4181 case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
4182 case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
4183 case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
4184 case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
4185 case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
4186 case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
4187 case Intrinsic::nvvm_tex_2d_v4u32_s32:
4188 case Intrinsic::nvvm_tex_2d_v4u32_f32:
4189 case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
4190 case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
4191 case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
4192 case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
4193 case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
4194 case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
4195 case Intrinsic::nvvm_tex_3d_v4u32_s32:
4196 case Intrinsic::nvvm_tex_3d_v4u32_f32:
4197 case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
4198 case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
4199 case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
4200 case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
4201 case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
4202 case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
4203 case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
4204 case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
4205 case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
4206 case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
4207 case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
4208 case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
4209 case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
4210 case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
4211 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
4212 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
4213 case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
4214 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: