47#include "llvm/IR/IntrinsicsNVPTX.h"
70#define DEBUG_TYPE "nvptx-lower"
82 cl::desc(
"NVPTX Specific: FMA contraction (0: don't do it"
83 " 1: do it 2: do it aggressively"),
88 cl::desc(
"NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
89 " IEEE Compliant F32 div.rnd if available."),
94 cl::desc(
"NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
98 "nvptx-force-min-byval-param-align",
cl::Hidden,
99 cl::desc(
"NVPTX Specific: force 4-byte minimal alignment for byval"
100 " params of device functions."),
172static std::optional<std::pair<unsigned int, EVT>>
201 return std::pair(NumElts, EltVT);
219 return std::pair(NumElts / NPerWord,
245 Offsets->push_back(StartingOffset + 0);
246 Offsets->push_back(StartingOffset + 8);
253 if (
StructType *STy = dyn_cast<StructType>(Ty)) {
254 auto const *SL =
DL.getStructLayout(STy);
256 for(
auto *EI : STy->elements()) {
258 StartingOffset + SL->getElementOffset(ElementNum));
265 for (
unsigned i = 0, e = TempVTs.
size(); i != e; ++i) {
301 NumElts = (NumElts + 3) / 4;
302 }
else if (EltVT.
getSimpleVT() == MVT::i8 && NumElts == 2) {
307 for (
unsigned j = 0; j != NumElts; ++j) {
315 Offsets->push_back(Off);
330 "Promotion is not suitable for scalars of size larger than 64-bits");
332 *PromotedVT = MVT::i1;
337 *PromotedVT = MVT::i8;
340 *PromotedVT = MVT::i16;
343 *PromotedVT = MVT::i32;
346 *PromotedVT = MVT::i64;
349 return EVT(*PromotedVT) != VT;
369 if (ParamAlignment < AccessSize)
372 if (Offsets[
Idx] & (AccessSize - 1))
375 EVT EltVT = ValueVTs[
Idx];
379 if (EltSize >= AccessSize)
382 unsigned NumElts = AccessSize / EltSize;
384 if (AccessSize != EltSize * NumElts)
388 if (
Idx + NumElts > ValueVTs.
size())
392 if (NumElts != 4 && NumElts != 2)
395 for (
unsigned j =
Idx + 1; j <
Idx + NumElts; ++j) {
397 if (ValueVTs[j] != EltVT)
401 if (Offsets[j] - Offsets[j - 1] != EltSize)
429 Align ParamAlignment,
bool IsVAArg =
false) {
439 for (
int I = 0, E = ValueVTs.
size();
I != E; ++
I) {
442 for (
unsigned AccessSize : {16, 8, 4, 2}) {
444 I, AccessSize, ValueVTs, Offsets, ParamAlignment);
453 assert(
I + 1 < E &&
"Not enough elements.");
459 assert(
I + 3 < E &&
"Not enough elements.");
477 if (
Value->getValueType(0) == VT)
558 Op, VT, IsOpSupported ? Action : NoBF16Action);
563 bool IsOpSupported =
false;
649 for (
MVT VT : {MVT::bf16, MVT::f16, MVT::v2bf16, MVT::v2f16, MVT::f32,
650 MVT::f64, MVT::i1, MVT::i8, MVT::i16, MVT::v2i16, MVT::v4i8,
651 MVT::i32, MVT::i64}) {
676 {MVT::i8, MVT::i16, MVT::v2i16, MVT::i32, MVT::i64},
775 for (
const auto& Ty : {MVT::i16, MVT::i32, MVT::i64}) {
857 const bool IsFP16FP16x2NegAvailable = STI.
getSmVersion() >= 53 &&
860 for (
const auto &VT : {MVT::f16, MVT::v2f16})
885 for (
MVT VT : {MVT::bf16, MVT::f32, MVT::f64}) {
894 for (
MVT VT : {MVT::i1, MVT::i16, MVT::i32, MVT::i64}) {
923 for (
const auto &
Op :
957 bool SupportsF32MinMaxNaN =
985#define MAKE_CASE(V) \
1073 bool Reciprocal)
const {
1094 if (Reciprocal || ExtraSteps > 0) {
1096 return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_rsqrt_approx_ftz_f
1097 : Intrinsic::nvvm_rsqrt_approx_f);
1098 else if (VT == MVT::f64)
1099 return MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d);
1104 return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_sqrt_approx_ftz_f
1105 : Intrinsic::nvvm_sqrt_approx_f);
1113 DAG.
getConstant(Intrinsic::nvvm_rcp_approx_ftz_d,
DL, MVT::i32),
1114 MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d));
1136 std::optional<std::pair<unsigned, const APInt &>> VAInfo,
1137 const CallBase &CB,
unsigned UniqueCallSite)
const {
1141 assert(isABI &&
"Non-ABI compilation is not supported");
1145 std::string Prototype;
1147 O <<
"prototype_" << UniqueCallSite <<
" : .callprototype ";
1156 if (
auto *ITy = dyn_cast<IntegerType>(retTy)) {
1157 size = ITy->getBitWidth();
1160 "Floating point type expected here");
1168 O <<
".param .b" <<
size <<
" _";
1169 }
else if (isa<PointerType>(retTy)) {
1170 O <<
".param .b" << PtrVT.getSizeInBits() <<
" _";
1172 O <<
".param .align " << (retAlignment ? retAlignment->value() : 0)
1173 <<
" .b8 _[" <<
DL.getTypeAllocSize(retTy) <<
"]";
1183 unsigned NumArgs = VAInfo ? VAInfo->first : Args.size();
1184 for (
unsigned i = 0, OIdx = 0; i != NumArgs; ++i, ++OIdx) {
1185 Type *Ty = Args[i].Ty;
1191 if (!Outs[OIdx].Flags.isByVal()) {
1195 O <<
".param .align " << ParamAlign.
value() <<
" .b8 ";
1197 O <<
"[" <<
DL.getTypeAllocSize(Ty) <<
"]";
1201 if (
unsigned len = vtparts.
size())
1207 (
getValueType(
DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
1208 "type mismatch between callee prototype and arguments");
1211 if (isa<IntegerType>(Ty)) {
1212 sz = cast<IntegerType>(Ty)->getBitWidth();
1214 }
else if (isa<PointerType>(Ty)) {
1215 sz = PtrVT.getSizeInBits();
1219 O <<
".param .b" << sz <<
" ";
1226 Type *ETy = Args[i].IndirectType;
1227 Align InitialAlign = Outs[OIdx].Flags.getNonZeroByValAlign();
1228 Align ParamByValAlign =
1231 O <<
".param .align " << ParamByValAlign.
value() <<
" .b8 ";
1233 O <<
"[" << Outs[OIdx].Flags.getByValSize() <<
"]";
1237 O << (first ?
"" :
",") <<
" .param .align " << VAInfo->second
1257 return DL.getABITypeAlign(Ty);
1262 if (!DirectCallee) {
1267 if (
const auto *CI = dyn_cast<CallInst>(CB)) {
1270 return StackAlign.value();
1281 return DL.getABITypeAlign(Ty);
1285 switch (ElementType.getSimpleVT().SimpleTy) {
1290 ElementType = MVT::i16;
1295 ElementType = MVT::i32;
1298 ElementType = MVT::i64;
1310 unsigned ArgID,
const SDLoc &dl) {
1317 for (
unsigned i = 0, n = ElementType.getSizeInBits() / 8; i < n; i++) {
1343 EVT MergedType = ElementType;
1350 for (
unsigned i = 0, n = ElementType.getSizeInBits() / 8; i < n; i++) {
1377 if (ElementType != MergedType)
1387 if (
auto *CalleeFunc = dyn_cast<Function>(Func->getGlobal()))
1397 "Support for variadic functions (unsized array parameter) introduced "
1398 "in PTX ISA version 6.0 and requires target sm_30.");
1414 assert(isABI &&
"Non-ABI compilation is not supported");
1436 unsigned VAOffset = 0;
1443 unsigned ParamCount = 0;
1456 for (
unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
1457 EVT VT = Outs[OIdx].VT;
1458 Type *Ty = Args[i].Ty;
1460 bool IsByVal = Outs[OIdx].Flags.isByVal();
1465 assert((!IsByVal || Args[i].IndirectType) &&
1466 "byval arg must have indirect type");
1467 Type *ETy = (IsByVal ? Args[i].IndirectType : Ty);
1475 Align InitialAlign = Outs[OIdx].Flags.getNonZeroByValAlign();
1479 VAOffset =
alignTo(VAOffset, ArgAlign);
1481 ArgAlign = getArgumentAlignment(CB, Ty, ParamCount + 1,
DL);
1485 (IsByVal ? Outs[OIdx].Flags.getByValSize() :
DL.getTypeAllocSize(Ty));
1491 if (ParamCount == FirstVAArg) {
1497 DeclareParamVTs, DeclareParamOps);
1499 NeedAlign = PassAsArray;
1500 }
else if (PassAsArray) {
1517 SDValue DeclareScalarParamOps[] = {
1522 DeclareScalarParamOps);
1531 bool ExtendIntegerParam =
1536 for (
unsigned j = 0, je = VTs.
size(); j != je; ++j) {
1538 int CurOffset = Offsets[j];
1543 SDValue StVal = OutVals[OIdx];
1547 EltVT =
EVT(PromotedVT);
1552 StVal = DAG.
getNode(Ext, dl, PromotedVT, StVal);
1561 }
else if (ExtendIntegerParam) {
1562 assert(VTs.
size() == 1 &&
"Scalar can't have multiple parts.");
1566 dl, MVT::i32, StVal);
1577 if (VectorInfo[j] ==
PVF_SCALAR && !IsVAArg && PartAlign.has_value() &&
1580 assert(StoreOperands.
empty() &&
"Unfinished preceeding store.");
1582 DAG, Chain, IsByVal ? CurOffset + VAOffset : CurOffset, EltVT,
1583 StVal, InGlue, ParamCount, dl);
1594 assert(StoreOperands.
empty() &&
"Unfinished preceding store.");
1597 DAG.
getConstant(IsVAArg ? FirstVAArg : ParamCount, dl, MVT::i32));
1600 IsByVal ? CurOffset + VAOffset : (IsVAArg ? VAOffset : CurOffset),
1608 unsigned NumElts = StoreOperands.
size() - 3;
1628 EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : EltVT;
1631 Op, dl, DAG.
getVTList(MVT::Other, MVT::Glue), StoreOperands,
1637 StoreOperands.
clear();
1641 if (!IsByVal && IsVAArg) {
1643 "Vectorization is expected to be disabled for variadics.");
1644 VAOffset +=
DL.getTypeAllocSize(
1651 assert(StoreOperands.
empty() &&
"Unfinished parameter store.");
1652 if (!IsByVal && VTs.
size() > 0)
1655 if (IsByVal && IsVAArg)
1663 if (Ins.size() > 0) {
1670 unsigned resultsz =
DL.getTypeAllocSizeInBits(
RetTy);
1681 retAlignment = getArgumentAlignment(CB,
RetTy, 0,
DL);
1682 assert(retAlignment &&
"retAlignment is guaranteed to be set");
1685 Chain, DAG.
getConstant(retAlignment->value(), dl, MVT::i32),
1703 VADeclareParam->
getVTList(), DeclareParamOps);
1715 if (isa<ExternalSymbolSDNode>(Callee)) {
1720 assert(CalleeFunc !=
nullptr &&
"Libcall callee must be set.");
1724 CalleeFunc->
addFnAttr(
"nvptx-libcall-callee",
"true");
1737 DL,
RetTy, Args, Outs, retAlignment,
1739 ? std::optional<std::pair<unsigned, const APInt &>>(std::make_pair(
1742 *CB, UniqueCallSite);
1755 Chain, DAG.
getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InGlue
1762 Chain = DAG.
getNode(Opcode, dl, PrintCallVTs, PrintCallOps);
1765 if (ConvertToIndirectCall) {
1768 EVT DestVT = Callee.getValueType();
1779 SDValue CallVoidOps[] = { Chain, Callee, InGlue };
1785 SDValue CallArgBeginOps[] = { Chain, InGlue };
1790 for (
unsigned i = 0, e = std::min(CLI.
NumFixedArgs + 1, ParamCount); i != e;
1800 Chain = DAG.
getNode(opcode, dl, CallArgVTs, CallArgOps);
1804 SDValue CallArgEndOps[] = { Chain,
1813 Chain, DAG.
getConstant(UniqueCallSite, dl, MVT::i32), InGlue};
1830 if (Ins.size() > 0) {
1834 assert(VTs.
size() == Ins.size() &&
"Bad value decomposition");
1836 Align RetAlign = getArgumentAlignment(CB,
RetTy, 0,
DL);
1845 bool ExtendIntegerRetVal =
1846 RetTy->isIntegerTy() &&
DL.getTypeAllocSizeInBits(
RetTy) < 32;
1848 for (
unsigned i = 0, e = VTs.
size(); i != e; ++i) {
1849 bool needTruncate =
false;
1850 EVT TheLoadType = VTs[i];
1851 EVT EltType = Ins[i].VT;
1856 TheLoadType =
EVT(PromotedVT);
1857 EltType =
EVT(PromotedVT);
1858 needTruncate =
true;
1861 if (ExtendIntegerRetVal) {
1862 TheLoadType = MVT::i32;
1864 needTruncate =
true;
1866 if (VTs[i].isInteger())
1867 needTruncate =
true;
1874 EltAlign <
DL.getABITypeAlign(
1876 assert(VecIdx == -1 && LoadVTs.
empty() &&
"Orphaned operand list.");
1878 DAG, Chain, Offsets[i], TheLoadType, InGlue, TempProxyRegOps, dl);
1880 ProxyRegTruncates.
push_back(std::optional<MVT>());
1889 assert(VecIdx == -1 && LoadVTs.
empty() &&
"Orphaned operand list.");
1896 unsigned NumElts = LoadVTs.
size();
1916 DAG.
getConstant(Offsets[VecIdx], dl, MVT::i32), InGlue};
1918 Op, dl, DAG.
getVTList(LoadVTs), LoadOperands, TheLoadType,
1922 for (
unsigned j = 0; j < NumElts; ++j) {
1926 ProxyRegTruncates.
push_back(std::optional<MVT>(Ins[VecIdx + j].VT));
1928 ProxyRegTruncates.
push_back(std::optional<MVT>());
1932 InGlue = RetVal.
getValue(NumElts + 1);
1942 DAG.
getCALLSEQ_END(Chain, UniqueCallSite, UniqueCallSite + 1, InGlue, dl);
1948 for (
unsigned i = 0; i < ProxyRegOps.
size(); ++i) {
1949 if (i < RetElts.
size() && RetElts[i]) {
1956 DAG.
getVTList(ProxyRegOps[i].getSimpleValueType(), MVT::Other, MVT::Glue),
1957 { Chain, ProxyRegOps[i], InGlue }
1960 Chain = Ret.getValue(1);
1961 InGlue = Ret.getValue(2);
1963 if (ProxyRegTruncates[i]) {
1970 for (
SDValue &
T : TempProxyRegOps) {
1973 DAG.
getVTList(
T.getSimpleValueType(), MVT::Other, MVT::Glue),
1974 {Chain, T.getOperand(0), InGlue});
1996 "Support for dynamic alloca introduced in PTX ISA version 7.3 and "
1997 "requires target sm_52.",
2007 uint64_t Align = cast<ConstantSDNode>(
Op.getOperand(2))->getZExtValue();
2015 EVT RetTypes[] = {ValueSizeTy, MVT::Other};
2027 "Support for stackrestore requires PTX ISA version >= 7.3 and target "
2031 return Op.getOperand(0);
2050 "Support for stacksave requires PTX ISA version >= 7.3 and target >= "
2075 unsigned NumOperands = Node->getNumOperands();
2076 for (
unsigned i = 0; i < NumOperands; ++i) {
2077 SDValue SubOp = Node->getOperand(i);
2081 for (
unsigned j = 0; j < NumSubElem; ++j) {
2092 EVT FromVT =
Op->getOperand(0)->getValueType(0);
2093 if (FromVT != MVT::v2i8) {
2109 EVT ToVT =
Op->getValueType(0);
2119 EVT VT =
Op->getValueType(0);
2120 if (!(
Isv2x16VT(VT) || VT == MVT::v4i8))
2125 return Operand->isUndef() || isa<ConstantSDNode>(Operand) ||
2126 isa<ConstantFPSDNode>(Operand);
2128 if (VT != MVT::v4i8)
2145 auto PRMT__10 = GetPRMT(
Op->getOperand(0),
Op->getOperand(1),
true, 0x3340);
2146 auto PRMT__32 = GetPRMT(
Op->getOperand(2),
Op->getOperand(3),
true, 0x3340);
2147 auto PRMT3210 = GetPRMT(PRMT__10, PRMT__32,
false, 0x5410);
2154 EVT VT =
Op->getValueType(0);
2156 return APInt(32, 0);
2158 if (VT == MVT::v2f16 || VT == MVT::v2bf16)
2159 Value = cast<ConstantFPSDNode>(Operand)->getValueAPF().bitcastToAPInt();
2160 else if (VT == MVT::v2i16 || VT == MVT::v4i8)
2166 if (VT == MVT::v4i8)
2168 return Value.zext(32);
2172 Value = GetOperand(
Op, 0) | GetOperand(
Op, 1).shl(16);
2173 }
else if (VT == MVT::v4i8) {
2174 Value = GetOperand(
Op, 0) | GetOperand(
Op, 1).shl(8) |
2175 GetOperand(
Op, 2).shl(16) | GetOperand(
Op, 3).shl(24);
2190 if (VectorVT == MVT::v4i8) {
2202 if (isa<ConstantSDNode>(
Index.getNode()))
2223 if (VectorVT != MVT::v4i8)
2227 if (
Value->isUndef())
2246 if (VectorVT != MVT::v4i8 ||
Op.getValueType() != MVT::v4i8)
2254 if (
I.value() != -1)
2255 Selector |= (
I.value() << (
I.index() * 4));
2273 EVT VT =
Op.getValueType();
2334 EVT VT =
Op.getValueType();
2388 EVT VT =
Op.getValueType();
2402 EVT VT =
Op.getValueType();
2405 return LowerFROUND32(
Op, DAG);
2408 return LowerFROUND64(
Op, DAG);
2424 EVT VT =
Op.getValueType();
2430 const unsigned SignBitMask = 0x80000000;
2433 const unsigned PointFiveInBits = 0x3F000000;
2434 SDValue PointFiveWithSignRaw =
2465 EVT VT =
Op.getValueType();
2497 if (
Op.getValueType() == MVT::bf16) {
2501 DAG.
getNode(
Op.getOpcode(), Loc, MVT::f32,
Op.getOperand(0)),
2513 if (
Op.getOperand(0).getValueType() == MVT::bf16) {
2516 Op.getOpcode(), Loc,
Op.getValueType(),
2526 EVT NarrowVT =
Op.getValueType();
2563 EVT WideVT =
Op.getValueType();
2590 if (
Op.getValueType() != MVT::v2i16)
2592 EVT EltVT =
Op.getValueType().getVectorElementType();
2594 for (
int I = 0, E =
Op.getValueType().getVectorNumElements();
I < E;
I++) {
2597 [&](
const SDUse &O) {
2598 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
2599 O.get(), DAG.getIntPtrConstant(I, DL));
2610 switch (
Op.getOpcode()) {
2620 return LowerBUILD_VECTOR(
Op, DAG);
2622 return LowerBITCAST(
Op, DAG);
2626 return LowerEXTRACT_VECTOR_ELT(
Op, DAG);
2628 return LowerINSERT_VECTOR_ELT(
Op, DAG);
2630 return LowerVECTOR_SHUFFLE(
Op, DAG);
2632 return LowerCONCAT_VECTORS(
Op, DAG);
2634 return LowerSTORE(
Op, DAG);
2636 return LowerLOAD(
Op, DAG);
2638 return LowerShiftLeftParts(
Op, DAG);
2641 return LowerShiftRightParts(
Op, DAG);
2643 return LowerSelect(
Op, DAG);
2645 return LowerFROUND(
Op, DAG);
2647 return LowerFCOPYSIGN(
Op, DAG);
2650 return LowerINT_TO_FP(
Op, DAG);
2653 return LowerFP_TO_INT(
Op, DAG);
2655 return LowerFP_ROUND(
Op, DAG);
2657 return LowerFP_EXTEND(
Op, DAG);
2659 return LowerBR_JT(
Op, DAG);
2661 return LowerVAARG(
Op, DAG);
2663 return LowerVASTART(
Op, DAG);
2683 return LowerCopyToReg_128(
Op, DAG);
2692 const auto *JT = cast<JumpTableSDNode>(
Op.getOperand(1));
2695 unsigned JId = JT->getIndex();
2731 const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
2732 EVT VT = Node->getValueType(0);
2734 SDValue Tmp1 = Node->getOperand(0);
2735 SDValue Tmp2 = Node->getOperand(1);
2736 const MaybeAlign MA(Node->getConstantOperandVal(3));
2774 SDValue Arg = getParamSymbol(DAG, -1, PtrVT);
2777 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
2778 return DAG.
getStore(
Op.getOperand(0),
DL, VAReg,
Op.getOperand(1),
2788 assert(
Op.getValueType() == MVT::i1 &&
"Custom lowering enabled only for i1");
2799 if (
Op.getValueType() == MVT::i1)
2800 return LowerLOADi1(
Op, DAG);
2804 EVT VT =
Op.getValueType();
2807 EVT MemVT =
Load->getMemoryVT();
2809 MemVT, *
Load->getMemOperand())) {
2829 "Custom lowering for i1 load only");
2831 LD->getBasePtr(),
LD->getPointerInfo(),
2832 MVT::i8,
LD->getAlign(),
2833 LD->getMemOperand()->getFlags());
2838 SDValue Ops[] = { result,
LD->getChain() };
2847 return LowerSTOREi1(
Op, DAG);
2851 if ((
Isv2x16VT(VT) || VT == MVT::v4i8) &&
2853 VT, *
Store->getMemOperand()))
2861 return LowerSTOREVector(
Op, DAG);
2874 if (!NumEltsAndEltVT)
2876 auto [NumElts, EltVT] = NumEltsAndEltVT.value();
2883 if (Alignment < PrefAlign) {
2895 bool NeedExt =
false;
2899 unsigned Opcode = 0;
2918 "NumElts should not increase, only decrease or stay the same.");
2927 for (
unsigned i = 0; i < NumElts; ++i) {
2930 NumEltsPerSubVector);
2935 for (
unsigned i = 0; i < NumElts; ++i) {
2945 Ops.
append(
N->op_begin() + 2,
N->op_end());
2969 DAG.
getTruncStore(Tmp1, dl, Tmp3, Tmp2,
ST->getPointerInfo(), MVT::i8,
2970 ST->getAlign(),
ST->getMemOperand()->getFlags());
2979 assert(
Op.getOperand(1).getValueType() == MVT::i128 &&
2980 "Custom lowering for 128-bit CopyToReg only");
2994 NewOps[0] =
Op->getOperand(0);
2995 NewOps[1] =
Op->getOperand(1);
2999 NewOps[4] =
Op->getOperand(3);
3004unsigned NVPTXTargetLowering::getNumRegisters(
3006 std::optional<MVT> RegisterVT = std::nullopt)
const {
3007 if (VT == MVT::i128 && RegisterVT == MVT::i128)
3012bool NVPTXTargetLowering::splitValueIntoRegisterParts(
3014 unsigned NumParts,
MVT PartVT, std::optional<CallingConv::ID>
CC)
const {
3015 if (Val.
getValueType() == MVT::i128 && NumParts == 1) {
3046 std::vector<SDValue> OutChains;
3049 assert(isABI &&
"Non-ABI compilation is not supported");
3053 std::vector<Type *> argTypes;
3054 std::vector<const Argument *> theArgs;
3056 theArgs.push_back(&
I);
3057 argTypes.push_back(
I.getType());
3068 unsigned InsIdx = 0;
3070 for (
unsigned i = 0, e = theArgs.size(); i != e; ++i, ++InsIdx) {
3071 Type *Ty = argTypes[i];
3073 if (theArgs[i]->use_empty()) {
3079 if (vtparts.
empty())
3082 for (
unsigned parti = 0, parte = vtparts.
size(); parti != parte;
3087 if (vtparts.
size() > 0)
3094 for (
unsigned parti = 0; parti < NumRegs; ++parti) {
3111 bool aggregateIsPacked =
false;
3112 if (
StructType *STy = dyn_cast<StructType>(Ty))
3113 aggregateIsPacked = STy->isPacked();
3125 SDValue Arg = getParamSymbol(DAG, i, PtrVT);
3127 for (
unsigned parti = 0, parte = VTs.
size(); parti != parte; ++parti) {
3129 assert(VecIdx == -1 &&
"Orphaned vector.");
3134 if (VectorInfo[parti] &
PVF_LAST) {
3135 unsigned NumElts = parti - VecIdx + 1;
3136 EVT EltVT = VTs[parti];
3139 if (EltVT == MVT::i1)
3141 else if (
Isv2x16VT(EltVT) || EltVT == MVT::v4i8)
3155 if (aggregateIsPacked)
3158 return std::nullopt;
3168 P.getNode()->setIROrder(i + 1);
3169 for (
unsigned j = 0; j < NumElts; ++j) {
3173 if (EltVT == MVT::i1)
3176 else if (EltVT != LoadVT)
3188 Ins[InsIdx].VT.getFixedSizeInBits() >
3192 Elt = DAG.
getNode(Extend, dl, Ins[InsIdx].VT, Elt);
3215 assert(ObjectVT == Ins[InsIdx].VT &&
3216 "Ins type did not match function type");
3217 SDValue Arg = getParamSymbol(DAG, i, PtrVT);
3220 p.getNode()->setIROrder(i + 1);
3224 if (!OutChains.empty())
3240 for (
unsigned i = 0, n = ElementType.getSizeInBits() / 8; i < n; i++) {
3250 DAG.
getVTList(MVT::Other), StoreOperands,
3268 assert(isABI &&
"Non-ABI compilation is not supported");
3277 assert(VTs.
size() == OutVals.
size() &&
"Bad return value decomposition");
3279 for (
unsigned i = 0, e = VTs.
size(); i != e; ++i) {
3280 SDValue PromotedOutVal = OutVals[i];
3283 VTs[i] =
EVT(PromotedVT);
3288 PromotedOutVal = DAG.
getNode(Ext, dl, PromotedVT, PromotedOutVal);
3290 PromotedOutVals.
push_back(PromotedOutVal);
3301 bool ExtendIntegerRetVal =
3302 RetTy->isIntegerTy() &&
DL.getTypeAllocSizeInBits(
RetTy) < 32;
3305 for (
unsigned i = 0, e = VTs.
size(); i != e; ++i) {
3307 SDValue RetVal = PromotedOutVals[i];
3309 if (ExtendIntegerRetVal) {
3312 dl, MVT::i32, RetVal);
3322 EVT ElementType = ExtendIntegerRetVal ? MVT::i32 : VTs[i];
3323 Align ElementTypeAlign =
3324 DL.getABITypeAlign(ElementType.getTypeForEVT(
RetTy->getContext()));
3325 Align ElementAlign =
3327 if (ElementAlign < ElementTypeAlign) {
3328 assert(StoreOperands.
empty() &&
"Orphaned operand list.");
3340 assert(StoreOperands.
empty() &&
"Orphaned operand list.");
3351 unsigned NumElts = StoreOperands.
size() - 2;
3368 EVT TheStoreType = ExtendIntegerRetVal ? MVT::i32 : VTs[i];
3370 Op, dl, DAG.
getVTList(MVT::Other), StoreOperands, TheStoreType,
3373 StoreOperands.
clear();
3383 if (Constraint.
size() > 1)
3396 switch (Intrinsic) {
3399 case Intrinsic::nvvm_match_all_sync_i32p:
3400 case Intrinsic::nvvm_match_all_sync_i64p:
3405 Info.memVT = MVT::i1;
3410 case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col:
3411 case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row:
3412 case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride:
3413 case Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride:
3414 case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col:
3415 case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row:
3416 case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride:
3417 case Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride:
3418 case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col:
3419 case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row:
3420 case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride:
3421 case Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride:
3422 case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col:
3423 case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row:
3424 case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride:
3425 case Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride:
3426 case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col:
3427 case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row:
3428 case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride:
3429 case Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride:
3430 case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col:
3431 case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row:
3432 case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride:
3433 case Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride: {
3435 Info.memVT = MVT::v8f16;
3436 Info.ptrVal =
I.getArgOperand(0);
3442 case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col:
3443 case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col_stride:
3444 case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col_stride:
3445 case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col:
3446 case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row:
3447 case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row_stride:
3448 case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row_stride:
3449 case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row:
3450 case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_col:
3451 case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_col_stride:
3452 case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_row:
3453 case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_row_stride:
3454 case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col:
3455 case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col_stride:
3456 case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col_stride:
3457 case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col:
3458 case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row:
3459 case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row_stride:
3460 case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row_stride:
3461 case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row:
3462 case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_col:
3463 case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_col_stride:
3464 case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_row:
3465 case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_row_stride: {
3467 Info.memVT = MVT::v2i32;
3468 Info.ptrVal =
I.getArgOperand(0);
3475 case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col:
3476 case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col_stride:
3477 case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col_stride:
3478 case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col:
3479 case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row:
3480 case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row_stride:
3481 case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row_stride:
3482 case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row:
3483 case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_col:
3484 case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_col_stride:
3485 case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_row:
3486 case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_row_stride:
3487 case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_col:
3488 case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_col_stride:
3489 case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_row:
3490 case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_row_stride:
3492 case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col:
3493 case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col_stride:
3494 case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col_stride:
3495 case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col:
3496 case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row:
3497 case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row_stride:
3498 case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row_stride:
3499 case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row:
3500 case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_col:
3501 case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_col_stride:
3502 case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_row:
3503 case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_row_stride:
3504 case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col:
3505 case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col_stride:
3506 case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row:
3507 case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row_stride:
3508 case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_b16:
3509 case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_trans_b16: {
3511 Info.memVT = MVT::v4i32;
3512 Info.ptrVal =
I.getArgOperand(0);
3519 case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col:
3520 case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col_stride:
3521 case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col_stride:
3522 case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col:
3523 case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row:
3524 case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row_stride:
3525 case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row_stride:
3526 case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row:
3528 case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col:
3529 case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col_stride:
3530 case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col_stride:
3531 case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col:
3532 case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row:
3533 case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row_stride:
3534 case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row_stride:
3535 case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row:
3536 case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row:
3537 case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row_stride:
3538 case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col:
3539 case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col_stride:
3540 case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row:
3541 case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row_stride:
3542 case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row_stride:
3543 case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row:
3544 case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col:
3545 case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col_stride:
3546 case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col_stride:
3547 case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col:
3548 case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_b16:
3549 case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_trans_b16: {
3551 Info.memVT = MVT::i32;
3552 Info.ptrVal =
I.getArgOperand(0);
3559 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col:
3560 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row:
3561 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride:
3562 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride:
3563 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col:
3564 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row:
3565 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride:
3566 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride:
3567 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col:
3568 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row:
3569 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride:
3570 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride: {
3572 Info.memVT = MVT::v4f16;
3573 Info.ptrVal =
I.getArgOperand(0);
3580 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col:
3581 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row:
3582 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride:
3583 case Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride:
3584 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col:
3585 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row:
3586 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride:
3587 case Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride:
3588 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col:
3589 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row:
3590 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride:
3591 case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride:
3592 case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_col:
3593 case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_row:
3594 case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_col_stride:
3595 case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_row_stride: {
3597 Info.memVT = MVT::v8f32;
3598 Info.ptrVal =
I.getArgOperand(0);
3605 case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_col:
3606 case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_col_stride:
3607 case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_row:
3608 case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_row_stride:
3610 case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_col:
3611 case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_col_stride:
3612 case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_row:
3613 case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_row_stride:
3615 case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col:
3616 case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col_stride:
3617 case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row:
3618 case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row_stride:
3619 case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col:
3620 case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col_stride:
3621 case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row:
3622 case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row_stride:
3623 case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col:
3624 case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col_stride:
3625 case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row:
3626 case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row_stride: {
3628 Info.memVT = MVT::v8i32;
3629 Info.ptrVal =
I.getArgOperand(0);
3636 case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col:
3637 case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col_stride:
3638 case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row:
3639 case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row_stride:
3640 case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col:
3641 case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col_stride:
3642 case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row:
3643 case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row_stride:
3644 case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_b16:
3645 case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_trans_b16: {
3647 Info.memVT = MVT::v2i32;
3648 Info.ptrVal =
I.getArgOperand(0);
3655 case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_col:
3656 case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_col_stride:
3657 case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_row:
3658 case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_row_stride:
3660 case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_col:
3661 case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_col_stride:
3662 case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_row:
3663 case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_row_stride: {
3665 Info.memVT = MVT::f64;
3666 Info.ptrVal =
I.getArgOperand(0);
3673 case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_col:
3674 case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_col_stride:
3675 case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_row:
3676 case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_row_stride: {
3678 Info.memVT = MVT::v2f64;
3679 Info.ptrVal =
I.getArgOperand(0);
3686 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col:
3687 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row:
3688 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride:
3689 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride:
3690 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col:
3691 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row:
3692 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride:
3693 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride:
3694 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col:
3695 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row:
3696 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride:
3697 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride: {
3699 Info.memVT = MVT::v4f16;
3700 Info.ptrVal =
I.getArgOperand(0);
3707 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col:
3708 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row:
3709 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride:
3710 case Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride:
3711 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col:
3712 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row:
3713 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride:
3714 case Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride:
3715 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col:
3716 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row:
3717 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride:
3718 case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride:
3719 case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_col:
3720 case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_row:
3721 case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_col_stride:
3722 case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_row_stride: {
3724 Info.memVT = MVT::v8f32;
3725 Info.ptrVal =
I.getArgOperand(0);
3732 case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col:
3733 case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col_stride:
3734 case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row:
3735 case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row_stride:
3736 case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col:
3737 case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col_stride:
3738 case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row:
3739 case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row_stride:
3740 case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col:
3741 case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col_stride:
3742 case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row:
3743 case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row_stride: {
3745 Info.memVT = MVT::v8i32;
3746 Info.ptrVal =
I.getArgOperand(0);
3753 case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col:
3754 case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col_stride:
3755 case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row:
3756 case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row_stride:
3757 case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col:
3758 case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col_stride:
3759 case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row:
3760 case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row_stride: {
3762 Info.memVT = MVT::v2i32;
3763 Info.ptrVal =
I.getArgOperand(0);
3770 case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_col:
3771 case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_col_stride:
3772 case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_row:
3773 case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_row_stride: {
3775 Info.memVT = MVT::v2f64;
3776 Info.ptrVal =
I.getArgOperand(0);
3783 case Intrinsic::nvvm_atomic_load_inc_32:
3784 case Intrinsic::nvvm_atomic_load_dec_32:
3786 case Intrinsic::nvvm_atomic_add_gen_f_cta:
3787 case Intrinsic::nvvm_atomic_add_gen_f_sys:
3788 case Intrinsic::nvvm_atomic_add_gen_i_cta:
3789 case Intrinsic::nvvm_atomic_add_gen_i_sys:
3790 case Intrinsic::nvvm_atomic_and_gen_i_cta:
3791 case Intrinsic::nvvm_atomic_and_gen_i_sys:
3792 case Intrinsic::nvvm_atomic_cas_gen_i_cta:
3793 case Intrinsic::nvvm_atomic_cas_gen_i_sys:
3794 case Intrinsic::nvvm_atomic_dec_gen_i_cta:
3795 case Intrinsic::nvvm_atomic_dec_gen_i_sys:
3796 case Intrinsic::nvvm_atomic_inc_gen_i_cta:
3797 case Intrinsic::nvvm_atomic_inc_gen_i_sys:
3798 case Intrinsic::nvvm_atomic_max_gen_i_cta:
3799 case Intrinsic::nvvm_atomic_max_gen_i_sys:
3800 case Intrinsic::nvvm_atomic_min_gen_i_cta:
3801 case Intrinsic::nvvm_atomic_min_gen_i_sys:
3802 case Intrinsic::nvvm_atomic_or_gen_i_cta:
3803 case Intrinsic::nvvm_atomic_or_gen_i_sys:
3804 case Intrinsic::nvvm_atomic_exch_gen_i_cta:
3805 case Intrinsic::nvvm_atomic_exch_gen_i_sys:
3806 case Intrinsic::nvvm_atomic_xor_gen_i_cta:
3807 case Intrinsic::nvvm_atomic_xor_gen_i_sys: {
3808 auto &
DL =
I.getDataLayout();
3811 Info.ptrVal =
I.getArgOperand(0);
3818 case Intrinsic::nvvm_ldu_global_i:
3819 case Intrinsic::nvvm_ldu_global_f:
3820 case Intrinsic::nvvm_ldu_global_p: {
3821 auto &
DL =
I.getDataLayout();
3823 if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
3825 else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
3829 Info.ptrVal =
I.getArgOperand(0);
3832 Info.align = cast<ConstantInt>(
I.getArgOperand(1))->getMaybeAlignValue();
3836 case Intrinsic::nvvm_tex_1d_v4f32_s32:
3837 case Intrinsic::nvvm_tex_1d_v4f32_f32:
3838 case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
3839 case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
3840 case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
3841 case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
3842 case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
3843 case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
3844 case Intrinsic::nvvm_tex_2d_v4f32_s32:
3845 case Intrinsic::nvvm_tex_2d_v4f32_f32:
3846 case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
3847 case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
3848 case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
3849 case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
3850 case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
3851 case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
3852 case Intrinsic::nvvm_tex_3d_v4f32_s32:
3853 case Intrinsic::nvvm_tex_3d_v4f32_f32:
3854 case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
3855 case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
3856 case Intrinsic::nvvm_tex_cube_v4f32_f32:
3857 case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
3858 case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
3859 case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
3860 case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
3861 case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
3862 case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
3863 case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
3864 case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
3865 case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
3866 case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
3867 case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
3868 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
3869 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
3870 case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
3871 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
3872 case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
3873 case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
3874 case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
3875 case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
3876 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
3877 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
3878 case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
3879 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
3880 case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
3881 case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
3882 case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
3883 case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
3884 case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
3885 case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
3886 case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
3887 case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
3888 case Intrinsic::nvvm_tex_unified_cube_grad_v4f32_f32:
3889 case Intrinsic::nvvm_tex_unified_cube_array_grad_v4f32_f32:
3890 case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
3891 case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
3892 case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
3893 case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
3895 Info.memVT = MVT::v4f32;
3896 Info.ptrVal =
nullptr;
3902 case Intrinsic::nvvm_tex_1d_v4s32_s32:
3903 case Intrinsic::nvvm_tex_1d_v4s32_f32:
3904 case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
3905 case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
3906 case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
3907 case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
3908 case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
3909 case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
3910 case Intrinsic::nvvm_tex_2d_v4s32_s32:
3911 case Intrinsic::nvvm_tex_2d_v4s32_f32:
3912 case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
3913 case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
3914 case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
3915 case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
3916 case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
3917 case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
3918 case Intrinsic::nvvm_tex_3d_v4s32_s32:
3919 case Intrinsic::nvvm_tex_3d_v4s32_f32:
3920 case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
3921 case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
3922 case Intrinsic::nvvm_tex_cube_v4s32_f32:
3923 case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
3924 case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
3925 case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
3926 case Intrinsic::nvvm_tex_cube_v4u32_f32:
3927 case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
3928 case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
3929 case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
3930 case Intrinsic::nvvm_tex_1d_v4u32_s32:
3931 case Intrinsic::nvvm_tex_1d_v4u32_f32:
3932 case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
3933 case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
3934 case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
3935 case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
3936 case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
3937 case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
3938 case Intrinsic::nvvm_tex_2d_v4u32_s32:
3939 case Intrinsic::nvvm_tex_2d_v4u32_f32:
3940 case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
3941 case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
3942 case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
3943 case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
3944 case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
3945 case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
3946 case Intrinsic::nvvm_tex_3d_v4u32_s32:
3947 case Intrinsic::nvvm_tex_3d_v4u32_f32:
3948 case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
3949 case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
3950 case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
3951 case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
3952 case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
3953 case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
3954 case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
3955 case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
3956 case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
3957 case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
3958 case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
3959 case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
3960 case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
3961 case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
3962 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
3963 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
3964 case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
3965 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
3966 case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
3967 case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
3968 case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
3969 case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
3970 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
3971 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
3972 case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
3973 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
3974 case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
3975 case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
3976 case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
3977 case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
3978 case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
3979 case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
3980 case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
3981 case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
3982 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
3983 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
3984 case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
3985 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
3986 case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
3987 case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
3988 case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
3989 case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
3990 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
3991 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
3992 case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
3993 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
3994 case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
3995 case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
3996 case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
3997 case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
3998 case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
3999 case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
4000 case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
4001 case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
4002 case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
4003 case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
4004 case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
4005 case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
4006 case Intrinsic::nvvm_tex_unified_cube_grad_v4s32_f32:
4007 case Intrinsic::nvvm_tex_unified_cube_grad_v4u32_f32:
4008 case Intrinsic::nvvm_tex_unified_cube_array_grad_v4s32_f32:
4009 case Intrinsic::nvvm_tex_unified_cube_array_grad_v4u32_f32:
4010 case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
4011 case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
4012 case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
4013 case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
4014 case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
4015 case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
4016 case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
4017 case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32:
4019 Info.memVT = MVT::v4i32;
4020 Info.ptrVal =
nullptr;
4026 case Intrinsic::nvvm_suld_1d_i8_clamp:
4027 case Intrinsic::nvvm_suld_1d_v2i8_clamp:
4028 case Intrinsic::nvvm_suld_1d_v4i8_clamp:
4029 case Intrinsic::nvvm_suld_1d_array_i8_clamp:
4030 case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
4031 case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
4032 case Intrinsic::nvvm_suld_2d_i8_clamp:
4033 case Intrinsic::nvvm_suld_2d_v2i8_clamp:
4034 case Intrinsic::nvvm_suld_2d_v4i8_clamp:
4035 case Intrinsic::nvvm_suld_2d_array_i8_clamp:
4036 case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
4037 case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
4038 case Intrinsic::nvvm_suld_3d_i8_clamp:
4039 case Intrinsic::nvvm_suld_3d_v2i8_clamp:
4040 case Intrinsic::nvvm_suld_3d_v4i8_clamp:
4041 case Intrinsic::nvvm_suld_1d_i8_trap:
4042 case Intrinsic::nvvm_suld_1d_v2i8_trap:
4043 case Intrinsic::nvvm_suld_1d_v4i8_trap:
4044 case Intrinsic::nvvm_suld_1d_array_i8_trap:
4045 case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
4046 case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
4047 case Intrinsic::nvvm_suld_2d_i8_trap:
4048 case Intrinsic::nvvm_suld_2d_v2i8_trap:
4049 case Intrinsic::nvvm_suld_2d_v4i8_trap:
4050 case Intrinsic::nvvm_suld_2d_array_i8_trap:
4051 case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
4052 case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
4053 case Intrinsic::nvvm_suld_3d_i8_trap:
4054 case Intrinsic::nvvm_suld_3d_v2i8_trap:
4055 case Intrinsic::nvvm_suld_3d_v4i8_trap:
4056 case Intrinsic::nvvm_suld_1d_i8_zero:
4057 case Intrinsic::nvvm_suld_1d_v2i8_zero:
4058 case Intrinsic::nvvm_suld_1d_v4i8_zero:
4059 case Intrinsic::nvvm_suld_1d_array_i8_zero:
4060 case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
4061 case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
4062 case Intrinsic::nvvm_suld_2d_i8_zero:
4063 case Intrinsic::nvvm_suld_2d_v2i8_zero:
4064 case Intrinsic::nvvm_suld_2d_v4i8_zero:
4065 case Intrinsic::nvvm_suld_2d_array_i8_zero:
4066 case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
4067 case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
4068 case Intrinsic::nvvm_suld_3d_i8_zero:
4069 case Intrinsic::nvvm_suld_3d_v2i8_zero:
4070 case Intrinsic::nvvm_suld_3d_v4i8_zero:
4072 Info.memVT = MVT::i8;
4073 Info.ptrVal =
nullptr;
4079 case Intrinsic::nvvm_suld_1d_i16_clamp:
4080 case Intrinsic::nvvm_suld_1d_v2i16_clamp:
4081 case Intrinsic::nvvm_suld_1d_v4i16_clamp:
4082 case Intrinsic::nvvm_suld_1d_array_i16_clamp:
4083 case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
4084 case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
4085 case Intrinsic::nvvm_suld_2d_i16_clamp:
4086 case Intrinsic::nvvm_suld_2d_v2i16_clamp:
4087 case Intrinsic::nvvm_suld_2d_v4i16_clamp:
4088 case Intrinsic::nvvm_suld_2d_array_i16_clamp:
4089 case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
4090 case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
4091 case Intrinsic::nvvm_suld_3d_i16_clamp:
4092 case Intrinsic::nvvm_suld_3d_v2i16_clamp:
4093 case Intrinsic::nvvm_suld_3d_v4i16_clamp:
4094 case Intrinsic::nvvm_suld_1d_i16_trap:
4095 case Intrinsic::nvvm_suld_1d_v2i16_trap:
4096 case Intrinsic::nvvm_suld_1d_v4i16_trap:
4097 case Intrinsic::nvvm_suld_1d_array_i16_trap:
4098 case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
4099 case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
4100 case Intrinsic::nvvm_suld_2d_i16_trap:
4101 case Intrinsic::nvvm_suld_2d_v2i16_trap:
4102 case Intrinsic::nvvm_suld_2d_v4i16_trap:
4103 case Intrinsic::nvvm_suld_2d_array_i16_trap:
4104 case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
4105 case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
4106 case Intrinsic::nvvm_suld_3d_i16_trap:
4107 case Intrinsic::nvvm_suld_3d_v2i16_trap:
4108 case Intrinsic::nvvm_suld_3d_v4i16_trap:
4109 case Intrinsic::nvvm_suld_1d_i16_zero:
4110 case Intrinsic::nvvm_suld_1d_v2i16_zero:
4111 case Intrinsic::nvvm_suld_1d_v4i16_zero:
4112 case Intrinsic::nvvm_suld_1d_array_i16_zero:
4113 case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
4114 case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
4115 case Intrinsic::nvvm_suld_2d_i16_zero:
4116 case Intrinsic::nvvm_suld_2d_v2i16_zero:
4117 case Intrinsic::nvvm_suld_2d_v4i16_zero:
4118 case Intrinsic::nvvm_suld_2d_array_i16_zero:
4119 case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
4120 case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
4121 case Intrinsic::nvvm_suld_3d_i16_zero:
4122 case Intrinsic::nvvm_suld_3d_v2i16_zero:
4123 case Intrinsic::nvvm_suld_3d_v4i16_zero:
4125 Info.memVT = MVT::i16;
4126 Info.ptrVal =
nullptr;
4132 case Intrinsic::nvvm_suld_1d_i32_clamp:
4133 case Intrinsic::nvvm_suld_1d_v2i32_clamp:
4134 case Intrinsic::nvvm_suld_1d_v4i32_clamp:
4135 case Intrinsic::nvvm_suld_1d_array_i32_clamp:
4136 case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
4137 case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
4138 case Intrinsic::nvvm_suld_2d_i32_clamp:
4139 case Intrinsic::nvvm_suld_2d_v2i32_clamp:
4140 case Intrinsic::nvvm_suld_2d_v4i32_clamp:
4141 case Intrinsic::nvvm_suld_2d_array_i32_clamp:
4142 case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
4143 case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
4144 case Intrinsic::nvvm_suld_3d_i32_clamp:
4145 case Intrinsic::nvvm_suld_3d_v2i32_clamp:
4146 case Intrinsic::nvvm_suld_3d_v4i32_clamp:
4147 case Intrinsic::nvvm_suld_1d_i32_trap:
4148 case Intrinsic::nvvm_suld_1d_v2i32_trap:
4149 case Intrinsic::nvvm_suld_1d_v4i32_trap:
4150 case Intrinsic::nvvm_suld_1d_array_i32_trap:
4151 case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
4152 case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
4153 case Intrinsic::nvvm_suld_2d_i32_trap:
4154 case Intrinsic::nvvm_suld_2d_v2i32_trap:
4155 case Intrinsic::nvvm_suld_2d_v4i32_trap:
4156 case Intrinsic::nvvm_suld_2d_array_i32_trap:
4157 case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
4158 case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
4159 case Intrinsic::nvvm_suld_3d_i32_trap:
4160 case Intrinsic::nvvm_suld_3d_v2i32_trap:
4161 case Intrinsic::nvvm_suld_3d_v4i32_trap:
4162 case Intrinsic::nvvm_suld_1d_i32_zero:
4163 case Intrinsic::nvvm_suld_1d_v2i32_zero:
4164 case Intrinsic::nvvm_suld_1d_v4i32_zero:
4165 case Intrinsic::nvvm_suld_1d_array_i32_zero:
4166 case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
4167 case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
4168 case Intrinsic::nvvm_suld_2d_i32_zero:
4169 case Intrinsic::nvvm_suld_2d_v2i32_zero:
4170 case Intrinsic::nvvm_suld_2d_v4i32_zero:
4171 case Intrinsic::nvvm_suld_2d_array_i32_zero:
4172 case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
4173 case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
4174 case Intrinsic::nvvm_suld_3d_i32_zero:
4175 case Intrinsic::nvvm_suld_3d_v2i32_zero:
4176 case Intrinsic::nvvm_suld_3d_v4i32_zero:
4178 Info.memVT = MVT::i32;
4179 Info.ptrVal =
nullptr;
4185 case Intrinsic::nvvm_suld_1d_i64_clamp:
4186 case Intrinsic::nvvm_suld_1d_v2i64_clamp:
4187 case Intrinsic::nvvm_suld_1d_array_i64_clamp:
4188 case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
4189 case Intrinsic::nvvm_suld_2d_i64_clamp:
4190 case Intrinsic::nvvm_suld_2d_v2i64_clamp:
4191 case Intrinsic::nvvm_suld_2d_array_i64_clamp:
4192 case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
4193 case Intrinsic::nvvm_suld_3d_i64_clamp:
4194 case Intrinsic::nvvm_suld_3d_v2i64_clamp:
4195 case Intrinsic::nvvm_suld_1d_i64_trap:
4196 case Intrinsic::nvvm_suld_1d_v2i64_trap:
4197 case Intrinsic::nvvm_suld_1d_array_i64_trap:
4198 case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
4199 case Intrinsic::nvvm_suld_2d_i64_trap:
4200 case Intrinsic::nvvm_suld_2d_v2i64_trap:
4201 case Intrinsic::nvvm_suld_2d_array_i64_trap:
4202 case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
4203 case Intrinsic::nvvm_suld_3d_i64_trap:
4204 case Intrinsic::nvvm_suld_3d_v2i64_trap:
4205 case Intrinsic::nvvm_suld_1d_i64_zero:
4206 case Intrinsic::nvvm_suld_1d_v2i64_zero:
4207 case Intrinsic::nvvm_suld_1d_array_i64_zero:
4208 case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
4209 case Intrinsic::nvvm_suld_2d_i64_zero:
4210 case Intrinsic::nvvm_suld_2d_v2i64_zero:
4211 case Intrinsic::nvvm_suld_2d_array_i64_zero:
4212 case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
4213 case Intrinsic::nvvm_suld_3d_i64_zero:
4214 case Intrinsic::nvvm_suld_3d_v2i64_zero:
4216 Info.memVT = MVT::i64;
4217 Info.ptrVal =
nullptr;
4237 const Align ABITypeAlign = std::min(
Align(128),
DL.getABITypeAlign(ArgTy));
4242 if (!
F || !
F->hasLocalLinkage() ||
4243 F->hasAddressTaken(
nullptr,
4247 return ABITypeAlign;
4250 return std::max(
Align(16), ABITypeAlign);
4257 Align ArgAlign = InitialAlign;
4272 ArgAlign = std::max(ArgAlign,
Align(4));
4282 std::string ParamName;
4287 ParamStr <<
"_vararg";
4289 ParamStr <<
"_param_" <<
Idx;
4341 if (Constraint.
size() == 1) {
4342 switch (Constraint[0]) {
4361std::pair<unsigned, const TargetRegisterClass *>
4365 if (Constraint.
size() == 1) {
4366 switch (Constraint[0]) {
4368 return std::make_pair(0U, &NVPTX::Int1RegsRegClass);
4370 return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
4372 return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
4374 return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
4377 return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
4381 "supported for sm_70 and higher!");
4382 return std::make_pair(0U, &NVPTX::Int128RegsRegClass);
4385 return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
4387 return std::make_pair(0U, &NVPTX::Float64RegsRegClass);