35#include "llvm/IR/IntrinsicsRISCV.h"
47#define DEBUG_TYPE "riscv-lower"
53 cl::desc(
"Give the maximum size (in number of nodes) of the web of "
54 "instructions that we will consider for VW expansion"),
59 cl::desc(
"Allow the formation of VW_W operations (e.g., "
60 "VWADD_W) with splat constants"),
65 cl::desc(
"Set the minimum number of repetitions of a divisor to allow "
66 "transformation to multiplications by the reciprocal"),
71 cl::desc(
"Give the maximum number of instructions that we will "
72 "use for creating a floating-point immediate value"),
79 if (Subtarget.isRVE())
86 !Subtarget.hasStdExtF()) {
87 errs() <<
"Hard-float 'f' ABI can't be used for a target that "
88 "doesn't support the F instruction set extension (ignoring "
92 !Subtarget.hasStdExtD()) {
93 errs() <<
"Hard-float 'd' ABI can't be used for a target that "
94 "doesn't support the D instruction set extension (ignoring "
118 if (Subtarget.hasStdExtF())
120 if (Subtarget.hasStdExtD())
141 auto addRegClassForRVV = [
this](
MVT VT) {
145 if (VT.getVectorMinNumElements() < MinElts)
148 unsigned Size = VT.getSizeInBits().getKnownMinValue();
151 RC = &RISCV::VRRegClass;
153 RC = &RISCV::VRM2RegClass;
155 RC = &RISCV::VRM4RegClass;
157 RC = &RISCV::VRM8RegClass;
164 for (
MVT VT : BoolVecVTs)
165 addRegClassForRVV(VT);
166 for (
MVT VT : IntVecVTs) {
167 if (VT.getVectorElementType() ==
MVT::i64 &&
170 addRegClassForRVV(VT);
174 for (
MVT VT : F16VecVTs)
175 addRegClassForRVV(VT);
178 for (
MVT VT : F32VecVTs)
179 addRegClassForRVV(VT);
182 for (
MVT VT : F64VecVTs)
183 addRegClassForRVV(VT);
186 auto addRegClassForFixedVectors = [
this](
MVT VT) {
193 if (useRVVForFixedLengthVectorVT(VT))
194 addRegClassForFixedVectors(VT);
197 if (useRVVForFixedLengthVectorVT(VT))
198 addRegClassForFixedVectors(VT);
237 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
252 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
257 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
267 if (!Subtarget.hasStdExtM()) {
284 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
285 Subtarget.hasVendorXTHeadBb()) {
295 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
296 Subtarget.hasVendorXTHeadBb())
303 if (Subtarget.hasStdExtZbb()) {
315 if (Subtarget.hasVendorXTHeadBb()) {
327 if (!Subtarget.hasVendorXVentanaCondOps() &&
328 !Subtarget.hasVendorXTHeadCondMov())
331 static const unsigned FPLegalNodeTypes[] = {
344 static const unsigned FPOpToExpand[] = {
348 static const unsigned FPRndMode[] = {
356 if (Subtarget.hasStdExtZfh()) {
362 static const unsigned ZfhminPromoteOps[] = {
408 if (Subtarget.hasStdExtF()) {
420 if (Subtarget.hasStdExtZfa())
424 if (Subtarget.hasStdExtF() && Subtarget.
is64Bit())
427 if (Subtarget.hasStdExtD()) {
430 if (Subtarget.hasStdExtZfa()) {
459 if (Subtarget.hasStdExtF()) {
490 if (Subtarget.hasStdExtA()) {
493 }
else if (Subtarget.hasForcedAtomics()) {
521 static const unsigned IntegerVPOps[] = {
522 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
523 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
524 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
525 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
526 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
527 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
528 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
529 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
530 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
531 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
532 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
535 static const unsigned FloatingPointVPOps[] = {
536 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
537 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
538 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
539 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
540 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
541 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
542 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
543 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
544 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
545 ISD::VP_FRINT, ISD::VP_FNEARBYINT};
547 static const unsigned IntegerVecReduceOps[] = {
552 static const unsigned FloatingPointVecReduceOps[] = {
565 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
566 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
567 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
571 for (
MVT VT : BoolVecVTs) {
597 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
619 ISD::VP_TRUNCATE, ISD::VP_SETCC},
632 for (
MVT VT : IntVecVTs) {
643 if (VT.getVectorElementType() ==
MVT::i64 && !Subtarget.hasStdExtV())
657 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
697 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
698 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
746 const auto SetCommonVFPActions = [&](
MVT VT) {
789 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
790 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
815 const auto SetCommonVFPExtLoadTruncStoreActions =
817 for (
auto SmallVT : SmallerVTs) {
824 for (
MVT VT : F16VecVTs) {
827 SetCommonVFPActions(VT);
832 for (
MVT VT : F32VecVTs) {
835 SetCommonVFPActions(VT);
836 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
841 for (
MVT VT : F64VecVTs) {
844 SetCommonVFPActions(VT);
845 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
846 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
852 if (!useRVVForFixedLengthVectorVT(VT))
889 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
905 ISD::VP_SETCC, ISD::VP_TRUNCATE},
926 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
927 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
972 if (!useRVVForFixedLengthVectorVT(VT))
997 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
998 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1035 if (Subtarget.hasStdExtF())
1037 if (Subtarget.hasStdExtD())
1042 if (Subtarget.hasForcedAtomics()) {
1052 if (Subtarget.hasVendorXTHeadMemIdx()) {
1086 if (Subtarget.hasStdExtF())
1089 if (Subtarget.hasStdExtZbb())
1092 if (Subtarget.hasStdExtZbs() && Subtarget.
is64Bit())
1095 if (Subtarget.hasStdExtZbkb())
1099 if (Subtarget.hasStdExtF())
1106 if (Subtarget.hasVendorXTHeadMemPair())
1126MVT RISCVTargetLowering::getVPExplicitVectorLengthTy()
const {
1133 unsigned Intrinsic)
const {
1134 auto &
DL =
I.getModule()->getDataLayout();
1135 switch (Intrinsic) {
1138 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1139 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1140 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1141 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1142 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1143 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1144 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1145 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1146 case Intrinsic::riscv_masked_cmpxchg_i32:
1149 Info.ptrVal =
I.getArgOperand(0);
1155 case Intrinsic::riscv_masked_strided_load:
1157 Info.ptrVal =
I.getArgOperand(1);
1159 Info.align =
Align(
DL.getTypeSizeInBits(
I.getType()->getScalarType()) / 8);
1163 case Intrinsic::riscv_masked_strided_store:
1165 Info.ptrVal =
I.getArgOperand(1);
1169 DL.getTypeSizeInBits(
I.getArgOperand(0)->getType()->getScalarType()) /
1174 case Intrinsic::riscv_seg2_load:
1175 case Intrinsic::riscv_seg3_load:
1176 case Intrinsic::riscv_seg4_load:
1177 case Intrinsic::riscv_seg5_load:
1178 case Intrinsic::riscv_seg6_load:
1179 case Intrinsic::riscv_seg7_load:
1180 case Intrinsic::riscv_seg8_load:
1182 Info.ptrVal =
I.getArgOperand(0);
1184 getValueType(
DL,
I.getType()->getStructElementType(0)->getScalarType());
1187 I.getType()->getStructElementType(0)->getScalarType()) /
1192 case Intrinsic::riscv_seg2_store:
1193 case Intrinsic::riscv_seg3_store:
1194 case Intrinsic::riscv_seg4_store:
1195 case Intrinsic::riscv_seg5_store:
1196 case Intrinsic::riscv_seg6_store:
1197 case Intrinsic::riscv_seg7_store:
1198 case Intrinsic::riscv_seg8_store:
1201 Info.ptrVal =
I.getArgOperand(
I.getNumOperands() - 3);
1205 DL.getTypeSizeInBits(
I.getArgOperand(0)->getType()->getScalarType()) /
1244 return isInt<12>(Imm);
1248 return isInt<12>(Imm);
1261 return (SrcBits == 64 && DestBits == 32);
1272 return (SrcBits == 64 && DestBits == 32);
1279 if (
auto *LD = dyn_cast<LoadSDNode>(Val)) {
1280 EVT MemVT = LD->getMemoryVT();
1299 return Subtarget.hasStdExtZbb();
1303 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb();
1314 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1319 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1323 EVT VT =
Y.getValueType();
1329 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1330 !isa<ConstantSDNode>(
Y);
1335 if (Subtarget.hasStdExtZbs())
1336 return X.getValueType().isScalarInteger();
1337 auto *
C = dyn_cast<ConstantSDNode>(
Y);
1339 if (Subtarget.hasVendorXTHeadBs())
1340 return C !=
nullptr;
1342 return C &&
C->getAPIntValue().ule(10);
1362 if (BitSize > Subtarget.
getXLen())
1366 int64_t Val = Imm.getSExtValue();
1374 if (!Subtarget.enableUnalignedScalarMem())
1391 unsigned OldShiftOpcode,
unsigned NewShiftOpcode,
1398 if (XC && OldShiftOpcode ==
ISD::SRL && XC->isOne())
1402 if (NewShiftOpcode ==
ISD::SRL &&
CC->isOne())
1414 case Instruction::Add:
1415 case Instruction::Sub:
1416 case Instruction::Mul:
1417 case Instruction::And:
1418 case Instruction::Or:
1419 case Instruction::Xor:
1420 case Instruction::FAdd:
1421 case Instruction::FSub:
1422 case Instruction::FMul:
1423 case Instruction::FDiv:
1424 case Instruction::ICmp:
1425 case Instruction::FCmp:
1427 case Instruction::Shl:
1428 case Instruction::LShr:
1429 case Instruction::AShr:
1430 case Instruction::UDiv:
1431 case Instruction::SDiv:
1432 case Instruction::URem:
1433 case Instruction::SRem:
1434 return Operand == 1;
1448 auto *II = dyn_cast<IntrinsicInst>(
I);
1452 switch (II->getIntrinsicID()) {
1453 case Intrinsic::fma:
1454 case Intrinsic::vp_fma:
1455 return Operand == 0 || Operand == 1;
1456 case Intrinsic::vp_shl:
1457 case Intrinsic::vp_lshr:
1458 case Intrinsic::vp_ashr:
1459 case Intrinsic::vp_udiv:
1460 case Intrinsic::vp_sdiv:
1461 case Intrinsic::vp_urem:
1462 case Intrinsic::vp_srem:
1463 return Operand == 1;
1465 case Intrinsic::vp_add:
1466 case Intrinsic::vp_mul:
1467 case Intrinsic::vp_and:
1468 case Intrinsic::vp_or:
1469 case Intrinsic::vp_xor:
1470 case Intrinsic::vp_fadd:
1471 case Intrinsic::vp_fmul:
1473 case Intrinsic::vp_sub:
1474 case Intrinsic::vp_fsub:
1475 case Intrinsic::vp_fdiv:
1476 return Operand == 0 || Operand == 1;
1492 for (
auto OpIdx :
enumerate(
I->operands())) {
1496 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
1498 if (!Op ||
any_of(Ops, [&](
Use *U) {
return U->get() == Op; }))
1508 for (
Use &U : Op->uses()) {
1551 if (!Subtarget.hasStdExtZfa())
1554 bool IsSupportedVT =
false;
1556 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
1558 IsSupportedVT =
true;
1560 assert(Subtarget.hasStdExtD() &&
"Expect D extension");
1561 IsSupportedVT =
true;
1571 bool ForCodeSize)
const {
1572 bool IsLegalVT =
false;
1576 IsLegalVT = Subtarget.hasStdExtF();
1578 IsLegalVT = Subtarget.hasStdExtD();
1590 return Imm.isZero();
1593 int Cost = Imm.isNegZero()
1597 Subtarget.getFeatureBits());
1604 unsigned Index)
const {
1617 if ((ResElts * 2) != SrcElts)
1640 if (VT ==
MVT::f16 && Subtarget.hasStdExtF() &&
1652 if (VT ==
MVT::f16 && Subtarget.hasStdExtF() &&
1669 isa<ConstantSDNode>(
LHS.getOperand(1))) {
1675 ShAmt =
LHS.getValueSizeInBits() - 1 -
Log2_64(Mask);
1688 if (
auto *RHSC = dyn_cast<ConstantSDNode>(
RHS)) {
1689 int64_t
C = RHSC->getSExtValue();
1731 switch (KnownSize) {
1759 return RISCV::VRRegClassID;
1761 return RISCV::VRM2RegClassID;
1763 return RISCV::VRM4RegClassID;
1765 return RISCV::VRM8RegClassID;
1775 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
1776 "Unexpected subreg numbering");
1777 return RISCV::sub_vrm1_0 +
Index;
1780 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
1781 "Unexpected subreg numbering");
1782 return RISCV::sub_vrm2_0 +
Index;
1785 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
1786 "Unexpected subreg numbering");
1787 return RISCV::sub_vrm4_0 +
Index;
1794 return RISCV::VRRegClassID;
1803std::pair<unsigned, unsigned>
1805 MVT VecVT,
MVT SubVecVT,
unsigned InsertExtractIdx,
1807 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
1808 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
1809 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
1810 "Register classes not ordered");
1819 unsigned SubRegIdx = RISCV::NoSubRegister;
1820 for (
const unsigned RCID :
1821 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
1822 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
1826 SubRegIdx =
TRI->composeSubRegIndices(SubRegIdx,
1831 return {SubRegIdx, InsertExtractIdx};
1836bool RISCVTargetLowering::mergeStoresAfterLegalization(
EVT VT)
const {
1862unsigned RISCVTargetLowering::combineRepeatedFPDivisors()
const {
1869 "Unexpected opcode");
1871 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
1873 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
1876 return Op.getOperand(II->
VLOperand + 1 + HasChain);
1946bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(
MVT VT)
const {
1947 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
1956 "Expected legal fixed length vector!");
1959 unsigned MaxELen = Subtarget.
getELEN();
1992 return ::getContainerForFixedLengthVector(*
this, VT,
getSubtarget());
1999 "Expected to convert into a scalable vector!");
2000 assert(V.getValueType().isFixedLengthVector() &&
2001 "Expected a fixed length vector operand!");
2011 "Expected to convert into a fixed length vector!");
2012 assert(V.getValueType().isScalableVector() &&
2013 "Expected a scalable vector operand!");
2041static std::pair<SDValue, SDValue>
2054static std::pair<SDValue, SDValue>
2068static std::pair<SDValue, SDValue>
2092 EVT VT,
unsigned DefinedValues)
const {
2101 SDValue Src = Op.getOperand(0);
2103 MVT DstVT = Op.getSimpleValueType();
2104 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2110 if (Src.getSimpleValueType() ==
MVT::f16 && !Subtarget.hasStdExtZfh()) {
2125 Opc,
DL, DstVT, Src,
2139 MVT SrcVT = Src.getSimpleValueType();
2145 if (SatVT != DstEltVT)
2149 if (SrcEltSize > (2 * DstEltSize))
2152 MVT DstContainerVT = DstVT;
2153 MVT SrcContainerVT = SrcVT;
2159 "Expected same element count");
2168 {Src, Src, DAG.getCondCode(ISD::SETNE),
2169 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2173 if (DstEltSize > (2 * SrcEltSize)) {
2198 case ISD::VP_FROUNDEVEN:
2201 case ISD::VP_FROUNDTOZERO:
2204 case ISD::VP_FFLOOR:
2210 case ISD::VP_FROUND:
2226 MVT VT = Op.getSimpleValueType();
2231 SDValue Src = Op.getOperand(0);
2233 MVT ContainerVT = VT;
2240 if (Op->isVPOpcode()) {
2241 Mask = Op.getOperand(1);
2242 VL = Op.getOperand(2);
2264 DAG.
getUNDEF(ContainerVT), MaxValNode, VL);
2278 switch (Op.getOpcode()) {
2284 case ISD::VP_FFLOOR:
2287 case ISD::VP_FROUND:
2288 case ISD::VP_FROUNDEVEN:
2289 case ISD::VP_FROUNDTOZERO: {
2303 case ISD::VP_FNEARBYINT:
2310 if (Op.getOpcode() != ISD::VP_FNEARBYINT)
2316 Src, Src, Mask, VL);
2327 MVT VT = Op.getSimpleValueType();
2335 SDValue Src = Op.getOperand(0);
2372 return std::nullopt;
2387 unsigned NumElts = Op.getNumOperands();
2389 bool IsInteger = Op.getValueType().isInteger();
2391 std::optional<unsigned> SeqStepDenom;
2392 std::optional<int64_t> SeqStepNum, SeqAddend;
2393 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
2394 unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
2395 for (
unsigned Idx = 0;
Idx < NumElts;
Idx++) {
2398 if (Op.getOperand(
Idx).isUndef())
2404 if (!isa<ConstantSDNode>(Op.getOperand(
Idx)))
2405 return std::nullopt;
2406 Val = Op.getConstantOperandVal(
Idx) &
2407 maskTrailingOnes<uint64_t>(EltSizeInBits);
2410 if (!isa<ConstantFPSDNode>(Op.getOperand(
Idx)))
2411 return std::nullopt;
2413 cast<ConstantFPSDNode>(Op.getOperand(
Idx))->getValueAPF(),
2415 Val = *ExactInteger;
2417 return std::nullopt;
2423 unsigned IdxDiff =
Idx - PrevElt->second;
2424 int64_t ValDiff =
SignExtend64(Val - PrevElt->first, EltSizeInBits);
2432 int64_t Remainder = ValDiff % IdxDiff;
2434 if (Remainder != ValDiff) {
2437 return std::nullopt;
2443 SeqStepNum = ValDiff;
2444 else if (ValDiff != SeqStepNum)
2445 return std::nullopt;
2448 SeqStepDenom = IdxDiff;
2449 else if (IdxDiff != *SeqStepDenom)
2450 return std::nullopt;
2454 if (!PrevElt || PrevElt->first != Val)
2455 PrevElt = std::make_pair(Val,
Idx);
2459 if (!SeqStepNum || !SeqStepDenom)
2460 return std::nullopt;
2464 for (
unsigned Idx = 0;
Idx < NumElts;
Idx++) {
2465 if (Op.getOperand(
Idx).isUndef())
2469 Val = Op.getConstantOperandVal(
Idx) &
2470 maskTrailingOnes<uint64_t>(EltSizeInBits);
2473 cast<ConstantFPSDNode>(Op.getOperand(
Idx))->getValueAPF(),
2477 (int64_t)(
Idx * (
uint64_t)*SeqStepNum) / *SeqStepDenom;
2478 int64_t Addend =
SignExtend64(Val - ExpectedVal, EltSizeInBits);
2481 else if (Addend != SeqAddend)
2482 return std::nullopt;
2485 assert(SeqAddend &&
"Must have an addend if we have a step");
2487 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
2508 MVT ContainerVT = VT;
2527 MVT VT = Op.getSimpleValueType();
2536 unsigned NumElts = Op.getNumOperands();
2557 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.
getXLen());
2558 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.
getELEN());
2567 MVT IntegerViaVecVT =
2572 unsigned BitPos = 0, IntegerEltIdx = 0;
2575 for (
unsigned I = 0;
I < NumElts;
I++, BitPos++) {
2578 if (
I != 0 &&
I % NumViaIntegerBits == 0) {
2579 if (NumViaIntegerBits <= 32)
2580 Bits = SignExtend64<32>(Bits);
2589 bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
2590 Bits |= ((
uint64_t)BitValue << BitPos);
2595 if (NumViaIntegerBits <= 32)
2596 Bits = SignExtend64<32>(Bits);
2601 if (NumElts < NumViaIntegerBits) {
2605 assert(IntegerViaVecVT ==
MVT::v1i8 &&
"Unexpected mask vector type");
2629 assert(Splat.getValueType() == XLenVT &&
2630 "Unexpected type for i1 splat value");
2658 int64_t StepNumerator = SimpleVID->StepNumerator;
2659 unsigned StepDenominator = SimpleVID->StepDenominator;
2660 int64_t Addend = SimpleVID->Addend;
2662 assert(StepNumerator != 0 &&
"Invalid step");
2663 bool Negate =
false;
2664 int64_t SplatStepVal = StepNumerator;
2666 if (StepNumerator != 1) {
2668 Negate = StepNumerator < 0;
2670 SplatStepVal =
Log2_64(std::abs(StepNumerator));
2678 if (((StepOpcode ==
ISD::MUL && isInt<12>(SplatStepVal)) ||
2679 (StepOpcode ==
ISD::SHL && isUInt<5>(SplatStepVal))) &&
2681 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
2684 MVT VIDContainerVT =
2692 if ((StepOpcode ==
ISD::MUL && SplatStepVal != 1) ||
2693 (StepOpcode ==
ISD::SHL && SplatStepVal != 0)) {
2696 VID = DAG.
getNode(StepOpcode,
DL, VIDVT, VID, SplatStep);
2698 if (StepDenominator != 1) {
2703 if (Addend != 0 || Negate) {
2726 const auto *BV = cast<BuildVectorSDNode>(Op);
2727 if (VT.
isInteger() && EltBitSize < 64 &&
2729 BV->getRepeatedSequence(Sequence) &&
2730 (Sequence.size() * EltBitSize) <= 64) {
2731 unsigned SeqLen = Sequence.size();
2736 "Unexpected sequence type");
2738 unsigned EltIdx = 0;
2739 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
2743 for (
const auto &SeqV : Sequence) {
2744 if (!SeqV.isUndef())
2745 SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
2746 << (EltIdx * EltBitSize));
2753 SplatValue = SignExtend64<32>(SplatValue);
2760 "Unexpected bitcast sequence");
2761 if (ViaIntVT.
bitsLE(XLenVT) || isInt<32>(SplatValue)) {
2764 MVT ViaContainerVT =
2784 unsigned MostCommonCount = 0;
2786 unsigned NumUndefElts =
2787 count_if(Op->op_values(), [](
const SDValue &V) { return V.isUndef(); });
2794 unsigned NumScalarLoads = 0;
2796 for (
SDValue V : Op->op_values()) {
2800 ValueCounts.
insert(std::make_pair(V, 0));
2801 unsigned &Count = ValueCounts[V];
2803 if (
auto *CFP = dyn_cast<ConstantFPSDNode>(V))
2804 NumScalarLoads += !CFP->isExactlyValue(+0.0);
2809 if (++Count >= MostCommonCount) {
2811 MostCommonCount = Count;
2815 assert(DominantValue &&
"Not expecting an all-undef BUILD_VECTOR");
2816 unsigned NumDefElts = NumElts - NumUndefElts;
2817 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
2822 ((MostCommonCount > DominantValueCountThreshold) ||
2829 for (
const auto &OpIdx :
enumerate(Op->ops())) {
2830 const SDValue &V = OpIdx.value();
2831 if (V.isUndef() || !Processed.insert(V).second)
2833 if (ValueCounts[V] == 1) {
2842 return DAG.getConstant(V == V1, DL, XLenVT);
2861 if (isa<ConstantSDNode>(
Lo) && isa<ConstantSDNode>(
Hi)) {
2862 int32_t LoC = cast<ConstantSDNode>(
Lo)->getSExtValue();
2863 int32_t HiC = cast<ConstantSDNode>(
Hi)->getSExtValue();
2866 if ((LoC >> 31) == HiC)
2871 auto *Const = dyn_cast<ConstantSDNode>(VL);
2872 if (LoC == HiC && Const && Const->isAllOnes()) {
2908 bool HasPassthru = Passthru && !Passthru.
isUndef();
2909 if (!HasPassthru && !Passthru)
2921 if (Scalar.getValueType().bitsLE(XLenVT)) {
2928 Scalar = DAG.
getNode(ExtOpc,
DL, XLenVT, Scalar);
2933 (!Const ||
isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue())))
2939 "Unexpected scalar for splat lowering!");
2951 "Unexpected vector MVT");
2971 auto InnerVT = VT.
bitsLE(M1VT) ? VT : M1VT;
2973 DAG.
getUNDEF(InnerVT), Scalar, VL);
2984 if (!Scalar.getValueType().bitsLE(XLenVT))
2987 VT,
DL, DAG, Subtarget);
2995 Scalar = DAG.
getNode(ExtOpc,
DL, XLenVT, Scalar);
3000 if (!
isNullConstant(Scalar) && isInt<5>(Const->getSExtValue()) &&
3006 auto InnerVT = VT.
bitsLE(M1VT) ? VT : M1VT;
3008 DAG.
getUNDEF(InnerVT), Scalar, VL);
3038 if (Src != V2.getOperand(0))
3042 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
3047 V2.getConstantOperandVal(1) != Mask.size())
3051 if (Mask[0] != 0 && Mask[0] != 1)
3056 for (
unsigned i = 1; i != Mask.size(); ++i)
3057 if (Mask[i] != Mask[i - 1] + 2)
3075 int Size = Mask.size();
3082 EvenSrc = StartIndexes[0] % 2 ? StartIndexes[1] : StartIndexes[0];
3083 OddSrc = StartIndexes[0] % 2 ? StartIndexes[0] : StartIndexes[1];
3086 if (EvenSrc != 0 && OddSrc != 0)
3105 int Size = Mask.size();
3117 for (
int i = 0; i !=
Size; ++i) {
3123 int StartIdx = i - (M %
Size);
3131 int CandidateRotation = StartIdx < 0 ? -StartIdx :
Size - StartIdx;
3134 Rotation = CandidateRotation;
3135 else if (Rotation != CandidateRotation)
3140 int MaskSrc = M <
Size ? 0 : 1;
3145 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
3150 TargetSrc = MaskSrc;
3151 else if (TargetSrc != MaskSrc)
3158 assert(Rotation != 0 &&
"Failed to locate a viable rotation!");
3159 assert((LoSrc >= 0 || HiSrc >= 0) &&
3160 "Failed to find a rotated input vector!");
3175 MVT ContainerVT = VT;
3178 assert(Src.getSimpleValueType().isFixedLengthVector());
3182 MVT SrcContainerVT =
3195 Src = DAG.
getBitcast(WideSrcContainerVT, Src);
3202 unsigned Shift = EvenElts ? 0 : EltBits;
3208 DAG.
getUNDEF(IntContainerVT), TrueMask, VL);
3222 if (
Merge.isUndef())
3234 if (
Merge.isUndef())
3258 auto findNonEXTRACT_SUBVECTORParent =
3259 [](
SDValue Parent) -> std::pair<SDValue, uint64_t> {
3264 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
3265 Offset += Parent.getConstantOperandVal(1);
3266 Parent = Parent.getOperand(0);
3268 return std::make_pair(Parent,
Offset);
3271 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
3272 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
3281 for (
size_t i = 0; i != NewMask.
size(); ++i) {
3282 if (NewMask[i] == -1)
3285 if (
static_cast<size_t>(NewMask[i]) < NewMask.
size()) {
3286 NewMask[i] = NewMask[i] + V1IndexOffset;
3290 NewMask[i] = NewMask[i] - NewMask.
size() + V2IndexOffset;
3296 if (NewMask[0] <= 0)
3300 for (
unsigned i = 1; i != NewMask.
size(); ++i)
3301 if (NewMask[i - 1] + 1 != NewMask[i])
3305 MVT SrcVT = Src.getSimpleValueType();
3325 MVT VecContainerVT = VecVT;
3342 MVT WideContainerVT = WideVT;
3348 EvenV = DAG.
getBitcast(VecContainerVT, EvenV);
3357 EvenV, OddV, Passthru, Mask, VL);
3363 AllOnesVec, Passthru, Mask, VL);
3371 OddsMul, Passthru, Mask, VL);
3377 Interleaved = DAG.
getBitcast(ResultContainerVT, Interleaved);
3392 SDValue V1 = Op.getOperand(0);
3393 SDValue V2 = Op.getOperand(1);
3396 MVT VT = Op.getSimpleValueType();
3417 V.getOperand(0).getSimpleValueType().getVectorNumElements();
3418 V = V.getOperand(
Offset / OpElements);
3424 auto *Ld = cast<LoadSDNode>(V);
3434 SDValue Ops[] = {Ld->getChain(),
3452 V = DAG.
getLoad(SVT,
DL, Ld->getChain(), NewAddr,
3453 Ld->getPointerInfo().getWithOffset(
Offset),
3454 Ld->getOriginalAlign(),
3458 Ld->getPointerInfo().getWithOffset(
Offset), SVT,
3459 Ld->getOriginalAlign(),
3460 Ld->getMemOperand()->getFlags());
3471 assert(Lane < (
int)NumElts &&
"Unexpected lane!");
3474 DAG.
getUNDEF(ContainerVT), TrueMask, VL);
3492 LoV = LoSrc == 0 ? V1 : V2;
3496 HiV = HiSrc == 0 ? V1 : V2;
3502 unsigned InvRotate = NumElts - Rotation;
3518 Res =
getVSlideup(DAG, Subtarget,
DL, ContainerVT, Res, LoV,
3534 int EvenSrc, OddSrc;
3539 int Size = Mask.size();
3541 assert(EvenSrc >= 0 &&
"Undef source?");
3542 EvenV = (EvenSrc /
Size) == 0 ? V1 : V2;
3546 assert(OddSrc >= 0 &&
"Undef source?");
3547 OddV = (OddSrc /
Size) == 0 ? V1 : V2;
3558 int MaskIndex = MaskIdx.value();
3559 return MaskIndex < 0 || MaskIdx.index() == (
unsigned)MaskIndex % NumElts;
3562 assert(!V1.
isUndef() &&
"Unexpected shuffle canonicalization");
3574 bool InvertMask = IsSelect == SwapOps;
3583 for (
int MaskIndex : Mask) {
3584 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
3587 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
3588 GatherIndicesLHS.
push_back(IsLHSOrUndefIndex && MaskIndex >= 0
3592 IsLHSOrUndefIndex ? DAG.
getUNDEF(XLenVT)
3594 if (IsLHSOrUndefIndex && MaskIndex >= 0)
3595 ++LHSIndexCounts[MaskIndex];
3596 if (!IsLHSOrUndefIndex)
3597 ++RHSIndexCounts[MaskIndex - NumElts];
3603 std::swap(GatherIndicesLHS, GatherIndicesRHS);
3606 assert(MaskVals.
size() == NumElts &&
"Unexpected select-like shuffle");
3632 MVT IndexContainerVT =
3646 if (LHSIndexCounts.
size() == 1) {
3647 int SplatIndex = LHSIndexCounts.
begin()->getFirst();
3648 Gather = DAG.
getNode(GatherVXOpc,
DL, ContainerVT, V1,
3650 DAG.
getUNDEF(ContainerVT), TrueMask, VL);
3656 Gather = DAG.
getNode(GatherVVOpc,
DL, ContainerVT, V1, LHSIndices,
3657 DAG.
getUNDEF(ContainerVT), TrueMask, VL);
3663 if (!V2.isUndef()) {
3673 if (RHSIndexCounts.
size() == 1) {
3674 int SplatIndex = RHSIndexCounts.
begin()->getFirst();
3675 Gather = DAG.
getNode(GatherVXOpc,
DL, ContainerVT, V2,
3682 Gather = DAG.
getNode(GatherVVOpc,
DL, ContainerVT, V2, RHSIndices, Gather,
3709RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(
SDValue Op,
3711 MVT VT = Op.getSimpleValueType();
3713 SDValue Src = Op.getOperand(0);
3728 "Expected legal float type!");
3739 if (FloatVT.
bitsGT(VT)) {
3743 MVT ContainerVT = VT;
3752 MVT ContainerFloatVT =
3755 Src, Mask, RTZRM, VL);
3762 unsigned ShiftAmt = FloatEltVT ==
MVT::f64 ? 52 : 23;
3768 else if (IntVT.
bitsGT(VT))
3771 unsigned ExponentBias = FloatEltVT ==
MVT::f64 ? 1023 : 127;
3780 unsigned Adjust = ExponentBias + (EltSize - 1);
3796 auto *
Load = cast<LoadSDNode>(Op);
3797 assert(Load &&
Load->getMemoryVT().isVector() &&
"Expected vector load");
3800 Load->getMemoryVT(),
3801 *
Load->getMemOperand()))
3805 MVT VT =
Op.getSimpleValueType();
3807 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
3808 "Unexpected unaligned RVV load type");
3812 "Expecting equally-sized RVV vector types to be legal");
3814 Load->getPointerInfo(),
Load->getOriginalAlign(),
3815 Load->getMemOperand()->getFlags());
3825 auto *
Store = cast<StoreSDNode>(Op);
3826 assert(Store &&
Store->getValue().getValueType().isVector() &&
3827 "Expected vector store");
3830 Store->getMemoryVT(),
3831 *
Store->getMemOperand()))
3838 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
3839 "Unexpected unaligned RVV store type");
3843 "Expecting equally-sized RVV vector types to be legal");
3844 StoredVal = DAG.
getBitcast(NewVT, StoredVal);
3846 Store->getPointerInfo(),
Store->getOriginalAlign(),
3847 Store->getMemOperand()->getFlags());
3854 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
3884 if (Subtarget.hasStdExtZtso()) {
3907 switch (Op.getOpcode()) {
3913 return lowerGlobalAddress(Op, DAG);
3915 return lowerBlockAddress(Op, DAG);
3917 return lowerConstantPool(Op, DAG);
3919 return lowerJumpTable(Op, DAG);
3921 return lowerGlobalTLSAddress(Op, DAG);
3925 return lowerSELECT(Op, DAG);
3927 return lowerBRCOND(Op, DAG);
3929 return lowerVASTART(Op, DAG);
3931 return lowerFRAMEADDR(Op, DAG);
3933 return lowerRETURNADDR(Op, DAG);
3935 return lowerShiftLeftParts(Op, DAG);
3937 return lowerShiftRightParts(Op, DAG,
true);
3939 return lowerShiftRightParts(Op, DAG,
false);
3942 EVT VT = Op.getValueType();
3943 SDValue Op0 = Op.getOperand(0);
3953 Subtarget.hasStdExtF()) {
3960 Subtarget.hasStdExtZfa()) {
3979 "Unexpected types");
4013 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
4015 return LowerINTRINSIC_W_CHAIN(Op, DAG);
4017 return LowerINTRINSIC_VOID(Op, DAG);
4019 MVT VT = Op.getSimpleValueType();
4021 assert(Subtarget.hasStdExtZbkb() &&
"Unexpected custom legalization");
4029 if (!Op.getSimpleValueType().isVector())
4031 return lowerVectorTruncLike(Op, DAG);
4034 if (Op.getOperand(0).getValueType().isVector() &&
4035 Op.getOperand(0).getValueType().getVectorElementType() ==
MVT::i1)
4036 return lowerVectorMaskExt(Op, DAG, 1);
4039 if (Op.getOperand(0).getValueType().isVector() &&
4040 Op.getOperand(0).getValueType().getVectorElementType() ==
MVT::i1)
4041 return lowerVectorMaskExt(Op, DAG, -1);
4044 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
4046 return lowerINSERT_VECTOR_ELT(Op, DAG);
4048 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
4050 MVT VT = Op.getSimpleValueType();
4061 uint64_t Val = Op.getConstantOperandVal(0);
4086 Op.getOperand(1).getValueType() ==
MVT::i32) {
4098 if (!Op.getValueType().isVector())
4100 return lowerVectorFPExtendOrRoundLike(Op, DAG);
4102 return lowerStrictFPExtend(Op, DAG);
4110 MVT VT = Op.getSimpleValueType();
4114 SDValue Src = Op.getOperand(0);
4116 MVT SrcVT = Src.getSimpleValueType();
4121 "Unexpected vector element types");
4125 if (EltSize > (2 * SrcEltSize)) {
4134 return DAG.
getNode(Op.getOpcode(),
DL, VT, Ext);
4137 assert(SrcEltVT ==
MVT::f16 &&
"Unexpected FP_TO_[US]INT lowering");
4142 return DAG.
getNode(Op.getOpcode(),
DL, VT, FExt);
4146 if (SrcEltSize > (2 * EltSize)) {
4169 unsigned RVVOpc = 0;
4170 switch (Op.getOpcode()) {