74#include "llvm/IR/IntrinsicsAArch64.h"
109#define DEBUG_TYPE "aarch64-lower"
112STATISTIC(NumOptimizedImms,
"Number of times immediates were optimized");
119 cl::desc(
"Allow AArch64 Local Dynamic TLS code generation"),
124 cl::desc(
"Enable AArch64 logical imm instruction "
134 cl::desc(
"Combine extends of AArch64 masked "
135 "gather intrinsics"),
139 cl::desc(
"Combine ext and trunc to TBL"),
154 cl::desc(
"Enable / disable SVE scalable vectors in Global ISel"),
161 cl::desc(
"Generate ISD::PTRADD nodes for pointer arithmetic in "
162 "SelectionDAG for FEAT_CPA"),
172 AArch64::X3, AArch64::X4, AArch64::X5,
173 AArch64::X6, AArch64::X7};
175 AArch64::Q3, AArch64::Q4, AArch64::Q5,
176 AArch64::Q6, AArch64::Q7};
201 return MVT::nxv8bf16;
208 switch (EC.getKnownMinValue()) {
224 "Expected scalable predicate vector type!");
246 "Expected legal vector type!");
253 "Expected legal type!");
254 return VT == MVT::nxv16i1;
267 "Unexpected fixed-size unpacked type.");
277 case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
278 case AArch64ISD::BSWAP_MERGE_PASSTHRU:
279 case AArch64ISD::REVH_MERGE_PASSTHRU:
280 case AArch64ISD::REVW_MERGE_PASSTHRU:
281 case AArch64ISD::REVD_MERGE_PASSTHRU:
282 case AArch64ISD::CTLZ_MERGE_PASSTHRU:
283 case AArch64ISD::CTPOP_MERGE_PASSTHRU:
284 case AArch64ISD::DUP_MERGE_PASSTHRU:
285 case AArch64ISD::ABS_MERGE_PASSTHRU:
286 case AArch64ISD::NEG_MERGE_PASSTHRU:
287 case AArch64ISD::FNEG_MERGE_PASSTHRU:
288 case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
289 case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
290 case AArch64ISD::FCEIL_MERGE_PASSTHRU:
291 case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
292 case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
293 case AArch64ISD::FRINT_MERGE_PASSTHRU:
294 case AArch64ISD::FRINT32_MERGE_PASSTHRU:
295 case AArch64ISD::FRINT64_MERGE_PASSTHRU:
296 case AArch64ISD::FROUND_MERGE_PASSTHRU:
297 case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
298 case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
299 case AArch64ISD::FTRUNC32_MERGE_PASSTHRU:
300 case AArch64ISD::FTRUNC64_MERGE_PASSTHRU:
301 case AArch64ISD::FP_ROUND_MERGE_PASSTHRU:
302 case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
303 case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
304 case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
305 case AArch64ISD::FCVTX_MERGE_PASSTHRU:
306 case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
307 case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
308 case AArch64ISD::FSQRT_MERGE_PASSTHRU:
309 case AArch64ISD::FRECPX_MERGE_PASSTHRU:
310 case AArch64ISD::FABS_MERGE_PASSTHRU:
317 switch (
Op.getOpcode()) {
323 case AArch64ISD::PTRUE:
324 case AArch64ISD::SETCC_MERGE_ZERO:
327 switch (
Op.getConstantOperandVal(0)) {
330 case Intrinsic::aarch64_sve_ptrue:
331 case Intrinsic::aarch64_sve_pnext:
332 case Intrinsic::aarch64_sve_cmpeq:
333 case Intrinsic::aarch64_sve_cmpne:
334 case Intrinsic::aarch64_sve_cmpge:
335 case Intrinsic::aarch64_sve_cmpgt:
336 case Intrinsic::aarch64_sve_cmphs:
337 case Intrinsic::aarch64_sve_cmphi:
338 case Intrinsic::aarch64_sve_cmpeq_wide:
339 case Intrinsic::aarch64_sve_cmpne_wide:
340 case Intrinsic::aarch64_sve_cmpge_wide:
341 case Intrinsic::aarch64_sve_cmpgt_wide:
342 case Intrinsic::aarch64_sve_cmplt_wide:
343 case Intrinsic::aarch64_sve_cmple_wide:
344 case Intrinsic::aarch64_sve_cmphs_wide:
345 case Intrinsic::aarch64_sve_cmphi_wide:
346 case Intrinsic::aarch64_sve_cmplo_wide:
347 case Intrinsic::aarch64_sve_cmpls_wide:
348 case Intrinsic::aarch64_sve_fcmpeq:
349 case Intrinsic::aarch64_sve_fcmpne:
350 case Intrinsic::aarch64_sve_fcmpge:
351 case Intrinsic::aarch64_sve_fcmpgt:
352 case Intrinsic::aarch64_sve_fcmpuo:
353 case Intrinsic::aarch64_sve_facgt:
354 case Intrinsic::aarch64_sve_facge:
355 case Intrinsic::aarch64_sve_whilege:
356 case Intrinsic::aarch64_sve_whilegt:
357 case Intrinsic::aarch64_sve_whilehi:
358 case Intrinsic::aarch64_sve_whilehs:
359 case Intrinsic::aarch64_sve_whilele:
360 case Intrinsic::aarch64_sve_whilelo:
361 case Intrinsic::aarch64_sve_whilels:
362 case Intrinsic::aarch64_sve_whilelt:
363 case Intrinsic::aarch64_sve_match:
364 case Intrinsic::aarch64_sve_nmatch:
365 case Intrinsic::aarch64_sve_whilege_x2:
366 case Intrinsic::aarch64_sve_whilegt_x2:
367 case Intrinsic::aarch64_sve_whilehi_x2:
368 case Intrinsic::aarch64_sve_whilehs_x2:
369 case Intrinsic::aarch64_sve_whilele_x2:
370 case Intrinsic::aarch64_sve_whilelo_x2:
371 case Intrinsic::aarch64_sve_whilels_x2:
372 case Intrinsic::aarch64_sve_whilelt_x2:
378static std::tuple<SDValue, SDValue>
399 if (!ConstDiscN || !
isUInt<16>(ConstDiscN->getZExtValue()))
405 AddrDisc = DAG->
getRegister(AArch64::NoRegister, MVT::i64);
407 return std::make_tuple(
426 if (Subtarget->hasLS64()) {
432 if (Subtarget->hasFPARMv8()) {
441 if (Subtarget->hasNEON()) {
445 addDRType(MVT::v2f32);
446 addDRType(MVT::v8i8);
447 addDRType(MVT::v4i16);
448 addDRType(MVT::v2i32);
449 addDRType(MVT::v1i64);
450 addDRType(MVT::v1f64);
451 addDRType(MVT::v4f16);
452 addDRType(MVT::v4bf16);
454 addQRType(MVT::v4f32);
455 addQRType(MVT::v2f64);
456 addQRType(MVT::v16i8);
457 addQRType(MVT::v8i16);
458 addQRType(MVT::v4i32);
459 addQRType(MVT::v2i64);
460 addQRType(MVT::v8f16);
461 addQRType(MVT::v8bf16);
464 if (Subtarget->isSVEorStreamingSVEAvailable()) {
492 if (Subtarget->useSVEForFixedLengthVectors()) {
534 if (Subtarget->hasFPARMv8()) {
625 if (Subtarget->hasFPARMv8()) {
631 if (Subtarget->hasFPARMv8()) {
685 if (Subtarget->hasCSSC()) {
764 if (Subtarget->hasFullFP16()) {
796 if (Subtarget->hasFullFP16()) {
809 auto LegalizeNarrowFP = [
this](
MVT ScalarVT) {
917 if (!Subtarget->hasFullFP16()) {
918 LegalizeNarrowFP(MVT::f16);
920 LegalizeNarrowFP(MVT::bf16);
938 for (
MVT Ty : {MVT::f32, MVT::f64})
940 if (Subtarget->hasFullFP16())
948 for (
MVT Ty : {MVT::f32, MVT::f64})
950 if (Subtarget->hasFullFP16())
963 if (!Subtarget->hasLSE() && !Subtarget->outlineAtomics()) {
975 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
1003 if (Subtarget->outlineAtomics() && !Subtarget->hasLSFE()) {
1030 if (Subtarget->hasLSE128()) {
1044 if (Subtarget->hasLSE2()) {
1101 if (WideVT.getScalarSizeInBits() > NarrowVT.getScalarSizeInBits()) {
1107 if (Subtarget->hasFPARMv8()) {
1234 if (!Subtarget->isTargetWindows())
1250 if (Subtarget->hasSME())
1253 if (Subtarget->isNeonAvailable()) {
1298 for (
auto VT : {MVT::v2i32, MVT::v2i64, MVT::v4i32})
1301 if (Subtarget->hasFullFP16()) {
1330 for (
auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1339 for (
auto VT : {MVT::v1i64, MVT::v2i64}) {
1355 for (
MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64,
1356 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1363 for (
MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1374 for (
MVT VT : { MVT::v4f16, MVT::v2f32,
1375 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1376 if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1385 if (Subtarget->hasFullFP16())
1388 for (
MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1389 MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1411 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
1421 if (VT == MVT::v4i16 || VT == MVT::v8i16 || VT == MVT::v2i32 ||
1422 VT == MVT::v4i32 || VT == MVT::v2i64)
1427 if (VT == MVT::v8i8 || VT == MVT::v16i8 || VT == MVT::v8i16 ||
1428 VT == MVT::v4i16 || VT == MVT::v2i32 || VT == MVT::v4i32)
1447 for (
MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
1449 if (Subtarget->hasFullFP16())
1450 for (
MVT Ty : {MVT::v4f16, MVT::v8f16})
1456 for (
MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
1458 if (Subtarget->hasFullFP16())
1459 for (
MVT Ty : {MVT::v4f16, MVT::v8f16})
1494 for (
MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1497 for (
MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
1500 if (Subtarget->hasDotProd()) {
1509 if (Subtarget->hasMatMulInt8()) {
1520 if (Subtarget->hasF16F32DOT() || Subtarget->hasFP16FML()) {
1527 if (Subtarget->hasBF16())
1529 MVT::v8bf16,
Legal);
1533 if (Subtarget->hasAES()) {
1544 if (VT.is128BitVector() || VT.is64BitVector()) {
1559 for (
MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
1565 if (Subtarget->hasSME()) {
1571 if (Subtarget->isSVEorStreamingSVEAvailable()) {
1573 {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
1579 for (
auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
1586 if (Subtarget->isSVEorStreamingSVEAvailable() &&
1587 (Subtarget->hasSVE2p1() || Subtarget->hasSME2()))
1590 for (
auto VT : {MVT::v16i8, MVT::v8i8, MVT::v4i16, MVT::v2i32})
1593 for (
auto VT : {MVT::v8f16, MVT::v4f32, MVT::v2f64})
1597 if (Subtarget->isSVEorStreamingSVEAvailable()) {
1598 for (
auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
1667 if (!Subtarget->isLittleEndian())
1670 if (Subtarget->hasSVE2() ||
1671 (Subtarget->hasSME() && Subtarget->isStreaming()))
1676 for (
auto VT : {MVT::nxv4i32, MVT::nxv2i64}) {
1682 for (
auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
1688 for (
auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32})
1692 { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
1693 MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
1705 {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
1719 if (VT != MVT::nxv16i1) {
1729 {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
1730 MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1731 MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1770 for (
auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1771 MVT::nxv4f32, MVT::nxv2f64}) {
1853 for (
auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1872 if (Subtarget->hasSVEB16B16() &&
1873 Subtarget->isNonStreamingSVEorSME2Available()) {
1875 for (
auto VT : {MVT::v4bf16, MVT::v8bf16, MVT::nxv2bf16, MVT::nxv4bf16,
1898 if (!Subtarget->hasSVEB16B16() ||
1899 !Subtarget->isNonStreamingSVEorSME2Available()) {
1900 for (
MVT VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1910 if (VT != MVT::nxv2bf16 && Subtarget->hasBF16())
1916 if (Subtarget->hasBF16() && Subtarget->isNeonAvailable())
1925 for (
auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1926 MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1940 if (Subtarget->hasSVE2() || Subtarget->isStreamingSVEAvailable()) {
1949 if (Subtarget->useSVEForFixedLengthVectors()) {
1952 VT, !Subtarget->isNeonAvailable()))
1953 addTypeForFixedLengthSVE(VT);
1957 VT, !Subtarget->isNeonAvailable()))
1958 addTypeForFixedLengthSVE(VT);
1962 for (
auto VT : {MVT::v8i8, MVT::v4i16})
1967 for (
auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
1969 for (
auto VT : {MVT::v8f16, MVT::v4f32, MVT::v8bf16})
1991 for (
auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1992 MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
2001 for (
auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
2024 for (
auto VT : {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1})
2031 if (Subtarget->isSVEorStreamingSVEAvailable()) {
2041 if (Subtarget->hasMatMulInt8()) {
2043 MVT::nxv16i8,
Legal);
2048 if (Subtarget->hasSVE2() || Subtarget->hasSME()) {
2057 MVT::nxv8f16,
Legal);
2061 if (!Subtarget->isNeonAvailable() ||
2062 (!Subtarget->hasF16F32DOT() && !Subtarget->hasFP16FML())) {
2070 if (Subtarget->hasBF16())
2072 MVT::nxv8bf16,
Legal);
2075 if (Subtarget->hasSVEAES() &&
2076 (Subtarget->isSVEAvailable() || Subtarget->hasSSVE_AES()))
2080 if (Subtarget->hasSVE2() ||
2081 (Subtarget->hasSME() && Subtarget->isStreaming())) {
2083 for (
auto VT : {MVT::v2i32, MVT::v4i16, MVT::v8i8, MVT::v16i8}) {
2087 for (
auto VT : {MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, MVT::nxv16i1}) {
2094 if (Subtarget->isSVEAvailable()) {
2095 for (
auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64,
2096 MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
2097 MVT::nxv4f32, MVT::nxv2f64, MVT::nxv2bf16, MVT::nxv4bf16,
2098 MVT::nxv8bf16, MVT::v4f16, MVT::v8f16, MVT::v2f32,
2099 MVT::v4f32, MVT::v1f64, MVT::v2f64, MVT::v8i8,
2100 MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
2101 MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
2106 for (
auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
2107 MVT::nxv4f32, MVT::nxv2f64, MVT::v4f16, MVT::v8f16,
2108 MVT::v2f32, MVT::v4f32, MVT::v2f64})
2113 {MVT::nxv4i32, MVT::nxv2i64, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv2f64})
2118 for (
auto VT : {MVT::v2i32, MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32,
2128 for (
auto VT : {MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64,
2129 MVT::nxv2f32, MVT::nxv2f64, MVT::nxv4i8, MVT::nxv4i16,
2130 MVT::nxv4i32, MVT::nxv4f32}) {
2138 if (Subtarget->hasSVE2()) {
2156 if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
2163 if (Subtarget->hasSVE()) {
2177 if (Subtarget->isTargetWindows()) {
2197void AArch64TargetLowering::addTypeForNEON(
MVT VT) {
2207 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
2229 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
2230 ((VT == MVT::v4bf16 || VT == MVT::v8bf16 || VT == MVT::v4f16 ||
2231 VT == MVT::v8f16) &&
2232 Subtarget->hasFullFP16()))
2257 if (VT != MVT::v8i8 && VT != MVT::v16i8)
2266 for (
unsigned Opcode :
2284 for (
unsigned Opcode :
2315 if (Subtarget->isLittleEndian()) {
2326 if (Subtarget->hasD128()) {
2344 if (!Subtarget->isSVEorStreamingSVEAvailable() ||
2351 (OpVT != MVT::i32 && OpVT != MVT::i64))))
2363 if (!Subtarget->isSVEorStreamingSVEAvailable())
2368 return VT != MVT::nxv16i1 && VT != MVT::nxv8i1 && VT != MVT::nxv4i1 &&
2369 VT != MVT::nxv2i1 && VT != MVT::v16i1 && VT != MVT::v8i1 &&
2370 VT != MVT::v4i1 && VT != MVT::v2i1;
2374 unsigned SearchSize)
const {
2376 if (!Subtarget->hasSVE2() || !Subtarget->isSVEAvailable())
2379 if (VT == MVT::nxv8i16 || VT == MVT::v8i16)
2380 return SearchSize != 8;
2381 if (VT == MVT::nxv16i8 || VT == MVT::v16i8 || VT == MVT::v8i8)
2382 return SearchSize != 8 && SearchSize != 16;
2386void AArch64TargetLowering::addTypeForFixedLengthSVE(
MVT VT) {
2411 while (InnerVT != VT) {
2425 while (InnerVT != VT) {
2434 bool PreferSVE = !PreferNEON && Subtarget->isSVEAvailable();
2455 if (Subtarget->hasMatMulInt8()) {
2570void AArch64TargetLowering::addDRType(
MVT VT) {
2572 if (Subtarget->isNeonAvailable())
2576void AArch64TargetLowering::addQRType(
MVT VT) {
2578 if (Subtarget->isNeonAvailable())
2595 Imm =
C->getZExtValue();
2603 case AArch64ISD::SQDMULH:
2615 return N->getOpcode() ==
Opc &&
2620 const APInt &Demanded,
2623 uint64_t OldImm = Imm, NewImm, Enc;
2628 if (Imm == 0 || Imm == Mask ||
2632 unsigned EltSize =
Size;
2649 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
2651 uint64_t Sum = RotatedImm + NonDemandedBits;
2652 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
2653 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
2654 NewImm = (Imm | Ones) & Mask;
2682 while (EltSize <
Size) {
2683 NewImm |= NewImm << EltSize;
2689 "demanded bits should never be altered");
2690 assert(OldImm != NewImm &&
"the new imm shouldn't be equal to the old imm");
2693 EVT VT =
Op.getValueType();
2699 if (NewImm == 0 || NewImm == OrigMask) {
2724 EVT VT =
Op.getValueType();
2738 switch (
Op.getOpcode()) {
2742 NewOpc =
Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
2745 NewOpc =
Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
2748 NewOpc =
Size == 32 ? AArch64::EORWri : AArch64::EORXri;
2763 switch (
Op.getOpcode()) {
2766 case AArch64ISD::DUP: {
2769 if (
SrcOp.getValueSizeInBits() !=
Op.getScalarValueSizeInBits()) {
2770 assert(
SrcOp.getValueSizeInBits() >
Op.getScalarValueSizeInBits() &&
2771 "Expected DUP implicit truncation");
2772 Known = Known.
trunc(
Op.getScalarValueSizeInBits());
2776 case AArch64ISD::CSEL: {
2783 case AArch64ISD::CSNEG:
2784 case AArch64ISD::CSINC:
2785 case AArch64ISD::CSINV: {
2793 if (
Op.getOpcode() == AArch64ISD::CSINC)
2797 else if (
Op.getOpcode() == AArch64ISD::CSINV)
2799 else if (
Op.getOpcode() == AArch64ISD::CSNEG)
2802 Op.getScalarValueSizeInBits())));
2807 case AArch64ISD::BICi: {
2810 ~(
Op->getConstantOperandAPInt(1) <<
Op->getConstantOperandAPInt(2))
2816 case AArch64ISD::VLSHR: {
2823 case AArch64ISD::VASHR: {
2830 case AArch64ISD::VSHL: {
2837 case AArch64ISD::MOVI: {
2842 case AArch64ISD::MOVIshift: {
2845 <<
Op->getConstantOperandVal(1)));
2848 case AArch64ISD::MOVImsl: {
2851 Known.
getBitWidth(), ~(~
Op->getConstantOperandVal(0) << ShiftAmt)));
2854 case AArch64ISD::MOVIedit: {
2860 case AArch64ISD::MVNIshift: {
2863 ~(
Op->getConstantOperandVal(0) <<
Op->getConstantOperandVal(1)),
2867 case AArch64ISD::MVNImsl: {
2874 case AArch64ISD::LOADgot:
2875 case AArch64ISD::ADDlow: {
2876 if (!Subtarget->isTargetILP32())
2882 case AArch64ISD::ASSERT_ZEXT_BOOL: {
2892 case Intrinsic::aarch64_ldaxr:
2893 case Intrinsic::aarch64_ldxr: {
2905 unsigned IntNo =
Op.getConstantOperandVal(0);
2909 case Intrinsic::aarch64_neon_uaddlv: {
2910 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
2912 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
2913 unsigned Bound = (VT == MVT::v8i8) ? 11 : 12;
2920 case Intrinsic::aarch64_neon_umaxv:
2921 case Intrinsic::aarch64_neon_uminv: {
2926 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
2928 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
2932 }
else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
2946 unsigned Depth)
const {
2947 EVT VT =
Op.getValueType();
2949 unsigned Opcode =
Op.getOpcode();
2951 case AArch64ISD::FCMEQ:
2952 case AArch64ISD::FCMGE:
2953 case AArch64ISD::FCMGT:
2956 case AArch64ISD::VASHR: {
2959 return std::min<uint64_t>(Tmp +
Op.getConstantOperandVal(1), VTBits);
2973 unsigned *
Fast)
const {
2983 if (ElementSizeBits % 8 == 0 && Alignment >=
Align(ElementSizeBits / 8))
2987 if (Subtarget->requiresStrictAlign())
2992 *
Fast = !Subtarget->isMisaligned128StoreSlow() || VT.
getStoreSize() != 16 ||
3011 unsigned *
Fast)
const {
3012 if (Subtarget->requiresStrictAlign())
3017 *
Fast = !Subtarget->isMisaligned128StoreSlow() ||
3018 Ty.getSizeInBytes() != 16 ||
3061 Register DestReg =
MI.getOperand(0).getReg();
3062 Register IfTrueReg =
MI.getOperand(1).getReg();
3063 Register IfFalseReg =
MI.getOperand(2).getReg();
3064 unsigned CondCode =
MI.getOperand(3).getImm();
3065 bool NZCVKilled =
MI.getOperand(4).isKill();
3079 MBB->addSuccessor(TrueBB);
3080 MBB->addSuccessor(EndBB);
3096 MI.eraseFromParent();
3104 "SEH does not use catchret!");
3115 Register TargetReg =
MI.getOperand(0).getReg();
3117 TII.probedStackAlloc(
MBBI, TargetReg,
false);
3119 MI.eraseFromParent();
3120 return NextInst->getParent();
3173 MBB->addSuccessor(TrapBB);
3174 MBB->addSuccessor(PassBB);
3176 MI.eraseFromParent();
3188 MIB.
add(
MI.getOperand(1));
3189 MIB.
add(
MI.getOperand(2));
3190 MIB.
add(
MI.getOperand(3));
3191 MIB.
add(
MI.getOperand(4));
3192 MIB.
add(
MI.getOperand(5));
3194 MI.eraseFromParent();
3205 MIB.
add(
MI.getOperand(0));
3206 MIB.
add(
MI.getOperand(1));
3207 MIB.
add(
MI.getOperand(2));
3208 MIB.
add(
MI.getOperand(1));
3210 MI.eraseFromParent();
3217 bool Op0IsDef)
const {
3223 for (
unsigned I = 1;
I <
MI.getNumOperands(); ++
I)
3224 MIB.
add(
MI.getOperand(
I));
3226 MI.eraseFromParent();
3236 unsigned StartIdx = 0;
3238 bool HasTile = BaseReg != AArch64::ZA;
3239 bool HasZPROut = HasTile &&
MI.getOperand(0).isReg();
3241 MIB.
add(
MI.getOperand(StartIdx));
3245 MIB.
addReg(BaseReg +
MI.getOperand(StartIdx).getImm(),
3247 MIB.
addReg(BaseReg +
MI.getOperand(StartIdx).getImm());
3251 if (
MI.getOperand(0).isReg() && !
MI.getOperand(1).isImm()) {
3252 MIB.
add(
MI.getOperand(StartIdx));
3257 for (
unsigned I = StartIdx;
I <
MI.getNumOperands(); ++
I)
3258 MIB.
add(
MI.getOperand(
I));
3260 MI.eraseFromParent();
3269 MIB.
add(
MI.getOperand(0));
3271 unsigned Mask =
MI.getOperand(0).getImm();
3272 for (
unsigned I = 0;
I < 8;
I++) {
3273 if (Mask & (1 <<
I))
3277 MI.eraseFromParent();
3287 Register ResultReg =
MI.getOperand(0).getReg();
3290 }
else if (Subtarget->hasSME()) {
3292 .
addImm(AArch64SysReg::SVCR)
3295 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE;
3304 MI.eraseFromParent();
3312 while (
Reg.isVirtual()) {
3314 assert(
DefMI &&
"Virtual register definition not found");
3315 unsigned Opcode =
DefMI->getOpcode();
3317 if (Opcode == AArch64::COPY) {
3318 Reg =
DefMI->getOperand(1).getReg();
3320 if (
Reg.isPhysical())
3324 if (Opcode == AArch64::SUBREG_TO_REG) {
3325 Reg =
DefMI->getOperand(1).getReg();
3342 int64_t IntDisc = IntDiscOp.
getImm();
3343 assert(IntDisc == 0 &&
"Blend components are already expanded");
3348 case AArch64::MOVKXi:
3357 case AArch64::MOVi32imm:
3358 case AArch64::MOVi64imm:
3362 AddrDisc = AArch64::NoRegister;
3371 if (AddrDisc == AArch64::XZR)
3372 AddrDisc = AArch64::NoRegister;
3375 if (AddrDisc && MRI.
getRegClass(AddrDisc) != AddrDiscRC) {
3381 AddrDiscOp.
setReg(AddrDisc);
3382 IntDiscOp.
setImm(IntDisc);
3389 if (SMEOrigInstr != -1) {
3393 switch (SMEMatrixType) {
3409 switch (
MI.getOpcode()) {
3415 case AArch64::EntryPStateSM:
3417 case AArch64::F128CSEL:
3419 case TargetOpcode::STATEPOINT:
3425 MI.addOperand(*
MI.getMF(),
3431 case TargetOpcode::STACKMAP:
3432 case TargetOpcode::PATCHPOINT:
3435 case TargetOpcode::PATCHABLE_EVENT_CALL:
3436 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
3439 case AArch64::CATCHRET:
3442 case AArch64::PROBED_STACKALLOC_DYN:
3445 case AArch64::CHECK_MATCHING_VL_PSEUDO:
3448 case AArch64::LD1_MXIPXX_H_PSEUDO_B:
3449 return EmitTileLoad(AArch64::LD1_MXIPXX_H_B, AArch64::ZAB0,
MI, BB);
3450 case AArch64::LD1_MXIPXX_H_PSEUDO_H:
3451 return EmitTileLoad(AArch64::LD1_MXIPXX_H_H, AArch64::ZAH0,
MI, BB);
3452 case AArch64::LD1_MXIPXX_H_PSEUDO_S:
3453 return EmitTileLoad(AArch64::LD1_MXIPXX_H_S, AArch64::ZAS0,
MI, BB);
3454 case AArch64::LD1_MXIPXX_H_PSEUDO_D:
3455 return EmitTileLoad(AArch64::LD1_MXIPXX_H_D, AArch64::ZAD0,
MI, BB);
3456 case AArch64::LD1_MXIPXX_H_PSEUDO_Q:
3457 return EmitTileLoad(AArch64::LD1_MXIPXX_H_Q, AArch64::ZAQ0,
MI, BB);
3458 case AArch64::LD1_MXIPXX_V_PSEUDO_B:
3459 return EmitTileLoad(AArch64::LD1_MXIPXX_V_B, AArch64::ZAB0,
MI, BB);
3460 case AArch64::LD1_MXIPXX_V_PSEUDO_H:
3461 return EmitTileLoad(AArch64::LD1_MXIPXX_V_H, AArch64::ZAH0,
MI, BB);
3462 case AArch64::LD1_MXIPXX_V_PSEUDO_S:
3463 return EmitTileLoad(AArch64::LD1_MXIPXX_V_S, AArch64::ZAS0,
MI, BB);
3464 case AArch64::LD1_MXIPXX_V_PSEUDO_D:
3465 return EmitTileLoad(AArch64::LD1_MXIPXX_V_D, AArch64::ZAD0,
MI, BB);
3466 case AArch64::LD1_MXIPXX_V_PSEUDO_Q:
3467 return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0,
MI, BB);
3468 case AArch64::LDR_ZA_PSEUDO:
3470 case AArch64::LDR_TX_PSEUDO:
3472 case AArch64::STR_TX_PSEUDO:
3474 case AArch64::ZERO_M_PSEUDO:
3476 case AArch64::ZERO_T_PSEUDO:
3478 case AArch64::MOVT_TIZ_PSEUDO:
3483 &AArch64::GPR64noipRegClass);
3511 N =
N->getOperand(0).getNode();
3516 if (
N->getOpcode() != AArch64ISD::DUP)
3519 auto Opnd0 =
N->getOperand(0);
3525 (V.getOpcode() == AArch64ISD::DUP &&
isOneConstant(V.getOperand(0)));
3686 CondCode, CondCode2);
3699 bool IsLegal = (
C >> 12 == 0) || ((
C & 0xFFFULL) == 0 &&
C >> 24 == 0);
3701 <<
" legal: " << (IsLegal ?
"yes\n" :
"no\n"));
3720 if (
Op->getFlags().hasNoSignedWrap())
3746 (isIntEqualitySetCC(CC) ||
3754 EVT VT =
LHS.getValueType();
3759 if ((VT == MVT::f16 && !FullFP16) || VT == MVT::bf16) {
3764 Chain =
RHS.getValue(1);
3767 IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
3773 EVT VT =
LHS.getValueType();
3778 if ((VT == MVT::f16 && !FullFP16) || VT == MVT::bf16) {
3789 unsigned Opcode = AArch64ISD::SUBS;
3793 Opcode = AArch64ISD::ADDS;
3796 isIntEqualitySetCC(CC)) {
3799 Opcode = AArch64ISD::ADDS;
3808 LHS.getOperand(0),
LHS.getOperand(1));
3812 }
else if (
LHS.getOpcode() == AArch64ISD::ANDS) {
3814 return LHS.getValue(1);
3880 unsigned Opcode = 0;
3883 if (
LHS.getValueType().isFloatingPoint()) {
3884 assert(
LHS.getValueType() != MVT::f128);
3885 if ((
LHS.getValueType() == MVT::f16 && !FullFP16) ||
3886 LHS.getValueType() == MVT::bf16) {
3890 Opcode = AArch64ISD::FCCMP;
3892 APInt Imm = Const->getAPIntValue();
3893 if (Imm.isNegative() && Imm.sgt(-32)) {
3894 Opcode = AArch64ISD::CCMN;
3898 Opcode = AArch64ISD::CCMN;
3901 isIntEqualitySetCC(CC)) {
3904 Opcode = AArch64ISD::CCMN;
3908 Opcode = AArch64ISD::CCMP;
3934 bool &CanNegate,
bool &MustBeFirst,
3935 bool &PreferFirst,
bool WillNegate,
3936 unsigned Depth = 0) {
3942 if (VT == MVT::f128)
3945 MustBeFirst =
false;
3949 {Val->getOperand(0), Val->getOperand(1)});
3956 bool IsOR = Opcode ==
ISD::OR;
3972 if (MustBeFirstL && MustBeFirstR)
3978 if (!CanNegateL && !CanNegateR)
3982 CanNegate = WillNegate && CanNegateL && CanNegateR;
3985 MustBeFirst = !CanNegate;
3990 MustBeFirst = MustBeFirstL || MustBeFirstR;
3992 PreferFirst = PreferFirstL || PreferFirstR;
4015 bool isInteger =
LHS.getValueType().isInteger();
4017 CC = getSetCCInverse(CC,
LHS.getValueType());
4023 assert(
LHS.getValueType().isFloatingPoint());
4049 bool IsOR = Opcode ==
ISD::OR;
4056 PreferFirstL, IsOR);
4057 assert(ValidL &&
"Valid conjunction/disjunction tree");
4065 PreferFirstR, IsOR);
4066 assert(ValidR &&
"Valid conjunction/disjunction tree");
4069 bool ShouldFirstL = PreferFirstL && !PreferFirstR && !MustBeFirstR;
4072 if (MustBeFirstL || ShouldFirstL) {
4073 assert(!MustBeFirstR &&
"Valid conjunction/disjunction tree");
4082 bool NegateAfterAll;
4086 assert(CanNegateR &&
"at least one side must be negatable");
4087 assert(!MustBeFirstR &&
"invalid conjunction/disjunction tree");
4091 NegateAfterR =
true;
4094 NegateR = CanNegateR;
4095 NegateAfterR = !CanNegateR;
4098 NegateAfterAll = !Negate;
4100 assert(Opcode ==
ISD::AND &&
"Valid conjunction/disjunction tree");
4101 assert(!Negate &&
"Valid conjunction/disjunction tree");
4105 NegateAfterR =
false;
4106 NegateAfterAll =
false;
4126 bool DummyCanNegate;
4127 bool DummyMustBeFirst;
4128 bool DummyPreferFirst;
4130 DummyPreferFirst,
false))
4141 auto isSupportedExtend = [&](
SDValue V) {
4147 uint64_t Mask = MaskCst->getZExtValue();
4148 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
4154 if (!
Op.hasOneUse())
4157 if (isSupportedExtend(
Op))
4160 unsigned Opc =
Op.getOpcode();
4163 uint64_t Shift = ShiftCst->getZExtValue();
4164 if (isSupportedExtend(
Op.getOperand(0)))
4165 return (Shift <= 4) ? 2 : 1;
4166 EVT VT =
Op.getValueType();
4167 if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
4179 if (
LHS.getOpcode() !=
ISD::AND &&
LHS.getOpcode() != AArch64ISD::ANDS)
4199 EVT VT =
RHS.getValueType();
4200 APInt C = RHSC->getAPIntValue();
4215 if (!
C.isMinSignedValue()) {
4227 assert(!
C.isZero() &&
"C should not be zero here");
4238 if (!
C.isMaxSignedValue()) {
4249 if (!
C.isAllOnes()) {
4274 bool LHSIsCMN =
isCMN(
LHS, CC, DAG);
4275 bool RHSIsCMN =
isCMN(
RHS, CC, DAG);
4310 LHS.getNode()->hasNUsesOfValue(1, 0)) {
4311 int16_t ValueofRHS =
RHS->getAsZExtVal();
4339static std::pair<SDValue, SDValue>
4341 assert((
Op.getValueType() == MVT::i32 ||
Op.getValueType() == MVT::i64) &&
4342 "Unsupported value type");
4348 switch (
Op.getOpcode()) {
4352 Opc = AArch64ISD::ADDS;
4356 Opc = AArch64ISD::ADDS;
4360 Opc = AArch64ISD::SUBS;
4364 Opc = AArch64ISD::SUBS;
4372 if (
Op.getValueType() == MVT::i32) {
4395 assert(
Op.getValueType() == MVT::i64 &&
"Expected an i64 value type");
4405 Overflow = DAG.
getNode(AArch64ISD::SUBS,
DL, VTs, UpperBits, LowerBits)
4424 Overflow =
Value.getValue(1);
4426 return std::make_pair(
Value, Overflow);
4431 !Subtarget->isNeonAvailable()))
4432 return LowerToScalableOp(
Op, DAG);
4456 return DAG.
getNode(AArch64ISD::CSEL,
DL,
Op.getValueType(), TVal, FVal,
4479 if (
LHS.getValueType() != MVT::i32 &&
LHS.getValueType() != MVT::i64)
4486 if (!CFVal || !CTVal)
4523 return Cmp.getValue(1);
4536 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, One, Zero, CC, Glue);
4546 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, One, Zero, CC, Glue);
4552 unsigned Opcode,
bool IsSigned) {
4553 EVT VT0 =
Op.getValue(0).getValueType();
4554 EVT VT1 =
Op.getValue(1).getValueType();
4556 if (VT0 != MVT::i32 && VT0 != MVT::i64)
4559 bool InvertCarry = Opcode == AArch64ISD::SBCS;
4578 bool LastOperandIsImm =
false) {
4579 if (
Op.getValueType().isVector())
4584 const unsigned NumOperands =
Op.getNumOperands();
4585 auto getFloatVT = [](
EVT VT) {
4586 assert((VT == MVT::i32 || VT == MVT::i64) &&
"Unexpected VT");
4587 return VT == MVT::i32 ? MVT::f32 : MVT::f64;
4589 auto bitcastToFloat = [&](
SDValue Val) {
4590 return DAG.
getBitcast(getFloatVT(Val.getValueType()), Val);
4594 for (
unsigned I = 1;
I < NumOperands; ++
I) {
4596 const bool KeepInt = LastOperandIsImm && (
I == NumOperands - 1);
4597 NewOps.
push_back(KeepInt ? Val : bitcastToFloat(Val));
4599 EVT OrigVT =
Op.getValueType();
4624 DAG.
getNode(AArch64ISD::CSEL,
DL, MVT::i32, FVal, TVal, CCVal, Overflow);
4636 unsigned IsWrite =
Op.getConstantOperandVal(2);
4637 unsigned Locality =
Op.getConstantOperandVal(3);
4638 unsigned IsData =
Op.getConstantOperandVal(4);
4640 bool IsStream = !Locality;
4644 assert(Locality <= 3 &&
"Prefetch locality out-of-range");
4648 Locality = 3 - Locality;
4652 unsigned PrfOp = (IsWrite << 4) |
4656 return DAG.
getNode(AArch64ISD::PREFETCH,
DL, MVT::Other,
Op.getOperand(0),
4669 if (LHSConstOp && RHSConst) {
4673 uint64_t NewMaskValue = LHSConstValue & ~(RHSConstant - 1);
4686 EVT VT =
Op.getValueType();
4690 if (VT == MVT::nxv2f64 && SrcVal.
getValueType() == MVT::nxv2bf16) {
4698 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
4702 return LowerFixedLengthFPExtendToSVE(
Op, DAG);
4704 bool IsStrict =
Op->isStrictFPOpcode();
4705 SDValue Op0 =
Op.getOperand(IsStrict ? 1 : 0);
4707 if (VT == MVT::f64) {
4709 if (Op0VT == MVT::f32 || Op0VT == MVT::f16)
4712 if (Op0VT == MVT::bf16 && IsStrict) {
4715 {Op0,
Op.getOperand(0)});
4719 if (Op0VT == MVT::bf16)
4725 assert(
Op.getValueType() == MVT::f128 &&
"Unexpected lowering");
4731 EVT VT =
Op.getValueType();
4732 bool IsStrict =
Op->isStrictFPOpcode();
4733 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
4735 bool Trunc =
Op.getConstantOperandVal(IsStrict ? 2 : 1) == 1;
4736 SDNodeFlags
Flags =
Op->getFlags();
4740 if (SrcVT == MVT::nxv8f32)
4744 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
4747 constexpr EVT
I32 = MVT::nxv4i32;
4753 if (SrcVT == MVT::nxv2f32 || SrcVT == MVT::nxv4f32) {
4754 if (Subtarget->hasBF16())
4755 return LowerToPredicatedOp(
Op, DAG,
4756 AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
4758 Narrow = getSVESafeBitCast(I32, SrcVal, DAG);
4763 }
else if (SrcVT == MVT::nxv2f64 &&
4764 (Subtarget->hasSVE2() || Subtarget->isStreamingSVEAvailable())) {
4767 Narrow = DAG.
getNode(AArch64ISD::FCVTX_MERGE_PASSTHRU,
DL, MVT::nxv2f32,
4768 Pg, SrcVal, DAG.
getPOISON(MVT::nxv2f32));
4774 NewOps.
push_back(
Op.getOperand(IsStrict ? 2 : 1));
4775 return DAG.
getNode(
Op.getOpcode(),
DL, VT, NewOps,
Op->getFlags());
4792 IsNaN = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, I1, IsNaN);
4793 Narrow = DAG.
getSelect(
DL, I32, IsNaN, NaN, Narrow);
4798 return getSVESafeBitCast(VT, Narrow, DAG);
4802 return LowerFixedLengthFPRoundToSVE(
Op, DAG);
4807 !((Subtarget->hasNEON() || Subtarget->hasSME()) &&
4808 Subtarget->hasBF16())) {
4823 Narrow = DAG.
getNode(AArch64ISD::FCVTXN,
DL,
F32, Narrow);
4844 Narrow = DAG.
getSelect(
DL, I32, IsNaN, NaN, Narrow);
4861 if (SrcVT != MVT::f128) {
4878 bool IsStrict =
Op->isStrictFPOpcode();
4879 EVT InVT =
Op.getOperand(IsStrict ? 1 : 0).getValueType();
4880 EVT VT =
Op.getValueType();
4883 "Unimplemented SVE support for STRICT_FP_to_INT!");
4892 {
Op.getOperand(0),
Op.getOperand(1)});
4893 return DAG.
getNode(
Op.getOpcode(),
DL, {VT, MVT::Other},
4894 {Ext.getValue(1), Ext.getValue(0)});
4897 Op.getOpcode(),
DL,
Op.getValueType(),
4911 if (InVT == MVT::nxv8f32)
4915 ? AArch64ISD::FCVTZU_MERGE_PASSTHRU
4916 : AArch64ISD::FCVTZS_MERGE_PASSTHRU;
4917 return LowerToPredicatedOp(
Op, DAG, Opcode);
4922 return LowerFixedLengthFPToIntToSVE(
Op, DAG);
4926 if (VTSize < InVTSize) {
4931 {Op.getOperand(0), Op.getOperand(1)});
4941 if (VTSize > InVTSize) {
4948 {
Op.getOperand(0),
Op.getOperand(1)});
4949 return DAG.
getNode(
Op.getOpcode(),
DL, {VT, MVT::Other},
4950 {Ext.getValue(1), Ext.getValue(0)});
4965 return DAG.
getNode(
Op.getOpcode(),
DL, {ScalarVT, MVT::Other},
4966 {Op.getOperand(0), Extract});
4967 return DAG.
getNode(
Op.getOpcode(),
DL, ScalarVT, Extract);
4976 bool IsStrict =
Op->isStrictFPOpcode();
4977 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
4980 return LowerVectorFP_TO_INT(
Op, DAG);
4983 if ((SrcVal.
getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
4989 {
Op.getOperand(0), SrcVal});
4990 return DAG.
getNode(
Op.getOpcode(),
DL, {Op.getValueType(), MVT::Other},
4991 {Ext.getValue(1), Ext.getValue(0)});
5006AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(
SDValue Op,
5012 EVT DstVT =
Op.getValueType();
5018 assert(SatWidth <= DstElementWidth &&
5019 "Saturation width cannot exceed result width");
5032 if ((SrcElementVT == MVT::f16 &&
5033 (!Subtarget->hasFullFP16() || DstElementWidth > 16)) ||
5034 SrcElementVT == MVT::bf16) {
5044 SrcElementVT = MVT::f32;
5045 SrcElementWidth = 32;
5046 }
else if (SrcElementVT != MVT::f64 && SrcElementVT != MVT::f32 &&
5047 SrcElementVT != MVT::f16 && SrcElementVT != MVT::bf16)
5052 if (SatWidth == 64 && SrcElementWidth < 64) {
5056 SrcElementVT = MVT::f64;
5057 SrcElementWidth = 64;
5060 if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth) {
5075 if (SrcElementWidth < SatWidth || SrcElementVT == MVT::f64)
5082 SrcVal2 ? DAG.
getNode(
Op.getOpcode(),
DL, IntVT, SrcVal2,
5118 return LowerVectorFP_TO_INT_SAT(
Op, DAG);
5120 EVT DstVT =
Op.getValueType();
5124 assert(SatWidth <= DstWidth &&
"Saturation width cannot exceed result width");
5127 if ((SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) || SrcVT == MVT::bf16) {
5130 }
else if (SrcVT != MVT::f64 && SrcVT != MVT::f32 && SrcVT != MVT::f16 &&
5136 if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 ||
5137 (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
5138 DstVT == SatVT && (DstVT == MVT::i64 || DstVT == MVT::i32))
5139 return DAG.
getNode(
Op.getOpcode(),
DL, DstVT, SrcVal,
5145 if (DstWidth < SatWidth)
5148 if (SrcVT == MVT::f16 && SatVT == MVT::i16 && DstVT == MVT::i32) {
5151 DAG.
getNode(AArch64ISD::FCVTZS_HALF,
DL, MVT::f32, SrcVal);
5156 SDValue CVTf32 = DAG.
getNode(AArch64ISD::FCVTZU_HALF,
DL, MVT::f32, SrcVal);
5181 EVT VT =
Op.getValueType();
5188 *DAG.
getContext(), Src.getValueType().getVectorElementType());
5204 bool IsStrict =
Op->isStrictFPOpcode();
5205 EVT VT =
Op.getValueType();
5208 EVT InVT =
In.getValueType();
5209 unsigned Opc =
Op.getOpcode();
5213 "Unimplemented SVE support for ISD:::STRICT_INT_TO_FP!");
5228 {Op.getOperand(0), In});
5230 {
Op.getValueType(), MVT::Other},
5241 if (VT == MVT::nxv8f32)
5244 unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
5245 : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
5246 return LowerToPredicatedOp(
Op, DAG, Opcode);
5251 return LowerFixedLengthIntToFPToSVE(
Op, DAG);
5255 if (VTSize < InVTSize) {
5261 bool IsTargetf16 =
false;
5262 if (
Op.hasOneUse() &&
5267 SDNode *
U = *
Op->user_begin();
5268 if (
U->hasOneUse() &&
U->user_begin()->getOpcode() ==
ISD::FP_ROUND) {
5269 EVT TmpVT =
U->user_begin()->getValueType(0);
5275 if (IsTargetf32 && !IsTargetf16) {
5285 {
In.getValue(1),
In.getValue(0),
5293 if (VTSize > InVTSize) {
5310 return DAG.
getNode(
Op.getOpcode(),
DL, {ScalarVT, MVT::Other},
5311 {Op.getOperand(0), Extract});
5312 return DAG.
getNode(
Op.getOpcode(),
DL, ScalarVT, Extract);
5320 if (
Op.getValueType().isVector())
5321 return LowerVectorINT_TO_FP(
Op, DAG);
5323 bool IsStrict =
Op->isStrictFPOpcode();
5324 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
5329 auto IntToFpViaPromotion = [&](EVT PromoteVT) {
5333 {Op.getOperand(0), SrcVal});
5335 {
Op.getValueType(), MVT::Other},
5340 DAG.
getNode(
Op.getOpcode(),
DL, PromoteVT, SrcVal),
5344 if (
Op.getValueType() == MVT::bf16) {
5345 unsigned MaxWidth = IsSigned
5349 if (MaxWidth <= 24) {
5350 return IntToFpViaPromotion(MVT::f32);
5354 if (MaxWidth <= 53) {
5355 return IntToFpViaPromotion(MVT::f64);
5406 IsStrict ? DAG.
getNode(
Op.getOpcode(),
DL, {MVT::f64, MVT::Other},
5407 {Op.getOperand(0), ToRound})
5408 : DAG.
getNode(
Op.getOpcode(),
DL, MVT::f64, ToRound);
5435 {
Op.getValueType(), MVT::Other},
5439 DAG.getIntPtrConstant(0,
DL,
true));
5444 if (
Op.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
5445 return IntToFpViaPromotion(MVT::f32);
5454 if (
Op.getValueType() != MVT::f128)
5462AArch64TargetLowering::LowerLOOP_DEPENDENCE_MASK(
SDValue Op,
5464 assert((Subtarget->hasSVE2() ||
5465 (Subtarget->hasSME() && Subtarget->isStreaming())) &&
5466 "Lowering loop_dependence_raw_mask or loop_dependence_war_mask "
5467 "requires SVE or SME");
5470 EVT VT =
Op.getValueType();
5471 unsigned LaneOffset =
Op.getConstantOperandVal(3);
5473 uint64_t EltSizeInBytes =
Op.getConstantOperandVal(2);
5474 EVT AddrTy =
Op->getOperand(0).getValueType();
5477 if (LaneOffset != 0 || !
is_contained({1u, 2u, 4u, 8u}, EltSizeInBytes))
5486 if (AddrTy == MVT::i64)
5490 assert(AddrTy == MVT::i32 &&
"Only expected i32 to be legal!");
5492 Op.getOpcode(),
DL, VT,
5518 EVT OpVT =
Op.getValueType();
5519 EVT ArgVT =
Op.getOperand(0).getValueType();
5522 return LowerFixedLengthBitcastToSVE(
Op, DAG);
5530 "Expected int->fp bitcast!");
5543 return getSVESafeBitCast(OpVT, ExtResult, DAG);
5554 return getSVESafeBitCast(OpVT,
Op.getOperand(0), DAG);
5557 if (OpVT != MVT::f16 && OpVT != MVT::bf16)
5561 if (ArgVT == MVT::f16 || ArgVT == MVT::bf16)
5571 assert(ArgVT == MVT::i16);
5580static std::optional<uint64_t>
5584 return std::nullopt;
5589 return std::nullopt;
5591 return C->getZExtValue();
5596 EVT VT =
N.getValueType();
5601 for (
const SDValue &Elt :
N->op_values()) {
5604 unsigned HalfSize = EltSize / 2;
5606 if (!
isIntN(HalfSize,
C->getSExtValue()))
5609 if (!
isUIntN(HalfSize,
C->getZExtValue()))
5621 EVT VT =
N.getValueType();
5643 unsigned Opcode =
N.getOpcode();
5654 unsigned Opcode =
N.getOpcode();
5796 if (IsN0SExt && IsN1SExt)
5797 return AArch64ISD::SMULL;
5802 if (IsN0ZExt && IsN1ZExt)
5803 return AArch64ISD::UMULL;
5809 if (IsN0ZExt || IsN1ZExt) {
5811 return AArch64ISD::UMULL;
5816 return AArch64ISD::UMULL;
5819 if (IsN0SExt || IsN1SExt) {
5821 return AArch64ISD::SMULL;
5824 return AArch64ISD::SMULL;
5827 if (!IsN1SExt && !IsN1ZExt)
5834 return AArch64ISD::SMULL;
5838 return AArch64ISD::UMULL;
5843 return AArch64ISD::UMULL;
5864 const SDNode *Operand =
N->getOperand(1).getNode();
5873 SplatValue = SplatValue.
abs();
5882 if ((SplatValue - 1).isPowerOf2())
5884 else if ((SplatValue + 1).isPowerOf2())
5893 EVT VT =
N->getValueType(0);
5896 unsigned ShiftAmt = MathOp ==
ISD::ADD ? (SplatValue - 1).logBase2()
5897 : (SplatValue + 1).logBase2();
5915 EVT VT =
Op.getValueType();
5917 bool OverrideNEON = !Subtarget->isNeonAvailable();
5919 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MUL_PRED);
5924 "unexpected type for custom-lowering ISD::MUL");
5940 if (VT == MVT::v1i64) {
5941 if (Subtarget->hasSVE())
5942 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MUL_PRED);
5958 if (Subtarget->hasSVE())
5959 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MUL_PRED);
5977 "unexpected types for extended operands to VMULL");
6000 if (
Pattern == AArch64SVEPredPattern::all)
6009 if (PatNumElts == (NumElts * VScale))
6013 return DAG.
getNode(AArch64ISD::PTRUE,
DL, VT,
6018 bool IsSigned,
bool IsEqual) {
6022 if (!
N->getValueType(0).isScalableVector() ||
6027 APInt Y =
N->getConstantOperandAPInt(Op1);
6032 if (IsSigned ?
Y.isMaxSignedValue() :
Y.isMaxValue())
6038 APInt X =
N->getConstantOperandAPInt(Op0);
6041 APInt NumActiveElems =
6042 IsSigned ?
Y.ssub_ov(
X, Overflow) :
Y.usub_ov(
X, Overflow);
6049 NumActiveElems = IsSigned ? NumActiveElems.
sadd_ov(One, Overflow)
6050 : NumActiveElems.
uadd_ov(One, Overflow);
6055 std::optional<unsigned> PredPattern =
6057 unsigned MinSVEVectorSize = std::max(
6059 unsigned ElementSize = 128 /
N->getValueType(0).getVectorMinNumElements();
6060 if (PredPattern != std::nullopt &&
6061 NumActiveElems.
getZExtValue() <= (MinSVEVectorSize / ElementSize))
6062 return getPTrue(DAG,
DL,
N->getValueType(0), *PredPattern);
6071 EVT VT =
N->getValueType(0);
6079 unsigned BrkID = Intrinsic::aarch64_sve_brkb_z;
6082 BrkID = Intrinsic::aarch64_sve_brka_z;
6089 Upper.getOperand(0).getValueType() != VT)
6098 DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, MVT::nxv16i1, Mask);
6102 return DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, VT, Brk);
6109 EVT InVT =
Op.getValueType();
6113 "Expected a predicate-to-predicate bitcast");
6117 "Only expect to cast between legal scalable predicate types!");
6127 Op.getConstantOperandVal(0) == Intrinsic::aarch64_sve_convert_to_svbool &&
6128 Op.getOperand(1).getValueType().bitsGT(VT))
6129 Op =
Op.getOperand(1);
6147 Mask = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, VT, Mask);
6154 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE;
6160 TargetLowering::CallLoweringInfo CLI(DAG);
6162 CLI.setDebugLoc(
DL).setChain(Chain).setLibCallee(
6165 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
6211 SDValue TileSlice =
N->getOperand(2);
6214 int32_t ConstAddend = 0;
6223 ConstAddend = ImmNode->getSExtValue();
6227 int32_t ImmAddend = ConstAddend % 16;
6228 if (int32_t
C = (ConstAddend - ImmAddend)) {
6230 VarAddend = VarAddend
6237 auto SVL = DAG.
getNode(AArch64ISD::RDSVL,
DL, MVT::i64,
6249 return DAG.
getNode(IsLoad ? AArch64ISD::SME_ZA_LDR : AArch64ISD::SME_ZA_STR,
6251 {
N.getOperand(0), TileSlice,
Base,
6260 auto Op1 =
Op.getOperand(1);
6261 auto Op2 =
Op.getOperand(2);
6262 auto Mask =
Op.getOperand(3);
6265 EVT Op2VT = Op2.getValueType();
6266 EVT ResVT =
Op.getValueType();
6270 "Expected 8-bit or 16-bit characters.");
6284 Op2 = DAG.
getNode(AArch64ISD::DUPLANE128,
DL, OpContainerVT, Op2,
6312 ID, Mask, Op1, Op2);
6323 unsigned IntNo =
Op.getConstantOperandVal(1);
6328 case Intrinsic::aarch64_prefetch: {
6332 unsigned IsWrite =
Op.getConstantOperandVal(3);
6333 unsigned Locality =
Op.getConstantOperandVal(4);
6334 unsigned IsStream =
Op.getConstantOperandVal(5);
6335 unsigned IsData =
Op.getConstantOperandVal(6);
6336 unsigned PrfOp = (IsWrite << 4) |
6341 return DAG.
getNode(AArch64ISD::PREFETCH,
DL, MVT::Other, Chain,
6344 case Intrinsic::aarch64_range_prefetch: {
6348 unsigned IsWrite =
Op.getConstantOperandVal(3);
6349 unsigned IsStream =
Op.getConstantOperandVal(4);
6350 unsigned PrfOp = (IsStream << 2) | IsWrite;
6353 return DAG.
getNode(AArch64ISD::RANGE_PREFETCH,
DL, MVT::Other, Chain,
6357 case Intrinsic::aarch64_prefetch_ir:
6358 return DAG.
getNode(AArch64ISD::PREFETCH,
DL, MVT::Other,
6362 case Intrinsic::aarch64_sme_str:
6363 case Intrinsic::aarch64_sme_ldr: {
6366 case Intrinsic::aarch64_sme_za_enable:
6368 AArch64ISD::SMSTART,
DL, DAG.
getVTList(MVT::Other, MVT::Glue),
6371 case Intrinsic::aarch64_sme_za_disable:
6373 AArch64ISD::SMSTOP,
DL, DAG.
getVTList(MVT::Other, MVT::Glue),
6381 unsigned IntNo =
Op.getConstantOperandVal(1);
6386 case Intrinsic::aarch64_mops_memset_tag: {
6393 auto Alignment =
Node->getMemOperand()->getAlign();
6394 bool IsVol =
Node->isVolatile();
6395 auto DstPtrInfo =
Node->getPointerInfo();
6399 SDValue MS = SDI.EmitMOPS(AArch64::MOPSMemorySetTaggingPseudo, DAG,
DL,
6400 Chain, Dst, Val,
Size, Alignment, IsVol,
6401 DstPtrInfo, MachinePointerInfo{});
6414 unsigned IntNo =
Op.getConstantOperandVal(0);
6418 case Intrinsic::thread_pointer: {
6420 return DAG.
getNode(AArch64ISD::THREAD_POINTER,
DL, PtrVT);
6422 case Intrinsic::aarch64_sve_whilewr_b:
6424 Op.getOperand(1),
Op.getOperand(2),
6427 case Intrinsic::aarch64_sve_whilewr_h:
6429 Op.getOperand(1),
Op.getOperand(2),
6432 case Intrinsic::aarch64_sve_whilewr_s:
6434 Op.getOperand(1),
Op.getOperand(2),
6437 case Intrinsic::aarch64_sve_whilewr_d:
6439 Op.getOperand(1),
Op.getOperand(2),
6442 case Intrinsic::aarch64_sve_whilerw_b:
6444 Op.getOperand(1),
Op.getOperand(2),
6447 case Intrinsic::aarch64_sve_whilerw_h:
6449 Op.getOperand(1),
Op.getOperand(2),
6452 case Intrinsic::aarch64_sve_whilerw_s:
6454 Op.getOperand(1),
Op.getOperand(2),
6457 case Intrinsic::aarch64_sve_whilerw_d:
6459 Op.getOperand(1),
Op.getOperand(2),
6462 case Intrinsic::aarch64_neon_abs: {
6463 EVT Ty =
Op.getValueType();
6464 if (Ty == MVT::i64) {
6476 case Intrinsic::aarch64_neon_pmull64: {
6480 std::optional<uint64_t> LHSLane =
6482 std::optional<uint64_t> RHSLane =
6485 assert((!LHSLane || *LHSLane < 2) &&
"Expect lane to be None or 0 or 1");
6486 assert((!RHSLane || *RHSLane < 2) &&
"Expect lane to be None or 0 or 1");
6492 auto TryVectorizeOperand = [](
SDValue N, std::optional<uint64_t> NLane,
6493 std::optional<uint64_t> OtherLane,
6495 SelectionDAG &DAG) ->
SDValue {
6504 if (OtherLane == 1) {
6513 DAG.
getNode(AArch64ISD::DUPLANE64,
DL, MVT::v2i64,
6519 return DAG.
getNode(AArch64ISD::DUP,
DL, MVT::v1i64,
N);
6524 assert(
N.getValueType() == MVT::i64 &&
6525 "Intrinsic aarch64_neon_pmull64 requires i64 parameters");
6529 LHS = TryVectorizeOperand(
LHS, LHSLane, RHSLane,
DL, DAG);
6530 RHS = TryVectorizeOperand(
RHS, RHSLane, LHSLane,
DL, DAG);
6534 case Intrinsic::aarch64_neon_smax:
6537 case Intrinsic::aarch64_neon_umax:
6540 case Intrinsic::aarch64_neon_smin:
6543 case Intrinsic::aarch64_neon_umin:
6546 case Intrinsic::aarch64_neon_scalar_sqxtn:
6547 case Intrinsic::aarch64_neon_scalar_sqxtun:
6548 case Intrinsic::aarch64_neon_scalar_uqxtn: {
6549 assert(
Op.getValueType() == MVT::i32 ||
Op.getValueType() == MVT::f32);
6550 if (
Op.getValueType() == MVT::i32)
6555 Op.getOperand(1))));
6558 case Intrinsic::aarch64_neon_sqxtn:
6561 case Intrinsic::aarch64_neon_sqxtun:
6564 case Intrinsic::aarch64_neon_uqxtn:
6567 case Intrinsic::aarch64_neon_sqshrn:
6568 if (
Op.getValueType().isVector())
6571 Op.getOperand(1).getValueType(),
6572 Op.getOperand(1),
Op.getOperand(2)));
6575 case Intrinsic::aarch64_neon_sqshrun:
6576 if (
Op.getValueType().isVector())
6579 Op.getOperand(1).getValueType(),
6580 Op.getOperand(1),
Op.getOperand(2)));
6583 case Intrinsic::aarch64_neon_uqshrn:
6584 if (
Op.getValueType().isVector())
6587 Op.getOperand(1).getValueType(),
6588 Op.getOperand(1),
Op.getOperand(2)));
6591 case Intrinsic::aarch64_neon_sqrshrn:
6592 if (
Op.getValueType().isVector())
6595 Op.getOperand(1).getValueType(),
6596 Op.getOperand(1),
Op.getOperand(2)));
6599 case Intrinsic::aarch64_neon_sqrshrun:
6600 if (
Op.getValueType().isVector())
6603 Op.getOperand(1).getValueType(),
6604 Op.getOperand(1),
Op.getOperand(2)));
6607 case Intrinsic::aarch64_neon_uqrshrn:
6608 if (
Op.getValueType().isVector())
6611 Op.getOperand(1).getValueType(),
6612 Op.getOperand(1),
Op.getOperand(2)));
6615 case Intrinsic::aarch64_neon_sqdmulh:
6617 case Intrinsic::aarch64_neon_sqrdmulh:
6619 case Intrinsic::aarch64_neon_sqrdmlah:
6621 case Intrinsic::aarch64_neon_sqrdmlsh:
6623 case Intrinsic::aarch64_neon_sqrshl:
6625 case Intrinsic::aarch64_neon_sqshl:
6627 case Intrinsic::aarch64_neon_uqrshl:
6629 case Intrinsic::aarch64_neon_uqshl:
6631 case Intrinsic::aarch64_neon_sqadd:
6632 if (
Op.getValueType().isVector())
6637 case Intrinsic::aarch64_neon_sqsub:
6638 if (
Op.getValueType().isVector())
6643 case Intrinsic::aarch64_neon_uqadd:
6644 if (
Op.getValueType().isVector())
6648 case Intrinsic::aarch64_neon_suqadd:
6650 case Intrinsic::aarch64_neon_usqadd:
6652 case Intrinsic::aarch64_neon_uqsub:
6653 if (
Op.getValueType().isVector())
6657 case Intrinsic::aarch64_neon_sqdmulls_scalar:
6659 case Intrinsic::aarch64_neon_sqabs:
6661 case Intrinsic::aarch64_neon_sqneg:
6663 case Intrinsic::aarch64_sve_whilelt:
6666 case Intrinsic::aarch64_sve_whilels:
6669 case Intrinsic::aarch64_sve_whilele:
6672 case Intrinsic::aarch64_sve_sunpkhi:
6673 return DAG.
getNode(AArch64ISD::SUNPKHI,
DL,
Op.getValueType(),
6675 case Intrinsic::aarch64_sve_sunpklo:
6676 return DAG.
getNode(AArch64ISD::SUNPKLO,
DL,
Op.getValueType(),
6678 case Intrinsic::aarch64_sve_uunpkhi:
6679 return DAG.
getNode(AArch64ISD::UUNPKHI,
DL,
Op.getValueType(),
6681 case Intrinsic::aarch64_sve_uunpklo:
6682 return DAG.
getNode(AArch64ISD::UUNPKLO,
DL,
Op.getValueType(),
6684 case Intrinsic::aarch64_sve_clasta_n:
6685 return DAG.
getNode(AArch64ISD::CLASTA_N,
DL,
Op.getValueType(),
6686 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6687 case Intrinsic::aarch64_sve_clastb_n:
6688 return DAG.
getNode(AArch64ISD::CLASTB_N,
DL,
Op.getValueType(),
6689 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6690 case Intrinsic::aarch64_sve_lasta:
6691 return DAG.
getNode(AArch64ISD::LASTA,
DL,
Op.getValueType(),
6692 Op.getOperand(1),
Op.getOperand(2));
6693 case Intrinsic::aarch64_sve_lastb:
6694 return DAG.
getNode(AArch64ISD::LASTB,
DL,
Op.getValueType(),
6695 Op.getOperand(1),
Op.getOperand(2));
6696 case Intrinsic::aarch64_sve_tbl:
6697 return DAG.
getNode(AArch64ISD::TBL,
DL,
Op.getValueType(),
Op.getOperand(1),
6699 case Intrinsic::aarch64_sve_trn1:
6700 return DAG.
getNode(AArch64ISD::TRN1,
DL,
Op.getValueType(),
6701 Op.getOperand(1),
Op.getOperand(2));
6702 case Intrinsic::aarch64_sve_trn2:
6703 return DAG.
getNode(AArch64ISD::TRN2,
DL,
Op.getValueType(),
6704 Op.getOperand(1),
Op.getOperand(2));
6705 case Intrinsic::aarch64_sve_uzp1:
6706 return DAG.
getNode(AArch64ISD::UZP1,
DL,
Op.getValueType(),
6707 Op.getOperand(1),
Op.getOperand(2));
6708 case Intrinsic::aarch64_sve_uzp2:
6709 return DAG.
getNode(AArch64ISD::UZP2,
DL,
Op.getValueType(),
6710 Op.getOperand(1),
Op.getOperand(2));
6711 case Intrinsic::aarch64_sve_zip1:
6712 return DAG.
getNode(AArch64ISD::ZIP1,
DL,
Op.getValueType(),
6713 Op.getOperand(1),
Op.getOperand(2));
6714 case Intrinsic::aarch64_sve_zip2:
6715 return DAG.
getNode(AArch64ISD::ZIP2,
DL,
Op.getValueType(),
6716 Op.getOperand(1),
Op.getOperand(2));
6717 case Intrinsic::aarch64_sve_splice:
6718 return DAG.
getNode(AArch64ISD::SPLICE,
DL,
Op.getValueType(),
6719 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6720 case Intrinsic::aarch64_sve_ptrue:
6721 return getPTrue(DAG,
DL,
Op.getValueType(),
Op.getConstantOperandVal(1));
6722 case Intrinsic::aarch64_sve_clz:
6723 return DAG.
getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6724 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6725 case Intrinsic::aarch64_sme_cntsd: {
6731 case Intrinsic::aarch64_sve_cnt: {
6734 if (
Data.getValueType().isFloatingPoint())
6736 return DAG.
getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6737 Op.getOperand(2),
Data,
Op.getOperand(1));
6739 case Intrinsic::aarch64_sve_dupq_lane:
6740 return LowerDUPQLane(
Op, DAG);
6741 case Intrinsic::aarch64_sve_convert_from_svbool:
6742 if (
Op.getValueType() == MVT::aarch64svcount)
6745 case Intrinsic::aarch64_sve_convert_to_svbool:
6746 if (
Op.getOperand(1).getValueType() == MVT::aarch64svcount)
6749 case Intrinsic::aarch64_sve_fneg:
6750 return DAG.
getNode(AArch64ISD::FNEG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6751 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6752 case Intrinsic::aarch64_sve_frintp:
6753 return DAG.
getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6754 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6755 case Intrinsic::aarch64_sve_frintm:
6756 return DAG.
getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6757 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6758 case Intrinsic::aarch64_sve_frinti:
6759 return DAG.
getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU,
DL,
6760 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6762 case Intrinsic::aarch64_sve_frintx:
6763 return DAG.
getNode(AArch64ISD::FRINT_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6764 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6765 case Intrinsic::aarch64_sve_frint32x:
6766 return DAG.
getNode(AArch64ISD::FRINT32_MERGE_PASSTHRU,
DL,
6767 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6769 case Intrinsic::aarch64_sve_frint64x:
6770 return DAG.
getNode(AArch64ISD::FRINT64_MERGE_PASSTHRU,
DL,
6771 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6773 case Intrinsic::aarch64_sve_frinta:
6774 return DAG.
getNode(AArch64ISD::FROUND_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6775 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6776 case Intrinsic::aarch64_sve_frintn:
6777 return DAG.
getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU,
DL,
6778 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6780 case Intrinsic::aarch64_sve_frintz:
6781 return DAG.
getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6782 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6783 case Intrinsic::aarch64_sve_frint32z:
6784 return DAG.
getNode(AArch64ISD::FTRUNC32_MERGE_PASSTHRU,
DL,
6785 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6787 case Intrinsic::aarch64_sve_frint64z:
6788 return DAG.
getNode(AArch64ISD::FTRUNC64_MERGE_PASSTHRU,
DL,
6789 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6791 case Intrinsic::aarch64_sve_ucvtf:
6792 return DAG.
getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU,
DL,
6793 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6795 case Intrinsic::aarch64_sve_scvtf:
6796 return DAG.
getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU,
DL,
6797 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6799 case Intrinsic::aarch64_sve_fcvtzu:
6800 return DAG.
getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6801 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6802 case Intrinsic::aarch64_sve_fcvtzs:
6803 return DAG.
getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6804 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6805 case Intrinsic::aarch64_sve_fsqrt:
6806 return DAG.
getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6807 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6808 case Intrinsic::aarch64_sve_frecpx:
6809 return DAG.
getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6810 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6811 case Intrinsic::aarch64_sve_frecpe_x:
6812 return DAG.
getNode(AArch64ISD::FRECPE,
DL,
Op.getValueType(),
6814 case Intrinsic::aarch64_sve_frecps_x:
6815 return DAG.
getNode(AArch64ISD::FRECPS,
DL,
Op.getValueType(),
6816 Op.getOperand(1),
Op.getOperand(2));
6817 case Intrinsic::aarch64_sve_frsqrte_x:
6818 return DAG.
getNode(AArch64ISD::FRSQRTE,
DL,
Op.getValueType(),
6820 case Intrinsic::aarch64_sve_frsqrts_x:
6821 return DAG.
getNode(AArch64ISD::FRSQRTS,
DL,
Op.getValueType(),
6822 Op.getOperand(1),
Op.getOperand(2));
6823 case Intrinsic::aarch64_sve_fabs:
6824 return DAG.
getNode(AArch64ISD::FABS_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6825 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6826 case Intrinsic::aarch64_sve_abs:
6827 return DAG.
getNode(AArch64ISD::ABS_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6828 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6829 case Intrinsic::aarch64_sve_neg:
6830 return DAG.
getNode(AArch64ISD::NEG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6831 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6832 case Intrinsic::aarch64_sve_insr: {
6834 EVT ScalarTy =
Scalar.getValueType();
6835 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
6838 return DAG.
getNode(AArch64ISD::INSR,
DL,
Op.getValueType(),
6839 Op.getOperand(1), Scalar);
6841 case Intrinsic::aarch64_sve_rbit:
6842 return DAG.
getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
DL,
6843 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6845 case Intrinsic::aarch64_sve_revb:
6846 return DAG.
getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6847 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6848 case Intrinsic::aarch64_sve_revh:
6849 return DAG.
getNode(AArch64ISD::REVH_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6850 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6851 case Intrinsic::aarch64_sve_revw:
6852 return DAG.
getNode(AArch64ISD::REVW_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6853 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6854 case Intrinsic::aarch64_sve_revd:
6855 return DAG.
getNode(AArch64ISD::REVD_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6856 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6857 case Intrinsic::aarch64_sve_sxtb:
6859 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6860 Op.getOperand(2),
Op.getOperand(3),
6864 case Intrinsic::aarch64_sve_sxth:
6866 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6867 Op.getOperand(2),
Op.getOperand(3),
6871 case Intrinsic::aarch64_sve_sxtw:
6873 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6874 Op.getOperand(2),
Op.getOperand(3),
6878 case Intrinsic::aarch64_sve_uxtb:
6880 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6881 Op.getOperand(2),
Op.getOperand(3),
6885 case Intrinsic::aarch64_sve_uxth:
6887 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6888 Op.getOperand(2),
Op.getOperand(3),
6892 case Intrinsic::aarch64_sve_uxtw:
6894 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6895 Op.getOperand(2),
Op.getOperand(3),
6899 case Intrinsic::localaddress: {
6901 const auto *RegInfo = Subtarget->getRegisterInfo();
6902 unsigned Reg = RegInfo->getLocalAddressRegister(MF);
6904 Op.getSimpleValueType());
6907 case Intrinsic::eh_recoverfp: {
6912 SDValue IncomingFPOp =
Op.getOperand(2);
6917 "llvm.eh.recoverfp must take a function as the first argument");
6918 return IncomingFPOp;
6920 case Intrinsic::aarch64_neon_vsri:
6921 case Intrinsic::aarch64_neon_vsli:
6922 case Intrinsic::aarch64_sve_sri:
6923 case Intrinsic::aarch64_sve_sli: {
6924 EVT Ty =
Op.getValueType();
6931 bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri ||
6932 IntNo == Intrinsic::aarch64_sve_sri;
6933 unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
6934 return DAG.
getNode(Opcode,
DL, Ty,
Op.getOperand(1),
Op.getOperand(2),
6938 case Intrinsic::aarch64_neon_srhadd:
6939 case Intrinsic::aarch64_neon_urhadd:
6940 case Intrinsic::aarch64_neon_shadd:
6941 case Intrinsic::aarch64_neon_uhadd: {
6942 bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
6943 IntNo == Intrinsic::aarch64_neon_shadd);
6944 bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
6945 IntNo == Intrinsic::aarch64_neon_urhadd);
6946 unsigned Opcode = IsSignedAdd
6949 return DAG.
getNode(Opcode,
DL,
Op.getValueType(),
Op.getOperand(1),
6952 case Intrinsic::aarch64_neon_saddlp:
6953 case Intrinsic::aarch64_neon_uaddlp: {
6954 unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uaddlp
6955 ? AArch64ISD::UADDLP
6956 : AArch64ISD::SADDLP;
6957 return DAG.
getNode(Opcode,
DL,
Op.getValueType(),
Op.getOperand(1));
6959 case Intrinsic::aarch64_neon_sdot:
6960 case Intrinsic::aarch64_neon_udot:
6961 case Intrinsic::aarch64_sve_sdot:
6962 case Intrinsic::aarch64_sve_udot: {
6963 unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
6964 IntNo == Intrinsic::aarch64_sve_udot)
6967 return DAG.
getNode(Opcode,
DL,
Op.getValueType(),
Op.getOperand(1),
6968 Op.getOperand(2),
Op.getOperand(3));
6970 case Intrinsic::aarch64_neon_usdot:
6971 case Intrinsic::aarch64_sve_usdot: {
6972 return DAG.
getNode(AArch64ISD::USDOT,
DL,
Op.getValueType(),
6973 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6975 case Intrinsic::aarch64_neon_saddlv:
6976 case Intrinsic::aarch64_neon_uaddlv: {
6977 EVT OpVT =
Op.getOperand(1).getValueType();
6978 EVT ResVT =
Op.getValueType();
6980 ((ResVT == MVT::i32 && (OpVT == MVT::v8i8 || OpVT == MVT::v16i8 ||
6981 OpVT == MVT::v8i16 || OpVT == MVT::v4i16)) ||
6982 (ResVT == MVT::i64 && (OpVT == MVT::v4i32 || OpVT == MVT::v2i32))) &&
6983 "Unexpected aarch64_neon_u/saddlv type");
6987 IntNo == Intrinsic::aarch64_neon_uaddlv ? AArch64ISD::UADDLV
6988 : AArch64ISD::SADDLV,
6989 DL, ResVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64,
Op.getOperand(1));
6993 return EXTRACT_VEC_ELT;
6995 case Intrinsic::experimental_vector_match: {
6998 case Intrinsic::aarch64_cls:
6999 case Intrinsic::aarch64_cls64: {
7004 case Intrinsic::aarch64_neon_cls: {
7008 case Intrinsic::aarch64_sve_pmul:
7009 case Intrinsic::aarch64_neon_pmul:
7015bool AArch64TargetLowering::shouldExtendGSIndex(
EVT VT,
EVT &EltTy)
const {
7024bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(
SDValue Extend,
7045 if (LD->isVolatile())
7048 EVT MemVT = LD->getMemoryVT();
7049 if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8 && MemVT != MVT::v2i16)
7052 Align Alignment = LD->getAlign();
7054 if (Subtarget.requiresStrictAlign() && Alignment < RequiredAlignment)
7060bool AArch64TargetLowering::isVectorLoadExtDesirable(
SDValue ExtVal)
const {
7068 if (!ExtVT.
isScalableVector() && !Subtarget->useSVEForFixedLengthVectors())
7084 unsigned NumExtMaskedLoads = 0;
7085 for (
auto *U : Ld->getMask()->users())
7087 NumExtMaskedLoads++;
7089 if (NumExtMaskedLoads <= 1)
7095 return PreExtScalarVT == MVT::i8 || PreExtScalarVT == MVT::i16 ||
7096 PreExtScalarVT == MVT::i32 || PreExtScalarVT == MVT::i64;
7100 std::map<std::tuple<bool, bool, bool>,
unsigned> AddrModes = {
7101 {std::make_tuple(
false,
false,
false),
7102 AArch64ISD::GLD1_MERGE_ZERO},
7103 {std::make_tuple(
false,
false,
true),
7104 AArch64ISD::GLD1_UXTW_MERGE_ZERO},
7105 {std::make_tuple(
false,
true,
false),
7106 AArch64ISD::GLD1_MERGE_ZERO},
7107 {std::make_tuple(
false,
true,
true),
7108 AArch64ISD::GLD1_SXTW_MERGE_ZERO},
7109 {std::make_tuple(
true,
false,
false),
7110 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
7111 {std::make_tuple(
true,
false,
true),
7112 AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO},
7113 {std::make_tuple(
true,
true,
false),
7114 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
7115 {std::make_tuple(
true,
true,
true),
7116 AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO},
7118 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
7119 return AddrModes.find(
Key)->second;
7127 case AArch64ISD::GLD1_MERGE_ZERO:
7128 return AArch64ISD::GLD1S_MERGE_ZERO;
7129 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
7130 return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
7131 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
7132 return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
7133 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
7134 return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
7135 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
7136 return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
7137 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
7138 return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
7139 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
7140 return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
7155 EVT VT =
Op.getValueType();
7179 EVT IndexVT =
Index.getValueType();
7191 assert(Subtarget->useSVEForFixedLengthVectors() &&
7192 "Cannot lower when not using SVE for fixed vectors!");
7201 Index.getValueType().getVectorElementType() == MVT::i64 ||
7202 Mask.getValueType().getVectorElementType() == MVT::i64)
7268 EVT IndexVT =
Index.getValueType();
7280 assert(Subtarget->useSVEForFixedLengthVectors() &&
7281 "Cannot lower when not using SVE for fixed vectors!");
7293 Index.getValueType().getVectorElementType() == MVT::i64 ||
7294 Mask.getValueType().getVectorElementType() == MVT::i64)
7304 if (PromotedVT != VT)
7329 assert(LoadNode &&
"Expected custom lowering of a masked load node");
7330 EVT VT =
Op->getValueType(0);
7333 return LowerFixedLengthVectorMLoadToSVE(
Op, DAG);
7353 if ((!Subtarget->isSVEAvailable() || !Subtarget->hasSVE2p2()) &&
7354 (!Subtarget->isSVEorStreamingSVEAvailable() || !Subtarget->hasSME2p2()))
7393 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16);
7416 return DAG.
getStore(ST->getChain(),
DL, ExtractTrunc,
7417 ST->getBasePtr(), ST->getMemOperand());
7423 MVT DestVT =
Op.getSimpleValueType();
7427 unsigned SrcAS =
N->getSrcAddressSpace();
7428 unsigned DestAS =
N->getDestAddressSpace();
7429 assert(SrcAS != DestAS &&
7430 "addrspacecast must be between different address spaces");
7433 "addrspacecast must be between different ptr sizes");
7458 if (!
DL.isLittleEndian())
7462 if (DataType->isIntegerTy(64))
7470 unsigned NumElements = DataTypeTy->getNumElements();
7471 unsigned EltSizeBits = DataTypeTy->getElementType()->getScalarSizeInBits();
7477 unsigned TotalSizeBits = DataTypeTy->getPrimitiveSizeInBits().getFixedValue();
7481 if (TotalSizeBits == 64u || TotalSizeBits == 128u)
7485 if (TotalSizeBits == 256u && (EltSizeBits == 8u || EltSizeBits == 16u ||
7486 EltSizeBits == 32u || EltSizeBits == 64u))
7499 assert(StoreNode &&
"Expected a store operation");
7532 {StoreNode->getChain(), DAG.getBitcast(MVT::v2i64, Lo),
7533 DAG.getBitcast(MVT::v2i64, Hi), StoreNode->getBasePtr()},
7537 "Lowering should be consistent with legality");
7551 assert (StoreNode &&
"Can only custom lower store nodes");
7555 EVT VT =
Value.getValueType();
7559 if (
auto MaybeSTNP =
LowerNTStore(StoreNode, VT, MemVT, Dl, DAG))
7566 Subtarget->useSVEForFixedLengthVectors()))
7567 return LowerFixedLengthVectorStoreToSVE(
Op, DAG);
7579 MemVT == MVT::v4i8) {
7582 }
else if (MemVT == MVT::i128 && StoreNode->
isVolatile()) {
7583 return LowerStore128(
Op, DAG);
7584 }
else if (MemVT == MVT::i64x8) {
7589 EVT PtrVT =
Base.getValueType();
7590 for (
unsigned i = 0; i < 8; i++) {
7611 bool IsStoreRelease =
7614 assert((Subtarget->hasFeature(AArch64::FeatureLSE2) &&
7615 Subtarget->hasFeature(AArch64::FeatureRCPC3) && IsStoreRelease) ||
7625 unsigned Opcode = IsStoreRelease ? AArch64ISD::STILP : AArch64ISD::STP;
7627 std::swap(StoreValue.first, StoreValue.second);
7630 {StoreNode->getChain(), StoreValue.first, StoreValue.second,
7631 StoreNode->getBasePtr()},
7643 EVT MemVT = Load->getMemoryVT();
7644 EVT ResVT = Load->getValueType(0);
7650 switch (Load->getExtensionType()) {
7663 SDValue Chain = Load->getChain();
7664 SDValue BasePtr = Load->getBasePtr();
7666 Align Alignment = Load->getAlign();
7672 DAG.
getLoad(ScalarLoadType,
DL, Chain, BasePtr, PtrInfo, Alignment);
7684 while (CurrentEltBits < DstEltBits) {
7686 CurrentNumElts = CurrentNumElts / 2;
7692 CurrentEltBits = CurrentEltBits * 2;
7695 Res = DAG.
getNode(ExtOpcode,
DL, ExtVT, Res);
7698 if (CurrentNumElts != NumElts) {
7711 assert(LoadNode &&
"Expected custom lowering of a load node");
7720 EVT PtrVT =
Base.getValueType();
7721 for (
unsigned i = 0; i < 8; i++) {
7727 Ops.push_back(Part);
7737SDValue AArch64TargetLowering::LowerFixedLengthVectorCompressToSVE(
7740 EVT VT =
Op.getValueType();
7755 EVT VT =
Op.getValueType();
7756 if (!Subtarget->isSVEAvailable())
7760 return LowerFixedLengthVectorCompressToSVE(
Op, DAG);
7766 EVT MaskVT =
Mask.getValueType();
7793 MVT VT =
Op.getSimpleValueType();
7796 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
7804 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT,
Op.getOperand(0), Neg,
7817 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CCVal,
7832 MVT VT =
Op.getSimpleValueType();
7836 if (NewShiftNo == 0)
7837 return Op.getOperand(0);
7846 if (NewShiftNo == 0)
7847 return Op.getOperand(1);
7849 if (ShiftNo->getZExtValue() == NewShiftNo)
7864 EVT XScalarTy =
X.getValueType();
7869 switch (
Op.getSimpleValueType().SimpleTy) {
7878 ExpVT = MVT::nxv4i32;
7882 ExpVT = MVT::nxv2i64;
7900 if (
X.getValueType() != XScalarTy)
7908 return Op.getOperand(0);
7943 const char FptrReg = 0x11;
7949 Chain,
DL, DAG.
getConstant(0x58000080u | NestReg,
DL, MVT::i32), Addr,
7950 MachinePointerInfo(TrmpAddr));
7955 Chain,
DL, DAG.
getConstant(0x580000b0u | FptrReg,
DL, MVT::i32), Addr,
7956 MachinePointerInfo(TrmpAddr, 4));
7962 MachinePointerInfo(TrmpAddr, 8));
7967 DAG.
getStore(Chain,
DL, Nest, Addr, MachinePointerInfo(TrmpAddr, 16));
7972 DAG.
getStore(Chain,
DL, FPtr, Addr, MachinePointerInfo(TrmpAddr, 24));
7986 EVT VT =
Op.getValueType();
7988 (Subtarget->hasSVEB16B16() &&
7989 Subtarget->isNonStreamingSVEorSME2Available()))
7990 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMUL_PRED);
7992 assert(Subtarget->hasBF16() &&
"Expected +bf16 for custom FMUL lowering");
7993 assert((VT == MVT::nxv4bf16 || VT == MVT::nxv8bf16 || VT == MVT::v8bf16) &&
7994 "Unexpected FMUL VT");
7997 return [&, IID](EVT VT,
auto...
Ops) {
8004 EVT SrcVT =
Value.getValueType();
8015 auto FCVT = MakeGetIntrinsic(Intrinsic::aarch64_sve_fcvt_bf16f32_v2);
8016 auto FCVTNT = MakeGetIntrinsic(Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2);
8021 MakeGetIntrinsic(UseSVEBFMLAL ? Intrinsic::aarch64_sve_bfmlalb
8022 : Intrinsic::aarch64_neon_bfmlalb);
8024 MakeGetIntrinsic(UseSVEBFMLAL ? Intrinsic::aarch64_sve_bfmlalt
8025 : Intrinsic::aarch64_neon_bfmlalt);
8027 EVT AccVT = UseSVEBFMLAL ? MVT::nxv4f32 : MVT::v4f32;
8040 LHS = Reinterpret(
LHS, MVT::nxv8bf16);
8041 RHS = Reinterpret(
RHS, MVT::nxv8bf16);
8044 SDValue BottomF32 = Reinterpret(BFMLALB(AccVT, Zero,
LHS,
RHS), MVT::nxv4f32);
8046 FCVT(MVT::nxv8bf16, DAG.
getPOISON(MVT::nxv8bf16), Pg, BottomF32);
8048 if (VT == MVT::nxv4bf16)
8049 return Reinterpret(BottomBF16, VT);
8051 SDValue TopF32 = Reinterpret(BFMLALT(AccVT, Zero,
LHS,
RHS), MVT::nxv4f32);
8052 SDValue TopBF16 = FCVTNT(MVT::nxv8bf16, BottomBF16, Pg, TopF32);
8053 return Reinterpret(TopBF16, VT);
8060 EVT VT =
Op.getValueType();
8063 assert(VT.
isVector() &&
"Scalar fma lowering should be handled by patterns");
8066 if (VT != MVT::v8f16 && VT != MVT::v4f32 && VT != MVT::v2f64)
8067 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMA_PRED);
8071 ? LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMA_PRED)
8081 auto ConvertToScalableFnegMt = [&](
SDValue Op) {
8083 Op = LowerToPredicatedOp(
Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
8087 OpA = ConvertToScalableFnegMt(OpA);
8088 OpB = ConvertToScalableFnegMt(OpB);
8089 OpC = ConvertToScalableFnegMt(OpC);
8092 DAG.
getNode(AArch64ISD::FMA_PRED,
DL, ContainerVT, Pg, OpA, OpB, OpC);
8097 EVT VT =
Op.getValueType();
8099 (VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) &&
8103 EVT CLMULTy = VT == MVT::i8 ? MVT::v8i8 : MVT::v1i64;
8104 EVT ExtractTy = VT == MVT::i64 ? MVT::i64 : MVT::i32;
8110 if (VecVT != CLMULTy) {
8115 if (ExtractTy == MVT::i32)
8120 if (ExtractTy != VT)
8122 return ExtractVecElt;
8130 switch (
Op.getOpcode()) {
8136 return LowerLOOP_DEPENDENCE_MASK(
Op, DAG);
8138 return LowerBITCAST(
Op, DAG);
8140 return LowerGlobalAddress(
Op, DAG);
8142 return LowerGlobalTLSAddress(
Op, DAG);
8144 return LowerPtrAuthGlobalAddress(
Op, DAG);
8146 return LowerADJUST_TRAMPOLINE(
Op, DAG);
8148 return LowerINIT_TRAMPOLINE(
Op, DAG);
8152 return LowerSETCC(
Op, DAG);
8154 return LowerSETCCCARRY(
Op, DAG);
8158 return LowerBR_CC(
Op, DAG);
8160 return LowerSELECT(
Op, DAG);
8162 return LowerSELECT_CC(
Op, DAG);
8164 return LowerJumpTable(
Op, DAG);
8166 return LowerBR_JT(
Op, DAG);
8168 return LowerBRIND(
Op, DAG);
8170 return LowerConstantPool(
Op, DAG);
8172 return LowerBlockAddress(
Op, DAG);
8174 return LowerVASTART(
Op, DAG);
8176 return LowerVACOPY(
Op, DAG);
8178 return LowerVAARG(
Op, DAG);
8195 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FADD_PRED);
8197 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FSUB_PRED);
8199 return LowerFMUL(
Op, DAG);
8201 return LowerFMA(
Op, DAG);
8203 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FDIV_PRED);
8205 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
8207 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
8209 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
8211 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
8213 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
8215 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
8217 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
8219 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
8221 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
8223 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
8226 return LowerFP_ROUND(
Op, DAG);
8229 return LowerFP_EXTEND(
Op, DAG);
8231 return LowerFRAMEADDR(
Op, DAG);
8233 return LowerSPONENTRY(
Op, DAG);
8235 return LowerRETURNADDR(
Op, DAG);
8237 return LowerADDROFRETURNADDR(
Op, DAG);
8239 return LowerCONCAT_VECTORS(
Op, DAG);
8241 return LowerINSERT_VECTOR_ELT(
Op, DAG);
8243 return LowerEXTRACT_VECTOR_ELT(
Op, DAG);
8245 return LowerBUILD_VECTOR(
Op, DAG);
8248 return LowerEXTEND_VECTOR_INREG(
Op, DAG);
8250 return LowerZERO_EXTEND_VECTOR_INREG(
Op, DAG);
8252 return LowerVECTOR_SHUFFLE(
Op, DAG);
8254 return LowerSPLAT_VECTOR(
Op, DAG);
8256 return LowerEXTRACT_SUBVECTOR(
Op, DAG);
8258 return LowerINSERT_SUBVECTOR(
Op, DAG);
8263 return LowerDIV(
Op, DAG);
8268 return LowerMinMax(
Op, DAG);
8272 return LowerVectorSRA_SRL_SHL(
Op, DAG);
8276 return LowerShiftParts(
Op, DAG);
8279 return LowerCTPOP_PARITY(
Op, DAG);
8281 return LowerFCOPYSIGN(
Op, DAG);
8283 return LowerVectorOR(
Op, DAG);
8285 return LowerXOR(
Op, DAG);
8292 return LowerINT_TO_FP(
Op, DAG);
8297 return LowerFP_TO_INT(
Op, DAG);
8300 return LowerFP_TO_INT_SAT(
Op, DAG);
8302 return LowerGET_ROUNDING(
Op, DAG);
8304 return LowerSET_ROUNDING(
Op, DAG);
8306 return LowerGET_FPMODE(
Op, DAG);
8308 return LowerSET_FPMODE(
Op, DAG);
8310 return LowerRESET_FPMODE(
Op, DAG);
8312 return LowerMUL(
Op, DAG);
8314 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MULHS_PRED);
8316 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MULHU_PRED);
8318 return LowerINTRINSIC_W_CHAIN(
Op, DAG);
8320 return LowerINTRINSIC_WO_CHAIN(
Op, DAG);
8322 return LowerINTRINSIC_VOID(
Op, DAG);
8325 assert(Subtarget->hasLSE2() || Subtarget->hasRCPC3());
8326 return LowerStore128(
Op, DAG);
8330 return LowerSTORE(
Op, DAG);
8332 return LowerMSTORE(
Op, DAG);
8334 return LowerMGATHER(
Op, DAG);
8336 return LowerMSCATTER(
Op, DAG);
8338 return LowerVECREDUCE_SEQ_FADD(
Op, DAG);
8352 return LowerVECREDUCE(
Op, DAG);
8355 return LowerVECREDUCE_MUL(
Op, DAG);
8357 return LowerATOMIC_LOAD_AND(
Op, DAG);
8359 return LowerDYNAMIC_STACKALLOC(
Op, DAG);
8361 return LowerVSCALE(
Op, DAG);
8363 return LowerVECTOR_COMPRESS(
Op, DAG);
8367 return LowerFixedLengthVectorIntExtendToSVE(
Op, DAG);
8374 if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
8375 (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
8378 return LowerToPredicatedOp(
Op, DAG,
8379 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU);
8382 return LowerTRUNCATE(
Op, DAG);
8384 return LowerMLOAD(
Op, DAG);
8387 !Subtarget->isNeonAvailable()))
8388 return LowerFixedLengthVectorLoadToSVE(
Op, DAG);
8389 return LowerLOAD(
Op, DAG);
8393 return LowerToScalableOp(
Op, DAG);
8395 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMAX_PRED);
8398 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMAXNM_PRED);
8400 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMIN_PRED);
8403 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMINNM_PRED);
8405 return LowerFixedLengthVectorSelectToSVE(
Op, DAG);
8407 return LowerABS(
Op, DAG);
8409 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::ABDS_PRED);
8411 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::ABDU_PRED);
8413 return LowerAVG(
Op, DAG, AArch64ISD::HADDS_PRED);
8415 return LowerAVG(
Op, DAG, AArch64ISD::HADDU_PRED);
8417 return LowerAVG(
Op, DAG, AArch64ISD::RHADDS_PRED);
8419 return LowerAVG(
Op, DAG, AArch64ISD::RHADDU_PRED);
8421 return LowerBitreverse(
Op, DAG);
8423 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
8425 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU);
8427 return LowerCTTZ(
Op, DAG);
8430 return LowerVECTOR_SPLICE(
Op, DAG);
8432 return LowerVECTOR_DEINTERLEAVE(
Op, DAG);
8434 return LowerVECTOR_INTERLEAVE(
Op, DAG);
8436 return LowerGET_ACTIVE_LANE_MASK(
Op, DAG);
8439 if (
Op.getValueType().isVector())
8440 return LowerVectorXRINT(
Op, DAG);
8444 assert((
Op.getOperand(0).getValueType() == MVT::f16 ||
8445 Op.getOperand(0).getValueType() == MVT::bf16) &&
8446 "Expected custom lowering of rounding operations only for f16");
8449 return DAG.
getNode(
Op.getOpcode(),
DL,
Op.getValueType(), Ext);
8455 assert((
Op.getOperand(1).getValueType() == MVT::f16 ||
8456 Op.getOperand(1).getValueType() == MVT::bf16) &&
8457 "Expected custom lowering of rounding operations only for f16");
8460 {
Op.getOperand(0),
Op.getOperand(1)});
8461 return DAG.
getNode(
Op.getOpcode(),
DL, {Op.getValueType(), MVT::Other},
8462 {Ext.getValue(1), Ext.getValue(0)});
8465 assert(
Op.getOperand(2).getValueType() == MVT::i128 &&
8466 "WRITE_REGISTER custom lowering is only for 128-bit sysregs");
8471 std::pair<SDValue, SDValue> Pair =
8476 SysRegName, Pair.first, Pair.second);
8486 return LowerVECTOR_HISTOGRAM(
Op, DAG);
8491 return LowerPARTIAL_REDUCE_MLA(
Op, DAG);
8495 return LowerFCANONICALIZE(
Op, DAG);
8513 DAG.
getNode(AArch64ISD::CTTZ_ELTS,
DL, MVT::i64, Pg, CttzOp);
8520 return !Subtarget->useSVEForFixedLengthVectors();
8524 EVT VT,
bool OverrideNEON)
const {
8547 return Subtarget->isSVEorStreamingSVEAvailable();
8554 if (!Subtarget->useSVEForFixedLengthVectors())
8574 unsigned Opcode =
N->getOpcode();
8579 unsigned IID =
N->getConstantOperandVal(0);
8580 if (IID < Intrinsic::num_intrinsics)
8594 if (IID == Intrinsic::aarch64_neon_umull ||
8596 IID == Intrinsic::aarch64_neon_smull ||
8605 bool IsVarArg)
const {
8628 if (Subtarget->isTargetWindows()) {
8630 if (Subtarget->isWindowsArm64EC())
8636 if (!Subtarget->isTargetDarwin())
8644 if (Subtarget->isWindowsArm64EC())
8650 if (Subtarget->isWindowsArm64EC())
8674 if (Subtarget->isWindowsArm64EC())
8691 auto &FuncInfo = *MF.
getInfo<AArch64FunctionInfo>();
8693 SMEAttrs SMEFnAttrs = FuncInfo.getSMEFnAttrs();
8712 if (SMEFnAttrs.hasStreamingInterfaceOrBody())
8715 else if (SMEFnAttrs.hasStreamingCompatibleInterface())
8721SDValue AArch64TargetLowering::LowerFormalArguments(
8729 Subtarget->isCallingConvWin64(
F.getCallingConv(),
F.isVarArg());
8731 (isVarArg && Subtarget->isWindowsArm64EC());
8732 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
8742 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.
getContext());
8750 unsigned NumArgs = Ins.
size();
8752 unsigned CurArgIdx = 0;
8753 bool UseVarArgCC =
false;
8755 UseVarArgCC = isVarArg;
8759 for (
unsigned i = 0; i != NumArgs; ++i) {
8760 MVT ValVT = Ins[i].VT;
8761 if (Ins[i].isOrigArg()) {
8762 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
8763 CurArgIdx = Ins[i].getOrigArgIndex();
8770 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
8772 else if (ActualMVT == MVT::i16)
8776 Ins[i].OrigTy, CCInfo);
8777 assert(!Res &&
"Call operand has unhandled type");
8782 bool IsLocallyStreaming =
8783 !
Attrs.hasStreamingInterface() &&
Attrs.hasStreamingBody();
8787 unsigned ExtraArgLocs = 0;
8788 for (
unsigned i = 0, e = Ins.
size(); i != e; ++i) {
8789 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
8791 if (Ins[i].
Flags.isByVal()) {
8795 int Size = Ins[i].Flags.getByValSize();
8796 unsigned NumRegs = (
Size + 7) / 8;
8808 if (Ins[i].
Flags.isSwiftAsync())
8809 MF.
getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(
true);
8815 const TargetRegisterClass *RC;
8817 if (RegVT == MVT::i32)
8818 RC = &AArch64::GPR32RegClass;
8819 else if (RegVT == MVT::i64)
8820 RC = &AArch64::GPR64RegClass;
8821 else if (RegVT == MVT::f16 || RegVT == MVT::bf16)
8822 RC = &AArch64::FPR16RegClass;
8823 else if (RegVT == MVT::f32)
8824 RC = &AArch64::FPR32RegClass;
8826 RC = &AArch64::FPR64RegClass;
8828 RC = &AArch64::FPR128RegClass;
8832 RC = &AArch64::PPRRegClass;
8833 }
else if (RegVT == MVT::aarch64svcount) {
8835 RC = &AArch64::PPRRegClass;
8838 RC = &AArch64::ZPRRegClass;
8845 if (IsLocallyStreaming) {
8860 DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
8879 "Indirect arguments should be scalable on most subtargets");
8901 uint32_t BEAlign = 0;
8902 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
8903 !Ins[i].Flags.isInConsecutiveRegs())
8904 BEAlign = 8 - ArgSize;
8907 MachinePointerInfo PtrInfo;
8913 unsigned ObjOffset = ArgOffset + BEAlign;
8943 "Indirect arguments should be scalable on most subtargets");
8963 Subtarget->isWindowsArm64EC()) &&
8964 "Indirect arguments should be scalable on most subtargets");
8967 unsigned NumParts = 1;
8968 if (Ins[i].
Flags.isInConsecutiveRegs()) {
8969 while (!Ins[i + NumParts - 1].
Flags.isInConsecutiveRegsLast())
8978 while (NumParts > 0) {
8979 ArgValue = DAG.
getLoad(PartLoad,
DL, Chain, Ptr, MachinePointerInfo());
8992 if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
8998 if (Ins[i].isOrigArg()) {
8999 Argument *OrigArg =
F.getArg(Ins[i].getOrigArgIndex());
9001 if (!Ins[i].
Flags.isZExt()) {
9002 ArgValue = DAG.
getNode(AArch64ISD::ASSERT_ZEXT_BOOL,
DL,
9013 if (
Attrs.hasStreamingCompatibleInterface()) {
9015 DAG.
getNode(AArch64ISD::ENTRY_PSTATE_SM,
DL,
9016 DAG.
getVTList(MVT::i64, MVT::Other), {Chain});
9028 if (IsLocallyStreaming) {
9029 if (
Attrs.hasStreamingCompatibleInterface())
9038 for (
unsigned I=0;
I<InVals.
size(); ++
I) {
9050 if (!Subtarget->isTargetDarwin() || IsWin64) {
9056 saveVarArgRegisters(CCInfo, DAG,
DL, Chain);
9060 unsigned VarArgsOffset = CCInfo.getStackSize();
9063 alignTo(VarArgsOffset, Subtarget->isTargetILP32() ? 4 : 8);
9074 SmallVectorImpl<ForwardedRegister> &Forwards =
9076 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
9080 if (!CCInfo.isAllocated(AArch64::X8)) {
9082 Forwards.
push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
9091 for (
unsigned I = 0,
E = Ins.
size();
I !=
E; ++
I) {
9093 Ins[
I].Flags.isInReg()) &&
9094 Ins[
I].Flags.isSRet()) {
9109 unsigned StackArgSize = CCInfo.getStackSize();
9111 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
9115 StackArgSize =
alignTo(StackArgSize, 16);
9129 if (Subtarget->hasCustomCallingConv())
9130 Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
9134 if (
Attrs.hasZAState()) {
9138 }
else if (
Attrs.hasAgnosticZAInterface()) {
9139 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE_SIZE;
9144 auto *RetTy = EVT(MVT::i64).getTypeForEVT(*DAG.
getContext());
9145 TargetLowering::CallLoweringInfo CLI(DAG);
9146 CLI.setDebugLoc(
DL).setChain(Chain).setLibCallee(
9154 {Chain, Size, DAG.getConstant(1, DL, MVT::i64)});
9160 Chain = DAG.
getNode(AArch64ISD::SME_STATE_ALLOC,
DL,
9168 for (
const ISD::InputArg &
I : Ins) {
9169 if (
I.Flags.isSwiftSelf() ||
I.Flags.isSwiftError() ||
9170 I.Flags.isSwiftAsync()) {
9174 "Swift attributes can't be used with preserve_none",
9184void AArch64TargetLowering::saveVarArgRegisters(
CCState &CCInfo,
9190 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
9194 Subtarget->isCallingConvWin64(
F.getCallingConv(),
F.isVarArg());
9200 if (Subtarget->isWindowsArm64EC()) {
9207 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
9209 if (GPRSaveSize != 0) {
9212 if (GPRSaveSize & 15)
9219 if (Subtarget->isWindowsArm64EC()) {
9232 for (
unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
9238 MF, GPRIdx, (i - FirstVariadicGPR) * 8)
9248 if (Subtarget->hasFPARMv8() && !IsWin64) {
9250 const unsigned NumFPRArgRegs =
FPRArgRegs.size();
9253 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
9255 if (FPRSaveSize != 0) {
9260 for (
unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
9275 if (!MemOps.
empty()) {
9282SDValue AArch64TargetLowering::LowerCallResult(
9286 SDValue ThisVal,
bool RequiresSMChange)
const {
9287 DenseMap<unsigned, SDValue> CopiedRegs;
9289 for (
unsigned i = 0; i != RVLocs.
size(); ++i) {
9290 CCValAssign VA = RVLocs[i];
9294 if (i == 0 && isThisReturn) {
9296 "unexpected return calling convention register assignment");
9332 Val = DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
9399 unsigned NumArgs = Outs.
size();
9400 for (
unsigned i = 0; i != NumArgs; ++i) {
9401 MVT ArgVT = Outs[i].VT;
9404 bool UseVarArgCC =
false;
9408 if (IsCalleeWin64) {
9422 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
9424 else if (ActualMVT == MVT::i16)
9432 Outs[i].OrigTy, CCInfo);
9433 assert(!Res &&
"Call operand has unhandled type");
9448bool AArch64TargetLowering::isEligibleForTailCallOptimization(
9449 const CallLoweringInfo &CLI)
const {
9455 bool IsVarArg = CLI.IsVarArg;
9459 const SelectionDAG &DAG = CLI.DAG;
9466 SMECallAttrs CallAttrs =
9480 MF.
getInfo<AArch64FunctionInfo>()->isSVECC())
9483 bool CCMatch = CallerCC == CalleeCC;
9498 if (i->hasByValAttr())
9507 if (i->hasInRegAttr()) {
9508 unsigned ArgIdx = i - CallerF.
arg_begin();
9509 if (!CLI.CB || CLI.CB->arg_size() <= ArgIdx)
9511 AttributeSet
Attrs = CLI.CB->getParamAttributes(ArgIdx);
9512 if (!
Attrs.hasAttribute(Attribute::InReg) ||
9513 !
Attrs.hasAttribute(Attribute::StructRet) || !i->hasStructRetAttr() ||
9514 CLI.CB->getArgOperand(ArgIdx) != i) {
9531 const GlobalValue *GV =
G->getGlobal();
9534 (!
TT.isOSWindows() ||
TT.isOSBinFormatELF() ||
TT.isOSBinFormatMachO()))
9554 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
9555 const uint32_t *CallerPreserved =
TRI->getCallPreservedMask(MF, CallerCC);
9557 const uint32_t *CalleePreserved =
TRI->getCallPreservedMask(MF, CalleeCC);
9558 if (Subtarget->hasCustomCallingConv()) {
9559 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
9560 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
9562 if (!
TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
9571 CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs,
C);
9575 if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) {
9583 for (
const CCValAssign &ArgLoc : ArgLocs)
9584 if (!ArgLoc.isRegLoc())
9588 const AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
9596 A.getValVT().isScalableVector() ||
9597 Subtarget->isWindowsArm64EC()) &&
9598 "Expected value to be scalable");
9608 const MachineRegisterInfo &MRI = MF.
getRegInfo();
9618 int ClobberedFI)
const {
9621 int64_t LastByte = FirstByte + MFI.
getObjectSize(ClobberedFI) - 1;
9632 if (FI->getIndex() < 0) {
9634 int64_t InLastByte = InFirstByte;
9637 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
9638 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
9646bool AArch64TargetLowering::DoesCalleeRestoreStack(
CallingConv::ID CallCC,
9647 bool TailCallOpt)
const {
9658 APInt RequiredZero(SizeInBits, 0xFE);
9660 bool ZExtBool = (Bits.Zero & RequiredZero) == RequiredZero;
9664void AArch64TargetLowering::AdjustInstrPostInstrSelection(
MachineInstr &
MI,
9670 if (
MI.getOpcode() == AArch64::MSRpstatesvcrImm1 ||
9671 MI.getOpcode() == AArch64::MSRpstatePseudo) {
9672 for (
unsigned I =
MI.getNumOperands() - 1;
I > 0; --
I)
9673 if (MachineOperand &MO =
MI.getOperand(
I);
9674 MO.isReg() && MO.isImplicit() && MO.isDef() &&
9675 (AArch64::GPR32RegClass.contains(MO.getReg()) ||
9676 AArch64::GPR64RegClass.contains(MO.getReg())))
9677 MI.removeOperand(
I);
9681 if (
MI.getOperand(0).getImm() == AArch64SVCR::SVCRSM ||
9682 MI.getOperand(0).getImm() == AArch64SVCR::SVCRSMZA) {
9697 const MachineFunction &MF = *
MI.getMF();
9698 if (MF.
getInfo<AArch64FunctionInfo>()->hasStreamingModeChanges() &&
9699 (
MI.getOpcode() == AArch64::ADDXri ||
9700 MI.getOpcode() == AArch64::SUBXri)) {
9701 const MachineOperand &MO =
MI.getOperand(1);
9710 unsigned Condition,
bool InsertVectorLengthCheck)
const {
9718 Ops.push_back(InGlue);
9719 return DAG.
getNode(AArch64ISD::CHECK_MATCHING_VL,
DL,
9723 if (InsertVectorLengthCheck &&
Enable) {
9726 SDValue CheckVL = GetCheckVL(Chain, InGlue);
9739 assert(PStateReg.
isValid() &&
"PStateSM Register is invalid");
9746 Opcode =
Enable ? AArch64ISD::COND_SMSTART : AArch64ISD::COND_SMSTOP;
9747 Ops.push_back(ConditionOp);
9748 Ops.push_back(PStateSM);
9750 Opcode =
Enable ? AArch64ISD::SMSTART : AArch64ISD::SMSTOP;
9752 Ops.push_back(RegMask);
9755 Ops.push_back(InGlue);
9760 if (!InsertVectorLengthCheck ||
Enable)
9787 if (Flags.isZExt() || Flags.isSExt())
9794 Arg->
isAssert() ||
Op == AArch64ISD::ASSERT_ZEXT_BOOL) {
9806 int FI = FINode->getIndex();
9824AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
9826 SelectionDAG &DAG = CLI.DAG;
9833 bool &IsTailCall = CLI.IsTailCall;
9835 bool IsVarArg = CLI.IsVarArg;
9836 const CallBase *CB = CLI.CB;
9839 MachineFunction::CallSiteInfo CSInfo;
9840 bool IsThisReturn =
false;
9842 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
9844 bool IsCFICall = CLI.CB && CLI.CB->isIndirectCall() && CLI.CFIType;
9845 bool IsSibCall =
false;
9846 bool GuardWithBTI =
false;
9848 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
9849 !Subtarget->noBTIAtReturnTwice()) {
9855 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.
getContext());
9858 unsigned NumArgs = Outs.
size();
9860 for (
unsigned i = 0; i != NumArgs; ++i) {
9861 if (Outs[i].
Flags.isVarArg() && Outs[i].VT.isScalableVector())
9863 "currently not supported");
9874 RetCCInfo.AnalyzeCallResult(Ins, RetCC);
9882 auto HasSVERegLoc = [](CCValAssign &Loc) {
9883 if (!Loc.isRegLoc())
9885 return AArch64::ZPRRegClass.contains(Loc.getLocReg()) ||
9886 AArch64::PPRRegClass.contains(Loc.getLocReg());
9888 if (
any_of(RVLocs, HasSVERegLoc) ||
any_of(ArgLocs, HasSVERegLoc))
9893 SMECallAttrs CallAttrs =
9896 std::optional<unsigned> ZAMarkerNode;
9898 ZAMarkerNode = AArch64ISD::REQUIRES_ZA_SAVE;
9900 ZAMarkerNode = AArch64ISD::REQUIRES_ZT0_SAVE;
9902 ZAMarkerNode = AArch64ISD::INOUT_ZA_USE;
9906 IsTailCall = isEligibleForTailCallOptimization(CLI);
9910 if (!ZAMarkerNode && !TailCallOpt && IsTailCall &&
9918 if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
9920 "site marked musttail");
9938 if (IsTailCall && !IsSibCall) {
9943 NumBytes =
alignTo(NumBytes, 16);
9948 FPDiff = NumReusableBytes - NumBytes;
9952 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (
unsigned)-FPDiff)
9960 assert(FPDiff % 16 == 0 &&
"unaligned stack on tail call");
9963 auto DescribeCallsite =
9964 [&](OptimizationRemarkAnalysis &
R) -> OptimizationRemarkAnalysis & {
9967 R <<
ore::NV(
"Callee", ES->getSymbol());
9968 else if (CLI.CB && CLI.CB->getCalledFunction())
9969 R <<
ore::NV(
"Callee", CLI.CB->getCalledFunction()->getName());
9971 R <<
"unknown callee";
9977 if (RequiresSMChange) {
9980 auto R = CLI.CB ? OptimizationRemarkAnalysis(
"sme",
"SMETransition",
9982 : OptimizationRemarkAnalysis(
"sme",
"SMETransition",
9984 DescribeCallsite(R) <<
" requires a streaming mode transition";
9991 assert((!IsSibCall || !ZAMarkerNode) &&
"ZA markers require CALLSEQ_START");
10001 {Chain, Chain.getValue(1)});
10009 SmallSet<unsigned, 8> RegsUsed;
10013 if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
10015 for (
const auto &
F : Forwards) {
10022 unsigned ExtraArgLocs = 0;
10023 for (
unsigned i = 0, e = Outs.
size(); i != e; ++i) {
10024 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
10026 ISD::ArgFlagsTy
Flags = Outs[i].Flags;
10041 if (Outs[i].ArgVT == MVT::i1) {
10063 assert(VA.
getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits");
10079 assert((isScalable || Subtarget->isWindowsArm64EC()) &&
10080 "Indirect arguments should be scalable on most subtargets");
10083 TypeSize PartSize = StoreSize;
10084 unsigned NumParts = 1;
10085 if (Outs[i].
Flags.isInConsecutiveRegs()) {
10086 while (!Outs[i + NumParts - 1].
Flags.isInConsecutiveRegsLast())
10088 StoreSize *= NumParts;
10097 bool IsPred = VA.
getValVT() == MVT::aarch64svcount ||
10115 if (NumParts > 0) {
10131 if (i == 0 &&
Flags.isReturned() && !
Flags.isSwiftSelf() &&
10132 Outs[0].VT == MVT::i64) {
10134 "unexpected calling convention register assignment");
10136 "unexpected use of 'returned'");
10137 IsThisReturn =
true;
10146 [=](
const std::pair<unsigned, SDValue> &Elt) {
10155 [&VA](MachineFunction::ArgRegPair ArgReg) {
10156 return ArgReg.Reg == VA.getLocReg();
10163 Arg = DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
10168 if (
Options.EmitCallSiteInfo)
10175 MachinePointerInfo DstInfo;
10179 uint32_t BEAlign = 0;
10185 OpSize =
Flags.isByVal() ?
Flags.getByValSize() * 8
10187 OpSize = (OpSize + 7) / 8;
10188 if (!Subtarget->isLittleEndian() && !
Flags.isByVal() &&
10189 !
Flags.isInConsecutiveRegs()) {
10191 BEAlign = 8 - OpSize;
10194 int32_t
Offset = LocMemOffset + BEAlign;
10211 Chain = addTokenForArgument(Chain, DAG, MF.
getFrameInfo(), FI);
10219 if (Outs[i].
Flags.isByVal()) {
10223 Chain,
DL, DstAddr, Arg, SizeNode,
10224 Outs[i].
Flags.getNonZeroByValAlign(),
10226 nullptr, std::nullopt, DstInfo, MachinePointerInfo());
10243 if (IsVarArg && Subtarget->isWindowsArm64EC() &&
10244 !(CLI.CB && CLI.CB->isMustTailCall())) {
10262 if (!MemOpChains.
empty())
10266 if (RequiresSMChange) {
10267 bool InsertVectorLengthCheck =
10277 for (
auto &RegToPass : RegsToPass) {
10279 RegToPass.second, InGlue);
10286 const GlobalValue *CalledGlobal =
nullptr;
10287 unsigned OpFlags = 0;
10289 CalledGlobal =
G->getGlobal();
10290 OpFlags = Subtarget->classifyGlobalFunctionReference(CalledGlobal,
10295 }
else if (!CLI.PAI || !IsTailCall) {
10296 const GlobalValue *GV =
G->getGlobal();
10301 Subtarget->isTargetMachO()) ||
10303 const char *Sym = S->getSymbol();
10316 if (IsTailCall && !IsSibCall) {
10321 unsigned Opc = IsTailCall ? AArch64ISD::TC_RETURN : AArch64ISD::CALL;
10323 std::vector<SDValue>
Ops;
10324 Ops.push_back(Chain);
10325 Ops.push_back(Callee);
10332 "tail calls cannot be marked with clang.arc.attachedcall");
10333 Opc = AArch64ISD::CALL_RVMARKER;
10339 Ops.insert(
Ops.begin() + 1, GA);
10346 Ops.insert(
Ops.begin() + 2, DoEmitMarker);
10348 Opc = AArch64ISD::CALL_ARM64EC_TO_X64;
10349 }
else if (GuardWithBTI) {
10350 Opc = AArch64ISD::CALL_BTI;
10361 const uint64_t
Key = CLI.PAI->Key;
10363 "Invalid auth call key");
10367 std::tie(IntDisc, AddrDisc) =
10370 if (
Opc == AArch64ISD::CALL_RVMARKER)
10371 Opc = AArch64ISD::AUTH_CALL_RVMARKER;
10373 Opc = IsTailCall ? AArch64ISD::AUTH_TC_RETURN : AArch64ISD::AUTH_CALL;
10375 Ops.push_back(IntDisc);
10376 Ops.push_back(AddrDisc);
10381 for (
auto &RegToPass : RegsToPass)
10383 RegToPass.second.getValueType()));
10386 const uint32_t *
Mask;
10387 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
10388 if (IsThisReturn) {
10390 Mask =
TRI->getThisReturnPreservedMask(MF, CallConv);
10392 IsThisReturn =
false;
10393 Mask =
TRI->getCallPreservedMask(MF, CallConv);
10396 Mask =
TRI->getCallPreservedMask(MF, CallConv);
10398 if (Subtarget->hasCustomCallingConv())
10399 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
10401 if (
TRI->isAnyArgRegReserved(MF))
10402 TRI->emitReservedArgRegCallError(MF);
10404 assert(Mask &&
"Missing call preserved mask for calling convention");
10408 Ops.push_back(InGlue);
10410 if (CLI.DeactivationSymbol)
10423 if (CalledGlobal &&
10437 if (CalledGlobal &&
10441 uint64_t CalleePopBytes =
10442 DoesCalleeRestoreStack(CallConv, TailCallOpt) ?
alignTo(NumBytes, 16) : 0;
10450 Chain, InGlue, CallConv, IsVarArg, RVLocs,
DL, DAG, InVals, IsThisReturn,
10451 IsThisReturn ? OutVals[0] :
SDValue(), RequiresSMChange);
10456 if (RequiresSMChange) {
10462 if (RequiresSMChange) {
10463 for (
unsigned I = 0;
I < InVals.
size(); ++
I) {
10478 for (
const ISD::OutputArg &O : Outs) {
10479 if (
O.Flags.isSwiftSelf() ||
O.Flags.isSwiftError() ||
10480 O.Flags.isSwiftAsync()) {
10484 "Swift attributes can't be used with preserve_none",
10485 DL.getDebugLoc()));
10494bool AArch64TargetLowering::CanLowerReturn(
10497 const Type *RetTy)
const {
10500 CCState CCInfo(CallConv, isVarArg, MF, RVLocs,
Context);
10511 auto *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
10515 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.
getContext());
10521 SmallSet<unsigned, 4> RegsUsed;
10522 for (
unsigned i = 0, realRVLocIdx = 0; i != RVLocs.
size();
10523 ++i, ++realRVLocIdx) {
10524 CCValAssign &VA = RVLocs[i];
10526 SDValue Arg = OutVals[realRVLocIdx];
10532 if (Outs[i].ArgVT == MVT::i1) {
10548 assert(VA.
getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits");
10557 llvm::find_if(RetVals, [=](
const std::pair<unsigned, SDValue> &Elt) {
10567 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
10583 for (
auto &RetVal : RetVals) {
10587 DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
10588 DAG.
getVTList(RetVal.second.getValueType(), MVT::Glue),
10590 Chain = DAG.
getCopyToReg(Chain,
DL, RetVal.first, RetVal.second, Glue);
10593 DAG.
getRegister(RetVal.first, RetVal.second.getValueType()));
10604 unsigned RetValReg = AArch64::X0;
10606 RetValReg = AArch64::X8;
10617 if (AArch64::GPR64RegClass.
contains(*
I))
10619 else if (AArch64::FPR64RegClass.
contains(*
I))
10630 RetOps.push_back(Glue);
10641 MachinePointerInfo());
10642 RetOps.insert(RetOps.begin() + 1, Arm64ECRetDest);
10644 return DAG.
getNode(AArch64ISD::TC_RETURN,
DL, MVT::Other, RetOps);
10647 return DAG.
getNode(AArch64ISD::RET_GLUE,
DL, MVT::Other, RetOps);
10656 unsigned Flag)
const {
10658 N->getOffset(), Flag);
10663 unsigned Flag)
const {
10669 unsigned Flag)
const {
10671 N->getOffset(), Flag);
10676 unsigned Flag)
const {
10682 unsigned Flag)
const {
10687template <
class NodeTy>
10689 unsigned Flags)
const {
10697 .
getInfo<AArch64FunctionInfo>()
10698 ->hasELFSignedGOT())
10701 return DAG.
getNode(AArch64ISD::LOADgot,
DL, Ty, GotAddr);
10705template <
class NodeTy>
10707 unsigned Flags)
const {
10713 AArch64ISD::WrapperLarge,
DL, Ty,
10721template <
class NodeTy>
10723 unsigned Flags)
const {
10731 return DAG.
getNode(AArch64ISD::ADDlow,
DL, Ty, ADRP,
Lo);
10735template <
class NodeTy>
10737 unsigned Flags)
const {
10741 SDValue Sym = getTargetNode(
N, Ty, DAG, Flags);
10742 return DAG.
getNode(AArch64ISD::ADR,
DL, Ty, Sym);
10748 const GlobalValue *GV = GN->
getGlobal();
10749 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV,
getTargetMachine());
10753 "unexpected offset in global node");
10758 return getGOT(GN, DAG, OpFlags);
10764 Result = getAddrLarge(GN, DAG, OpFlags);
10766 Result = getAddrTiny(GN, DAG, OpFlags);
10768 Result = getAddr(GN, DAG, OpFlags);
10807AArch64TargetLowering::LowerDarwinGlobalTLSAddress(
SDValue Op,
10809 assert(Subtarget->isTargetDarwin() &&
10810 "This function expects a Darwin target");
10825 PtrMemVT,
DL, Chain, DescAddr,
10840 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
10841 const uint32_t *
Mask =
TRI->getTLSCallPreservedMask();
10842 if (Subtarget->hasCustomCallingConv())
10850 unsigned Opcode = AArch64ISD::CALL;
10852 Ops.push_back(Chain);
10853 Ops.push_back(FuncTLVGet);
10857 Opcode = AArch64ISD::AUTH_CALL;
10979SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(
SDValue SymAddr,
10984 auto *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
10988 SDVTList NodeTys = DAG.
getVTList(MVT::Other, MVT::Glue);
10991 bool RequiresSMChange = TLSCallAttrs.requiresSMChange();
10993 auto ChainAndGlue = [](
SDValue Chain) -> std::pair<SDValue, SDValue> {
10994 return {Chain, Chain.
getValue(1)};
10997 if (RequiresSMChange)
10998 std::tie(Chain, Glue) =
11004 ? AArch64ISD::TLSDESC_AUTH_CALLSEQ
11005 : AArch64ISD::TLSDESC_CALLSEQ;
11007 std::tie(Chain, Glue) = ChainAndGlue(DAG.
getNode(
11010 if (TLSCallAttrs.requiresLazySave())
11011 std::tie(Chain, Glue) = ChainAndGlue(DAG.
getNode(
11012 AArch64ISD::REQUIRES_ZA_SAVE,
DL, NodeTys, {Chain, Chain.getValue(1)}));
11014 if (RequiresSMChange)
11015 std::tie(Chain, Glue) =
11023AArch64TargetLowering::LowerELFGlobalTLSAddress(
SDValue Op,
11025 assert(Subtarget->isTargetELF() &&
"This function expects an ELF target");
11028 AArch64FunctionInfo *MFI =
11043 "in local exec TLS model");
11054 const GlobalValue *GV = GA->
getGlobal();
11059 return LowerELFTLSLocalExec(GV, ThreadBase,
DL, DAG);
11062 TPOff = DAG.
getNode(AArch64ISD::LOADgot,
DL, PtrVT, TPOff);
11080 TPOff = LowerELFTLSDescCallSeq(SymAddr,
DL, DAG);
11087 GV,
DL, MVT::i64, 0,
11104 TPOff = LowerELFTLSDescCallSeq(SymAddr,
DL, DAG);
11112AArch64TargetLowering::LowerWindowsGlobalTLSAddress(
SDValue Op,
11114 assert(Subtarget->isTargetWindows() &&
"Windows specific TLS lowering");
11126 TLSArray = DAG.
getLoad(PtrVT,
DL, Chain, TLSArray, MachinePointerInfo());
11139 DAG.
getNode(AArch64ISD::ADDlow,
DL, PtrVT, ADRP, TLSIndexLo);
11140 TLSIndex = DAG.
getLoad(MVT::i32,
DL, Chain, TLSIndex, MachinePointerInfo());
11150 MachinePointerInfo());
11151 Chain =
TLS.getValue(1);
11154 const GlobalValue *GV = GA->
getGlobal();
11166 Addr = DAG.
getNode(AArch64ISD::ADDlow,
DL, PtrVT, Addr, TGALo);
11176 if (Subtarget->isTargetDarwin())
11177 return LowerDarwinGlobalTLSAddress(
Op, DAG);
11178 if (Subtarget->isTargetELF())
11179 return LowerELFGlobalTLSAddress(
Op, DAG);
11180 if (Subtarget->isTargetWindows())
11181 return LowerWindowsGlobalTLSAddress(
Op, DAG);
11219 assert(TGN->getGlobal()->hasExternalWeakLinkage());
11225 if (TGN->getOffset() != 0)
11227 "unsupported non-zero offset in weak ptrauth global reference");
11234 {TGA, Key, Discriminator}),
11239AArch64TargetLowering::LowerPtrAuthGlobalAddress(
SDValue Op,
11242 uint64_t KeyC =
Op.getConstantOperandVal(1);
11243 SDValue AddrDiscriminator =
Op.getOperand(2);
11244 uint64_t DiscriminatorC =
Op.getConstantOperandVal(3);
11245 EVT VT =
Op.getValueType();
11255 "constant discriminator in ptrauth global out of range [0, 0xffff]");
11258 if (!Subtarget->isTargetELF() && !Subtarget->isTargetMachO())
11261 int64_t PtrOffsetC = 0;
11267 const GlobalValue *PtrGV = PtrN->getGlobal();
11270 const unsigned OpFlags =
11274 "unsupported non-GOT op flags on ptrauth global reference");
11277 PtrOffsetC += PtrN->getOffset();
11280 assert(PtrN->getTargetFlags() == 0 &&
11281 "unsupported target flags on ptrauth global");
11286 ? AddrDiscriminator
11290 if (!NeedsGOTLoad) {
11294 {TPtr, Key, TAddrDiscriminator, Discriminator}),
11303 {TPtr, Key, TAddrDiscriminator, Discriminator}),
11331 SDValue Dest,
unsigned Opcode,
11343 uint64_t Mask =
Op.getConstantOperandVal(1);
11348 if (
Op.getOperand(0).getOpcode() ==
ISD::SHL) {
11349 auto Op00 =
Op.getOperand(0).getOperand(0);
11352 Op.getOperand(1),
Op.getOperand(0).getOperand(1));
11353 return DAG.
getNode(Opcode,
DL, MVT::Other, Chain, Shr,
11373 bool ProduceNonFlagSettingCondBr =
11379 if (
LHS.getValueType() == MVT::f128) {
11384 if (!
RHS.getNode()) {
11404 OFCC = getInvertedCondCode(OFCC);
11407 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CCVal,
11411 if (
LHS.getValueType().isInteger()) {
11413 (
LHS.getValueType() == MVT::i32 ||
LHS.getValueType() == MVT::i64));
11418 if (RHSC && RHSC->
getZExtValue() == 1 && ProduceNonFlagSettingCondBr &&
11422 LHS.getResNo() == 0 &&
11426 bool CanNegate, MustBeFirst, PreferFirst;
11459 if (RHSC && RHSC->
getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
11465 return DAG.
getNode(AArch64ISD::CBZ,
DL, MVT::Other, Chain,
LHS, Dest);
11471 return DAG.
getNode(AArch64ISD::CBNZ,
DL, MVT::Other, Chain,
LHS, Dest);
11476 uint64_t SignBitPos;
11478 return DAG.
getNode(AArch64ISD::TBNZ,
DL, MVT::Other, Chain,
LHS,
11483 LHS.getOpcode() !=
ISD::AND && ProduceNonFlagSettingCondBr) {
11487 uint64_t SignBitPos;
11489 return DAG.
getNode(AArch64ISD::TBZ,
DL, MVT::Other, Chain,
LHS,
11495 if (Subtarget->hasCMPBR() &&
11497 ProduceNonFlagSettingCondBr) {
11506 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CCVal,
11510 assert(
LHS.getValueType() == MVT::f16 ||
LHS.getValueType() == MVT::bf16 ||
11511 LHS.getValueType() == MVT::f32 ||
LHS.getValueType() == MVT::f64);
11520 DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CC1Val, Cmp);
11523 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, BR1, Dest, CC2Val,
11532 if (!Subtarget->isNeonAvailable() &&
11533 !Subtarget->useSVEForFixedLengthVectors())
11536 EVT VT =
Op.getValueType();
11564 if (!VT.
isVector() && !Subtarget->isNeonAvailable() &&
11565 Subtarget->isSVEorStreamingSVEAvailable()) {
11566 if (VT != MVT::f16 && VT != MVT::f32 && VT != MVT::f64 && VT != MVT::bf16)
11580 auto BitCast = [
this](EVT VT,
SDValue Op, SelectionDAG &DAG) {
11582 return getSVESafeBitCast(VT,
Op, DAG);
11589 auto SetVecVal = [&](
int Idx = -1) {
11595 VecVal1 = BitCast(VecVT, In1, DAG);
11596 VecVal2 = BitCast(VecVT, In2, DAG);
11602 }
else if (VT == MVT::f64) {
11603 VecVT = MVT::v2i64;
11604 SetVecVal(AArch64::dsub);
11605 }
else if (VT == MVT::f32) {
11606 VecVT = MVT::v4i32;
11607 SetVecVal(AArch64::ssub);
11608 }
else if (VT == MVT::f16 || VT == MVT::bf16) {
11609 VecVT = MVT::v8i16;
11610 SetVecVal(AArch64::hsub);
11621 if (VT == MVT::f64 || VT == MVT::v2f64) {
11629 DAG.
getNode(AArch64ISD::BSP,
DL, VecVT, SignMaskV, VecVal1, VecVal2);
11630 if (VT == MVT::f16 || VT == MVT::bf16)
11632 if (VT == MVT::f32)
11634 if (VT == MVT::f64)
11637 return BitCast(VT, BSP, DAG);
11643 Attribute::NoImplicitFloat))
11646 EVT VT =
Op.getValueType();
11649 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
11657 if (VT == MVT::i32 && IsParity)
11660 if (Subtarget->isSVEorStreamingSVEAvailable()) {
11661 if (VT == MVT::i32 || VT == MVT::i64) {
11662 EVT ContainerVT = VT == MVT::i32 ? MVT::nxv4i32 : MVT::nxv2i64;
11674 if (VT == MVT::i128) {
11687 if (!Subtarget->isNeonAvailable())
11698 if (VT == MVT::i32 || VT == MVT::i64) {
11699 if (VT == MVT::i32)
11705 AddV = DAG.
getNode(AArch64ISD::NVCAST,
DL,
11706 VT == MVT::i32 ? MVT::v2i32 : MVT::v1i64, AddV);
11712 }
else if (VT == MVT::i128) {
11718 DAG.
getNode(AArch64ISD::NVCAST,
DL, MVT::v2i64, AddV),
11726 assert(!IsParity &&
"ISD::PARITY of vector types not supported");
11728 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
11729 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
11730 "Unexpected type for custom ctpop lowering");
11738 EVT DT = VT == MVT::v2i64 ? MVT::v4i32 : VT;
11742 if (VT == MVT::v2i64) {
11743 Val = DAG.
getNode(AArch64ISD::UDOT,
DL, DT, Zeros, Ones, Val);
11744 Val = DAG.
getNode(AArch64ISD::UADDLP,
DL, VT, Val);
11745 }
else if (VT == MVT::v2i32) {
11746 Val = DAG.
getNode(AArch64ISD::UDOT,
DL, DT, Zeros, Ones, Val);
11747 }
else if (VT == MVT::v4i32) {
11748 Val = DAG.
getNode(AArch64ISD::UDOT,
DL, DT, Zeros, Ones, Val);
11757 unsigned EltSize = 8;
11763 Val = DAG.
getNode(AArch64ISD::UADDLP,
DL, WidenVT, Val);
11770 EVT VT =
Op.getValueType();
11779 EVT VT =
Op.getValueType();
11781 unsigned Opcode =
Op.getOpcode();
11808 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SMAX_PRED);
11810 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SMIN_PRED);
11812 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::UMAX_PRED);
11814 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::UMIN_PRED);
11826 EVT VT =
Op.getValueType();
11830 VT, Subtarget->useSVEForFixedLengthVectors()))
11831 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU);
11843 REVB = DAG.
getNode(AArch64ISD::REV32,
DL, VST,
Op.getOperand(0));
11850 REVB = DAG.
getNode(AArch64ISD::REV32,
DL, VST,
Op.getOperand(0));
11857 REVB = DAG.
getNode(AArch64ISD::REV64,
DL, VST,
Op.getOperand(0));
11864 REVB = DAG.
getNode(AArch64ISD::REV64,
DL, VST,
Op.getOperand(0));
11870 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT,
11877 SmallVector<std::pair<SDValue, SDValue>, 16> &WorkList) {
11883 N =
N->getOperand(0);
11887 WorkList.push_back(std::make_pair(
N->getOperand(0),
N->getOperand(1)));
11893 if (
N->getOpcode() !=
ISD::OR || !
N->hasOneUse())
11907 EVT VT =
N->getValueType(0);
11917 unsigned NumXors = 0;
11922 std::tie(XOR0, XOR1) = WorkList[0];
11925 for (
unsigned I = 1;
I < WorkList.
size();
I++) {
11926 std::tie(XOR0, XOR1) = WorkList[
I];
11928 Cmp = DAG.
getNode(LogicOp,
DL, VT, Cmp, CmpChain);
11940 if (
Op.getValueType().isVector())
11941 return LowerVSETCC(
Op, DAG);
11943 bool IsStrict =
Op->isStrictFPOpcode();
11945 unsigned OpNo = IsStrict ? 1 : 0;
11948 Chain =
Op.getOperand(0);
11955 EVT VT =
Op.getValueType();
11961 if (
LHS.getValueType() == MVT::f128) {
11966 if (!
RHS.getNode()) {
11967 assert(
LHS.getValueType() ==
Op.getValueType() &&
11968 "Unexpected setcc expansion!");
11973 if (
LHS.getValueType().isInteger()) {
11989 SDValue Res = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, FVal, TVal, CCVal, Cmp);
11994 assert(
LHS.getValueType() == MVT::bf16 ||
LHS.getValueType() == MVT::f16 ||
11995 LHS.getValueType() == MVT::f32 ||
LHS.getValueType() == MVT::f64);
12016 Res = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, FVal, TVal, CC1Val, Cmp);
12026 DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, FVal, CC1Val, Cmp);
12029 Res = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, CS1, CC2Val, Cmp);
12039 EVT VT =
LHS.getValueType();
12040 if (VT != MVT::i32 && VT != MVT::i64)
12050 EVT OpVT =
Op.getValueType();
12059 return DAG.
getNode(AArch64ISD::CSEL,
DL, OpVT, FVal, TVal, CCVal,
12068 "function only supposed to emit natural comparisons");
12077 if (!
LHS.getValueType().isVector()) {
12116 assert(!
LHS.getValueType().isVector());
12117 assert(!
RHS.getValueType().isVector());
12121 if (!CTVal || !CFVal)
12135 bool OneNaN =
false;
12151 bool ShouldInvert =
false;
12160 if (!Cmp2 && !ShouldInvert)
12177SDValue AArch64TargetLowering::LowerSELECT_CC(
12183 if (
LHS.getValueType() == MVT::f128) {
12188 if (!
RHS.getNode()) {
12195 if ((
LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
12196 LHS.getValueType() == MVT::bf16) {
12202 if (
LHS.getValueType().isInteger()) {
12204 (
LHS.getValueType() == MVT::i32 ||
LHS.getValueType() == MVT::i64));
12216 LHS.getValueType() ==
RHS.getValueType()) {
12217 EVT VT =
LHS.getValueType();
12223 Shift = DAG.
getNOT(
DL, Shift, VT);
12237 uint64_t SignBitPos;
12239 EVT TestVT =
LHS.getValueType();
12243 LHS, SignBitConst);
12271 unsigned Opcode = AArch64ISD::CSEL;
12279 }
else if (CTVal && CFVal && CTVal->
isOne() && CFVal->
isZero()) {
12299 }
else if (CTVal && CFVal) {
12307 if (TrueVal == ~FalseVal) {
12308 Opcode = AArch64ISD::CSINV;
12309 }
else if (FalseVal > std::numeric_limits<int64_t>::min() &&
12310 TrueVal == -FalseVal) {
12311 Opcode = AArch64ISD::CSNEG;
12321 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
12322 Opcode = AArch64ISD::CSINC;
12324 if (TrueVal32 > FalseVal32) {
12330 const uint64_t TrueVal64 =
TrueVal;
12331 const uint64_t FalseVal64 =
FalseVal;
12333 if ((TrueVal64 == FalseVal64 + 1) || (TrueVal64 + 1 == FalseVal64)) {
12334 Opcode = AArch64ISD::CSINC;
12336 if (TrueVal > FalseVal) {
12349 if (Opcode != AArch64ISD::CSEL) {
12362 if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->
isOne() &&
12367 if (CTVal && CTVal == RHSVal && AArch64CC ==
AArch64CC::EQ)
12369 else if (CFVal && CFVal == RHSVal && AArch64CC ==
AArch64CC::NE)
12371 }
else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->
isOne()) {
12372 assert (CTVal && CFVal &&
"Expected constant operands for CSNEG.");
12377 Opcode = AArch64ISD::CSINV;
12386 return DAG.
getNode(Opcode,
DL, VT, TVal, FVal, CCVal, Cmp);
12390 assert(
LHS.getValueType() == MVT::f16 ||
LHS.getValueType() == MVT::f32 ||
12391 LHS.getValueType() == MVT::f64);
12398 if (Subtarget->isNeonAvailable() &&
all_of(
Users, [](
const SDNode *U) {
12399 switch (
U->getOpcode()) {
12404 case AArch64ISD::DUP:
12422 if (
Flags.hasNoSignedZeros()) {
12426 if (RHSVal && RHSVal->
isZero()) {
12434 CFVal && CFVal->
isZero() &&
12442 SDValue CS1 = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, FVal, CC1Val, Cmp);
12448 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, CS1, CC2Val, Cmp);
12457 EVT Ty =
Op.getValueType();
12460 auto Idx =
Op.getConstantOperandAPInt(2);
12461 int64_t IdxVal = Idx.getSExtValue();
12463 "Only expect scalable vectors for custom lowering of VECTOR_SPLICE");
12472 std::optional<unsigned> PredPattern;
12484 return DAG.
getNode(AArch64ISD::SPLICE,
DL, Ty, Pred,
Op.getOperand(0),
12504 SDNodeFlags
Flags =
Op->getFlags();
12506 return LowerSELECT_CC(CC,
LHS,
RHS, TVal, FVal,
Op->users(), Flags,
DL, DAG);
12516 EVT Ty =
Op.getValueType();
12517 if (Ty == MVT::aarch64svcount) {
12554 return DAG.
getNode(AArch64ISD::CSEL,
DL,
Op.getValueType(), TVal, FVal,
12573 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
12580 Op->getFlags(),
DL, DAG);
12582 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
12597 !Subtarget->isTargetMachO())
12598 return getAddrLarge(JT, DAG);
12600 return getAddrTiny(JT, DAG);
12601 return getAddr(JT, DAG);
12614 AFI->setJumpTableEntryInfo(JTI, 4,
nullptr);
12619 "aarch64-jump-table-hardening")) {
12621 if (Subtarget->isTargetMachO()) {
12626 assert(Subtarget->isTargetELF() &&
12627 "jump table hardening only supported on MachO/ELF");
12658 std::optional<uint16_t> BADisc =
12659 Subtarget->getPtrAuthBlockAddressDiscriminatorIfEnabled(MF.
getFunction());
12670 {Dest,
Key, Disc, AddrDisc, Chain});
12680 if (Subtarget->isTargetMachO()) {
12681 return getGOT(CP, DAG);
12684 return getAddrLarge(CP, DAG);
12686 return getAddrTiny(CP, DAG);
12688 return getAddr(CP, DAG);
12696 if (std::optional<uint16_t> BADisc =
12697 Subtarget->getPtrAuthBlockAddressDiscriminatorIfEnabled(
12711 {TargetBA,
Key, AddrDisc, Disc});
12719 return getAddrLarge(BAN, DAG);
12721 return getAddrTiny(BAN, DAG);
12723 return getAddr(BAN, DAG);
12728 AArch64FunctionInfo *FuncInfo =
12737 MachinePointerInfo(SV));
12743 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
12747 if (Subtarget->isWindowsArm64EC()) {
12753 uint64_t StackOffset;
12768 MachinePointerInfo(SV));
12776 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
12777 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
12792 MachinePointerInfo(SV),
Align(PtrSize)));
12809 MachinePointerInfo(SV,
Offset),
12827 MachinePointerInfo(SV,
Offset),
12837 GROffsAddr, MachinePointerInfo(SV,
Offset),
Align(4)));
12845 VROffsAddr, MachinePointerInfo(SV,
Offset),
Align(4)));
12855 if (Subtarget->isCallingConvWin64(
F.getCallingConv(),
F.isVarArg()))
12856 return LowerWin64_VASTART(
Op, DAG);
12857 else if (Subtarget->isTargetDarwin())
12858 return LowerDarwin_VASTART(
Op, DAG);
12860 return LowerAAPCS_VASTART(
Op, DAG);
12868 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
12869 unsigned VaListSize =
12870 (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
12872 : Subtarget->isTargetILP32() ? 20 : 32;
12878 Align(PtrSize),
false,
false,
nullptr,
12879 std::nullopt, MachinePointerInfo(DestSV),
12880 MachinePointerInfo(SrcSV));
12884 assert(Subtarget->isTargetDarwin() &&
12885 "automatic va_arg instruction only works on Darwin");
12888 EVT VT =
Op.getValueType();
12892 MaybeAlign
Align(
Op.getConstantOperandVal(3));
12893 unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
12897 DAG.
getLoad(PtrMemVT,
DL, Chain, Addr, MachinePointerInfo(V));
12903 "currently not supported");
12905 if (Align && *Align > MinSlotSize) {
12921 ArgSize = std::max(ArgSize, MinSlotSize);
12922 bool NeedFPTrunc =
false;
12925 NeedFPTrunc =
true;
12935 DAG.
getStore(Chain,
DL, VANext, Addr, MachinePointerInfo(V));
12941 DAG.
getLoad(MVT::f64,
DL, APStore, VAList, MachinePointerInfo());
12951 return DAG.
getLoad(VT,
DL, APStore, VAList, MachinePointerInfo());
12959 EVT VT =
Op.getValueType();
12961 unsigned Depth =
Op.getConstantOperandVal(0);
12966 MachinePointerInfo());
12968 if (Subtarget->isTargetILP32())
12984#define GET_REGISTER_MATCHER
12985#include "AArch64GenAsmMatcher.inc"
12992 if (AArch64::X1 <=
Reg &&
Reg <= AArch64::X28) {
12994 unsigned DwarfRegNum = MRI->getDwarfRegNum(
Reg,
false);
13006 EVT VT =
Op.getValueType();
13022 EVT VT =
Op.getValueType();
13024 unsigned Depth =
Op.getConstantOperandVal(0);
13027 SDValue FrameAddr = LowerFRAMEADDR(
Op, DAG);
13044 if (Subtarget->hasPAuth()) {
13081 const APInt ImmInt = Imm.bitcastToAPInt();
13083 if (VT == MVT::f64)
13086 if (VT == MVT::f32)
13089 if (VT == MVT::f16 || VT == MVT::bf16)
13097 bool OptForSize)
const {
13099 const APInt ImmInt = Imm.bitcastToAPInt();
13106 if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
13116 "Should be able to build any value with at most 4 moves");
13117 unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 4 : 2));
13118 IsLegal = Insn.
size() <= Limit;
13122 <<
" imm value: "; Imm.dump(););
13134 if ((ST->hasNEON() &&
13135 (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
13136 VT == MVT::f32 || VT == MVT::v1f32 || VT == MVT::v2f32 ||
13137 VT == MVT::v4f32)) ||
13139 (VT == MVT::nxv8f16 || VT == MVT::nxv4f32 || VT == MVT::nxv2f64))) {
13146 constexpr unsigned AccurateBits = 8;
13148 ExtraSteps = DesiredBits <= AccurateBits
13153 return DAG.
getNode(Opcode,
SDLoc(Operand), VT, Operand);
13163 EVT VT =
Op.getValueType();
13171AArch64TargetLowering::getSqrtResultForDenormInput(
SDValue Op,
13180 bool Reciprocal)
const {
13184 DAG, ExtraSteps)) {
13189 SDNodeFlags
Flags =
13194 for (
int i = ExtraSteps; i > 0; --i) {
13197 Step = DAG.
getNode(AArch64ISD::FRSQRTS,
DL, VT, Operand, Step, Flags);
13212 int &ExtraSteps)
const {
13215 DAG, ExtraSteps)) {
13223 for (
int i = ExtraSteps; i > 0; --i) {
13263const char *AArch64TargetLowering::LowerXConstraint(
EVT ConstraintVT)
const {
13271 if (!Subtarget->hasFPARMv8())
13296static std::optional<std::pair<unsigned, const TargetRegisterClass *>>
13299 (Constraint[1] !=
'p' && Constraint[1] !=
'z'))
13300 return std::nullopt;
13302 bool IsPredicate = Constraint[1] ==
'p';
13303 Constraint = Constraint.
substr(2, Constraint.
size() - 3);
13304 bool IsPredicateAsCount = IsPredicate && Constraint.
starts_with(
"n");
13305 if (IsPredicateAsCount)
13310 return std::nullopt;
13312 if (IsPredicateAsCount)
13313 return std::make_pair(AArch64::PN0 + V, &AArch64::PNRRegClass);
13315 return std::make_pair(AArch64::P0 + V, &AArch64::PPRRegClass);
13316 return std::make_pair(AArch64::Z0 + V, &AArch64::ZPRRegClass);
13319static std::optional<PredicateConstraint>
13330 if (VT != MVT::aarch64svcount &&
13334 switch (Constraint) {
13336 return VT == MVT::aarch64svcount ? &AArch64::PNR_p8to15RegClass
13337 : &AArch64::PPR_p8to15RegClass;
13339 return VT == MVT::aarch64svcount ? &AArch64::PNR_3bRegClass
13340 : &AArch64::PPR_3bRegClass;
13342 return VT == MVT::aarch64svcount ? &AArch64::PNRRegClass
13343 : &AArch64::PPRRegClass;
13351static std::optional<ReducedGprConstraint>
13364 switch (Constraint) {
13366 return &AArch64::MatrixIndexGPR32_8_11RegClass;
13368 return &AArch64::MatrixIndexGPR32_12_15RegClass;
13402 return DAG.
getNode(AArch64ISD::CSINC,
DL, MVT::i32,
13405 getCondCode(DAG, getInvertedCondCode(CC)), NZCV);
13409SDValue AArch64TargetLowering::LowerAsmOutputForConstraint(
13411 const AsmOperandInfo &OpInfo,
SelectionDAG &DAG)
const {
13416 if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
13417 OpInfo.ConstraintVT.getSizeInBits() < 8)
13432 if (OpInfo.ConstraintVT.getSizeInBits() <= 32)
13443AArch64TargetLowering::getConstraintType(
StringRef Constraint)
const {
13444 if (Constraint.
size() == 1) {
13445 switch (Constraint[0]) {
13482AArch64TargetLowering::getSingleConstraintMatchWeight(
13483 AsmOperandInfo &
info,
const char *constraint)
const {
13485 Value *CallOperandVal =
info.CallOperandVal;
13488 if (!CallOperandVal)
13492 switch (*constraint) {
13514std::pair<unsigned, const TargetRegisterClass *>
13515AArch64TargetLowering::getRegForInlineAsmConstraint(
13517 if (Constraint.
size() == 1) {
13518 switch (Constraint[0]) {
13521 return std::make_pair(0U,
nullptr);
13523 return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass);
13525 return std::make_pair(0U, &AArch64::GPR64commonRegClass);
13526 return std::make_pair(0U, &AArch64::GPR32commonRegClass);
13528 if (!Subtarget->hasFPARMv8())
13532 return std::make_pair(0U, &AArch64::ZPRRegClass);
13533 return std::make_pair(0U,
nullptr);
13535 if (VT == MVT::Other)
13539 return std::make_pair(0U, &AArch64::FPR16RegClass);
13541 return std::make_pair(0U, &AArch64::FPR32RegClass);
13543 return std::make_pair(0U, &AArch64::FPR64RegClass);
13545 return std::make_pair(0U, &AArch64::FPR128RegClass);
13551 if (!Subtarget->hasFPARMv8())
13554 return std::make_pair(0U, &AArch64::ZPR_4bRegClass);
13556 return std::make_pair(0U, &AArch64::FPR128_loRegClass);
13559 if (!Subtarget->hasFPARMv8())
13562 return std::make_pair(0U, &AArch64::ZPR_3bRegClass);
13570 if (AArch64::ZPRRegClass.hasSubClassEq(
P->second) &&
13571 !Subtarget->isSVEorStreamingSVEAvailable())
13572 return std::make_pair(
TRI->getSubReg(
P->first, AArch64::zsub),
13573 &AArch64::FPR128RegClass);
13578 return std::make_pair(0U, RegClass);
13582 return std::make_pair(0U, RegClass);
13584 if (StringRef(
"{cc}").equals_insensitive(Constraint) ||
13586 return std::make_pair(
unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
13588 if (Constraint ==
"{za}") {
13589 return std::make_pair(
unsigned(AArch64::ZA), &AArch64::MPRRegClass);
13592 if (Constraint ==
"{zt0}") {
13593 return std::make_pair(
unsigned(AArch64::ZT0), &AArch64::ZTRRegClass);
13598 std::pair<unsigned, const TargetRegisterClass *> Res;
13603 unsigned Size = Constraint.
size();
13604 if ((
Size == 4 ||
Size == 5) && Constraint[0] ==
'{' &&
13605 tolower(Constraint[1]) ==
'v' && Constraint[
Size - 1] ==
'}') {
13608 if (!
Failed && RegNo >= 0 && RegNo <= 31) {
13612 if (VT != MVT::Other) {
13615 Res.first = AArch64::FPR16RegClass.getRegister(RegNo);
13616 Res.second = &AArch64::FPR16RegClass;
13619 Res.first = AArch64::FPR32RegClass.getRegister(RegNo);
13620 Res.second = &AArch64::FPR32RegClass;
13623 Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
13624 Res.second = &AArch64::FPR64RegClass;
13627 Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
13628 Res.second = &AArch64::FPR128RegClass;
13631 return std::make_pair(0U,
nullptr);
13634 Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
13635 Res.second = &AArch64::FPR128RegClass;
13641 if (Res.second && !Subtarget->hasFPARMv8() &&
13642 !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
13643 !AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
13644 return std::make_pair(0U,
nullptr);
13651 bool AllowUnknown)
const {
13652 if (Subtarget->hasLS64() && Ty->isIntegerTy(512))
13653 return EVT(MVT::i64x8);
13660void AArch64TargetLowering::LowerAsmOperandForConstraint(
13666 if (Constraint.
size() != 1)
13669 char ConstraintLetter = Constraint[0];
13670 switch (ConstraintLetter) {
13681 if (
Op.getValueType() == MVT::i64)
13682 Result = DAG.
getRegister(AArch64::XZR, MVT::i64);
13684 Result = DAG.
getRegister(AArch64::WZR, MVT::i32);
13706 switch (ConstraintLetter) {
13720 CVal =
C->getSExtValue();
13751 if ((CVal & 0xFFFF) == CVal)
13753 if ((CVal & 0xFFFF0000ULL) == CVal)
13755 uint64_t NCVal = ~(uint32_t)CVal;
13756 if ((NCVal & 0xFFFFULL) == NCVal)
13758 if ((NCVal & 0xFFFF0000ULL) == NCVal)
13765 if ((CVal & 0xFFFFULL) == CVal)
13767 if ((CVal & 0xFFFF0000ULL) == CVal)
13769 if ((CVal & 0xFFFF00000000ULL) == CVal)
13771 if ((CVal & 0xFFFF000000000000ULL) == CVal)
13773 uint64_t NCVal = ~CVal;
13774 if ((NCVal & 0xFFFFULL) == NCVal)
13776 if ((NCVal & 0xFFFF0000ULL) == NCVal)
13778 if ((NCVal & 0xFFFF00000000ULL) == NCVal)
13780 if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
13794 Ops.push_back(Result);
13831 EVT VT =
Op.getValueType();
13833 "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
13837 if (VT != MVT::v16i8 && VT != MVT::v8i8)
13841 assert((NumElts == 8 || NumElts == 16) &&
13842 "Need to have exactly 8 or 16 elements in vector.");
13848 for (
unsigned i = 0; i < NumElts; ++i) {
13855 SourceVec = OperandSourceVec;
13856 else if (SourceVec != OperandSourceVec)
13869 }
else if (!AndMaskConstants.
empty()) {
13889 if (!MaskSourceVec) {
13893 }
else if (MaskSourceVec != MaskSource->
getOperand(0)) {
13907 if (!AndMaskConstants.
empty())
13914 SourceVec, MaskSourceVec);
13922 LLVM_DEBUG(
dbgs() <<
"AArch64TargetLowering::ReconstructShuffle\n");
13924 EVT VT =
Op.getValueType();
13926 "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
13929 struct ShuffleSourceInfo {
13944 ShuffleSourceInfo(
SDValue Vec)
13945 : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
13946 ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
13954 for (
unsigned i = 0; i < NumElts; ++i) {
13960 V.getOperand(0).getValueType().isScalableVector()) {
13962 dbgs() <<
"Reshuffle failed: "
13963 "a shuffle can only come from building a vector from "
13964 "various elements of other fixed-width vectors, provided "
13965 "their indices are constant\n");
13971 auto Source =
find(Sources, SourceVec);
13972 if (Source == Sources.
end())
13973 Source = Sources.
insert(Sources.
end(), ShuffleSourceInfo(SourceVec));
13976 unsigned EltNo = V.getConstantOperandVal(1);
13977 Source->MinElt = std::min(Source->MinElt, EltNo);
13978 Source->MaxElt = std::max(Source->MaxElt, EltNo);
13983 if ((Sources.
size() == 3 || Sources.
size() == 4) && NumElts > 4) {
13988 for (
unsigned I = 0;
I < NumElts; ++
I) {
13991 for (
unsigned OF = 0; OF < OutputFactor; OF++)
13992 Mask.push_back(-1);
13998 unsigned Lane = V.getConstantOperandVal(1);
13999 for (
unsigned S = 0; S < Sources.
size(); S++) {
14000 if (V.getOperand(0) == Sources[S].Vec) {
14001 unsigned InputSize = Sources[S].Vec.getScalarValueSizeInBits();
14002 unsigned InputBase = 16 * S + Lane * InputSize / 8;
14003 for (
unsigned OF = 0; OF < OutputFactor; OF++)
14004 Mask.push_back(InputBase + OF);
14014 ? Intrinsic::aarch64_neon_tbl3
14015 : Intrinsic::aarch64_neon_tbl4,
14017 for (
unsigned i = 0; i < Sources.
size(); i++) {
14018 SDValue Src = Sources[i].Vec;
14019 EVT SrcVT = Src.getValueType();
14022 "Expected a legally typed vector");
14030 for (
unsigned i = 0; i < Mask.size(); i++)
14032 assert((Mask.size() == 8 || Mask.size() == 16) &&
14033 "Expected a v8i8 or v16i8 Mask");
14035 Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8,
DL, TBLMask));
14039 Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, TBLOperands);
14043 if (Sources.
size() > 2) {
14044 LLVM_DEBUG(
dbgs() <<
"Reshuffle failed: currently only do something "
14045 <<
"sensible when at most two source vectors are "
14053 for (
auto &Source : Sources) {
14054 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
14055 if (SrcEltTy.
bitsLT(SmallestEltTy)) {
14056 SmallestEltTy = SrcEltTy;
14059 unsigned ResMultiplier =
14068 for (
auto &Src : Sources) {
14069 EVT SrcVT = Src.ShuffleVec.getValueType();
14082 assert(2 * SrcVTSize == VTSize);
14087 DAG.
getPOISON(Src.ShuffleVec.getValueType()));
14093 dbgs() <<
"Reshuffle failed: result vector too small to extract\n");
14097 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
14099 dbgs() <<
"Reshuffle failed: span too large for a VEXT to cope\n");
14103 if (Src.MinElt >= NumSrcElts) {
14108 Src.WindowBase = -NumSrcElts;
14109 }
else if (Src.MaxElt < NumSrcElts) {
14126 dbgs() <<
"Reshuffle failed: don't know how to lower AArch64ISD::EXT "
14127 "for SVE vectors.");
14132 DAG.
getNode(AArch64ISD::EXT,
DL, DestVT, VEXTSrc1, VEXTSrc2,
14134 Src.WindowBase = -Src.MinElt;
14141 for (
auto &Src : Sources) {
14143 if (SrcEltTy == SmallestEltTy)
14148 DAG.
getNode(AArch64ISD::NVCAST,
DL, ShuffleVT, Src.ShuffleVec);
14154 Src.WindowBase *= Src.WindowScale;
14159 for (
auto Src : Sources)
14160 assert(Src.ShuffleVec.getValueType() == ShuffleVT);
14168 if (Entry.isUndef())
14171 auto Src =
find(Sources, Entry.getOperand(0));
14180 int LanesDefined = BitsDefined / BitsPerShuffleLane;
14184 int *LaneMask = &Mask[i * ResMultiplier];
14186 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
14187 ExtractBase += NumElts * (Src - Sources.
begin());
14188 for (
int j = 0; j < LanesDefined; ++j)
14189 LaneMask[j] = ExtractBase + j;
14194 LLVM_DEBUG(
dbgs() <<
"Reshuffle failed: illegal shuffle mask\n");
14200 for (
unsigned i = 0; i < Sources.
size(); ++i)
14207 V = DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Shuffle);
14213 dbgs() <<
"Reshuffle, creating node: "; V.dump(););
14232 unsigned ExpectedElt = Imm;
14233 for (
unsigned i = 1; i < NumElts; ++i) {
14237 if (ExpectedElt == NumElts)
14242 if (ExpectedElt !=
static_cast<unsigned>(M[i]))
14253 if (V.getValueType() != MVT::v16i8)
14255 assert(V.getNumOperands() == 16 &&
"Expected 16 operands on the BUILDVECTOR");
14257 for (
unsigned X = 0;
X < 4;
X++) {
14269 for (
unsigned Y = 1;
Y < 4;
Y++) {
14285 V.getOperand(0).getOperand(0), V.getOperand(4).getOperand(0),
14286 V.getOperand(8).getOperand(0), V.getOperand(12).getOperand(0)};
14288 if (V.getValueType() == MVT::v4i32)
14304 unsigned &DupLaneOp) {
14306 "Only possible block sizes for wide DUP are: 16, 32, 64");
14325 for (
size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
14326 for (
size_t I = 0;
I < NumEltsPerBlock;
I++) {
14327 int Elt = M[BlockIndex * NumEltsPerBlock +
I];
14331 if ((
unsigned)Elt >= SingleVecNumElements)
14333 if (BlockElts[
I] < 0)
14334 BlockElts[
I] = Elt;
14335 else if (BlockElts[
I] != Elt)
14344 auto FirstRealEltIter =
find_if(BlockElts, [](
int Elt) {
return Elt >= 0; });
14345 assert(FirstRealEltIter != BlockElts.
end() &&
14346 "Shuffle with all-undefs must have been caught by previous cases, "
14348 if (FirstRealEltIter == BlockElts.
end()) {
14354 size_t FirstRealIndex = FirstRealEltIter - BlockElts.
begin();
14356 if ((
unsigned)*FirstRealEltIter < FirstRealIndex)
14359 size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
14362 if (Elt0 % NumEltsPerBlock != 0)
14366 for (
size_t I = 0;
I < NumEltsPerBlock;
I++)
14367 if (BlockElts[
I] >= 0 && (
unsigned)BlockElts[
I] != Elt0 +
I)
14370 DupLaneOp = Elt0 / NumEltsPerBlock;
14379 const int *FirstRealElt =
find_if(M, [](
int Elt) {
return Elt >= 0; });
14384 APInt ExpectedElt =
APInt(MaskBits, *FirstRealElt + 1,
false,
14388 bool FoundWrongElt = std::any_of(FirstRealElt + 1, M.end(), [&](
int Elt) {
14389 return Elt != ExpectedElt++ && Elt >= 0;
14421 bool &ReverseEXT,
unsigned &Imm) {
14423 unsigned OtherBase = SplatOperand == 0 ? NumElts : 0;
14424 auto IsSplatElt = [=](
int Elt) {
14426 (SplatOperand == 0 ? Elt < (int)NumElts : Elt >= (int)NumElts);
14429 unsigned PrefixSplatElts = 0;
14430 while (PrefixSplatElts != NumElts && IsSplatElt(M[PrefixSplatElts]))
14433 if (PrefixSplatElts > 0 && PrefixSplatElts < NumElts) {
14435 for (
unsigned I = PrefixSplatElts;
I != NumElts; ++
I) {
14436 int Expected = OtherBase +
I - PrefixSplatElts;
14444 ReverseEXT = SplatOperand == 1;
14445 Imm = NumElts - PrefixSplatElts;
14450 unsigned SuffixSplatElts = 0;
14451 while (SuffixSplatElts != NumElts &&
14452 IsSplatElt(M[NumElts - 1 - SuffixSplatElts]))
14455 if (0 < SuffixSplatElts && SuffixSplatElts < NumElts) {
14457 for (
unsigned I = 0;
I != NumElts - SuffixSplatElts; ++
I) {
14458 int Expected = OtherBase +
I + SuffixSplatElts;
14466 ReverseEXT = SplatOperand == 0;
14467 Imm = SuffixSplatElts;
14480 if (NumElts % 2 != 0)
14482 WhichResult = (M[0] == 0 ? 0 : 1);
14483 unsigned Idx = WhichResult * NumElts / 2;
14484 for (
unsigned i = 0; i != NumElts; i += 2) {
14485 if ((M[i] >= 0 && (
unsigned)M[i] != Idx) ||
14486 (M[i + 1] >= 0 && (
unsigned)M[i + 1] != Idx))
14499 WhichResult = (M[0] == 0 ? 0 : 1);
14500 for (
unsigned j = 0; j != 2; ++j) {
14501 unsigned Idx = WhichResult;
14502 for (
unsigned i = 0; i != Half; ++i) {
14503 int MIdx = M[i + j * Half];
14504 if (MIdx >= 0 && (
unsigned)MIdx != Idx)
14518 if (NumElts % 2 != 0)
14520 WhichResult = (M[0] == 0 ? 0 : 1);
14521 for (
unsigned i = 0; i < NumElts; i += 2) {
14522 if ((M[i] >= 0 && (
unsigned)M[i] != i + WhichResult) ||
14523 (M[i + 1] >= 0 && (
unsigned)M[i + 1] != i + WhichResult))
14530 bool &DstIsLeft,
int &Anomaly) {
14531 if (M.size() !=
static_cast<size_t>(NumInputElements))
14534 int NumLHSMatch = 0, NumRHSMatch = 0;
14535 int LastLHSMismatch = -1, LastRHSMismatch = -1;
14537 for (
int i = 0; i < NumInputElements; ++i) {
14547 LastLHSMismatch = i;
14549 if (M[i] == i + NumInputElements)
14552 LastRHSMismatch = i;
14555 if (NumLHSMatch == NumInputElements - 1) {
14557 Anomaly = LastLHSMismatch;
14559 }
else if (NumRHSMatch == NumInputElements - 1) {
14561 Anomaly = LastRHSMismatch;
14574 for (
int I = 0,
E = NumElts / 2;
I !=
E;
I++) {
14579 int Offset = NumElts / 2;
14580 for (
int I = NumElts / 2,
E = NumElts;
I !=
E;
I++) {
14581 if (Mask[
I] !=
I + SplitLHS *
Offset)
14590 EVT VT =
Op.getValueType();
14625 unsigned OpNum = (PFEntry >> 26) & 0x0F;
14626 unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
14627 unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
14649 if (LHSID == (1 * 9 + 2) * 9 + 3)
14651 assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 &&
"Illegal OP_COPY!");
14655 if (OpNum == OP_MOVLANE) {
14657 auto getPFIDLane = [](
unsigned ID,
int Elt) ->
int {
14658 assert(Elt < 4 &&
"Expected Perfect Lanes to be less than 4");
14664 return (
ID % 9 == 8) ? -1 :
ID % 9;
14673 assert(RHSID < 8 &&
"Expected a lane index for RHSID!");
14674 unsigned ExtLane = 0;
14680 int MaskElt = getPFIDLane(
ID, (RHSID & 0x01) << 1) >> 1;
14682 MaskElt = (getPFIDLane(
ID, ((RHSID & 0x01) << 1) + 1) - 1) >> 1;
14683 assert(MaskElt >= 0 &&
"Didn't expect an undef movlane index!");
14684 ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2);
14685 Input = MaskElt < 2 ? V1 : V2;
14691 "Expected 16 or 32 bit shuffle elements");
14696 int MaskElt = getPFIDLane(
ID, RHSID);
14697 assert(MaskElt >= 0 &&
"Didn't expect an undef movlane index!");
14698 ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4);
14699 Input = MaskElt < 4 ? V1 : V2;
14701 if (VT == MVT::v4i16) {
14707 Input.getValueType().getVectorElementType(),
14729 return DAG.
getNode(AArch64ISD::REV64,
DL, VT, OpLHS);
14734 return DAG.
getNode(AArch64ISD::REV32,
DL, VT, OpLHS);
14736 assert(VT == MVT::v8i8 || VT == MVT::v16i8);
14737 EVT BSVT = VT == MVT::v8i8 ? MVT::v4i16 : MVT::v8i16;
14739 AArch64ISD::NVCAST,
DL, VT,
14741 DAG.
getNode(AArch64ISD::NVCAST,
DL, BSVT, OpLHS)));
14749 if (EltTy == MVT::i8)
14750 Opcode = AArch64ISD::DUPLANE8;
14751 else if (EltTy == MVT::i16 || EltTy == MVT::f16 || EltTy == MVT::bf16)
14752 Opcode = AArch64ISD::DUPLANE16;
14753 else if (EltTy == MVT::i32 || EltTy == MVT::f32)
14754 Opcode = AArch64ISD::DUPLANE32;
14755 else if (EltTy == MVT::i64 || EltTy == MVT::f64)
14756 Opcode = AArch64ISD::DUPLANE64;
14763 return DAG.
getNode(Opcode,
DL, VT, OpLHS, Lane);
14769 return DAG.
getNode(AArch64ISD::EXT,
DL, VT, OpLHS, OpRHS,
14773 return DAG.
getNode(AArch64ISD::UZP1,
DL, VT, OpLHS, OpRHS);
14775 return DAG.
getNode(AArch64ISD::UZP2,
DL, VT, OpLHS, OpRHS);
14777 return DAG.
getNode(AArch64ISD::ZIP1,
DL, VT, OpLHS, OpRHS);
14779 return DAG.
getNode(AArch64ISD::ZIP2,
DL, VT, OpLHS, OpRHS);
14781 return DAG.
getNode(AArch64ISD::TRN1,
DL, VT, OpLHS, OpRHS);
14783 return DAG.
getNode(AArch64ISD::TRN2,
DL, VT, OpLHS, OpRHS);
14794 EVT EltVT =
Op.getValueType().getVectorElementType();
14807 MVT IndexVT = MVT::v8i8;
14808 unsigned IndexLen = 8;
14809 if (
Op.getValueSizeInBits() == 128) {
14810 IndexVT = MVT::v16i8;
14815 for (
int Val : ShuffleMask) {
14816 for (
unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
14817 unsigned Offset = Byte + Val * BytesPerElt;
14820 if (IsUndefOrZero &&
Offset >= IndexLen)
14830 if (IsUndefOrZero) {
14839 if (IndexLen == 8) {
14864 if (EltType == MVT::i8)
14865 return AArch64ISD::DUPLANE8;
14866 if (EltType == MVT::i16 || EltType == MVT::f16 || EltType == MVT::bf16)
14867 return AArch64ISD::DUPLANE16;
14868 if (EltType == MVT::i32 || EltType == MVT::f32)
14869 return AArch64ISD::DUPLANE32;
14870 if (EltType == MVT::i64 || EltType == MVT::f64)
14871 return AArch64ISD::DUPLANE64;
14879 auto getScaledOffsetDup = [](
SDValue BitCast,
int &LaneC,
MVT &CastVT) {
14890 unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
14892 if (ExtIdxInBits % CastedEltBitWidth != 0)
14900 LaneC += ExtIdxInBits / CastedEltBitWidth;
14907 unsigned SrcVecNumElts =
14914 if (getScaledOffsetDup(V, Lane, CastVT)) {
14915 V = DAG.
getBitcast(CastVT, V.getOperand(0).getOperand(0));
14917 V.getOperand(0).getValueType().is128BitVector()) {
14920 Lane += V.getConstantOperandVal(1);
14921 V = V.getOperand(0);
14947 EVT VT =
Op.getValueType();
14957 if (ElementSize > 32 || ElementSize == 1)
14987 EVT VT =
Op.getValueType();
15004 for (
unsigned I = 0;
I < 16;
I++) {
15005 if (ShuffleMask[
I] < 16)
15011 TBLMaskParts[
I] = DAG.
getConstant(
C->getSExtValue() + 32,
DL, MVT::i32);
15025AArch64TargetLowering::LowerEXTEND_VECTOR_INREG(
SDValue Op,
15028 EVT VT =
Op.getValueType();
15032 unsigned UnpackOpcode =
Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
15040 Val = DAG.
getNode(UnpackOpcode,
DL, MVT::nxv8i16, Val);
15041 if (VT == MVT::nxv8i16)
15045 Val = DAG.
getNode(UnpackOpcode,
DL, MVT::nxv4i32, Val);
15046 if (VT == MVT::nxv4i32)
15050 Val = DAG.
getNode(UnpackOpcode,
DL, MVT::nxv2i64, Val);
15051 assert(VT == MVT::nxv2i64 &&
"Unexpected result type!");
15062AArch64TargetLowering::LowerZERO_EXTEND_VECTOR_INREG(
SDValue Op,
15065 EVT VT =
Op.getValueType();
15068 return LowerEXTEND_VECTOR_INREG(
Op, DAG);
15073 "Unexpected extension factor.");
15080 DAG.
getNode(AArch64ISD::ZIP1,
DL, SrcVT, SrcOp, Zeros));
15086 EVT VT =
Op.getValueType();
15091 return LowerFixedLengthVECTOR_SHUFFLEToSVE(
Op, DAG);
15097 ArrayRef<int> ShuffleMask = SVN->
getMask();
15104 "Unexpected VECTOR_SHUFFLE mask size!");
15130 for (
unsigned LaneSize : {64U, 32U, 16U}) {
15133 unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
15134 : LaneSize == 32 ? AArch64ISD::DUPLANE32
15135 : AArch64ISD::DUPLANE16;
15150 if (
isREVMask(ShuffleMask, EltSize, NumElts, 64))
15152 if (
isREVMask(ShuffleMask, EltSize, NumElts, 32))
15154 if (
isREVMask(ShuffleMask, EltSize, NumElts, 16)) {
15156 assert(VT == MVT::v8i8 || VT == MVT::v16i8);
15157 EVT BSVT = VT == MVT::v8i8 ? MVT::v4i16 : MVT::v8i16;
15159 AArch64ISD::NVCAST,
DL, VT,
15161 DAG.
getNode(AArch64ISD::NVCAST,
DL, BSVT, V1)));
15164 if (((NumElts == 8 && EltSize == 16) || (NumElts == 16 && EltSize == 8)) &&
15167 return DAG.
getNode(AArch64ISD::EXT,
DL, VT, Rev, Rev,
15175 for (
unsigned SplatOperand : {0
U, 1U}) {
15176 if ((SplatOperand == 0 && !IsSplat1) || (SplatOperand == 1 && !IsSplat2))
15179 bool ReverseSplatEXT =
false;
15185 if (ReverseSplatEXT)
15188 return DAG.
getNode(AArch64ISD::EXT,
DL, VT, ExtOp1, ExtOp2,
15193 bool ReverseEXT =
false;
15195 if (
isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
15207 unsigned WhichResult;
15208 unsigned OperandOrder;
15209 if (
isZIPMask(ShuffleMask, NumElts, WhichResult, OperandOrder)) {
15210 unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
15212 OperandOrder == 0 ? V2 : V1);
15214 if (
isUZPMask(ShuffleMask, NumElts, WhichResult)) {
15215 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
15218 if (
isTRNMask(ShuffleMask, NumElts, WhichResult, OperandOrder)) {
15219 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
15221 OperandOrder == 0 ? V2 : V1);
15225 unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
15229 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
15233 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
15243 if (
isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
15244 SDValue DstVec = DstIsLeft ? V1 : V2;
15248 int SrcLane = ShuffleMask[Anomaly];
15249 if (SrcLane >= NumInputElements) {
15251 SrcLane -= NumElts;
15258 ScalarVT = MVT::i32;
15271 if (NumElts == 4) {
15272 unsigned PFIndexes[4];
15273 for (
unsigned i = 0; i != 4; ++i) {
15274 if (ShuffleMask[i] < 0)
15277 PFIndexes[i] = ShuffleMask[i];
15281 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
15282 PFIndexes[2] * 9 + PFIndexes[3];
15292 "Expected larger vector element sizes to be handled already");
15294 for (
int M : ShuffleMask)
15296 M >=
static_cast<int>(NumElts) ? 0 : 0xffffffff,
DL, MVT::i32));
15310 EVT VT =
Op.getValueType();
15313 return LowerToScalableOp(
Op, DAG);
15316 "Unexpected vector type!");
15331 if (VT == MVT::nxv1i1)
15343 EVT VT =
Op.getValueType();
15356 if (CIdx && (CIdx->getZExtValue() <= 3)) {
15358 return DAG.
getNode(AArch64ISD::DUPLANE128,
DL, VT,
Op.getOperand(1), CI);
15380 SDValue TBL = DAG.
getNode(AArch64ISD::TBL,
DL, MVT::nxv2i64, V, ShuffleMask);
15386 APInt &UndefBits) {
15388 APInt SplatBits, SplatUndef;
15389 unsigned SplatBitSize;
15391 if (BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
15394 for (
unsigned i = 0; i < NumSplats; ++i) {
15395 CnstBits <<= SplatBitSize;
15396 UndefBits <<= SplatBitSize;
15398 UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.
getSizeInBits());
15409 const APInt &Bits) {
15410 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15412 EVT VT =
Op.getValueType();
15421 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15432 EVT VT =
Op.getValueType();
15437 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15440 bool isAdvSIMDModImm =
false;
15460 if (isAdvSIMDModImm) {
15474 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15485 EVT VT =
Op.getValueType();
15490 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15493 bool isAdvSIMDModImm =
false;
15505 if (isAdvSIMDModImm) {
15519 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15529 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15531 EVT VT =
Op.getValueType();
15533 bool isAdvSIMDModImm =
false;
15545 if (isAdvSIMDModImm) {
15550 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15559 const APInt &Bits) {
15560 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15562 EVT VT =
Op.getValueType();
15571 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15580 const APInt &Bits) {
15581 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15583 EVT VT =
Op.getValueType();
15586 bool isAdvSIMDModImm =
false;
15590 MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
15595 MovTy = MVT::v2f64;
15598 if (isAdvSIMDModImm) {
15602 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15622 for (
unsigned i = 1; i < NumElts; ++i)
15631 while (
N.getOpcode() == AArch64ISD::REINTERPRET_CAST)
15632 N =
N.getOperand(0);
15638 unsigned NumElts =
N.getValueType().getVectorMinNumElements();
15641 while (
N.getOpcode() == AArch64ISD::REINTERPRET_CAST) {
15642 N =
N.getOperand(0);
15645 if (
N.getValueType().getVectorMinNumElements() < NumElts)
15655 if (
N.getOpcode() == AArch64ISD::PTRUE &&
15656 N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
15657 return N.getValueType().getVectorMinNumElements() >= NumElts;
15669 EVT VT =
N->getValueType(0);
15679 SDValue FirstOp =
N->getOperand(0);
15680 unsigned FirstOpc = FirstOp.
getOpcode();
15681 SDValue SecondOp =
N->getOperand(1);
15682 unsigned SecondOpc = SecondOp.
getOpcode();
15689 if ((FirstOpc ==
ISD::AND || FirstOpc == AArch64ISD::BICi) &&
15690 (SecondOpc == AArch64ISD::VSHL || SecondOpc == AArch64ISD::VLSHR ||
15691 SecondOpc == AArch64ISD::SHL_PRED ||
15692 SecondOpc == AArch64ISD::SRL_PRED)) {
15696 }
else if ((SecondOpc ==
ISD::AND || SecondOpc == AArch64ISD::BICi) &&
15697 (FirstOpc == AArch64ISD::VSHL || FirstOpc == AArch64ISD::VLSHR ||
15698 FirstOpc == AArch64ISD::SHL_PRED ||
15699 FirstOpc == AArch64ISD::SRL_PRED)) {
15706 bool IsShiftRight = Shift.
getOpcode() == AArch64ISD::VLSHR ||
15707 Shift.
getOpcode() == AArch64ISD::SRL_PRED;
15708 bool ShiftHasPredOp = Shift.
getOpcode() == AArch64ISD::SHL_PRED ||
15709 Shift.
getOpcode() == AArch64ISD::SRL_PRED;
15713 if (ShiftHasPredOp) {
15719 C2 =
C.getZExtValue();
15722 C2 = C2node->getZExtValue();
15736 assert(C1nodeImm && C1nodeShift);
15738 C1AsAPInt = C1AsAPInt.
zextOrTrunc(ElemSizeInBits);
15744 if (C2 > ElemSizeInBits)
15749 if (C1AsAPInt != RequiredC1)
15757 unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
15762 EVT VT =
N->getValueType(0);
15763 assert(VT.
isVector() &&
"Expected vector type in tryLowerToBSL\n");
15781 for (
int i = 1; i >= 0; --i) {
15782 for (
int j = 1; j >= 0; --j) {
15808 if (
Sub.getOperand(1) !=
Add.getOperand(0))
15811 return DAG.
getNode(AArch64ISD::BSP,
DL, VT,
Sub, SubSibling, AddSibling);
15819 for (
int i = 1; i >= 0; --i)
15820 for (
int j = 1; j >= 0; --j) {
15831 if (!BVN0 || !BVN1)
15834 bool FoundMatch =
true;
15838 if (!CN0 || !CN1 ||
15841 FoundMatch =
false;
15856 !Subtarget->isNeonAvailable()))
15857 return LowerToScalableOp(
Op, DAG);
15866 EVT VT =
Op.getValueType();
15871 BuildVectorSDNode *BVN =
15875 LHS =
Op.getOperand(1);
15893 UndefBits, &
LHS)) ||
15909 EVT VT =
Op.getValueType();
15923 CstLane->getAPIntValue().trunc(EltTy.
getSizeInBits()).getZExtValue(),
15927 }
else if (Lane.getOpcode() ==
ISD::UNDEF) {
15930 assert(Lane.getValueType() == MVT::i32 &&
15931 "Unexpected BUILD_VECTOR operand type");
15933 Ops.push_back(Lane);
15940 EVT VT =
Op.getValueType();
15948 int32_t ImmVal, ShiftVal;
15957 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, SplatVal);
15962 EVT VT =
Op.getValueType();
15964 "Expected a legal NEON vector");
15970 auto TryMOVIWithBits = [&](
APInt DefBits) {
15984 APInt NotDefBits = ~DefBits;
15994 if (
SDValue R = TryMOVIWithBits(DefBits))
15996 if (
SDValue R = TryMOVIWithBits(UndefBits))
16004 auto TryWithFNeg = [&](
APInt DefBits,
MVT FVT) {
16010 unsigned NumElts = VT.
getSizeInBits() / FVT.getScalarSizeInBits();
16011 for (
unsigned i = 0; i < NumElts; i++)
16012 NegBits |= Neg << (FVT.getScalarSizeInBits() * i);
16013 NegBits = DefBits ^ NegBits;
16017 if (
SDValue NewOp = TryMOVIWithBits(NegBits)) {
16021 AArch64ISD::NVCAST,
DL, VT,
16023 DAG.
getNode(AArch64ISD::NVCAST,
DL, VFVT, NewOp)));
16028 if ((R = TryWithFNeg(DefBits, MVT::f32)) ||
16029 (R = TryWithFNeg(DefBits, MVT::f64)) ||
16030 (ST->hasFullFP16() && (R = TryWithFNeg(DefBits, MVT::f16))))
16037SDValue AArch64TargetLowering::LowerFixedLengthBuildVectorToSVE(
16039 EVT VT =
Op.getValueType();
16063 NumElems -
count_if(
Op->op_values(), IsExtractElt) > 4)
16070 return Op.isUndef() ? Poison
16071 : DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
16072 ContainerVT, Poison, Op, ZeroI64);
16076 while (Intermediates.
size() > 1) {
16079 for (
unsigned I = 0;
I < Intermediates.
size();
I += 2) {
16082 Intermediates[
I / 2] =
16084 : DAG.
getNode(AArch64ISD::ZIP1,
DL, ZipVT, Op0, Op1);
16087 Intermediates.
resize(Intermediates.
size() / 2);
16098 EVT VT =
Op.getValueType();
16100 bool OverrideNEON = !Subtarget->isNeonAvailable() ||
16103 return LowerFixedLengthBuildVectorToSVE(
Op, DAG);
16121 Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
16122 if (Val.isZero() || (VT.
isInteger() && Val.isAllOnes()))
16126 if (
Const->isZero() && !
Const->isNegative())
16147 bool isOnlyLowElement =
true;
16148 bool usesOnlyOneValue =
true;
16149 bool usesOnlyOneConstantValue =
true;
16151 bool AllLanesExtractElt =
true;
16152 unsigned NumConstantLanes = 0;
16153 unsigned NumDifferentLanes = 0;
16154 unsigned NumUndefLanes = 0;
16157 SmallMapVector<SDValue, unsigned, 16> DifferentValueMap;
16158 unsigned ConsecutiveValCount = 0;
16163 bool MaybeLowHalfZeroHigh =
16165 unsigned HalfElts = MaybeLowHalfZeroHigh ? (NumElts >> 1) : 0;
16166 SDValue LowHalfFirstVal = MaybeLowHalfZeroHigh ?
Op.getOperand(0) :
SDValue();
16167 for (
unsigned i = 0; i < NumElts; ++i) {
16170 AllLanesExtractElt =
false;
16173 MaybeLowHalfZeroHigh =
false;
16177 isOnlyLowElement =
false;
16182 ++NumConstantLanes;
16183 if (!ConstantValue.
getNode())
16185 else if (ConstantValue != V)
16186 usesOnlyOneConstantValue =
false;
16189 if (!
Value.getNode())
16191 else if (V !=
Value) {
16192 usesOnlyOneValue =
false;
16193 ++NumDifferentLanes;
16196 if (PrevVal != V) {
16197 ConsecutiveValCount = 0;
16200 if (MaybeLowHalfZeroHigh) {
16201 if (i < HalfElts) {
16202 if (V != LowHalfFirstVal)
16203 MaybeLowHalfZeroHigh =
false;
16204 }
else if (!IsZero(V)) {
16205 MaybeLowHalfZeroHigh =
false;
16220 DifferentValueMap[
V] = ++ConsecutiveValCount;
16223 if (!
Value.getNode()) {
16225 dbgs() <<
"LowerBUILD_VECTOR: value undefined, creating undef node\n");
16233 LLVM_DEBUG(
dbgs() <<
"LowerBUILD_VECTOR: only low element used, creating 1 "
16234 "SCALAR_TO_VECTOR node\n");
16238 if (MaybeLowHalfZeroHigh && LowHalfFirstVal.
getNode() &&
16249 : DAG.
getNode(AArch64ISD::DUP,
DL, HalfVT, LowHalfFirstVal);
16254 if (AllLanesExtractElt) {
16255 SDNode *
Vector =
nullptr;
16260 for (
unsigned i = 0; i < NumElts; ++i) {
16262 const SDNode *
N =
V.getNode();
16287 if (Val == 2 * i) {
16291 if (Val - 1 == 2 * i) {
16318 if (usesOnlyOneValue) {
16321 Value.getValueType() != VT) {
16323 dbgs() <<
"LowerBUILD_VECTOR: use DUP for non-constant splats\n");
16331 if (
Value.getValueSizeInBits() == 64) {
16333 dbgs() <<
"LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
16345 assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||
16346 EltTy == MVT::f64) &&
"Unsupported floating-point vector type");
16348 dbgs() <<
"LowerBUILD_VECTOR: float constant splats, creating int "
16349 "BITCASTS, and try again\n");
16351 for (
unsigned i = 0; i < NumElts; ++i)
16355 LLVM_DEBUG(
dbgs() <<
"LowerBUILD_VECTOR: trying to lower new vector: ";
16357 Val = LowerBUILD_VECTOR(Val, DAG);
16367 bool PreferDUPAndInsert =
16369 NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
16370 NumDifferentLanes >= NumConstantLanes;
16376 if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
16380 APInt ConstantValueAPInt(1, 0);
16382 ConstantValueAPInt =
C->getAPIntValue().zextOrTrunc(BitSize);
16384 !ConstantValueAPInt.isAllOnes()) {
16388 Val = DAG.
getNode(AArch64ISD::DUP,
DL, VT, ConstantValue);
16392 for (
unsigned i = 0; i < NumElts; ++i) {
16409 const SDLoc
DL(
Op);
16410 APInt PackedVal(64, 0);
16411 unsigned BitPos = 0;
16418 LaneBits = APInt(EltSizeInBits, 0);
16420 LaneBits =
C->getAPIntValue();
16422 LaneBits = CFP->getValueAPF().bitcastToAPInt();
16427 BitPos += EltSizeInBits;
16434 if (Insns.
size() > 2)
16445 dbgs() <<
"LowerBUILD_VECTOR: all elements are constant, use default "
16457 if (NumElts >= 4) {
16465 if (PreferDUPAndInsert) {
16470 for (
unsigned I = 0;
I < NumElts; ++
I)
16481 if (DifferentValueMap.
size() == 2 && NumUndefLanes == 0) {
16493 bool canUseVECTOR_CONCAT =
true;
16494 for (
auto Pair : DifferentValueMap) {
16496 if (Pair.second != NumElts / 2)
16497 canUseVECTOR_CONCAT =
false;
16510 if (canUseVECTOR_CONCAT) {
16533 if (NumElts >= 8) {
16534 SmallVector<int, 16> MaskVec;
16536 SDValue FirstLaneVal =
Op.getOperand(0);
16537 for (
unsigned i = 0; i < NumElts; ++i) {
16539 if (FirstLaneVal == Val)
16563 dbgs() <<
"LowerBUILD_VECTOR: alternatives failed, creating sequence "
16564 "of INSERT_VECTOR_ELT\n");
16581 LLVM_DEBUG(
dbgs() <<
"Creating node for op0, it is not undefined:\n");
16587 dbgs() <<
"Creating nodes for the other vector elements:\n";
16589 for (; i < NumElts; ++i) {
16600 dbgs() <<
"LowerBUILD_VECTOR: use default expansion, failed to find "
16601 "better alternative\n");
16608 !Subtarget->isNeonAvailable()))
16609 return LowerFixedLengthConcatVectorsToSVE(
Op, DAG);
16611 assert(
Op.getValueType().isScalableVector() &&
16613 "Expected legal scalable vector type!");
16618 "Unexpected number of operands in CONCAT_VECTORS");
16620 if (NumOperands == 2)
16625 while (ConcatOps.size() > 1) {
16626 for (
unsigned I = 0,
E = ConcatOps.size();
I !=
E;
I += 2) {
16634 ConcatOps.resize(ConcatOps.size() / 2);
16636 return ConcatOps[0];
16647 !Subtarget->isNeonAvailable()))
16648 return LowerFixedLengthInsertVectorElt(
Op, DAG);
16650 EVT VT =
Op.getValueType();
16659 if (VT == MVT::nxv1i1) {
16663 WidenVec, Elt, Idx);
16672 PromoteVec, Elt, Idx);
16685AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(
SDValue Op,
16688 EVT VT =
Op.getOperand(0).getValueType();
16694 if (VT == MVT::nxv1i1) {
16698 WidenedPred,
Op.getOperand(1));
16705 MVT ExtractTy = VectorVT == MVT::nxv2i64 ? MVT::i64 : MVT::i32;
16707 Extend,
Op.getOperand(1));
16712 return LowerFixedLengthExtractVectorElt(
Op, DAG);
16720 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
16721 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
16722 VT == MVT::v8f16 || VT == MVT::v8bf16)
16725 if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
16726 VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
16737 if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
16747 EVT VT =
Op.getValueType();
16749 "Only cases that extract a fixed length vector are supported!");
16750 EVT InVT =
Op.getOperand(0).getValueType();
16758 unsigned Idx =
Op.getConstantOperandVal(1);
16777 if (PackedVT != InVT) {
16801 assert(
Op.getValueType().isScalableVector() &&
16802 "Only expect to lower inserts into scalable vectors!");
16804 EVT InVT =
Op.getOperand(1).getValueType();
16805 unsigned Idx =
Op.getConstantOperandVal(2);
16810 EVT VT =
Op.getValueType();
16826 if (Idx < (NumElts / 2))
16852 Vec0 = getSVESafeBitCast(NarrowVT, Vec0, DAG);
16853 Vec1 = getSVESafeBitCast(NarrowVT, Vec1, DAG);
16857 Vec1 = DAG.
getNode(AArch64ISD::NVCAST,
DL, NarrowVT, Vec1);
16866 HiVec0 = DAG.
getNode(AArch64ISD::NVCAST,
DL, NarrowVT, HiVec0);
16867 Narrow = DAG.
getNode(AArch64ISD::UZP1,
DL, NarrowVT, Vec1, HiVec0);
16870 "Invalid subvector index!");
16872 LoVec0 = DAG.
getNode(AArch64ISD::NVCAST,
DL, NarrowVT, LoVec0);
16873 Narrow = DAG.
getNode(AArch64ISD::UZP1,
DL, NarrowVT, LoVec0, Vec1);
16876 return getSVESafeBitCast(VT, Narrow, DAG);
16884 std::optional<unsigned> PredPattern =
16896 if (
Op.getOpcode() != AArch64ISD::DUP &&
16909 SplatVal =
Op->getConstantOperandVal(0);
16910 if (
Op.getValueType().getVectorElementType() != MVT::i64)
16911 SplatVal = (int32_t)SplatVal;
16919 SplatVal = -SplatVal;
16927 EVT VT =
Op.getValueType();
16931 return LowerFixedLengthVectorIntDivideToSVE(
Op, DAG);
16933 unsigned Opc =
Op.getOpcode();
16945 DAG.
getNode(AArch64ISD::ASRD_MERGE_OP1,
DL, VT, Pg,
Op->getOperand(0),
16953 if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64) {
16955 return DAG.
getNode(MaskedOpcode,
DL, VT,
Op.getOperand(0),
Op.getOperand(1),
16962 if (VT == MVT::nxv16i8)
16963 WidenedVT = MVT::nxv8i16;
16964 else if (VT == MVT::nxv8i16)
16965 WidenedVT = MVT::nxv4i32;
16969 unsigned UnpkLo =
Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
16970 unsigned UnpkHi =
Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
16979 return DAG.
getNode(AArch64ISD::UZP1,
DL, VT, ResultLoCast, ResultHiCast);
16982bool AArch64TargetLowering::shouldExpandBuildVectorWithShuffles(
16983 EVT VT,
unsigned DefinedValues)
const {
16984 if (!Subtarget->isNeonAvailable())
17003 unsigned DummyUnsigned;
17011 isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
17013 isTRNMask(M, NumElts, DummyUnsigned, DummyUnsigned) ||
17014 isUZPMask(M, NumElts, DummyUnsigned) ||
17015 isZIPMask(M, NumElts, DummyUnsigned, DummyUnsigned) ||
17019 isINSMask(M, NumElts, DummyBool, DummyInt) ||
17035 Op =
Op.getOperand(0);
17037 APInt SplatBits, SplatUndef;
17038 unsigned SplatBitSize;
17040 if (!BVN || !BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
17041 HasAnyUndefs, ElementBits) ||
17042 SplatBitSize > ElementBits)
17053 assert(VT.
isVector() &&
"vector shift count is not a vector type");
17057 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
17064 assert(VT.
isVector() &&
"vector shift count is not a vector type");
17068 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
17073 EVT VT =
Op.getValueType();
17078 EVT OpVT =
Op.getOperand(0).getValueType();
17089 !Subtarget->isNeonAvailable()))
17090 return LowerFixedLengthVectorTruncateToSVE(
Op, DAG);
17093 if (VT.
is64BitVector() &&
Op.getOperand(0).getValueType().is128BitVector())
17104 unsigned &ShiftValue,
17117 ShiftValue = ShiftOp1->getZExtValue();
17126 "ResVT must be truncated or same type as the shift.");
17129 if (ShiftValue > ExtraBits && !
Add->getFlags().hasNoUnsignedWrap())
17136 uint64_t AddValue = AddOp1->getZExtValue();
17137 if (AddValue != 1ULL << (ShiftValue - 1))
17140 RShOperand =
Add->getOperand(0);
17146 EVT VT =
Op.getValueType();
17150 if (!
Op.getOperand(1).getValueType().isVector())
17154 switch (
Op.getOpcode()) {
17158 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SHL_PRED);
17160 if (
isVShiftLImm(
Op.getOperand(1), VT,
false, Cnt) && Cnt < EltSize)
17161 return DAG.
getNode(AArch64ISD::VSHL,
DL, VT,
Op.getOperand(0),
17166 Op.getOperand(0),
Op.getOperand(1));
17170 (Subtarget->hasSVE2() ||
17171 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
17173 unsigned ShiftValue;
17175 return DAG.
getNode(AArch64ISD::URSHR_I_PRED,
DL, VT,
17182 unsigned Opc =
Op.getOpcode() ==
ISD::SRA ? AArch64ISD::SRA_PRED
17183 : AArch64ISD::SRL_PRED;
17184 return LowerToPredicatedOp(
Op, DAG,
Opc);
17188 if (
isVShiftRImm(
Op.getOperand(1), VT,
false, Cnt) && Cnt < EltSize) {
17190 (
Op.getOpcode() ==
ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
17199 unsigned Opc = (
Op.getOpcode() ==
ISD::SRA) ? Intrinsic::aarch64_neon_sshl
17200 : Intrinsic::aarch64_neon_ushl;
17208 return NegShiftLeft;
17216 if (
Op.getValueType().isScalableVector())
17217 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
17220 !Subtarget->isNeonAvailable()))
17221 return LowerFixedLengthVectorSetccToSVE(
Op, DAG);
17226 EVT CmpVT =
LHS.getValueType().changeVectorElementTypeToInteger();
17229 if (
LHS.getValueType().getVectorElementType().isInteger())
17232 assert(((!Subtarget->hasFullFP16() &&
17233 LHS.getValueType().getVectorElementType() != MVT::f16) ||
17234 LHS.getValueType().getVectorElementType() != MVT::bf16 ||
17235 LHS.getValueType().getVectorElementType() != MVT::f128) &&
17236 "Unexpected type!");
17241 bool OneNaN =
false;
17262 bool NoNaNs =
Op->getFlags().hasNoNaNs();
17264 if (!
Cmp.getNode())
17293 unsigned ScalarOpcode;
17311 "Expected power-of-2 length vector");
17319 if (ElemVT == MVT::i1) {
17321 if (NumElems > 16) {
17324 EVT HalfVT =
Lo.getValueType();
17335 unsigned ExtendedWidth = 64;
17338 ExtendedWidth = 128;
17343 unsigned ExtendOp =
17352 NumElems == 2 && ExtendedWidth == 128) {
17353 Extended = DAG.
getBitcast(MVT::v4i32, Extended);
17354 ExtendedVT = MVT::i32;
17356 switch (ScalarOpcode) {
17377 VecVT =
Lo.getValueType();
17393 for (
unsigned Shift = NumElems / 2; Shift > 0; Shift /= 2) {
17398 Scalar = DAG.
getNode(ScalarOpcode,
DL, ScalarVT, Scalar, Shifted);
17411 EVT SrcVT = Src.getValueType();
17416 SrcVT == MVT::v2f16) {
17424 if (
SDValue Result = LowerReductionToSVE(
Op, DAG))
17428 switch (
Op.getOpcode()) {
17433 Op.getValueType(),
DL, DAG);
17453 EVT SrcVT = Src.getValueType();
17456 SDVTList SrcVTs = DAG.
getVTList(SrcVT, SrcVT);
17468 for (
unsigned I = 0;
I < Stages; ++
I) {
17470 Src = DAG.
getNode(BaseOpc,
DL, SrcVT, Src.getValue(0), Src.getValue(1));
17478 auto &Subtarget = DAG.
getSubtarget<AArch64Subtarget>();
17480 if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
17485 MVT VT =
Op.getSimpleValueType();
17486 assert(VT != MVT::i128 &&
"Handled elsewhere, code replicated.");
17491 Op.getOperand(0),
Op.getOperand(1),
RHS,
17496AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
SDValue Op,
17501 SDNode *
Node =
Op.getNode();
17506 EVT VT =
Node->getValueType(0);
17509 "no-stack-arg-probe")) {
17511 Chain =
SP.getValue(1);
17521 RTLIB::LibcallImpl ChkStkImpl =
getLibcallImpl(RTLIB::STACK_PROBE);
17522 if (ChkStkImpl == RTLIB::Unsupported)
17531 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
17532 const uint32_t *
Mask =
TRI->getWindowsStackProbePreservedMask();
17533 if (Subtarget->hasCustomCallingConv())
17541 Chain, Callee, DAG.
getRegister(AArch64::X15, MVT::i64),
17552 Chain =
SP.getValue(1);
17566AArch64TargetLowering::LowerInlineDYNAMIC_STACKALLOC(
SDValue Op,
17569 SDNode *
Node =
Op.getNode();
17576 EVT VT =
Node->getValueType(0);
17580 Chain =
SP.getValue(1);
17587 Chain = DAG.
getNode(AArch64ISD::PROBED_ALLOCA,
DL, MVT::Other, Chain, SP);
17593AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(
SDValue Op,
17597 if (Subtarget->isTargetWindows())
17598 return LowerWindowsDYNAMIC_STACKALLOC(
Op, DAG);
17600 return LowerInlineDYNAMIC_STACKALLOC(
Op, DAG);
17606 unsigned NewOp)
const {
17607 if (Subtarget->hasSVE2())
17608 return LowerToPredicatedOp(
Op, DAG, NewOp);
17616 EVT VT =
Op.getValueType();
17617 assert(VT != MVT::i64 &&
"Expected illegal VSCALE node");
17620 APInt MulImm =
Op.getConstantOperandAPInt(0);
17626template <
unsigned NumVecs>
17636 for (
unsigned I = 0;
I < NumVecs; ++
I)
17645 Info.align.reset();
17656 auto &
DL =
I.getDataLayout();
17658 case Intrinsic::aarch64_sve_st2:
17662 case Intrinsic::aarch64_sve_st3:
17666 case Intrinsic::aarch64_sve_st4:
17670 case Intrinsic::aarch64_neon_ld2:
17671 case Intrinsic::aarch64_neon_ld3:
17672 case Intrinsic::aarch64_neon_ld4:
17673 case Intrinsic::aarch64_neon_ld1x2:
17674 case Intrinsic::aarch64_neon_ld1x3:
17675 case Intrinsic::aarch64_neon_ld1x4: {
17677 uint64_t NumElts =
DL.getTypeSizeInBits(
I.getType()) / 64;
17679 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17681 Info.align.reset();
17687 case Intrinsic::aarch64_neon_ld2lane:
17688 case Intrinsic::aarch64_neon_ld3lane:
17689 case Intrinsic::aarch64_neon_ld4lane:
17690 case Intrinsic::aarch64_neon_ld2r:
17691 case Intrinsic::aarch64_neon_ld3r:
17692 case Intrinsic::aarch64_neon_ld4r: {
17695 Type *RetTy =
I.getType();
17697 unsigned NumElts = StructTy->getNumElements();
17698 Type *VecTy = StructTy->getElementType(0);
17701 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17703 Info.align.reset();
17709 case Intrinsic::aarch64_neon_st2:
17710 case Intrinsic::aarch64_neon_st3:
17711 case Intrinsic::aarch64_neon_st4:
17712 case Intrinsic::aarch64_neon_st1x2:
17713 case Intrinsic::aarch64_neon_st1x3:
17714 case Intrinsic::aarch64_neon_st1x4: {
17716 unsigned NumElts = 0;
17717 for (
const Value *Arg :
I.args()) {
17718 Type *ArgTy = Arg->getType();
17721 NumElts +=
DL.getTypeSizeInBits(ArgTy) / 64;
17724 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17726 Info.align.reset();
17732 case Intrinsic::aarch64_neon_st2lane:
17733 case Intrinsic::aarch64_neon_st3lane:
17734 case Intrinsic::aarch64_neon_st4lane: {
17736 unsigned NumElts = 0;
17738 Type *VecTy =
I.getArgOperand(0)->getType();
17741 for (
const Value *Arg :
I.args()) {
17742 Type *ArgTy = Arg->getType();
17749 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17751 Info.align.reset();
17757 case Intrinsic::aarch64_ldaxr:
17758 case Intrinsic::aarch64_ldxr: {
17759 Type *ValTy =
I.getParamElementType(0);
17762 Info.ptrVal =
I.getArgOperand(0);
17764 Info.align =
DL.getABITypeAlign(ValTy);
17769 case Intrinsic::aarch64_stlxr:
17770 case Intrinsic::aarch64_stxr: {
17771 Type *ValTy =
I.getParamElementType(1);
17774 Info.ptrVal =
I.getArgOperand(1);
17776 Info.align =
DL.getABITypeAlign(ValTy);
17781 case Intrinsic::aarch64_ldaxp:
17782 case Intrinsic::aarch64_ldxp:
17784 Info.memVT = MVT::i128;
17785 Info.ptrVal =
I.getArgOperand(0);
17787 Info.align =
Align(16);
17791 case Intrinsic::aarch64_stlxp:
17792 case Intrinsic::aarch64_stxp:
17794 Info.memVT = MVT::i128;
17795 Info.ptrVal =
I.getArgOperand(2);
17797 Info.align =
Align(16);
17801 case Intrinsic::aarch64_sve_ldnt1: {
17805 Info.ptrVal =
I.getArgOperand(1);
17807 Info.align =
DL.getABITypeAlign(ElTy);
17812 case Intrinsic::aarch64_sve_stnt1: {
17816 Info.memVT =
MVT::getVT(
I.getOperand(0)->getType());
17817 Info.ptrVal =
I.getArgOperand(2);
17819 Info.align =
DL.getABITypeAlign(ElTy);
17824 case Intrinsic::aarch64_mops_memset_tag: {
17825 Value *Dst =
I.getArgOperand(0);
17826 Value *Val =
I.getArgOperand(1);
17831 Info.align =
I.getParamAlign(0).valueOrOne();
17845 std::optional<unsigned> ByteOffset)
const {
17862 Base.getOperand(1).hasOneUse() &&
17869 uint64_t ShiftAmount =
Base.getOperand(1).getConstantOperandVal(1);
17871 if (ShiftAmount ==
Log2_32(LoadBytes))
17881 if ((VT == MVT::i64 || VT == MVT::i32) && Extend->
use_size()) {
17900 return NumBits1 > NumBits2;
17907 return NumBits1 > NumBits2;
17914 if (
I->getOpcode() != Instruction::FMul)
17917 if (!
I->hasOneUse())
17922 if (!(
User->getOpcode() == Instruction::FSub ||
17923 User->getOpcode() == Instruction::FAdd))
17934 I->getFastMathFlags().allowContract()));
17944 return NumBits1 == 32 && NumBits2 == 64;
17951 return NumBits1 == 32 && NumBits2 == 64;
17969bool AArch64TargetLowering::isExtFreeImpl(
const Instruction *Ext)
const {
17977 for (
const Use &U : Ext->
uses()) {
17985 switch (Instr->getOpcode()) {
17986 case Instruction::Shl:
17990 case Instruction::GetElementPtr: {
17993 std::advance(GTI, U.getOperandNo()-1);
18006 if (ShiftAmt == 0 || ShiftAmt > 4)
18010 case Instruction::Trunc:
18027 unsigned NumElts,
bool IsLittleEndian,
18029 if (DstWidth % 8 != 0 || DstWidth <= 16 || DstWidth > 64)
18032 assert(DstWidth % SrcWidth == 0 &&
18033 "TBL lowering is not supported for a conversion instruction with this "
18034 "source and destination element type.");
18036 unsigned Factor = DstWidth / SrcWidth;
18037 unsigned MaskLen = NumElts * Factor;
18040 Mask.resize(MaskLen, NumElts);
18042 unsigned SrcIndex = 0;
18043 for (
unsigned I = IsLittleEndian ? 0 : Factor - 1;
I < MaskLen;
I += Factor)
18044 Mask[
I] = SrcIndex++;
18052 bool IsLittleEndian) {
18054 unsigned NumElts = SrcTy->getNumElements();
18062 auto *FirstEltZero = Builder.CreateInsertElement(
18064 Value *Result = Builder.CreateShuffleVector(
Op, FirstEltZero, Mask);
18065 Result = Builder.CreateBitCast(Result, DstTy);
18066 if (DstTy != ZExtTy)
18067 Result = Builder.CreateZExt(Result, ZExtTy);
18073 bool IsLittleEndian) {
18080 !IsLittleEndian, Mask))
18083 auto *FirstEltZero = Builder.CreateInsertElement(
18086 return Builder.CreateShuffleVector(
Op, FirstEltZero, Mask);
18095 assert(SrcTy->getElementType()->isIntegerTy() &&
18096 "Non-integer type source vector element is not supported");
18097 assert(DstTy->getElementType()->isIntegerTy(8) &&
18098 "Unsupported destination vector element type");
18099 unsigned SrcElemTySz =
18101 unsigned DstElemTySz =
18103 assert((SrcElemTySz % DstElemTySz == 0) &&
18104 "Cannot lower truncate to tbl instructions for a source element size "
18105 "that is not divisible by the destination element size");
18106 unsigned TruncFactor = SrcElemTySz / DstElemTySz;
18107 assert((SrcElemTySz == 16 || SrcElemTySz == 32 || SrcElemTySz == 64) &&
18108 "Unsupported source vector element type size");
18116 for (
int Itr = 0; Itr < 16; Itr++) {
18117 if (Itr < NumElements)
18119 IsLittleEndian ? Itr * TruncFactor
18120 : Itr * TruncFactor + (TruncFactor - 1)));
18122 MaskConst.
push_back(Builder.getInt8(255));
18125 int MaxTblSz = 128 * 4;
18126 int MaxSrcSz = SrcElemTySz * NumElements;
18128 (MaxTblSz > MaxSrcSz) ? NumElements : (MaxTblSz / SrcElemTySz);
18129 assert(ElemsPerTbl <= 16 &&
18130 "Maximum elements selected using TBL instruction cannot exceed 16!");
18132 int ShuffleCount = 128 / SrcElemTySz;
18134 for (
int i = 0; i < ShuffleCount; ++i)
18141 while (ShuffleLanes.
back() < NumElements) {
18143 Builder.CreateShuffleVector(TI->
getOperand(0), ShuffleLanes), VecTy));
18145 if (Parts.
size() == 4) {
18148 Builder.CreateIntrinsic(Intrinsic::aarch64_neon_tbl4, VecTy, Parts));
18152 for (
int i = 0; i < ShuffleCount; ++i)
18153 ShuffleLanes[i] += ShuffleCount;
18157 "Lowering trunc for vectors requiring different TBL instructions is "
18161 if (!Parts.
empty()) {
18163 switch (Parts.
size()) {
18165 TblID = Intrinsic::aarch64_neon_tbl1;
18168 TblID = Intrinsic::aarch64_neon_tbl2;
18171 TblID = Intrinsic::aarch64_neon_tbl3;
18176 Results.push_back(Builder.CreateIntrinsic(TblID, VecTy, Parts));
18181 assert(
Results.size() <= 2 &&
"Trunc lowering does not support generation of "
18182 "more than 2 tbl instructions!");
18185 if (ElemsPerTbl < 16) {
18187 std::iota(FinalMask.
begin(), FinalMask.
end(), 0);
18188 FinalResult = Builder.CreateShuffleVector(
Results[0], FinalMask);
18192 if (ElemsPerTbl < 16) {
18193 std::iota(FinalMask.
begin(), FinalMask.
begin() + ElemsPerTbl, 0);
18194 std::iota(FinalMask.
begin() + ElemsPerTbl, FinalMask.
end(), 16);
18196 std::iota(FinalMask.
begin(), FinalMask.
end(), 0);
18210 if (!
EnableExtToTBL || Subtarget->useSVEForFixedLengthVectors())
18218 if (!L || L->getHeader() !=
I->getParent() ||
F->hasOptSize())
18223 if (!SrcTy || !DstTy)
18230 if (ZExt && SrcTy->getElementType()->isIntegerTy(8)) {
18231 auto DstWidth = DstTy->getElementType()->getScalarSizeInBits();
18232 if (DstWidth % 8 != 0)
18235 auto *TruncDstType =
18239 auto SrcWidth = SrcTy->getElementType()->getScalarSizeInBits();
18240 if (
TTI.getCastInstrCost(
I->getOpcode(), DstTy, TruncDstType,
18243 if (SrcWidth * 2 >= TruncDstType->getElementType()->getScalarSizeInBits())
18246 DstTy = TruncDstType;
18254 if (SrcWidth * 4 <= DstWidth) {
18255 if (
all_of(
I->users(), [&](
auto *U) {
18256 using namespace llvm::PatternMatch;
18257 auto *SingleUser = cast<Instruction>(&*U);
18258 if (match(SingleUser, m_c_Mul(m_Specific(I), m_SExt(m_Value()))))
18260 if (match(SingleUser,
18261 m_Intrinsic<Intrinsic::vector_partial_reduce_add>(
18262 m_Value(), m_Specific(I))))
18269 if (DstTy->getScalarSizeInBits() >= 64)
18275 DstTy, Subtarget->isLittleEndian());
18278 ZExt->replaceAllUsesWith(Result);
18279 ZExt->eraseFromParent();
18284 if (UIToFP && ((SrcTy->getElementType()->isIntegerTy(8) &&
18285 DstTy->getElementType()->isFloatTy()) ||
18286 (SrcTy->getElementType()->isIntegerTy(16) &&
18287 DstTy->getElementType()->isDoubleTy()))) {
18292 assert(ZExt &&
"Cannot fail for the i8 to float conversion");
18293 auto *UI = Builder.CreateUIToFP(ZExt, DstTy);
18294 I->replaceAllUsesWith(UI);
18295 I->eraseFromParent();
18300 if (SIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
18301 DstTy->getElementType()->isFloatTy()) {
18305 Subtarget->isLittleEndian());
18306 assert(Shuffle &&
"Cannot fail for the i8 to float conversion");
18308 auto *AShr = Builder.CreateAShr(Cast, 24,
"",
true);
18309 auto *
SI = Builder.CreateSIToFP(AShr, DstTy);
18310 I->replaceAllUsesWith(
SI);
18311 I->eraseFromParent();
18319 (SrcTy->getNumElements() == 8 || SrcTy->getNumElements() == 16) &&
18320 SrcTy->getElementType()->isFloatTy() &&
18321 DstTy->getElementType()->isIntegerTy(8)) {
18323 auto *WideConv = Builder.CreateFPToUI(FPToUI->getOperand(0),
18325 auto *TruncI = Builder.CreateTrunc(WideConv, DstTy);
18326 I->replaceAllUsesWith(TruncI);
18327 I->eraseFromParent();
18337 if (TI && DstTy->getElementType()->isIntegerTy(8) &&
18338 ((SrcTy->getElementType()->isIntegerTy(32) ||
18339 SrcTy->getElementType()->isIntegerTy(64)) &&
18340 (SrcTy->getNumElements() == 16 || SrcTy->getNumElements() == 8))) {
18349 Align &RequiredAlignment)
const {
18354 RequiredAlignment =
Align(1);
18356 return NumBits == 32 || NumBits == 64;
18363 unsigned VecSize = 128;
18367 VecSize = std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
18368 return std::max<unsigned>(1, (MinElts * ElSize + 127) / VecSize);
18373 if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
18383 unsigned MinElts = EC.getKnownMinValue();
18385 UseScalable =
false;
18388 (!Subtarget->useSVEForFixedLengthVectors() ||
18393 !Subtarget->isSVEorStreamingSVEAvailable())
18401 if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
18404 if (EC.isScalable()) {
18405 UseScalable =
true;
18406 return isPowerOf2_32(MinElts) && (MinElts * ElSize) % 128 == 0;
18409 unsigned VecSize =
DL.getTypeSizeInBits(VecTy);
18410 if (Subtarget->useSVEForFixedLengthVectors()) {
18411 unsigned MinSVEVectorSize =
18412 std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
18413 if (VecSize % MinSVEVectorSize == 0 ||
18415 (!Subtarget->isNeonAvailable() || VecSize > 128))) {
18416 UseScalable =
true;
18423 return Subtarget->isNeonAvailable() && (VecSize == 64 || VecSize % 128 == 0);
18455 bool Scalable,
Type *LDVTy,
18457 assert(Factor >= 2 && Factor <= 4 &&
"Invalid interleave factor");
18458 static const Intrinsic::ID SVELoads[3] = {Intrinsic::aarch64_sve_ld2_sret,
18459 Intrinsic::aarch64_sve_ld3_sret,
18460 Intrinsic::aarch64_sve_ld4_sret};
18461 static const Intrinsic::ID NEONLoads[3] = {Intrinsic::aarch64_neon_ld2,
18462 Intrinsic::aarch64_neon_ld3,
18463 Intrinsic::aarch64_neon_ld4};
18473 bool Scalable,
Type *STVTy,
18475 assert(Factor >= 2 && Factor <= 4 &&
"Invalid interleave factor");
18476 static const Intrinsic::ID SVEStores[3] = {Intrinsic::aarch64_sve_st2,
18477 Intrinsic::aarch64_sve_st3,
18478 Intrinsic::aarch64_sve_st4};
18479 static const Intrinsic::ID NEONStores[3] = {Intrinsic::aarch64_neon_st2,
18480 Intrinsic::aarch64_neon_st3,
18481 Intrinsic::aarch64_neon_st4};
18505 "Invalid interleave factor");
18506 assert(!Shuffles.
empty() &&
"Empty shufflevector input");
18508 "Unmatched number of shufflevectors and indices");
18513 assert(!Mask && GapMask.
popcount() == Factor &&
"Unexpected mask on a load");
18532 SI->getType()->getScalarSizeInBits() * 4 ==
18533 SI->user_back()->getType()->getScalarSizeInBits();
18543 Type *EltTy = FVTy->getElementType();
18551 FVTy->getNumElements() / NumLoads);
18559 Value *BaseAddr = LI->getPointerOperand();
18561 Type *PtrTy = LI->getPointerOperandType();
18563 LDVTy->getElementCount());
18566 UseScalable, LDVTy, PtrTy);
18573 Value *PTrue =
nullptr;
18575 std::optional<unsigned> PgPattern =
18577 if (Subtarget->getMinSVEVectorSizeInBits() ==
18578 Subtarget->getMaxSVEVectorSizeInBits() &&
18579 Subtarget->getMinSVEVectorSizeInBits() ==
DL.getTypeSizeInBits(FVTy))
18580 PgPattern = AArch64SVEPredPattern::all;
18584 PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
18588 for (
unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
18593 BaseAddr = Builder.CreateConstGEP1_32(LDVTy->getElementType(), BaseAddr,
18594 FVTy->getNumElements() * Factor);
18598 LdN = Builder.CreateCall(LdNFunc, {PTrue, BaseAddr},
"ldN");
18600 LdN = Builder.CreateCall(LdNFunc, BaseAddr,
"ldN");
18603 for (
unsigned i = 0; i < Shuffles.
size(); i++) {
18605 unsigned Index = Indices[i];
18607 Value *SubVec = Builder.CreateExtractValue(LdN, Index);
18610 SubVec = Builder.CreateExtractVector(FVTy, SubVec,
uint64_t(0));
18614 SubVec = Builder.CreateIntToPtr(
18616 FVTy->getNumElements()));
18618 SubVecs[SVI].push_back(SubVec);
18627 auto &SubVec = SubVecs[SVI];
18630 SVI->replaceAllUsesWith(WideVec);
18636template <
typename Iter>
18638 int MaxLookupDist = 20;
18639 unsigned IdxWidth =
DL.getIndexSizeInBits(0);
18640 APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
18641 const Value *PtrA1 =
18644 while (++It != End) {
18645 if (It->isDebugOrPseudoInst())
18647 if (MaxLookupDist-- == 0)
18650 const Value *PtrB1 =
18651 SI->getPointerOperand()->stripAndAccumulateInBoundsConstantOffsets(
18653 if (PtrA1 == PtrB1 &&
18654 (OffsetA.sextOrTrunc(IdxWidth) - OffsetB.
sextOrTrunc(IdxWidth))
18693 const APInt &GapMask)
const {
18696 "Invalid interleave factor");
18701 "Unexpected mask on store");
18704 assert(VecTy->getNumElements() % Factor == 0 &&
"Invalid interleaved store");
18706 unsigned LaneLen = VecTy->getNumElements() / Factor;
18707 Type *EltTy = VecTy->getElementType();
18728 Type *IntTy =
DL.getIntPtrType(EltTy);
18729 unsigned NumOpElts =
18734 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
18735 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
18742 LaneLen /= NumStores;
18749 Value *BaseAddr =
SI->getPointerOperand();
18763 if (Factor == 2 && SubVecTy->getPrimitiveSizeInBits() == 64 &&
18782 if (Factor == 2 &&
SI->hasMetadata(LLVMContext::MD_nontemporal) &&
18783 !
F->hasOptSize() && !
F->hasMinSize() &&
18787 Type *PtrTy =
SI->getPointerOperandType();
18789 STVTy->getElementCount());
18792 UseScalable, STVTy, PtrTy);
18794 Value *PTrue =
nullptr;
18796 std::optional<unsigned> PgPattern =
18798 if (Subtarget->getMinSVEVectorSizeInBits() ==
18799 Subtarget->getMaxSVEVectorSizeInBits() &&
18800 Subtarget->getMinSVEVectorSizeInBits() ==
18801 DL.getTypeSizeInBits(SubVecTy))
18802 PgPattern = AArch64SVEPredPattern::all;
18806 PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
18810 for (
unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
18815 for (
unsigned i = 0; i < Factor; i++) {
18817 unsigned IdxI = StoreCount * LaneLen * Factor + i;
18818 if (Mask[IdxI] >= 0) {
18819 Shuffle = Builder.CreateShuffleVector(
18822 unsigned StartMask = 0;
18823 for (
unsigned j = 1; j < LaneLen; j++) {
18824 unsigned IdxJ = StoreCount * LaneLen * Factor + j * Factor + i;
18825 if (Mask[IdxJ] >= 0) {
18826 StartMask = Mask[IdxJ] - j;
18835 Shuffle = Builder.CreateShuffleVector(
18843 Ops.push_back(Shuffle);
18847 Ops.push_back(PTrue);
18851 if (StoreCount > 0)
18852 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
18853 BaseAddr, LaneLen * Factor);
18855 Ops.push_back(BaseAddr);
18856 Builder.CreateCall(StNFunc,
Ops);
18864 if (Factor != 2 && Factor != 3 && Factor != 4) {
18865 LLVM_DEBUG(
dbgs() <<
"Matching ld2, ld3 and ld4 patterns failed\n");
18871 assert(!Mask &&
"Unexpected mask on a load\n");
18875 const DataLayout &
DL = LI->getModule()->getDataLayout();
18890 Type *PtrTy = LI->getPointerOperandType();
18892 UseScalable, LdTy, PtrTy);
18895 Value *Pred =
nullptr;
18898 Builder.CreateVectorSplat(LdTy->
getElementCount(), Builder.getTrue());
18900 Value *BaseAddr = LI->getPointerOperand();
18901 Value *Result =
nullptr;
18902 if (NumLoads > 1) {
18905 for (
unsigned I = 0;
I < NumLoads; ++
I) {
18909 Value *LdN =
nullptr;
18911 LdN = Builder.CreateCall(LdNFunc, {Pred,
Address},
"ldN");
18913 LdN = Builder.CreateCall(LdNFunc,
Address,
"ldN");
18916 for (
unsigned J = 0; J < Factor; ++J) {
18917 ExtractedLdValues[J] = Builder.CreateInsertVector(
18918 VTy, ExtractedLdValues[J], Builder.CreateExtractValue(LdN, J), Idx);
18925 for (
unsigned J = 0; J < Factor; ++J)
18926 Result = Builder.CreateInsertValue(Result, ExtractedLdValues[J], J);
18929 Result = Builder.CreateCall(LdNFunc, {Pred, BaseAddr},
"ldN");
18931 Result = Builder.CreateCall(LdNFunc, BaseAddr,
"ldN");
18942 unsigned Factor = InterleavedValues.
size();
18943 if (Factor != 2 && Factor != 3 && Factor != 4) {
18944 LLVM_DEBUG(
dbgs() <<
"Matching st2, st3 and st4 patterns failed\n");
18950 assert(!Mask &&
"Unexpected mask on plain store");
18970 Type *PtrTy =
SI->getPointerOperandType();
18972 UseScalable, StTy, PtrTy);
18976 Value *BaseAddr =
SI->getPointerOperand();
18977 Value *Pred =
nullptr;
18981 Builder.CreateVectorSplat(StTy->
getElementCount(), Builder.getTrue());
18983 auto ExtractedValues = InterleavedValues;
18988 for (
unsigned I = 0;
I < NumStores; ++
I) {
18990 if (NumStores > 1) {
18995 for (
unsigned J = 0; J < Factor; J++) {
18997 Builder.CreateExtractVector(StTy, ExtractedValues[J], Idx);
19000 StoreOperands[StoreOperands.
size() - 1] =
Address;
19002 Builder.CreateCall(StNFunc, StoreOperands);
19009 const AttributeList &FuncAttributes)
const {
19010 bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
19011 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
19012 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
19017 bool IsSmallZeroMemset =
Op.isMemset() &&
Op.size() < 32 &&
Op.isZeroMemset();
19018 auto AlignmentIsAcceptable = [&](
EVT VT,
Align AlignCheck) {
19019 if (
Op.isAligned(AlignCheck))
19029 if (CanUseNEON &&
Op.isMemset() && !IsSmallZeroMemset &&
19030 AlignmentIsAcceptable(MVT::v16i8,
Align(1)))
19032 if (CanUseFP && !IsSmallZeroMemset &&
19033 AlignmentIsAcceptable(MVT::f128,
Align(16)))
19035 if (
Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64,
Align(8)))
19037 if (
Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32,
Align(4)))
19043 LLVMContext &Context, std::vector<EVT> &MemOps,
unsigned Limit,
19044 const MemOp &
Op,
unsigned DstAS,
unsigned SrcAS,
19045 const AttributeList &FuncAttributes,
EVT *LargestVT)
const {
19049 if (VT == MVT::v16i8 &&
Op.isMemset() && !
Op.isZeroMemset() &&
19051 unsigned Size =
Op.size();
19052 unsigned RemainingSize =
Size;
19058 while (RemainingSize > 0) {
19062 if (RemainingSize >= 8) {
19063 TargetVT = MVT::i64;
19064 RemainingSize -= 8;
19065 }
else if (RemainingSize >= 4) {
19066 TargetVT = MVT::i32;
19067 RemainingSize -= 4;
19068 }
else if (RemainingSize >= 2) {
19069 TargetVT = MVT::i16;
19070 RemainingSize -= 2;
19071 }
else if (RemainingSize >= 1) {
19072 TargetVT = MVT::i8;
19073 RemainingSize -= 1;
19079 MemOps.push_back(TargetVT);
19085 if (RemainingSize == 0 && !MemOps.empty()) {
19096 Context, MemOps, Limit,
Op, DstAS, SrcAS, FuncAttributes, LargestVT);
19100 const MemOp &
Op,
const AttributeList &FuncAttributes)
const {
19101 bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
19102 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
19103 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
19108 bool IsSmallZeroMemset =
Op.isMemset() &&
Op.size() < 32 &&
Op.isZeroMemset();
19109 auto AlignmentIsAcceptable = [&](
EVT VT,
Align AlignCheck) {
19110 if (
Op.isAligned(AlignCheck))
19120 if (CanUseNEON &&
Op.isMemset() && !IsSmallZeroMemset &&
19121 AlignmentIsAcceptable(MVT::v16i8,
Align(1)))
19123 if (CanUseFP && !IsSmallZeroMemset &&
19124 AlignmentIsAcceptable(MVT::f128,
Align(16)))
19126 if (
Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64,
Align(8)))
19128 if (
Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32,
Align(4)))
19135 if (Immed == std::numeric_limits<int64_t>::min()) {
19144 if (!Subtarget->hasSVE2())
19163 return std::abs(Imm / 8) <= 16;
19166 return std::abs(Imm / 4) <= 16;
19169 return std::abs(Imm / 2) <= 16;
19196 if (Insn.
size() > 1)
19233 if (AM.
Scale == 1) {
19236 }
else if (AM.
Scale == 2) {
19248 if (Ty->isScalableTy()) {
19254 uint64_t VecNumBytes =
DL.getTypeSizeInBits(Ty).getKnownMinValue() / 8;
19276 if (Ty->isSized()) {
19277 uint64_t NumBits =
DL.getTypeSizeInBits(Ty);
19278 NumBytes = NumBits / 8;
19283 return Subtarget->getInstrInfo()->isLegalAddressingMode(NumBytes, AM.
BaseOffs,
19291 int64_t MaxOffset)
const {
19292 int64_t HighPart = MinOffset & ~0xfffULL;
19315 return Subtarget->hasFullFP16();
19321 Subtarget->isNonStreamingSVEorSME2Available();
19331 switch (Ty->getScalarType()->getTypeID()) {
19351 static const MCPhysReg ScratchRegs[] = {
19352 AArch64::X16, AArch64::X17, AArch64::LR, 0
19354 return ScratchRegs;
19358 static const MCPhysReg RCRegs[] = {AArch64::FPCR};
19367 "Expected shift op");
19369 SDValue ShiftLHS =
N->getOperand(0);
19370 EVT VT =
N->getValueType(0);
19391 return SRLC->getZExtValue() == SHLC->getZExtValue();
19403 (
N->getOperand(0).getOpcode() ==
ISD::SHL ||
19404 N->getOperand(0).getOpcode() ==
ISD::SRL) &&
19405 "Expected XOR(SHIFT) pattern");
19410 if (XorC && ShiftC) {
19411 unsigned MaskIdx, MaskLen;
19412 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
19413 unsigned ShiftAmt = ShiftC->getZExtValue();
19414 unsigned BitWidth =
N->getValueType(0).getScalarSizeInBits();
19415 if (
N->getOperand(0).getOpcode() ==
ISD::SHL)
19416 return MaskIdx == ShiftAmt && MaskLen == (
BitWidth - ShiftAmt);
19417 return MaskIdx == 0 && MaskLen == (
BitWidth - ShiftAmt);
19427 N->getOperand(0).getOpcode() ==
ISD::SRL) ||
19429 N->getOperand(0).getOpcode() ==
ISD::SHL)) &&
19430 "Expected shift-shift mask");
19432 if (!
N->getOperand(0)->hasOneUse())
19436 EVT VT =
N->getValueType(0);
19437 if (
N->getOpcode() ==
ISD::SRL && (VT == MVT::i32 || VT == MVT::i64)) {
19440 return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue());
19445 if (
N->getOpcode() ==
ISD::SHL &&
N->hasOneUse()) {
19447 unsigned ShlAmt = C2->getZExtValue();
19448 if (
auto ShouldADD = *
N->user_begin();
19449 ShouldADD->getOpcode() ==
ISD::ADD && ShouldADD->hasOneUse()) {
19451 EVT MemVT = Load->getMemoryVT();
19453 if (Load->getValueType(0).isScalableVector())
19467 unsigned BinOpcode,
EVT VT,
unsigned SelectOpcode,
SDValue X,
19475 assert(Ty->isIntegerTy());
19477 unsigned BitSize = Ty->getPrimitiveSizeInBits();
19481 int64_t Val = Imm.getSExtValue();
19488 Val &= (1LL << 32) - 1;
19496 unsigned Index)
const {
19518 EVT VT =
N->getValueType(0);
19519 if (!Subtarget->hasNEON() || !VT.
isVector())
19533 if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.
getSizeInBits() - 1)
19568 if (
N->getValueType(0) != MVT::i32)
19571 SDValue VecReduceOp0 =
N->getOperand(0);
19572 bool SawTrailingZext =
false;
19578 SawTrailingZext =
true;
19583 MVT AbsInputVT = SawTrailingZext ? MVT::v16i16 : MVT::v16i32;
19585 unsigned Opcode = VecReduceOp0.
getOpcode();
19591 if (ABS->getOperand(0)->getOpcode() !=
ISD::SUB ||
19592 ABS->getOperand(0)->getValueType(0) != AbsInputVT)
19595 SDValue SUB = ABS->getOperand(0);
19596 unsigned Opcode0 = SUB->getOperand(0).getOpcode();
19597 unsigned Opcode1 = SUB->getOperand(1).getOpcode();
19599 if (SUB->getOperand(0)->getValueType(0) != AbsInputVT ||
19600 SUB->getOperand(1)->getValueType(0) != AbsInputVT)
19604 bool IsZExt =
false;
19612 SDValue EXT0 = SUB->getOperand(0);
19613 SDValue EXT1 = SUB->getOperand(1);
19630 UABDHigh8Op0, UABDHigh8Op1);
19641 UABDLo8Op0, UABDLo8Op1);
19665 if (!
N->getValueType(0).isScalableVector() ||
19666 !ST->isSVEorStreamingSVEAvailable() ||
19667 !(ST->hasSVE2p1() || ST->hasSME2()))
19672 return Use->getOpcode() == ISD::EXTRACT_SUBVECTOR;
19675 auto MaskEC =
N->getValueType(0).getVectorElementCount();
19676 if (!MaskEC.isKnownMultipleOf(NumExts))
19690 if (
Use->getValueType(0).getVectorElementCount() != ExtMinEC)
19694 unsigned Offset =
Use->getConstantOperandVal(1);
19696 if (Extracts[Part] !=
nullptr)
19699 Extracts[Part] =
Use;
19715 EVT ExtVT = Extracts[0]->getValueType(0);
19719 DCI.
CombineTo(Extracts[0], R.getValue(0));
19720 DCI.
CombineTo(Extracts[1], R.getValue(1));
19724 if (NumExts == 2) {
19725 assert(
N->getValueType(0) == DoubleExtVT);
19731 for (
unsigned I = 2;
I < NumExts;
I += 2) {
19736 DCI.
CombineTo(Extracts[
I + 1], R.getValue(1));
19738 R.getValue(0), R.getValue(1)));
19770 if (!ST->isNeonAvailable())
19773 if (!ST->hasDotProd())
19791 unsigned DotOpcode;
19795 if (
A.getOperand(0).getValueType() !=
B.getOperand(0).getValueType())
19797 auto OpCodeA =
A.getOpcode();
19801 auto OpCodeB =
B.getOpcode();
19805 if (OpCodeA == OpCodeB) {
19810 if (!ST->hasMatMulInt8())
19812 DotOpcode = AArch64ISD::USDOT;
19817 DotOpcode = AArch64ISD::UDOT;
19819 DotOpcode = AArch64ISD::SDOT;
19824 EVT Op0VT =
A.getOperand(0).getValueType();
19827 if (!IsValidElementCount || !IsValidSize)
19836 B =
B.getOperand(0);
19839 unsigned NumOfVecReduce;
19841 if (IsMultipleOf16) {
19843 TargetType = MVT::v4i32;
19846 TargetType = MVT::v2i32;
19849 if (NumOfVecReduce == 1) {
19852 A.getOperand(0),
B);
19859 for (;
I < VecReduce16Num;
I += 1) {
19878 if (VecReduce8Num == 0)
19879 return VecReduceAdd16;
19901 auto DetectAddExtract = [&](
SDValue A) {
19905 EVT VT =
A.getValueType();
19930 : AArch64ISD::SADDLP;
19934 if (
SDValue R = DetectAddExtract(
A))
19937 if (
A.getOperand(0).getOpcode() ==
ISD::ADD &&
A.getOperand(0).hasOneUse())
19941 if (
A.getOperand(1).getOpcode() ==
ISD::ADD &&
A.getOperand(1).hasOneUse())
19954 EVT VT =
A.getValueType();
19955 if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64)
19966 if (ExtVT0 != ExtVT1 ||
19981 return DAG.
getNode(AArch64ISD::NVCAST,
SDLoc(
A), MVT::v8i16, Uaddlv);
19998 MVT OpVT =
A.getSimpleValueType();
19999 assert(
N->getSimpleValueType(0) == OpVT &&
20000 "The operand type should be consistent with the result type of UADDV");
20004 if (KnownLeadingLanes.
isZero())
20014 APInt DemandedElts =
20033AArch64TargetLowering::BuildSDIVPow2(
SDNode *
N,
const APInt &Divisor,
20040 EVT VT =
N->getValueType(0);
20045 if (VT.
isVector() && Subtarget->isSVEorStreamingSVEAvailable())
20049 if ((VT != MVT::i32 && VT != MVT::i64) ||
20055 if (Divisor == 2 ||
20056 Divisor == APInt(Divisor.
getBitWidth(), -2,
true))
20063AArch64TargetLowering::BuildSREMPow2(
SDNode *
N,
const APInt &Divisor,
20070 EVT VT =
N->getValueType(0);
20078 if ((VT != MVT::i32 && VT != MVT::i64) ||
20094 CSNeg = DAG.
getNode(AArch64ISD::CSNEG,
DL, VT,
And,
And, CCVal, Cmp);
20105 CSNeg = DAG.
getNode(AArch64ISD::CSNEG,
DL, VT, AndPos, AndNeg, CCVal,
20120 case Intrinsic::aarch64_sve_cntb:
20121 case Intrinsic::aarch64_sve_cnth:
20122 case Intrinsic::aarch64_sve_cntw:
20123 case Intrinsic::aarch64_sve_cntd:
20133 if (IID == Intrinsic::aarch64_sve_cntp)
20134 return Op.getOperand(1).getValueType().getVectorElementCount();
20136 case Intrinsic::aarch64_sve_cntd:
20138 case Intrinsic::aarch64_sve_cntw:
20140 case Intrinsic::aarch64_sve_cnth:
20142 case Intrinsic::aarch64_sve_cntb:
20145 return std::nullopt;
20172 return TypeNode->
getVT();
20182 if (Mask == UCHAR_MAX)
20184 else if (Mask == USHRT_MAX)
20186 else if (Mask == UINT_MAX)
20208 unsigned ExtendOpcode = Extend.
getOpcode();
20225 if (PreExtendType == MVT::Other ||
20230 bool SeenZExtOrSExt = !IsAnyExt;
20238 unsigned Opc =
Op.getOpcode();
20249 if (SeenZExtOrSExt && OpcIsSExt != IsSExt)
20252 IsSExt = OpcIsSExt;
20253 SeenZExtOrSExt =
true;
20261 EVT PreExtendLegalType =
20267 PreExtendLegalType));
20278 unsigned ExtOpc = !SeenZExtOrSExt
20281 return DAG.
getNode(ExtOpc,
DL, VT, NBV);
20288 EVT VT =
Mul->getValueType(0);
20289 if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64)
20300 return DAG.
getNode(
Mul->getOpcode(),
DL, VT, Op0 ? Op0 :
Mul->getOperand(0),
20301 Op1 ? Op1 :
Mul->getOperand(1));
20316 EVT VT =
Mul->getValueType(0);
20318 int ConstMultiplier =
20324 unsigned AbsConstValue =
abs(ConstMultiplier);
20325 unsigned OperandShift =
20334 unsigned B = ConstMultiplier < 0 ? 32 : 31;
20335 unsigned CeilAxOverB = (AbsConstValue + (
B - 1)) /
B;
20339 if (LowerBound > UpperBound)
20344 int Shift = std::min(std::max( 0, LowerBound), UpperBound);
20347 int32_t RdsvlMul = (AbsConstValue >> (OperandShift + Shift)) *
20348 (ConstMultiplier < 0 ? -1 : 1);
20349 auto Rdsvl = DAG.
getNode(AArch64ISD::RDSVL,
DL, MVT::i64,
20362 EVT VT =
N->getValueType(0);
20363 if (VT != MVT::v2i64 && VT != MVT::v1i64 && VT != MVT::v2i32 &&
20364 VT != MVT::v4i32 && VT != MVT::v4i16 && VT != MVT::v8i16)
20366 if (
N->getOperand(0).getOpcode() !=
ISD::AND ||
20367 N->getOperand(0).getOperand(0).getOpcode() !=
ISD::SRL)
20380 if (!V1.
isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
20381 V3 != (HalfSize - 1))
20392 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, CM);
20400 EVT VT =
N->getValueType(0);
20406 N->getOperand(0).getOperand(0).getValueType() !=
20407 N->getOperand(1).getOperand(0).getValueType())
20411 N->getOperand(0).getOpcode() !=
N->getOperand(1).getOpcode())
20414 SDValue N0 =
N->getOperand(0).getOperand(0);
20415 SDValue N1 =
N->getOperand(1).getOperand(0);
20420 if ((S2 == MVT::i32 &&
S1 == MVT::i8) ||
20421 (S2 == MVT::i64 && (
S1 == MVT::i8 ||
S1 == MVT::i16))) {
20453 EVT VT =
N->getValueType(0);
20457 unsigned AddSubOpc;
20459 auto IsAddSubWith1 = [&](
SDValue V) ->
bool {
20460 AddSubOpc = V->getOpcode();
20472 if (IsAddSubWith1(N0)) {
20474 return DAG.
getNode(AddSubOpc,
DL, VT, N1, MulVal);
20477 if (IsAddSubWith1(N1)) {
20479 return DAG.
getNode(AddSubOpc,
DL, VT, N0, MulVal);
20490 const APInt &ConstValue =
C->getAPIntValue();
20497 if (ConstValue.
sge(1) && ConstValue.
sle(16))
20512 unsigned TrailingZeroes = ConstValue.
countr_zero();
20513 if (TrailingZeroes) {
20521 if (
N->hasOneUse() && (
N->user_begin()->getOpcode() ==
ISD::ADD ||
20522 N->user_begin()->getOpcode() ==
ISD::SUB))
20527 APInt ShiftedConstValue = ConstValue.
ashr(TrailingZeroes);
20530 auto Shl = [&](
SDValue N0,
unsigned N1) {
20561 for (
unsigned i = 1; i <
BitWidth / 2; i++) {
20581 unsigned TrailingZeroes = CVMinus1.
countr_zero();
20582 APInt SCVMinus1 = CVMinus1.
ashr(TrailingZeroes) - 1;
20598 unsigned TrailingZeroes = CVMinus1.
countr_zero();
20599 APInt CVPlus1 = CVMinus1.
ashr(TrailingZeroes) + 1;
20619 APInt SCVMinus1 = ShiftedConstValue - 1;
20620 APInt SCVPlus1 = ShiftedConstValue + 1;
20621 APInt CVPlus1 = ConstValue + 1;
20625 return Shl(
Add(Shl(N0, ShiftAmt), N0), TrailingZeroes);
20628 return Sub(Shl(N0, ShiftAmt), N0);
20630 ShiftAmt = SCVPlus1.
logBase2() + TrailingZeroes;
20631 return Sub(Shl(N0, ShiftAmt), Shl(N0, TrailingZeroes));
20633 if (Subtarget->hasALULSLFast() &&
20634 isPowPlusPlusConst(ConstValue, CVM, CVN)) {
20635 APInt CVMMinus1 = CVM - 1;
20636 APInt CVNMinus1 = CVN - 1;
20637 unsigned ShiftM1 = CVMMinus1.
logBase2();
20638 unsigned ShiftN1 = CVNMinus1.
logBase2();
20640 if (ShiftM1 <= 4 && ShiftN1 <= 4) {
20642 return Add(Shl(MVal, ShiftN1), MVal);
20645 if (Subtarget->hasALULSLFast() &&
20646 isPowPlusPlusOneConst(ConstValue, CVM, CVN)) {
20650 if (ShiftM <= 4 && ShiftN <= 4) {
20656 if (Subtarget->hasALULSLFast() &&
20657 isPowMinusMinusOneConst(ConstValue, CVM, CVN)) {
20661 if (ShiftM <= 4 && ShiftN <= 4) {
20670 APInt SCVPlus1 = -ShiftedConstValue + 1;
20671 APInt CVNegPlus1 = -ConstValue + 1;
20672 APInt CVNegMinus1 = -ConstValue - 1;
20675 return Sub(N0, Shl(N0, ShiftAmt));
20677 ShiftAmt = CVNegMinus1.
logBase2();
20678 return Negate(
Add(Shl(N0, ShiftAmt), N0));
20680 ShiftAmt = SCVPlus1.
logBase2() + TrailingZeroes;
20681 return Sub(Shl(N0, TrailingZeroes), Shl(N0, ShiftAmt));
20701 EVT VT =
N->getValueType(0);
20703 N->getOperand(0)->getOperand(0)->getOpcode() !=
ISD::SETCC ||
20704 VT.
getSizeInBits() !=
N->getOperand(0)->getValueType(0).getSizeInBits())
20714 if (!BV->isConstant())
20719 EVT IntVT = BV->getValueType(0);
20726 N->getOperand(0)->getOperand(0), MaskConst);
20740 if (
N->isStrictFPOpcode())
20746 if (Subtarget->hasFPRCVT())
20754 return !VT.
isVector() && VT != MVT::bf16 && VT != MVT::f128;
20757 SDValue SrcVal =
N->getOperand(0);
20759 EVT DestTy =
N->getValueType(0);
20766 if (DestTy.
bitsGT(SrcTy)) {
20775 if (SrcVecTy == MVT::nxv2i32 || DestVecTy == MVT::nxv2i32)
20781 DAG.
getPOISON(SrcVecTy), SrcVal, ZeroIdx);
20804 EVT VT =
N->getValueType(0);
20805 if (VT != MVT::f16 && VT != MVT::f32 && VT != MVT::f64)
20807 if (VT == MVT::f16 && !Subtarget->hasFullFP16())
20811 if (VT.
getSizeInBits() !=
N->getOperand(0).getValueSizeInBits())
20832 (
N->getOpcode() ==
ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF;
20862 EVT VT =
N->getValueType(0);
20866 if (CSel0.
getOpcode() != AArch64ISD::CSEL ||
20885 if (Cmp1.
getOpcode() != AArch64ISD::SUBS &&
20886 Cmp0.
getOpcode() == AArch64ISD::SUBS) {
20891 if (Cmp1.
getOpcode() != AArch64ISD::SUBS)
20898 if (
N->getOpcode() ==
ISD::AND ||
N->getOpcode() == AArch64ISD::ANDS) {
20911 if (Op1 && Op1->getAPIntValue().isNegative() &&
20912 Op1->getAPIntValue().sgt(-32)) {
20919 AbsOp1, NZCVOp, Condition, Cmp0);
20922 Cmp1.
getOperand(1), NZCVOp, Condition, Cmp0);
20946 EVT VT =
N->getValueType(0);
20951 if (EltSize != 16 && EltSize != 32 && EltSize != 64)
20959 if (N0.
getOpcode() == AArch64ISD::SRL_PRED)
20961 if (N0.
getOpcode() != AArch64ISD::SHL_PRED ||
20962 N1.
getOpcode() != AArch64ISD::SRL_PRED)
20973 EltSize / 2 != ShAmt)
20978 RevOp = AArch64ISD::BSWAP_MERGE_PASSTHRU;
20979 else if (EltSize == 32)
20980 RevOp = AArch64ISD::REVH_MERGE_PASSTHRU;
20982 RevOp = AArch64ISD::REVW_MERGE_PASSTHRU;
20998 if (N0.
getOpcode() == AArch64ISD::VLSHR)
21013 if (EltSize == 32) {
21014 RevOp = AArch64ISD::REV32;
21017 RevOp = AArch64ISD::REV64;
21021 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT,
21028 unsigned Opc =
N->getOpcode();
21038 EVT VT =
N->getValueType(0);
21067 MaskForTy = 0xffull;
21070 MaskForTy = 0xffffull;
21073 MaskForTy = 0xffffffffull;
21082 return Op0->getAPIntValue().getLimitedValue() == MaskForTy;
21090 while (
Op.getOpcode() == AArch64ISD::REINTERPRET_CAST &&
21092 Op =
Op->getOperand(0);
21102 unsigned Opc = Src->getOpcode();
21105 if (
Opc == AArch64ISD::UUNPKHI ||
Opc == AArch64ISD::UUNPKLO) {
21119 auto MaskAndTypeMatch = [ExtVal](
EVT VT) ->
bool {
21120 return ((ExtVal == 0xFF && VT == MVT::i8) ||
21121 (ExtVal == 0xFFFF && VT == MVT::i16) ||
21122 (ExtVal == 0xFFFFFFFF && VT == MVT::i32));
21128 if (MaskAndTypeMatch(EltTy))
21134 if (MaskedLoadOp && (MaskedLoadOp->getExtensionType() ==
ISD::ZEXTLOAD ||
21137 if (MaskAndTypeMatch(EltTy))
21161 return N->getOperand(1);
21163 return N->getOperand(0);
21170 if (!Src.hasOneUse())
21178 case AArch64ISD::LD1_MERGE_ZERO:
21179 case AArch64ISD::LDNF1_MERGE_ZERO:
21180 case AArch64ISD::LDFF1_MERGE_ZERO:
21183 case AArch64ISD::GLD1_MERGE_ZERO:
21184 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
21185 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
21186 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
21187 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
21188 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
21189 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
21190 case AArch64ISD::GLDFF1_MERGE_ZERO:
21191 case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
21192 case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
21193 case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
21194 case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
21195 case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
21196 case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
21197 case AArch64ISD::GLDNT1_MERGE_ZERO:
21218 EVT VT =
N->getValueType(0);
21224 for (
auto U :
N->users())
21255 EVT VT =
N->getValueType(0);
21298 DefBits = ~(DefBits | ZeroSplat);
21305 UndefBits = ~(UndefBits | ZeroSplat);
21307 UndefBits, &
LHS)) ||
21321 EVT VT =
N->getValueType(0);
21324 if (!
N->getFlags().hasAllowReassociation())
21331 unsigned Opc =
A.getConstantOperandVal(0);
21332 if (
Opc != Intrinsic::aarch64_neon_vcmla_rot0 &&
21333 Opc != Intrinsic::aarch64_neon_vcmla_rot90 &&
21334 Opc != Intrinsic::aarch64_neon_vcmla_rot180 &&
21335 Opc != Intrinsic::aarch64_neon_vcmla_rot270)
21340 A.getOperand(2),
A.getOperand(3));
21356 return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64;
21358 return VT == MVT::i64;
21372 (
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege ||
21373 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege_x2 ||
21374 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt ||
21375 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt_x2 ||
21376 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehi ||
21377 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehi_x2 ||
21378 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehs ||
21379 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehs_x2 ||
21380 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele ||
21381 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele_x2 ||
21382 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo ||
21383 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo_x2 ||
21384 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels ||
21385 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels_x2 ||
21386 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt ||
21387 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt_x2)))
21467 MVT::bf16, MVT::f32, MVT::f64}),
21476 return DAG.
getNode(AArch64ISD::LASTB,
SDLoc(
N),
N->getValueType(0), Mask,
21492 SDValue N0 =
N->getOperand(0), N1 =
N->getOperand(1);
21494 EVT VT =
N->getValueType(0);
21495 const bool FullFP16 = Subtarget->hasFullFP16();
21527 if (Shuffle && Shuffle->
getMaskElt(0) == 1 &&
21542 {N0->getOperand(0), Extract1, Extract2});
21561 unsigned OffsetElts = 0;
21577 Load->getMemoryVT().isByteSized() &&
21579 return U.getResNo() != N0.getResNo() ||
21580 (U.getUser()->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21581 !any_of(U.getUser()->uses(), [](const SDUse &U2) {
21582 return U2.getUser()->getOpcode() ==
21583 ISD::INSERT_VECTOR_ELT ||
21584 U2.getUser()->getOpcode() == ISD::BUILD_VECTOR ||
21585 U2.getUser()->getOpcode() == ISD::SCALAR_TO_VECTOR;
21592 unsigned Offset = (OffsetElts +
N->getConstantOperandVal(1)) *
21593 Load->getValueType(0).getScalarSizeInBits() / 8;
21602 DAG.
getExtLoad(ExtType,
DL, VT, Load->getChain(), BasePtr,
21603 Load->getPointerInfo().getWithOffset(
Offset),
21604 Load->getValueType(0).getScalarType(),
21606 Load->getMemOperand()->getFlags(), Load->getAAInfo());
21619 EVT VT =
N->getValueType(0);
21620 SDValue N0 =
N->getOperand(0), N1 =
N->getOperand(1);
21626 N->getNumOperands() == 2 && N0Opc == AArch64ISD::ZIP1 &&
21636 return DAG.
getNode(AArch64ISD::TRN1,
DL, VT, Op0MoreElems, Op1MoreElems);
21661 (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
21663 MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
21665 for (
size_t i = 0; i < Mask.size(); ++i)
21687 if (N00Opc == AArch64ISD::VLSHR && N10Opc == AArch64ISD::VLSHR &&
21693 NScalarSize =
N->getValueType(0).getScalarSizeInBits();
21695 if (N001ConstVal == N101ConstVal && N001ConstVal > NScalarSize) {
21696 N000 = DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, N000);
21697 N100 = DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, N100);
21702 return DAG.
getNode(AArch64ISD::VLSHR,
DL, VT, Uzp, NewShiftConstant);
21707 if (
N->getOperand(0).getValueType() == MVT::v4i8 ||
21708 N->getOperand(0).getValueType() == MVT::v2i16 ||
21709 N->getOperand(0).getValueType() == MVT::v2i8) {
21710 EVT SrcVT =
N->getOperand(0).getValueType();
21714 if (
N->getNumOperands() % 2 == 0 &&
21716 if (V.getValueType() != SrcVT)
21720 LoadSDNode *LD = dyn_cast<LoadSDNode>(V);
21721 return LD && V.hasOneUse() && LD->isSimple() && !LD->isIndexed() &&
21722 LD->getExtensionType() == ISD::NON_EXTLOAD;
21724 EVT FVT = SrcVT == MVT::v2i8 ? MVT::f16 : MVT::f32;
21728 for (
unsigned i = 0; i <
N->getNumOperands(); i++) {
21735 LD->getBasePtr(), LD->getMemOperand());
21737 Ops.push_back(NewLoad);
21756 auto isBitwiseVectorNegate = [](
SDValue V) {
21757 return V->getOpcode() ==
ISD::XOR &&
21783 if (
N->getNumOperands() == 2 && N0Opc == N1Opc && VT.
is128BitVector() &&
21795 return DAG.
getNode(N0Opc,
DL, VT, Concat0, Concat1);
21799 auto IsRSHRN = [](
SDValue Shr) {
21800 if (Shr.getOpcode() != AArch64ISD::VLSHR)
21803 EVT VT =
Op.getValueType();
21804 unsigned ShtAmt = Shr.getConstantOperandVal(1);
21809 if (
Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
21811 Op.getOperand(1).getConstantOperandVal(0)
21812 <<
Op.getOperand(1).getConstantOperandVal(1));
21813 else if (
Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
21816 Op.getOperand(1).getConstantOperandVal(0));
21820 if (Imm != 1ULL << (ShtAmt - 1))
21826 if (
N->getNumOperands() == 2 && IsRSHRN(N0) &&
21834 X.getValueType().getDoubleNumVectorElementsVT(*DCI.
DAG.
getContext());
21845 if (
N->getNumOperands() == 2 && N0Opc == AArch64ISD::ZIP1 &&
21852 return DAG.
getNode(AArch64ISD::ZIP1,
DL, VT, E0, E1);
21876 MVT RHSTy =
RHS.getValueType().getSimpleVT();
21882 dbgs() <<
"aarch64-lower: concat_vectors bitcast simplification\n");
21898 EVT VT =
N->getValueType(0);
21920 SDValue SubVec =
N->getOperand(1);
21921 uint64_t IdxVal =
N->getConstantOperandVal(2);
21943 if (IdxVal == 0 && Vec.
isUndef())
21949 (IdxVal != 0 && IdxVal != NumSubElts))
21994 EVT ResTy =
N->getValueType(0);
22005 VecResTy = MVT::v4f32;
22007 VecResTy = MVT::v2f64;
22032 MVT VT =
N.getSimpleValueType();
22034 N.getConstantOperandVal(1) == 0)
22035 N =
N.getOperand(0);
22037 switch (
N.getOpcode()) {
22038 case AArch64ISD::DUP:
22039 case AArch64ISD::DUPLANE8:
22040 case AArch64ISD::DUPLANE16:
22041 case AArch64ISD::DUPLANE32:
22042 case AArch64ISD::DUPLANE64:
22043 case AArch64ISD::MOVI:
22044 case AArch64ISD::MOVIshift:
22045 case AArch64ISD::MOVIedit:
22046 case AArch64ISD::MOVImsl:
22047 case AArch64ISD::MVNIshift:
22048 case AArch64ISD::MVNImsl:
22062 if (
N.getValueType().is64BitVector()) {
22074 N =
N.getOperand(0);
22077 if (
N.getOperand(0).getValueType().isScalableVector())
22079 return N.getConstantOperandAPInt(1) ==
22080 N.getOperand(0).getValueType().getVectorNumElements() / 2;
22129 if (
Op.getOpcode() != AArch64ISD::CSEL)
22145 if (!TValue || !FValue)
22149 if (!TValue->
isOne()) {
22196 if (CmpVT != MVT::i32 && CmpVT != MVT::i64)
22213 EVT VT =
Op->getValueType(0);
22220 EVT VT =
N->getValueType(0);
22233 if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isZero())
22240 if (Op1.
getOpcode() != AArch64ISD::UADDV || OpVT1 != OpVT2 ||
22241 Op2.
getOpcode() != AArch64ISD::UADDV ||
22251 DAG.
getNode(AArch64ISD::UADDV,
DL, ValVT, AddVal),
22259 EVT VT =
N->getValueType(0);
22267 if (
LHS.getOpcode() != AArch64ISD::CSEL &&
22268 LHS.getOpcode() != AArch64ISD::CSNEG) {
22270 if (
LHS.getOpcode() != AArch64ISD::CSEL &&
22271 LHS.getOpcode() != AArch64ISD::CSNEG) {
22276 if (!
LHS.hasOneUse())
22286 if (!CTVal || !CFVal)
22289 if (!(
LHS.getOpcode() == AArch64ISD::CSEL &&
22291 !(
LHS.getOpcode() == AArch64ISD::CSNEG &&
22296 if (
LHS.getOpcode() == AArch64ISD::CSEL && CTVal->
isOne() &&
22304 if (
LHS.getOpcode() == AArch64ISD::CSNEG && CTVal->
isOne() &&
22319 assert(((
LHS.getOpcode() == AArch64ISD::CSEL && CFVal->
isOne()) ||
22320 (
LHS.getOpcode() == AArch64ISD::CSNEG && CFVal->
isAllOnes())) &&
22321 "Unexpected constant value");
22327 return DAG.
getNode(AArch64ISD::CSINC,
DL, VT, NewNode,
RHS, CCVal, Cmp);
22332 EVT VT =
N->getValueType(0);
22339 auto isZeroDot = [](
SDValue Dot) {
22340 return (Dot.
getOpcode() == AArch64ISD::UDOT ||
22342 Dot.
getOpcode() == AArch64ISD::USDOT) &&
22345 if (!isZeroDot(Dot))
22347 if (!isZeroDot(Dot))
22408 MVT VT =
N->getSimpleValueType(0);
22420 LHS.getOpcode() !=
RHS.getOpcode())
22423 unsigned ExtType =
LHS.getOpcode();
22429 if (!
RHS.getNode())
22435 if (!
LHS.getNode())
22445 return Op.getOpcode() == AArch64ISD::SUBS &&
22446 !
Op.getNode()->hasAnyUseOfValue(0);
22452 if (
Op.getOpcode() != AArch64ISD::CSEL)
22453 return std::nullopt;
22456 return std::nullopt;
22462 return getInvertedCondCode(CC);
22464 return std::nullopt;
22488 Op->getOperand(0),
Op->getOperand(1),
22501 EVT VT =
N->getValueType(0);
22513 EVT VT =
N->getValueType(0);
22516 (VT == MVT::v4f16 || VT == MVT::v4bf16)) {
22517 SDValue Elt0 =
N->getOperand(0), Elt1 =
N->getOperand(1),
22518 Elt2 =
N->getOperand(2), Elt3 =
N->getOperand(3);
22532 Elt1->getOperand(0)->getConstantOperandVal(1) == 1) {
22536 if (Elt2->isUndef() && Elt3->isUndef()) {
22542 Elt2->getConstantOperandVal(1) ==
22543 Elt3->getConstantOperandVal(1) &&
22544 Elt2->getOperand(0)->getOpcode() ==
22546 Elt3->getOperand(0)->getOpcode() ==
22551 Elt2->getOperand(0)->getOperand(0) ==
22552 Elt3->getOperand(0)->getOperand(0) &&
22553 Elt2->getOperand(0)->getConstantOperandVal(1) == 0 &&
22554 Elt3->getOperand(0)->getConstantOperandVal(1) == 1) {
22557 DAG.
getNode(AArch64ISD::FCVTXN,
DL, MVT::v2f32, HighLanesSrcVec);
22560 SDValue DoubleToSingleSticky =
22561 DAG.
getNode(AArch64ISD::FCVTXN,
DL, MVT::v2f32, LowLanesSrcVec);
22563 DoubleToSingleSticky, HighLanes);
22571 if (VT == MVT::v2f64) {
22572 SDValue Elt0 =
N->getOperand(0), Elt1 =
N->getOperand(1);
22583 Elt1->getOperand(0)->getConstantOperandVal(1) &&
22598 HalfToSingle, SubvectorIdx);
22613 if (VT != MVT::v2i32)
22616 SDValue Elt0 =
N->getOperand(0), Elt1 =
N->getOperand(1);
22654 EVT DestVT =
N->getValueType(0);
22666 unsigned ShiftAmt = 0;
22668 case (1ULL << 15) - 1:
22669 ScalarType = MVT::i16;
22672 case (1ULL << 31) - 1:
22673 ScalarType = MVT::i32;
22685 if (!RightShiftVec)
22689 if (SExtValue != (ShiftAmt - 1))
22706 if (SExt0Type != SExt1Type || SExt0Type.
getScalarType() != ScalarType ||
22733 EVT VT =
N->getValueType(0);
22756 "Unexpected legalisation result!");
22758 EVT SrcVectorType =
Op.getValueType();
22761 assert((SrcVectorType == MVT::v2i64 || SrcVectorType == MVT::nxv2i64) &&
22762 "Unexpected legalisation result!");
22764 unsigned ExtractIndex =
22778 unsigned Opcode =
N.getOpcode();
22784 SrcVT =
N.getOperand(0).getValueType();
22786 return SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8;
22792 return AndMask == 0xff || AndMask == 0xffff || AndMask == 0xffffffff;
22804 auto IsOneUseExtend = [](
SDValue N) {
22815 if (SUB.getOpcode() !=
ISD::SUB || !SUB.hasOneUse())
22818 SDValue Shift = SUB.getOperand(0);
22819 if (!IsOneUseExtend(Shift))
22823 EVT VT =
N->getValueType(0);
22839 EVT VT =
N->getValueType(0);
22840 if (VT != MVT::i32 && VT != MVT::i64)
22863 RHSImm > 4 &&
LHS.hasOneUse())
22880 if (!
Add.hasOneUse())
22887 if (
M1.getOpcode() !=
ISD::MUL &&
M1.getOpcode() != AArch64ISD::SMULL &&
22888 M1.getOpcode() != AArch64ISD::UMULL)
22894 EVT VT =
N->getValueType(0);
22919 if (!
N->getValueType(0).isFixedLengthVector())
22930 if (MulValue.
getOpcode() != AArch64ISD::MUL_PRED)
22942 DAG.
getNode(
N->getOpcode(),
SDLoc(
N), ScalableVT, {ScaledOp, MulValue});
22946 if (
SDValue res = performOpt(
N->getOperand(0),
N->getOperand(1)))
22949 return performOpt(
N->getOperand(1),
N->getOperand(0));
22957 EVT VT =
N->getValueType(0);
22958 if (VT != MVT::i64 ||
22986 DAG.
getNode(
N->getOpcode(),
DL, MVT::v1i64, Op0, Op1),
22995 if (!Ld || !Ld->isSimple())
23026 B.getOperand(1).getNumOperands() != 4)
23030 int NumElts =
B.getValueType().getVectorNumElements();
23031 int NumSubElts = NumElts / 4;
23032 for (
int I = 0;
I < NumSubElts;
I++) {
23034 if (SV1->getMaskElt(
I) !=
I ||
23035 SV1->getMaskElt(
I + NumSubElts) !=
I + NumSubElts ||
23036 SV1->getMaskElt(
I + NumSubElts * 2) !=
I + NumSubElts * 2 ||
23037 SV1->getMaskElt(
I + NumSubElts * 3) !=
I + NumElts)
23040 if (SV2->getMaskElt(
I) !=
I ||
23041 SV2->getMaskElt(
I + NumSubElts) !=
I + NumSubElts ||
23042 SV2->getMaskElt(
I + NumSubElts * 2) !=
I + NumElts)
23049 if (!Ld0 || !Ld1 || !Ld2 || !Ld3 || !Ld0->isSimple() || !Ld1->isSimple() ||
23050 !Ld2->isSimple() || !Ld3->isSimple())
23063 unsigned &NumSubLoads) {
23070 if (NumSubLoads && Loads0.
size() != NumSubLoads)
23072 NumSubLoads = Loads0.
size();
23073 return Loads0.
size() == Loads1.
size() &&
23074 all_of(
zip(Loads0, Loads1), [&DAG](
auto L) {
23075 unsigned Size =
get<0>(L)->getValueType(0).getSizeInBits();
23076 return Size ==
get<1>(L)->getValueType(0).getSizeInBits() &&
23089 DAG, NumSubLoads) &&
23117 EVT VT =
N->getValueType(0);
23135 Other.getOperand(0).getValueType() ||
23142 unsigned NumSubLoads = 0;
23151 unsigned NumSubElts = NumElts / NumSubLoads;
23173 for (
const auto &[L0, L1] :
zip(Loads0, Loads1)) {
23175 L0->getBasePtr(), L0->getPointerInfo(),
23176 L0->getBaseAlign());
23186 Ops.push_back(GenCombinedTree(O0, O1, DAG));
23189 SDValue NewOp = GenCombinedTree(Op0, Op1, DAG);
23192 int Hi = NumSubElts,
Lo = 0;
23193 for (
unsigned i = 0; i < NumSubLoads; i++) {
23194 for (
unsigned j = 0; j < NumSubElts; j++) {
23195 LowMask[i * NumSubElts + j] =
Lo++;
23196 HighMask[i * NumSubElts + j] =
Hi++;
23231 return DAG.
getNode(
N->getOpcode(),
DL, VT, Ext0, NShift);
23243 EVT VT =
N->getValueType(0);
23244 if (VT != MVT::i32 && VT != MVT::i64)
23255 if (Flags.getOpcode() != AArch64ISD::SUBS)
23263 if (!Flags.hasOneUse())
23267 if ((!CanFoldSub || !N0.
hasOneUse()) && RHSC &&
23270 Flags = DAG.
getNode(AArch64ISD::SUBS,
SDLoc(Flags), Flags->getVTList(),
23271 Flags.getOperand(1), Flags.getOperand(0))
23291 EVT VT =
N->getValueType(0);
23292 if (VT != MVT::v2i32 && VT != MVT::v4i16 && VT != MVT::v8i8)
23298 if (AShr.
getOpcode() != AArch64ISD::VASHR)
23300 if (AShr.
getOpcode() != AArch64ISD::VASHR ||
23301 LShr.
getOpcode() != AArch64ISD::VLSHR ||
23310 AArch64ISD::VLSHR,
DL, VT, Trunc,
23319 EVT VT =
N->getValueType(0);
23339 EVT VT =
N->getValueType(0);
23340 if (VT != MVT::i32 && VT != MVT::i64)
23346 if (SBC.
getOpcode() != AArch64ISD::SBC)
23408 assert(
LHS.getValueType().is64BitVector() &&
23409 RHS.getValueType().is64BitVector() &&
23410 "unexpected shape for long operation");
23417 if (!
RHS.getNode())
23421 if (!
LHS.getNode())
23434 MVT ElemTy =
N->getSimpleValueType(0).getScalarType();
23435 unsigned ElemBits = ElemTy.getSizeInBits();
23437 int64_t ShiftAmount;
23439 APInt SplatValue, SplatUndef;
23440 unsigned SplatBitSize;
23443 HasAnyUndefs, ElemBits) ||
23444 SplatBitSize != ElemBits)
23449 ShiftAmount = CVN->getSExtValue();
23454 if (ShiftAmount == 0 && IID != Intrinsic::aarch64_neon_sqshlu)
23455 return N->getOperand(1);
23462 case Intrinsic::aarch64_neon_sqshl:
23463 Opcode = AArch64ISD::SQSHL_I;
23464 IsRightShift =
false;
23466 case Intrinsic::aarch64_neon_uqshl:
23467 Opcode = AArch64ISD::UQSHL_I;
23468 IsRightShift =
false;
23470 case Intrinsic::aarch64_neon_srshl:
23471 Opcode = AArch64ISD::SRSHR_I;
23472 IsRightShift =
true;
23474 case Intrinsic::aarch64_neon_urshl:
23475 Opcode = AArch64ISD::URSHR_I;
23476 IsRightShift =
true;
23478 case Intrinsic::aarch64_neon_sqshlu:
23479 Opcode = AArch64ISD::SQSHLU_I;
23480 IsRightShift =
false;
23482 case Intrinsic::aarch64_neon_sshl:
23483 case Intrinsic::aarch64_neon_ushl:
23487 if (ShiftAmount < 0) {
23488 Opcode = IID == Intrinsic::aarch64_neon_sshl ? AArch64ISD::VASHR
23489 : AArch64ISD::VLSHR;
23490 ShiftAmount = -ShiftAmount;
23492 Opcode = AArch64ISD::VSHL;
23493 IsRightShift =
false;
23497 EVT VT =
N->getValueType(0);
23500 if (VT == MVT::i64) {
23505 if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(
int)ElemBits) {
23508 if (
N->getValueType(0) == MVT::i64)
23512 }
else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
23515 if (
N->getValueType(0) == MVT::i64)
23537 N->getOperand(0),
N->getOperand(1), AndN.
getOperand(0));
23544 DAG.
getNode(
Opc,
DL,
N->getOperand(1).getSimpleValueType(),
23554 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
23555 ScalarTy = MVT::i32;
23567 SDValue Scalar =
N->getOperand(3);
23568 EVT ScalarTy = Scalar.getValueType();
23570 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
23573 SDValue Passthru =
N->getOperand(1);
23575 return DAG.
getNode(AArch64ISD::DUP_MERGE_PASSTHRU,
DL,
N->getValueType(0),
23576 Pred, Scalar, Passthru);
23582 EVT VT =
N->getValueType(0);
23611 SDValue Comparator =
N->getOperand(3);
23612 if (Comparator.
getOpcode() == AArch64ISD::DUP ||
23615 EVT VT =
N->getValueType(0);
23616 EVT CmpVT =
N->getOperand(2).getValueType();
23627 case Intrinsic::aarch64_sve_cmpeq_wide:
23628 case Intrinsic::aarch64_sve_cmpne_wide:
23629 case Intrinsic::aarch64_sve_cmpge_wide:
23630 case Intrinsic::aarch64_sve_cmpgt_wide:
23631 case Intrinsic::aarch64_sve_cmplt_wide:
23632 case Intrinsic::aarch64_sve_cmple_wide: {
23634 int64_t ImmVal = CN->getSExtValue();
23635 if (ImmVal >= -16 && ImmVal <= 15)
23643 case Intrinsic::aarch64_sve_cmphs_wide:
23644 case Intrinsic::aarch64_sve_cmphi_wide:
23645 case Intrinsic::aarch64_sve_cmplo_wide:
23646 case Intrinsic::aarch64_sve_cmpls_wide: {
23648 uint64_t ImmVal = CN->getZExtValue();
23662 return DAG.
getNode(AArch64ISD::SETCC_MERGE_ZERO,
DL, VT, Pred,
23674 assert(
Op.getValueType().isScalableVector() &&
23676 "Expected legal scalable vector type!");
23678 "Expected same type for PTEST operands");
23686 if (
Op.getValueType() != MVT::nxv16i1) {
23689 Pg = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, MVT::nxv16i1, Pg);
23692 Op = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, MVT::nxv16i1,
Op);
23695 unsigned PTest = AArch64ISD::PTEST;
23697 PTest = AArch64ISD::PTEST_ANY;
23699 PTest = AArch64ISD::PTEST_FIRST;
23716 SDValue VecToReduce =
N->getOperand(2);
23735 SDValue VecToReduce =
N->getOperand(2);
23752 SDValue InitVal =
N->getOperand(2);
23753 SDValue VecToReduce =
N->getOperand(3);
23760 DAG.
getPOISON(ReduceVT), InitVal, Zero);
23772 if (
N->getValueType(0) != MVT::i16)
23786 bool SwapOperands =
false) {
23788 assert(
N->getNumOperands() == 4 &&
"Expected 3 operand intrinsic!");
23790 SDValue Op1 =
N->getOperand(SwapOperands ? 3 : 2);
23791 SDValue Op2 =
N->getOperand(SwapOperands ? 2 : 3);
23807 EVT VT =
N->getValueType(0);
23815 case Intrinsic::aarch64_sve_bsl:
23816 return DAG.
getNode(AArch64ISD::BSP,
DL, VT, Op3, Op1, Op2);
23817 case Intrinsic::aarch64_sve_bsl1n:
23820 case Intrinsic::aarch64_sve_bsl2n:
23821 return DAG.
getNode(AArch64ISD::BSP,
DL, VT, Op3, Op1,
23823 case Intrinsic::aarch64_sve_nbsl:
23842 if (EltVT != MVT::f32 && EltVT != MVT::f64 &&
23843 !(EltVT == MVT::f16 && Subtarget->hasFullFP16()))
23856 bool IsSignedZeroSafe =
23866 for (
unsigned I = 0;
I < NumElts; ++
I) {
23872 unsigned NumZeroElts = KnownZeroElts.
popcount();
23874 if (NumZeroElts == NumElts) {
23877 if (IsSignedZeroSafe)
23885 unsigned MinZeroElts;
23895 MinZeroElts = NumElts / 2;
23898 if (NumZeroElts < MinZeroElts)
23901 if (!IsSignedZeroSafe)
23907 for (
unsigned I = 0;
I < NumElts;
I++) {
23908 if (KnownZeroElts[
I])
23920 for (
unsigned I = 0;
I < NumElts;
I++) {
23924 while (Elts.
size() > 1) {
23927 for (
unsigned I = 0;
I < Elts.
size();
I += 2) {
23928 bool ZeroI = KnownZeroElts[
I];
23929 bool ZeroI1 = KnownZeroElts[
I + 1];
23930 if (ZeroI && ZeroI1) {
23933 NewKnownZeroElts.
setBit(
I / 2);
23934 }
else if (ZeroI) {
23936 }
else if (ZeroI1) {
23943 Elts = std::move(NewElts);
23944 KnownZeroElts = NewKnownZeroElts;
23957 case Intrinsic::aarch64_neon_vcvtfxs2fp:
23958 case Intrinsic::aarch64_neon_vcvtfxu2fp:
23960 case Intrinsic::aarch64_neon_saddv:
23962 case Intrinsic::aarch64_neon_uaddv:
23964 case Intrinsic::aarch64_neon_sminv:
23966 case Intrinsic::aarch64_neon_uminv:
23968 case Intrinsic::aarch64_neon_smaxv:
23970 case Intrinsic::aarch64_neon_umaxv:
23972 case Intrinsic::aarch64_neon_faddv:
23974 case Intrinsic::aarch64_neon_fmax:
23976 N->getOperand(1),
N->getOperand(2));
23977 case Intrinsic::aarch64_neon_fmin:
23979 N->getOperand(1),
N->getOperand(2));
23980 case Intrinsic::aarch64_neon_fmaxnm:
23982 N->getOperand(1),
N->getOperand(2));
23983 case Intrinsic::aarch64_neon_fminnm:
23985 N->getOperand(1),
N->getOperand(2));
23986 case Intrinsic::aarch64_neon_smull:
23987 return DAG.
getNode(AArch64ISD::SMULL,
SDLoc(
N),
N->getValueType(0),
23988 N->getOperand(1),
N->getOperand(2));
23989 case Intrinsic::aarch64_neon_umull:
23990 return DAG.
getNode(AArch64ISD::UMULL,
SDLoc(
N),
N->getValueType(0),
23991 N->getOperand(1),
N->getOperand(2));
23992 case Intrinsic::aarch64_neon_pmull:
23993 return DAG.
getNode(AArch64ISD::PMULL,
SDLoc(
N),
N->getValueType(0),
23994 N->getOperand(1),
N->getOperand(2));
23995 case Intrinsic::aarch64_neon_sqdmull:
23997 case Intrinsic::aarch64_neon_sqshl:
23998 case Intrinsic::aarch64_neon_uqshl:
23999 case Intrinsic::aarch64_neon_sqshlu:
24000 case Intrinsic::aarch64_neon_srshl:
24001 case Intrinsic::aarch64_neon_urshl:
24002 case Intrinsic::aarch64_neon_sshl:
24003 case Intrinsic::aarch64_neon_ushl:
24005 case Intrinsic::aarch64_neon_sabd:
24007 N->getOperand(1),
N->getOperand(2));
24008 case Intrinsic::aarch64_neon_uabd:
24010 N->getOperand(1),
N->getOperand(2));
24011 case Intrinsic::aarch64_neon_fcvtzs:
24013 case Intrinsic::aarch64_neon_fcvtzu:
24015 case Intrinsic::aarch64_neon_fcvtas:
24017 case Intrinsic::aarch64_neon_fcvtau:
24019 case Intrinsic::aarch64_neon_fcvtms:
24021 case Intrinsic::aarch64_neon_fcvtmu:
24023 case Intrinsic::aarch64_neon_fcvtns:
24025 case Intrinsic::aarch64_neon_fcvtnu:
24027 case Intrinsic::aarch64_neon_fcvtps:
24029 case Intrinsic::aarch64_neon_fcvtpu:
24031 case Intrinsic::aarch64_crc32b:
24032 case Intrinsic::aarch64_crc32cb:
24034 case Intrinsic::aarch64_crc32h:
24035 case Intrinsic::aarch64_crc32ch:
24037 case Intrinsic::aarch64_sve_saddv:
24039 if (
N->getOperand(2).getValueType().getVectorElementType() == MVT::i64)
24043 case Intrinsic::aarch64_sve_uaddv:
24045 case Intrinsic::aarch64_sve_smaxv:
24047 case Intrinsic::aarch64_sve_umaxv:
24049 case Intrinsic::aarch64_sve_sminv:
24051 case Intrinsic::aarch64_sve_uminv:
24053 case Intrinsic::aarch64_sve_orv:
24055 case Intrinsic::aarch64_sve_eorv:
24057 case Intrinsic::aarch64_sve_andv:
24059 case Intrinsic::aarch64_sve_index:
24061 case Intrinsic::aarch64_sve_dup:
24063 case Intrinsic::aarch64_sve_dup_x:
24066 case Intrinsic::aarch64_sve_ext:
24068 case Intrinsic::aarch64_sve_mul_u:
24069 return DAG.
getNode(AArch64ISD::MUL_PRED,
SDLoc(
N),
N->getValueType(0),
24070 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
24071 case Intrinsic::aarch64_sve_smulh_u:
24072 return DAG.
getNode(AArch64ISD::MULHS_PRED,
SDLoc(
N),
N->getValueType(0),
24073 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
24074 case Intrinsic::aarch64_sve_umulh_u:
24075 return DAG.
getNode(AArch64ISD::MULHU_PRED,
SDLoc(
N),
N->getValueType(0),
24076 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
24077 case Intrinsic::aarch64_sve_smin_u:
24078 return DAG.
getNode(AArch64ISD::SMIN_PRED,
SDLoc(
N),
N->getValueType(0),
24079 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
24080 case Intrinsic::aarch64_sve_umin_u:
24081 return DAG.
getNode(AArch64ISD::UMIN_PRED,
SDLoc(
N),
N->getValueType(0),
24082 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
24083 case Intrinsic::aarch64_sve_smax_u:
24084 return DAG.
getNode(AArch64ISD::SMAX_PRED,
SDLoc(
N),
N->getValueType(0),
24085 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
24086 case Intrinsic::aarch64_sve_umax_u:
24087 return DAG.
getNode(AArch64ISD::UMAX_PRED,
SDLoc(
N),
N->getValueType(0),
24088 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
24089 case Intrinsic::aarch64_sve_lsl_u:
24090 return DAG.
getNode(AArch64ISD::SHL_PRED,
SDLoc(
N),
N->getValueType(0),
24091 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
24092 case Intrinsic::aarch64_sve_lsr_u:
24093 return DAG.
getNode(AArch64ISD::SRL_PRED,
SDLoc(
N),
N->getValueType(0),
24094 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
24095 case Intrinsic::aarch64_sve_asr_u:
24096 return DAG.
getNode(AArch64ISD::SRA_PRED,
SDLoc(
N),
N->getValueType(0),
24097 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
24098 case Intrinsic::aarch64_sve_fadd_u:
24099 return DAG.
getNode(AArch64ISD::FADD_PRED,
SDLoc(
N),
N->getValueType(0),
24100 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
24101 case Intrinsic::aarch64_sve_fdiv_u:
24102 return DAG.
getNode(AArch64ISD::FDIV_PRED,
SDLoc(
N),
N->getValueType(0),
24103 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
24104 case Intrinsic::aarch64_sve_fmax_u:
24105 return DAG.
getNode(AArch64ISD::FMAX_PRED,
SDLoc(
N),
N->getValueType(0),
24106 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
24107 case Intrinsic::aarch64_sve_fmaxnm_u:
24108 return DAG.
getNode(AArch64ISD::FMAXNM_PRED,
SDLoc(
N),
N->getValueType(0),
24109 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
24110 case Intrinsic::aarch64_sve_fmla_u:
24111 return DAG.
getNode(AArch64ISD::FMA_PRED,
SDLoc(
N),
N->getValueType(0),
24112 N->getOperand(1),
N->getOperand(3),
N->getOperand(4),
24114 case Intrinsic::aarch64_sve_fmin_u:
24115 return DAG.
getNode(AArch64ISD::FMIN_PRED,
SDLoc(
N),
N->getValueType(0),
24116 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
24117 case Intrinsic::aarch64_sve_fminnm_u:
24118 return DAG.
getNode(AArch64ISD::FMINNM_PRED,
SDLoc(
N),
N->getValueType(0),
24119 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
24120 case Intrinsic::aarch64_sve_fmul_u:
24121 return DAG.
getNode(AArch64ISD::FMUL_PRED,
SDLoc(
N),
N->getValueType(0),
24122 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
24123 case Intrinsic::aarch64_sve_fsub_u:
24124 return DAG.
getNode(AArch64ISD::FSUB_PRED,
SDLoc(
N),
N->getValueType(0),
24125 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
24126 case Intrinsic::aarch64_sve_add_u:
24129 case Intrinsic::aarch64_sve_sub_u:
24132 case Intrinsic::aarch64_sve_subr:
24134 case Intrinsic::aarch64_sve_and_u:
24137 case Intrinsic::aarch64_sve_bic_u:
24138 return DAG.
getNode(AArch64ISD::BIC,
SDLoc(
N),
N->getValueType(0),
24139 N->getOperand(2),
N->getOperand(3));
24140 case Intrinsic::aarch64_sve_saddwb:
24141 return DAG.
getNode(AArch64ISD::SADDWB,
SDLoc(
N),
N->getValueType(0),
24142 N->getOperand(1),
N->getOperand(2));
24143 case Intrinsic::aarch64_sve_saddwt:
24144 return DAG.
getNode(AArch64ISD::SADDWT,
SDLoc(
N),
N->getValueType(0),
24145 N->getOperand(1),
N->getOperand(2));
24146 case Intrinsic::aarch64_sve_uaddwb:
24147 return DAG.
getNode(AArch64ISD::UADDWB,
SDLoc(
N),
N->getValueType(0),
24148 N->getOperand(1),
N->getOperand(2));
24149 case Intrinsic::aarch64_sve_uaddwt:
24150 return DAG.
getNode(AArch64ISD::UADDWT,
SDLoc(
N),
N->getValueType(0),
24151 N->getOperand(1),
N->getOperand(2));
24152 case Intrinsic::aarch64_sve_eor_u:
24155 case Intrinsic::aarch64_sve_orr_u:
24158 case Intrinsic::aarch64_sve_sabd_u:
24161 return DAG.
getNode(AArch64ISD::ABDS_PRED,
SDLoc(
N),
N->getValueType(0),
24162 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
24163 case Intrinsic::aarch64_sve_uabd_u:
24166 return DAG.
getNode(AArch64ISD::ABDU_PRED,
SDLoc(
N),
N->getValueType(0),
24167 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
24168 case Intrinsic::aarch64_sve_sqadd:
24170 case Intrinsic::aarch64_sve_sqsub_u:
24172 N->getOperand(2),
N->getOperand(3));
24173 case Intrinsic::aarch64_sve_uqadd:
24175 case Intrinsic::aarch64_sve_uqsub_u:
24177 N->getOperand(2),
N->getOperand(3));
24178 case Intrinsic::aarch64_sve_sqadd_x:
24180 N->getOperand(1),
N->getOperand(2));
24181 case Intrinsic::aarch64_sve_sqsub_x:
24183 N->getOperand(1),
N->getOperand(2));
24184 case Intrinsic::aarch64_sve_uqadd_x:
24186 N->getOperand(1),
N->getOperand(2));
24187 case Intrinsic::aarch64_sve_uqsub_x:
24189 N->getOperand(1),
N->getOperand(2));
24190 case Intrinsic::aarch64_sve_asrd:
24191 return DAG.
getNode(AArch64ISD::ASRD_MERGE_OP1,
SDLoc(
N),
N->getValueType(0),
24192 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
24193 case Intrinsic::aarch64_sve_cmphs:
24194 if (!
N->getOperand(2).getValueType().isFloatingPoint())
24196 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
24199 case Intrinsic::aarch64_sve_cmphi:
24200 if (!
N->getOperand(2).getValueType().isFloatingPoint())
24202 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
24205 case Intrinsic::aarch64_sve_fcmpge:
24206 case Intrinsic::aarch64_sve_cmpge:
24208 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
24211 case Intrinsic::aarch64_sve_fcmpgt:
24212 case Intrinsic::aarch64_sve_cmpgt:
24214 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
24217 case Intrinsic::aarch64_sve_fcmpeq:
24218 case Intrinsic::aarch64_sve_cmpeq:
24220 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
24223 case Intrinsic::aarch64_sve_fcmpne:
24224 case Intrinsic::aarch64_sve_cmpne:
24226 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
24229 case Intrinsic::aarch64_sve_fcmpuo:
24231 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
24234 case Intrinsic::aarch64_sve_fadda:
24236 case Intrinsic::aarch64_sve_faddv:
24238 case Intrinsic::aarch64_sve_fmaxnmv:
24240 case Intrinsic::aarch64_sve_fmaxv:
24242 case Intrinsic::aarch64_sve_fminnmv:
24244 case Intrinsic::aarch64_sve_fminv:
24246 case Intrinsic::aarch64_sve_sel:
24248 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
24249 case Intrinsic::aarch64_sve_cmpeq_wide:
24251 case Intrinsic::aarch64_sve_cmpne_wide:
24253 case Intrinsic::aarch64_sve_cmpge_wide:
24255 case Intrinsic::aarch64_sve_cmpgt_wide:
24257 case Intrinsic::aarch64_sve_cmplt_wide:
24259 case Intrinsic::aarch64_sve_cmple_wide:
24261 case Intrinsic::aarch64_sve_cmphs_wide:
24263 case Intrinsic::aarch64_sve_cmphi_wide:
24265 case Intrinsic::aarch64_sve_cmplo_wide:
24267 case Intrinsic::aarch64_sve_cmpls_wide:
24269 case Intrinsic::aarch64_sve_ptest_any:
24270 return getPTest(DAG,
N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
24272 case Intrinsic::aarch64_sve_ptest_first:
24273 return getPTest(DAG,
N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
24275 case Intrinsic::aarch64_sve_ptest_last:
24276 return getPTest(DAG,
N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
24278 case Intrinsic::aarch64_sve_whilelo:
24280 N->getOperand(1),
N->getOperand(2));
24281 case Intrinsic::aarch64_sve_bsl:
24282 case Intrinsic::aarch64_sve_bsl1n:
24283 case Intrinsic::aarch64_sve_bsl2n:
24284 case Intrinsic::aarch64_sve_nbsl:
24291 unsigned OC =
N->getOpcode();
24307 const SDValue SetCC =
N->getOperand(0);
24329 SDLoc(SetCC),
N->getValueType(0), Ext1, Ext2,
24341 EVT VT =
N->getValueType(0);
24342 if ((VT != MVT::v4i32 && VT != MVT::v8i16) ||
24347 unsigned ExtOffset =
N->getOperand(0).getConstantOperandVal(1);
24358 EVT InVT =
N->getOperand(0).getOperand(0).getValueType();
24370 bool IsUndefDeInterleave =
false;
24371 if (!IsDeInterleave)
24372 IsUndefDeInterleave =
24373 Shuffle->getOperand(1).isUndef() &&
24376 [](
int M) { return M < 0; }) &&
24381 if ((!IsDeInterleave && !IsUndefDeInterleave) || Idx >= 4)
24385 Shuffle->getOperand(IsUndefDeInterleave ? 1 : 0));
24387 Shuffle->getOperand(IsUndefDeInterleave ? 0 : 1));
24388 SDValue UZP = DAG.
getNode(Idx < 2 ? AArch64ISD::UZP1 : AArch64ISD::UZP2,
DL,
24390 if ((Idx & 1) == 1)
24406 EVT VT =
N->getValueType(0);
24408 (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16))
24412 unsigned ExtOffset = (
unsigned)-1;
24414 ExtOffset =
Op.getConstantOperandVal(1);
24415 Op =
Op.getOperand(0);
24417 if (
Op.getValueType().isScalableVector())
24421 unsigned Shift = 0;
24423 Op.getValueType().getScalarSizeInBits());
24425 if (
Op.getOpcode() == AArch64ISD::VLSHR) {
24426 Shift =
Op.getConstantOperandVal(1);
24427 Op =
Op.getOperand(0);
24428 Mask = Mask.lshr(Shift);
24432 Op =
Op.getOperand(0);
24434 }
else if (
Op.getOpcode() == AArch64ISD::BICi) {
24435 Mask =
~APInt(
Op.getValueType().getScalarSizeInBits(),
24436 Op.getConstantOperandVal(1) <<
Op.getConstantOperandVal(2));
24438 Op =
Op.getOperand(0);
24441 if (ExtOffset == (
unsigned)-1) {
24443 ExtOffset =
Op.getConstantOperandVal(1);
24444 Op =
Op.getOperand(0);
24446 if (
Op.getValueType().isScalableVector())
24454 if (
Op.getOpcode() != AArch64ISD::UZP1 &&
Op.getOpcode() != AArch64ISD::UZP2)
24456 if (
Op.getOpcode() == AArch64ISD::UZP2)
24461 Op.getOperand(ExtOffset == 0 ? 0 : 1));
24463 BC = DAG.
getNode(AArch64ISD::VLSHR,
DL, VT, BC,
24477 if (!Subtarget.hasNEON())
24488 if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16 && SVT != MVT::i8)
24505 if (NumElts > EltSizeInBits) {
24510 assert((NumElts % EltSizeInBits) == 0 &&
"Unexpected integer scale");
24511 unsigned Scale = NumElts / EltSizeInBits;
24516 for (
unsigned I = 0;
I != Scale; ++
I)
24517 ShuffleMask.append(EltSizeInBits, (
int)
I);
24529 for (
unsigned I = 0;
I != NumElts; ++
I) {
24530 unsigned ScalarBit = IsBE ? (NumElts - 1 -
I) :
I;
24531 int BitIdx = ScalarBit % EltSizeInBits;
24560 (DupOpc != AArch64ISD::DUPLANE8 && DupOpc != AArch64ISD::DUPLANE16 &&
24561 DupOpc != AArch64ISD::DUPLANE32))
24566 !Insert.getOperand(0).isUndef() || !
isNullConstant(Insert.getOperand(2)))
24569 SDValue Trunc = Insert.getOperand(1);
24574 EVT SrcVT = Src.getValueType();
24575 EVT DstVT =
N->getValueType(0);
24595 unsigned NewDupOpc;
24598 NewDupOpc = AArch64ISD::DUPLANE16;
24601 NewDupOpc = AArch64ISD::DUPLANE32;
24604 NewDupOpc = AArch64ISD::DUPLANE64;
24622 N->getOperand(0).getValueType().is64BitVector() &&
24623 (
N->getOperand(0).getOpcode() ==
ISD::ABDU ||
24624 N->getOperand(0).getOpcode() ==
ISD::ABDS)) {
24625 SDNode *ABDNode =
N->getOperand(0).getNode();
24641 EVT VT =
N->getValueType(0);
24643 DAG, DCI, *Subtarget))
24646 if (
N->getValueType(0).isFixedLengthVector() &&
24666 (
N->getValueType(0) == MVT::i32 ||
N->getValueType(0) == MVT::i64)) {
24670 return DAG.
getNode(AArch64ISD::REV16,
SDLoc(
N),
N->getValueType(0),
24681 SDValue SplatVal,
unsigned NumVecElts) {
24700 if (BasePtr->getOpcode() ==
ISD::ADD &&
24703 BasePtr = BasePtr->getOperand(0);
24706 unsigned Offset = EltOffset;
24707 while (--NumVecElts) {
24723 assert(ContentTy.
isSimple() &&
"No SVE containers for extended types");
24734 return MVT::nxv2i64;
24739 return MVT::nxv4i32;
24743 case MVT::nxv8bf16:
24744 return MVT::nxv8i16;
24746 return MVT::nxv16i8;
24752 EVT VT =
N->getValueType(0);
24757 EVT ContainerVT = VT;
24770 if (ContainerVT.
isInteger() && (VT != ContainerVT))
24778 EVT VT =
N->getValueType(0);
24779 EVT PtrTy =
N->getOperand(3).getValueType();
24788 DAG.
getMaskedLoad(LoadVT,
DL, MINode->getChain(), MINode->getOperand(3),
24790 MINode->getMemoryVT(), MINode->getMemOperand(),
24801template <
unsigned Opcode>
24803 static_assert(Opcode == AArch64ISD::LD1RQ_MERGE_ZERO ||
24804 Opcode == AArch64ISD::LD1RO_MERGE_ZERO,
24805 "Unsupported opcode.");
24807 EVT VT =
N->getValueType(0);
24813 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(2),
N->getOperand(3)};
24826 EVT DataVT =
Data.getValueType();
24834 if (
Data.getValueType().isFloatingPoint())
24846 return DAG.
getNode(AArch64ISD::ST1_PRED,
DL,
N->getValueType(0),
Ops);
24853 EVT DataVT =
Data.getValueType();
24854 EVT PtrTy =
N->getOperand(4).getValueType();
24862 MINode->getMemoryVT(), MINode->getMemOperand(),
24892 if (!(((NumVecElts == 2 || NumVecElts == 3) &&
24894 ((NumVecElts == 2 || NumVecElts == 3 || NumVecElts == 4) &&
24920 for (
int I = 0;
I < NumVecElts; ++
I) {
24932 ZeroReg = AArch64::WZR;
24935 ZeroReg = AArch64::XZR;
24959 if (NumVecElts != 4 && NumVecElts != 2)
24970 std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
24972 for (
unsigned I = 0;
I < NumVecElts; ++
I) {
24988 if (IndexVal >= NumVecElts)
24990 IndexNotInserted.reset(IndexVal);
24995 if (IndexNotInserted.any())
25019 return ReplacedZeroSplat;
25025 if (!Subtarget->isMisaligned128StoreSlow())
25050 return ReplacedSplat;
25073 assert(
N->getOpcode() == AArch64ISD::SPLICE &&
"Unexpected Opcode!");
25076 if (
N->getOperand(2).isUndef())
25077 return N->getOperand(1);
25084 assert((
N->getOpcode() == AArch64ISD::UUNPKHI ||
25085 N->getOpcode() == AArch64ISD::UUNPKLO) &&
25086 "Unexpected Opcode!");
25089 if (
N->getOperand(0).isUndef())
25090 return DAG.
getUNDEF(
N->getValueType(0));
25095 if (
N->getOperand(0).getOpcode() ==
ISD::MLOAD &&
25096 N->getOpcode() == AArch64ISD::UUNPKLO) {
25102 SDValue(MLD, 0).hasOneUse() && Mask->getOpcode() == AArch64ISD::PTRUE &&
25106 unsigned PgPattern = Mask->getConstantOperandVal(0);
25107 EVT VT =
N->getValueType(0);
25133 if (
N->getOpcode() != AArch64ISD::UZP1)
25137 EVT DstVT =
N->getValueType(0);
25138 return (SrcVT == MVT::nxv8i16 && DstVT == MVT::nxv16i8) ||
25139 (SrcVT == MVT::nxv4i32 && DstVT == MVT::nxv8i16) ||
25140 (SrcVT == MVT::nxv2i64 && DstVT == MVT::nxv4i32);
25147 assert(
N->getOpcode() == AArch64ISD::UZP1 &&
"Only UZP1 expected.");
25150 EVT ResVT =
N->getValueType(0);
25153 if (RshOpc != AArch64ISD::RSHRNB_I)
25164 if (
Lo.getOpcode() != AArch64ISD::UUNPKLO &&
25165 Hi.getOpcode() != AArch64ISD::UUNPKHI)
25168 if (OrigArg !=
Hi.getOperand(0))
25172 return DAG.
getNode(AArch64ISD::URSHR_I_PRED,
DL, ResVT,
25193 if (VT == MVT::nxv8i16)
25194 ResVT = MVT::nxv16i8;
25195 else if (VT == MVT::nxv4i32)
25196 ResVT = MVT::nxv8i16;
25197 else if (VT == MVT::nxv2i64)
25198 ResVT = MVT::nxv4i32;
25203 unsigned ShiftValue;
25208 AArch64ISD::RSHRNB_I,
DL, ResVT,
25210 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Rshrnb);
25214 if (V.getOpcode() != AArch64ISD::NVCAST)
25218 if (!
Op.getValueType().isVector() ||
25219 V.getValueType().getVectorElementCount() !=
25220 Op.getValueType().getVectorElementCount() * 2)
25231 EVT ResVT =
N->getValueType(0);
25242 if (ExtIdx0 == 0 && ExtIdx1 == NumElements / 2) {
25253 if (
N->getOpcode() == AArch64ISD::UZP2)
25261 Rshrnb = DAG.
getNode(AArch64ISD::NVCAST,
DL, ResVT, Rshrnb);
25262 return DAG.
getNode(AArch64ISD::UZP1,
DL, ResVT, Rshrnb, Op1);
25268 Rshrnb = DAG.
getNode(AArch64ISD::NVCAST,
DL, ResVT, Rshrnb);
25269 return DAG.
getNode(AArch64ISD::UZP1,
DL, ResVT, Op0, Rshrnb);
25275 if (PreCast.getOpcode() == AArch64ISD::UUNPKLO) {
25276 if (PreCast.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
25277 SDValue X = PreCast.getOperand(0).getOperand(0);
25278 return DAG.
getNode(AArch64ISD::UZP1,
DL, ResVT,
X, Op1);
25285 if (PreCast.getOpcode() == AArch64ISD::UUNPKHI) {
25286 if (PreCast.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
25287 SDValue Z = PreCast.getOperand(0).getOperand(1);
25288 return DAG.
getNode(AArch64ISD::UZP1,
DL, ResVT, Op0, Z);
25299 EVT BCVT = MVT::Other, HalfVT = MVT::Other;
25305 HalfVT = MVT::v8i8;
25309 HalfVT = MVT::v4i16;
25313 HalfVT = MVT::v2i32;
25316 if (BCVT != MVT::Other) {
25337 if (ResVT != MVT::v2i32 && ResVT != MVT::v4i16 && ResVT != MVT::v8i8)
25346 if ((ResVT == MVT::v4i16 && Op0Ty == MVT::v2i32) ||
25347 (ResVT == MVT::v8i8 && Op0Ty == MVT::v4i16)) {
25351 SourceOp0, SourceOp1);
25371 ResultTy = MVT::v4i32;
25374 ResultTy = MVT::v8i16;
25377 ResultTy = MVT::v16i8;
25388 EVT BitcastResultTy;
25392 BitcastResultTy = MVT::v2i64;
25395 BitcastResultTy = MVT::v4i32;
25398 BitcastResultTy = MVT::v8i16;
25409 unsigned Opc =
N->getOpcode();
25411 const bool Scaled =
Opc == AArch64ISD::GLD1_SCALED_MERGE_ZERO ||
25412 Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
25413 const bool Signed =
Opc == AArch64ISD::GLD1S_MERGE_ZERO ||
25414 Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
25415 const bool Extended =
Opc == AArch64ISD::GLD1_SXTW_MERGE_ZERO ||
25416 Opc == AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO ||
25417 Opc == AArch64ISD::GLD1_UXTW_MERGE_ZERO ||
25418 Opc == AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO;
25427 EVT ResVT =
N->getValueType(0);
25429 const auto OffsetOpc =
Offset.getOpcode();
25430 const bool OffsetIsZExt =
25431 OffsetOpc == AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU;
25432 const bool OffsetIsSExt =
25433 OffsetOpc == AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU;
25436 if (!Extended && (OffsetIsSExt || OffsetIsZExt)) {
25444 if (ExtPg == Pg && ExtFromEVT == MVT::i32) {
25451 return DAG.
getNode(NewOpc,
DL, {ResVT, MVT::Other},
25452 {Chain, Pg,
Base, UnextendedOffset, Ty});
25464 assert(
N->getOpcode() == AArch64ISD::VASHR ||
25465 N->getOpcode() == AArch64ISD::VLSHR);
25468 unsigned OpScalarSize =
Op.getScalarValueSizeInBits();
25470 unsigned ShiftImm =
N->getConstantOperandVal(1);
25471 assert(OpScalarSize > ShiftImm &&
"Invalid shift imm");
25474 if (
N->getOpcode() == AArch64ISD::VASHR &&
25475 Op.getOpcode() == AArch64ISD::VSHL &&
25476 N->getOperand(1) ==
Op.getOperand(1))
25478 return Op.getOperand(0);
25481 if (
N->getFlags().hasExact())
25485 APInt DemandedMask = ~ShiftedOutBits;
25498 N->getOperand(0)->getOperand(0)->getValueType(0).getScalarType() ==
25500 SDValue CC =
N->getOperand(0)->getOperand(0);
25519 EVT VT =
N->getValueType(0);
25525 unsigned LoadIdx = IsLaneOp ? 1 : 0;
25532 if (LD->isIndexed())
25538 Lane =
N->getOperand(2);
25554 for (
SDUse &U : LD->uses()) {
25555 if (U.getResNo() == 1)
25557 if (U.getUser() !=
N)
25564 if (
N->hasOneUse()) {
25565 unsigned UseOpc =
N->user_begin()->getOpcode();
25570 SDValue Addr = LD->getOperand(1);
25581 uint32_t IncVal = CInc->getZExtValue();
25583 if (IncVal != NumBytes)
25601 Ops.push_back(LD->getOperand(0));
25604 Ops.push_back(Lane);
25606 Ops.push_back(Addr);
25607 Ops.push_back(Inc);
25609 EVT Tys[3] = { VT, MVT::i64, MVT::Other };
25611 unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
25632static bool performTBISimplification(
SDValue Addr,
25638 unsigned NumIgnoreBits =
25654 "Expected STORE dag node in input!");
25657 if (!
Store->isTruncatingStore() ||
Store->isIndexed())
25668 Store->getBasePtr(),
Store->getMemOperand());
25691 EVT MemVT =
LD->getMemoryVT();
25693 LD->getBaseAlign() >= 4)
25701 assert(
LD->getOffset().isUndef() &&
"undef offset expected");
25737 performTBISimplification(
N->getOperand(1), DCI, DAG);
25740 EVT RegVT =
LD->getValueType(0);
25741 EVT MemVT =
LD->getMemoryVT();
25746 unsigned AddrSpace =
LD->getAddressSpace();
25750 if (PtrVT !=
LD->getBasePtr().getSimpleValueType()) {
25754 Cast,
LD->getPointerInfo(), MemVT,
25755 LD->getBaseAlign(),
25756 LD->getMemOperand()->getFlags());
25763 if (
SDValue Res = combineV3I8LoadExt(LD, DAG))
25766 if (!
LD->isNonTemporal())
25787 for (
unsigned I = 0;
I < Num256Loads;
I++) {
25788 unsigned PtrOffset =
I * 32;
25793 NewVT,
DL, Chain, NewPtr,
LD->getPointerInfo().getWithOffset(PtrOffset),
25794 NewAlign,
LD->getMemOperand()->getFlags(),
LD->getAAInfo());
25804 unsigned PtrOffset = (MemVT.
getSizeInBits() - BitsRemaining) / 8;
25812 DAG.
getLoad(RemainingVT,
DL, Chain, NewPtr,
25813 LD->getPointerInfo().getWithOffset(PtrOffset), NewAlign,
25814 LD->getMemOperand()->getFlags(),
LD->getAAInfo());
25817 SDValue ExtendedRemainingLoad =
25819 {PoisonVector, RemainingLoad, InsertIdx});
25820 LoadOps.
push_back(ExtendedRemainingLoad);
25837 EVT VecVT =
Op.getValueType();
25839 "Need boolean vector type.");
25846 return Op.getOperand(0).getValueType();
25850 for (
SDValue Operand :
Op->op_values()) {
25854 EVT OperandVT = tryGetOriginalBoolVectorType(Operand,
Depth + 1);
25856 BaseVT = OperandVT;
25857 else if (OperandVT != BaseVT)
25874 "Expected a fixed-length bool vector");
25877 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
25880 auto getCanonicalCompareVecVT = [&]() {
25881 unsigned BitsPerElement = std::max(64 / NumElts, 8u);
25885 EVT CompareVecVT = tryGetOriginalBoolVectorType(Vec);
25887 CompareVecVT = getCanonicalCompareVecVT();
25900 if (CompareBitsSize != 64 && CompareBitsSize != 128) {
25901 CompareVecVT = getCanonicalCompareVecVT();
25906 if (CompareBitsSize != 64 && CompareBitsSize != 128)
25910 if (CompareBitsSize == 64) {
25911 CompareLHS = DAG.
getBitcast(MVT::i64, CompareBits);
25933 EVT VecVT = ComparisonResult.getValueType();
25937 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
25947 VecVT = tryGetOriginalBoolVectorType(ComparisonResult);
25949 unsigned BitsPerElement = std::max(64 / NumElts, 8u);
25967 VecVT == MVT::v16i8) {
25971 for (
unsigned Half = 0; Half < 2; ++Half) {
25972 for (
unsigned I = 0;
I < 8; ++
I) {
25975 unsigned MaskBit = IsLE ? (1u <<
I) : (1u << (7 -
I));
25983 SDValue UpperRepresentativeBits =
25984 DAG.
getNode(AArch64ISD::EXT,
DL, VecVT, RepresentativeBits,
25987 RepresentativeBits, UpperRepresentativeBits);
25994 for (
unsigned I = 0;
I < NumEl; ++
I) {
25995 unsigned MaskBit = IsLE ? (1u <<
I) : (1u << (NumEl - 1 -
I));
26009 if (!
Store->isTruncatingStore())
26035 Store->getMemOperand());
26049 if (
Value.getValueType().isVector())
26053 while (
Value->isAssert())
26064 if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
26068 EVT VT =
Value.getSimpleValueType();
26085 DCI.
CombineTo(
ST->getValue().getNode(), Extracted);
26089bool isHalvingTruncateOfLegalScalableType(
EVT SrcVT,
EVT DstVT) {
26090 return (SrcVT == MVT::nxv8i16 && DstVT == MVT::nxv8i8) ||
26091 (SrcVT == MVT::nxv4i32 && DstVT == MVT::nxv4i16) ||
26092 (SrcVT == MVT::nxv2i64 && DstVT == MVT::nxv2i32);
26099 EVT ValueVT =
Value.getValueType();
26106 assert(
ST->getOffset().isUndef() &&
"undef offset expected");
26110 Value->getOperand(0).getValueType().getVectorElementType(), 4);
26142static unsigned getFPSubregForVT(
EVT VT) {
26145 case MVT::aarch64mfp8:
26146 return AArch64::bsub;
26148 return AArch64::hsub;
26150 return AArch64::ssub;
26152 return AArch64::dsub;
26166 EVT ValueVT =
Value.getValueType();
26167 EVT MemVT =
ST->getMemoryVT();
26171 if (
SDValue Res = combineStoreValueFPToInt(ST, DCI, DAG, Subtarget))
26174 auto hasValidElementTypeForFPTruncStore = [](
EVT VT) {
26176 return EltVT == MVT::f32 || EltVT == MVT::f64;
26180 unsigned AddrSpace =
ST->getAddressSpace();
26187 ST->getBaseAlign(),
ST->getMemOperand()->getFlags(),
26192 if (
SDValue Res = combineI8TruncStore(ST, DAG, Subtarget))
26204 hasValidElementTypeForFPTruncStore(
Value.getOperand(0).getValueType()))
26206 ST->getMemOperand());
26212 performTBISimplification(
N->getOperand(2), DCI, DAG))
26215 if (
SDValue Store = foldTruncStoreOfExt(DAG,
N))
26218 if (
SDValue Store = combineBoolVectorAndTruncateStore(DAG, ST))
26221 if (
ST->isTruncatingStore() &&
26222 isHalvingTruncateOfLegalScalableType(ValueVT, MemVT)) {
26226 MemVT,
ST->getMemOperand());
26250 if (ValueVT != MemVT && !
ST->isTruncatingStore())
26263 !ExtCst->isZero() &&
ST->getBasePtr().getOpcode() !=
ISD::ADD)
26266 if (MemVT == MVT::i64 || MemVT == MVT::i32) {
26270 for (
const auto &
Use :
Vector->uses()) {
26271 if (
Use.getResNo() !=
Vector.getResNo())
26282 if (!ExtCst || !ExtCst->isZero()) {
26291 EVT FPMemVT = MemVT == MVT::i8
26295 FPMemVT, ExtVector);
26297 return DAG.
getStore(
ST->getChain(),
DL, FPSubreg,
ST->getBasePtr(),
26298 ST->getMemOperand());
26309 unsigned NumParts =
N->getNumOperands();
26318 for (
unsigned I = 0;
I < NumParts;
I++)
26319 if (
N->getOperand(
I) !=
SDValue(InterleaveOp,
I))
26328 unsigned RequiredNumParts) {
26331 if (!isSequentialConcatOfVectorInterleave(WideMask.
getNode(),
26332 MaskInterleaveOps))
26335 if (MaskInterleaveOps.
size() != RequiredNumParts)
26342 return MaskInterleaveOps[0];
26349 assert(
EC.isKnownMultipleOf(RequiredNumParts) &&
26350 "Expected element count divisible by number of parts");
26351 EC =
EC.divideCoefficientBy(RequiredNumParts);
26356static SDValue performInterleavedMaskedStoreCombine(
26372 if (!isSequentialConcatOfVectorInterleave(WideValue.
getNode(),
26373 ValueInterleaveOps))
26376 unsigned NumParts = ValueInterleaveOps.
size();
26377 if (NumParts != 2 && NumParts != 4)
26382 EVT SubVecTy = ValueInterleaveOps[0].getValueType();
26390 getNarrowMaskForInterleavedOps(DAG,
DL, MST->
getMask(), NumParts);
26395 NumParts == 2 ? Intrinsic::aarch64_sve_st2 : Intrinsic::aarch64_sve_st4;
26398 NewStOps.
append(ValueInterleaveOps);
26412 if (
SDValue Res = performInterleavedMaskedStoreCombine(
N, DCI, DAG))
26420 Value.getValueType().isInteger()) {
26425 EVT InVT =
Value.getOperand(0).getValueType();
26429 unsigned PgPattern =
Mask->getConstantOperandVal(0);
26449 EVT ValueVT =
Value->getValueType(0);
26451 if (!isHalvingTruncateOfLegalScalableType(ValueVT, MemVT))
26468 EVT IndexVT = Index.getValueType();
26478 if (Index.getOpcode() ==
ISD::ADD) {
26493 if (Index.getOpcode() ==
ISD::SHL &&
26494 Index.getOperand(0).getOpcode() ==
ISD::ADD) {
26504 Add.getOperand(0), ShiftOp);
26524 EVT IndexVT = Index.getValueType();
26525 EVT DataVT =
N->getOperand(1).getValueType();
26545 int64_t Stride = 0;
26554 if (Stride < std::numeric_limits<int32_t>::min() ||
26555 Stride > std::numeric_limits<int32_t>::max())
26559 unsigned MaxVScale =
26561 int64_t LastElementOffset =
26564 if (LastElementOffset < std::numeric_limits<int32_t>::min() ||
26565 LastElementOffset > std::numeric_limits<int32_t>::max())
26596 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
26614 if (HG->getInc().getValueType().getScalarSizeInBits() >
26615 Index.getValueType().getScalarSizeInBits())
26620 SDValue Ops[] = {Chain, HG->getInc(), Mask, BasePtr,
26621 Index, Scale, HG->getIntID()};
26623 DL,
Ops, HG->getMemOperand(), IndexType);
26634 unsigned AddrOpIdx =
N->getNumOperands() - 1;
26635 SDValue Addr =
N->getOperand(AddrOpIdx);
26655 bool IsStore =
false;
26656 bool IsLaneOp =
false;
26657 bool IsDupOp =
false;
26658 unsigned NewOpc = 0;
26659 unsigned NumVecs = 0;
26660 unsigned IntNo =
N->getConstantOperandVal(1);
26663 case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post;
26664 NumVecs = 2;
break;
26665 case Intrinsic::aarch64_neon_ld3: NewOpc = AArch64ISD::LD3post;
26666 NumVecs = 3;
break;
26667 case Intrinsic::aarch64_neon_ld4: NewOpc = AArch64ISD::LD4post;
26668 NumVecs = 4;
break;
26669 case Intrinsic::aarch64_neon_st2: NewOpc = AArch64ISD::ST2post;
26670 NumVecs = 2; IsStore =
true;
break;
26671 case Intrinsic::aarch64_neon_st3: NewOpc = AArch64ISD::ST3post;
26672 NumVecs = 3; IsStore =
true;
break;
26673 case Intrinsic::aarch64_neon_st4: NewOpc = AArch64ISD::ST4post;
26674 NumVecs = 4; IsStore =
true;
break;
26675 case Intrinsic::aarch64_neon_ld1x2: NewOpc = AArch64ISD::LD1x2post;
26676 NumVecs = 2;
break;
26677 case Intrinsic::aarch64_neon_ld1x3: NewOpc = AArch64ISD::LD1x3post;
26678 NumVecs = 3;
break;
26679 case Intrinsic::aarch64_neon_ld1x4: NewOpc = AArch64ISD::LD1x4post;
26680 NumVecs = 4;
break;
26681 case Intrinsic::aarch64_neon_st1x2: NewOpc = AArch64ISD::ST1x2post;
26682 NumVecs = 2; IsStore =
true;
break;
26683 case Intrinsic::aarch64_neon_st1x3: NewOpc = AArch64ISD::ST1x3post;
26684 NumVecs = 3; IsStore =
true;
break;
26685 case Intrinsic::aarch64_neon_st1x4: NewOpc = AArch64ISD::ST1x4post;
26686 NumVecs = 4; IsStore =
true;
break;
26687 case Intrinsic::aarch64_neon_ld2r: NewOpc = AArch64ISD::LD2DUPpost;
26688 NumVecs = 2; IsDupOp =
true;
break;
26689 case Intrinsic::aarch64_neon_ld3r: NewOpc = AArch64ISD::LD3DUPpost;
26690 NumVecs = 3; IsDupOp =
true;
break;
26691 case Intrinsic::aarch64_neon_ld4r: NewOpc = AArch64ISD::LD4DUPpost;
26692 NumVecs = 4; IsDupOp =
true;
break;
26693 case Intrinsic::aarch64_neon_ld2lane: NewOpc = AArch64ISD::LD2LANEpost;
26694 NumVecs = 2; IsLaneOp =
true;
break;
26695 case Intrinsic::aarch64_neon_ld3lane: NewOpc = AArch64ISD::LD3LANEpost;
26696 NumVecs = 3; IsLaneOp =
true;
break;
26697 case Intrinsic::aarch64_neon_ld4lane: NewOpc = AArch64ISD::LD4LANEpost;
26698 NumVecs = 4; IsLaneOp =
true;
break;
26699 case Intrinsic::aarch64_neon_st2lane: NewOpc = AArch64ISD::ST2LANEpost;
26700 NumVecs = 2; IsStore =
true; IsLaneOp =
true;
break;
26701 case Intrinsic::aarch64_neon_st3lane: NewOpc = AArch64ISD::ST3LANEpost;
26702 NumVecs = 3; IsStore =
true; IsLaneOp =
true;
break;
26703 case Intrinsic::aarch64_neon_st4lane: NewOpc = AArch64ISD::ST4LANEpost;
26704 NumVecs = 4; IsStore =
true; IsLaneOp =
true;
break;
26709 VecTy =
N->getOperand(2).getValueType();
26711 VecTy =
N->getValueType(0);
26716 uint32_t IncVal = CInc->getZExtValue();
26718 if (IsLaneOp || IsDupOp)
26720 if (IncVal != NumBytes)
26725 Ops.push_back(
N->getOperand(0));
26727 if (IsLaneOp || IsStore)
26728 for (
unsigned i = 2; i < AddrOpIdx; ++i)
26729 Ops.push_back(
N->getOperand(i));
26730 Ops.push_back(Addr);
26731 Ops.push_back(Inc);
26735 unsigned NumResultVecs = (IsStore ? 0 : NumVecs);
26737 for (n = 0; n < NumResultVecs; ++n)
26739 Tys[n++] = MVT::i64;
26740 Tys[n] = MVT::Other;
26749 std::vector<SDValue> NewResults;
26750 for (
unsigned i = 0; i < NumResultVecs; ++i) {
26753 NewResults.push_back(
SDValue(UpdN.
getNode(), NumResultVecs + 1));
26767 switch(V.getNode()->getOpcode()) {
26772 if ((LoadNode->
getMemoryVT() == MVT::i8 && width == 8)
26773 || (LoadNode->
getMemoryVT() == MVT::i16 && width == 16)) {
26781 if ((TypeNode->
getVT() == MVT::i8 && width == 8)
26782 || (TypeNode->
getVT() == MVT::i16 && width == 16)) {
26790 if ((TypeNode->
getVT() == MVT::i8 && width == 8)
26791 || (TypeNode->
getVT() == MVT::i16 && width == 16)) {
26800 1LL << (width - 1);
26870 int CompConstant) {
26874 int MaxUInt = (1 << width);
26882 AddConstant -= (1 << (width-1));
26887 if ((AddConstant == 0) ||
26888 (CompConstant == MaxUInt - 1 && AddConstant < 0) ||
26889 (AddConstant >= 0 && CompConstant < 0) ||
26890 (AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
26895 if ((AddConstant == 0) ||
26896 (AddConstant >= 0 && CompConstant <= 0) ||
26897 (AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
26902 if ((AddConstant >= 0 && CompConstant < 0) ||
26903 (AddConstant <= 0 && CompConstant >= -1 &&
26904 CompConstant < AddConstant + MaxUInt))
26909 if ((AddConstant == 0) ||
26910 (AddConstant > 0 && CompConstant <= 0) ||
26911 (AddConstant < 0 && CompConstant <= AddConstant))
26916 if ((AddConstant >= 0 && CompConstant <= 0) ||
26917 (AddConstant <= 0 && CompConstant >= 0 &&
26918 CompConstant <= AddConstant + MaxUInt))
26923 if ((AddConstant > 0 && CompConstant < 0) ||
26924 (AddConstant < 0 && CompConstant >= 0 &&
26925 CompConstant < AddConstant + MaxUInt) ||
26926 (AddConstant >= 0 && CompConstant >= 0 &&
26927 CompConstant >= AddConstant) ||
26928 (AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
26947 unsigned CCIndex,
unsigned CmpIndex,
26976 N->getOperand(CCIndex)->getValueType(0));
26984 assert((CCIndex == 2 && CmpIndex == 3) &&
26985 "Expected CCIndex to be 2 and CmpIndex to be 3.");
26986 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1), AArch64_CC,
26988 return DAG.
getNode(
N->getOpcode(),
N,
N->getVTList(),
Ops);
26995 unsigned CmpIndex) {
26997 SDNode *SubsNode =
N->getOperand(CmpIndex).getNode();
26998 unsigned CondOpcode = SubsNode->
getOpcode();
27008 unsigned MaskBits = 0;
27032 unsigned ShiftAmt = M.countl_zero();
27037 ShiftedC, ShiftedX);
27044 uint32_t CNV = CN->getZExtValue();
27047 else if (CNV == 65535)
27121 SDValue CSel = Cmp.getOperand(0);
27125 return DAG.
getNode(
N->getOpcode(),
DL,
N->getVTList(), Chain, Dest,
27131 unsigned CmpOpc = Cmp.getOpcode();
27132 if (CmpOpc != AArch64ISD::ADDS && CmpOpc != AArch64ISD::SUBS)
27137 if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1))
27144 "Expected the value type to be the same for both operands!");
27145 if (
LHS.getValueType() != MVT::i32 &&
LHS.getValueType() != MVT::i64)
27161 BR = DAG.
getNode(AArch64ISD::CBZ,
SDLoc(
N), MVT::Other, Chain,
LHS, Dest);
27163 BR = DAG.
getNode(AArch64ISD::CBNZ,
SDLoc(
N), MVT::Other, Chain,
LHS, Dest);
27172 unsigned CC =
N->getConstantOperandVal(2);
27177 Zero =
N->getOperand(0);
27178 CTTZ =
N->getOperand(1);
27180 Zero =
N->getOperand(1);
27181 CTTZ =
N->getOperand(0);
27187 CTTZ.getOperand(0).getOpcode() !=
ISD::CTTZ))
27190 assert((CTTZ.getValueType() == MVT::i32 || CTTZ.getValueType() == MVT::i64) &&
27191 "Illegal type in CTTZ folding");
27197 ? CTTZ.getOperand(0).getOperand(0)
27198 : CTTZ.getOperand(0);
27204 ? CTTZ.getOperand(0).getValueSizeInBits()
27205 : CTTZ.getValueSizeInBits();
27232 if (CmpRHS.
getOpcode() == AArch64ISD::CSEL)
27234 else if (CmpLHS.
getOpcode() != AArch64ISD::CSEL)
27257 else if (CmpRHS !=
X)
27266 EVT VT =
Op->getValueType(0);
27269 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, L, R, CCValue,
Cond);
27278 SDValue SubsNode =
N->getOperand(3);
27284 EVT VT =
N->getValueType(0);
27286 unsigned ExpectedOpcode;
27294 CmpOpConst->getValueType(0));
27296 CmpOpConst->getValueType(0));
27299 ExpectedOp = CmpOpToMatch;
27300 SubsOp = CmpOpToMatch;
27305 if (
Op.getOpcode() != ExpectedOpcode)
27307 if (
Op.getOperand(0).getOpcode() !=
ISD::ADD ||
27308 !
Op.getOperand(0).hasOneUse())
27312 if (
X != CmpOpOther)
27314 if (
X != CmpOpOther)
27316 if (ExpectedOp !=
Op.getOperand(1))
27324 SDValue TReassocOp = GetReassociationOp(
N->getOperand(0), ExpectedOp);
27325 SDValue FReassocOp = GetReassociationOp(
N->getOperand(1), ExpectedOp);
27326 if (!TReassocOp && !FReassocOp)
27333 auto Reassociate = [&](
SDValue ReassocOp,
unsigned OpNum) {
27335 return N->getOperand(OpNum);
27342 SDValue TValReassoc = Reassociate(TReassocOp, 0);
27343 SDValue FValReassoc = Reassociate(FReassocOp, 1);
27344 return DAG.
getNode(AArch64ISD::CSEL,
SDLoc(
N), VT, TValReassoc, FValReassoc,
27352 if (
SDValue R = Fold(CC, ExpectedOp, SubsOp))
27374 auto CheckedFold = [&](
bool Check,
APInt NewCmpConst,
27377 CmpOpConst->getValueType(0));
27379 CmpOpConst->getValueType(0));
27380 return Check ? Fold(NewCC, ExpectedOp, SubsOp) :
SDValue();
27385 return CheckedFold(!CmpOpConst->getAPIntValue().isMaxValue(),
27389 return CheckedFold(!CmpOpConst->getAPIntValue().isMaxValue(),
27392 return CheckedFold(!CmpOpConst->getAPIntValue().isZero(),
27395 return CheckedFold(!CmpOpConst->getAPIntValue().isZero(),
27398 return CheckedFold(!CmpOpConst->getAPIntValue().isMinSignedValue(),
27401 return CheckedFold(!CmpOpConst->getAPIntValue().isMaxSignedValue(),
27404 return CheckedFold(!CmpOpConst->getAPIntValue().isMaxSignedValue(),
27407 return CheckedFold(!CmpOpConst->getAPIntValue().isMinSignedValue(),
27422 if (PTest.
getOpcode() != AArch64ISD::PTEST_ANY)
27428 if (TruePred.
getOpcode() == AArch64ISD::REINTERPRET_CAST)
27431 if (AnyPred.
getOpcode() == AArch64ISD::REINTERPRET_CAST)
27452 if (
N->getOperand(0) ==
N->getOperand(1))
27453 return N->getOperand(0);
27472 Cond.hasOneUse() &&
Cond->hasNUsesOfValue(0, 0) &&
27474 {Cond.getOperand(1), Cond.getOperand(0)}) &&
27476 {Cond.getOperand(0), Cond.getOperand(1)}) &&
27484 Cond.getOperand(1),
Cond.getOperand(0));
27485 return DAG.
getNode(AArch64ISD::CSEL,
DL,
N->getVTList(),
N->getOperand(0),
27502 EVT Op0MVT =
Op->getOperand(0).getValueType();
27508 SDNode *FirstUse = *
Op->user_begin();
27515 return N->getOpcode() != ISD::VSELECT || N->getValueType(0) != UseMVT;
27530 Op->getOperand(0));
27532 Op->getOperand(0));
27533 if (Op0SExt && (isSignedIntSetCC(CC) || isIntEqualitySetCC(CC))) {
27534 Op0ExtV =
SDValue(Op0SExt, 0);
27536 }
else if (Op0ZExt && (isUnsignedIntSetCC(CC) || isIntEqualitySetCC(CC))) {
27537 Op0ExtV =
SDValue(Op0ZExt, 0);
27544 Op0ExtV, Op1ExtV,
Op->getOperand(2));
27571 EVT VT =
N->getValueType(0);
27578 LHS->getOpcode() == AArch64ISD::CSEL &&
27580 LHS->hasOneUse()) {
27584 auto NewCond = getInvertedCondCode(OldCond);
27588 LHS.getOperand(0),
LHS.getOperand(1),
27596 LHS->hasOneUse()) {
27597 EVT TstVT =
LHS->getValueType(0);
27601 uint64_t TstImm = -1ULL <<
LHS->getConstantOperandVal(1);
27615 EVT ToVT =
LHS->getValueType(0);
27616 EVT FromVT =
LHS->getOperand(0).getValueType();
27619 SDValue CompareLHS, CompareRHS;
27620 if (getBoolVectorBitcastCompare(
LHS.getOperand(0),
RHS,
DL, DAG,
27621 CompareLHS, CompareRHS))
27626 DL, MVT::i1,
LHS->getOperand(0));
27637 EVT CmpVT =
LHS.getValueType();
27644 SplatLHSVal.
isOne())
27672 EVT FromVT =
LHS.getOperand(0).getValueType();
27677 SDValue CompareLHS, CompareRHS;
27678 if (!getBoolVectorBitcastCompare(
LHS.getOperand(0),
RHS,
DL, DAG, CompareLHS,
27683 CompareRHS,
N->getOperand(2),
N->getOperand(3),
27691 unsigned GenericOpcode) {
27695 EVT VT =
N->getValueType(0);
27698 if (!
N->hasAnyUseOfValue(1)) {
27722 if (!
N->hasAnyUseOfValue(0))
27724 return DAG.
getNode(AArch64ISD::SUBS,
SDLoc(
N),
N->getVTList(), R,
27750 if (InnerSetCC->
getOpcode() != AArch64ISD::SETCC_MERGE_ZERO)
27758 if (Pred.getOpcode() == AArch64ISD::PTRUE &&
27759 InnerPred.
getOpcode() == AArch64ISD::PTRUE &&
27761 Pred->getConstantOperandVal(0) >= AArch64SVEPredPattern::vl1 &&
27762 Pred->getConstantOperandVal(0) <= AArch64SVEPredPattern::vl256)
27769 if (V.getOpcode() != AArch64ISD::VASHR ||
27770 V.getOperand(0).getOpcode() != AArch64ISD::VSHL)
27773 unsigned BitWidth = V->getValueType(0).getScalarSizeInBits();
27774 unsigned ShiftAmtR = V.getConstantOperandVal(1);
27775 unsigned ShiftAmtL = V.getOperand(0).getConstantOperandVal(1);
27776 return (ShiftAmtR == ShiftAmtL && ShiftAmtR == (
BitWidth - 1));
27781 assert(
N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
27782 "Unexpected opcode!");
27795 LHS->getOperand(0)->getValueType(0) ==
N->getValueType(0)) {
27799 if (
LHS->getOperand(0)->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
27800 LHS->getOperand(0)->getOperand(0) == Pred)
27801 return LHS->getOperand(0);
27807 return LHS->getOperand(0);
27816 LHS->getOperand(0), Pred);
27841 LHS.getValueType(), L0, ExtVal, L2);
27842 return DAG.
getNode(AArch64ISD::SETCC_MERGE_ZERO,
DL,
N->getValueType(0),
27843 Pred, NewLHS,
RHS,
N->getOperand(3));
27857 if (!
Op->hasOneUse())
27873 Bit < Op->getOperand(0).getValueSizeInBits()) {
27877 if (
Op->getNumOperands() != 2)
27884 switch (
Op->getOpcode()) {
27890 if ((
C->getZExtValue() >> Bit) & 1)
27896 if (
C->getZExtValue() <= Bit &&
27897 (Bit -
C->getZExtValue()) <
Op->getValueType(0).getSizeInBits()) {
27898 Bit = Bit -
C->getZExtValue();
27905 Bit = Bit +
C->getZExtValue();
27906 if (Bit >=
Op->getValueType(0).getSizeInBits())
27907 Bit =
Op->getValueType(0).getSizeInBits() - 1;
27912 if ((Bit +
C->getZExtValue()) <
Op->getValueType(0).getSizeInBits()) {
27913 Bit = Bit +
C->getZExtValue();
27920 if ((
C->getZExtValue() >> Bit) & 1)
27930 unsigned Bit =
N->getConstantOperandVal(2);
27931 bool Invert =
false;
27932 SDValue TestSrc =
N->getOperand(1);
27935 if (TestSrc == NewTestSrc)
27938 unsigned NewOpc =
N->getOpcode();
27940 if (NewOpc == AArch64ISD::TBZ)
27941 NewOpc = AArch64ISD::TBNZ;
27943 assert(NewOpc == AArch64ISD::TBNZ);
27944 NewOpc = AArch64ISD::TBZ;
27949 return DAG.
getNode(NewOpc,
DL, MVT::Other,
N->getOperand(0), NewTestSrc,
27959 auto SelectA =
N->getOperand(1);
27960 auto SelectB =
N->getOperand(2);
27961 auto NTy =
N->getValueType(0);
27963 if (!NTy.isScalableVector())
27969 switch (SelectB.getOpcode()) {
27977 if (SelectA != SelectB.getOperand(0))
27983 auto InverseSetCC =
27988 {InverseSetCC, SelectB, SelectA});
27995 SDValue TrueVal =
N->getOperand(1);
27996 SDValue FalseVal =
N->getOperand(2);
27997 bool TrueValIsPow = TrueVal.getOpcode() ==
ISD::FPOW;
27998 bool FalseValIsPow = FalseVal.getOpcode() ==
ISD::FPOW;
28002 if (TrueValIsPow == FalseValIsPow)
28005 if ((TrueValIsPow && !TrueVal.hasOneUse()) ||
28006 (FalseValIsPow && !FalseVal.hasOneUse()))
28009 EVT VT =
N->getValueType(0);
28018 SDValue OldPow = TrueValIsPow ? TrueVal : FalseVal;
28057 SDValue IfTrue =
N->getOperand(1);
28058 SDValue IfFalse =
N->getOperand(2);
28059 EVT ResVT =
N->getValueType(0);
28063 return N->getOperand(1);
28066 return N->getOperand(2);
28096 return DAG.
getSelect(
DL, ResVT, ExtCond, IfTrue, IfFalse);
28130 EVT ResVT =
N->getValueType(0);
28142 "Scalar-SETCC feeding SELECT has unexpected result type!");
28148 if (SrcVT == MVT::i1 ||
28156 if (!ResVT.
isVector() || NumMaskElts == 0)
28191 return DAG.
getSelect(
DL, ResVT, Mask,
N->getOperand(1),
N->getOperand(2));
28196 EVT VT =
N->getValueType(0);
28210 if (
N->getOpcode() == AArch64ISD::DUP) {
28221 EVT MemVT = LD->getMemoryVT();
28224 (MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) &&
28225 ElemVT != MemVT && LD->hasOneUse()) {
28241 if (
Op.getOpcode() == AArch64ISD::FCMEQ ||
28242 Op.getOpcode() == AArch64ISD::FCMGE ||
28243 Op.getOpcode() == AArch64ISD::FCMGT) {
28245 EVT ExpandedVT = VT;
28264 SDValue EXTRACT_VEC_ELT =
N->getOperand(0);
28282 if (
N->getValueType(0) ==
N->getOperand(0).getValueType())
28283 return N->getOperand(0);
28284 if (
N->getOperand(0).getOpcode() == AArch64ISD::NVCAST)
28285 return DAG.
getNode(AArch64ISD::NVCAST,
SDLoc(
N),
N->getValueType(0),
28286 N->getOperand(0).getOperand(0));
28311 MinOffset = std::min(MinOffset,
C->getZExtValue());
28330 if (
Offset >= (1 << 20))
28335 if (!
T->isSized() ||
28349 !BR.getValueType().isScalarInteger())
28361 "This method is only for scalable vectors of offsets");
28377 unsigned ScalarSizeInBytes) {
28379 if (OffsetInBytes % ScalarSizeInBytes)
28383 if (OffsetInBytes / ScalarSizeInBytes > 31)
28397 unsigned ScalarSizeInBytes) {
28405 bool OnlyPackedOffsets =
true) {
28406 const SDValue Src =
N->getOperand(2);
28407 const EVT SrcVT = Src->getValueType(0);
28409 "Scatter stores are only possible for SVE vectors");
28421 if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64) &&
28422 ((Opcode != AArch64ISD::SST1Q_PRED &&
28423 Opcode != AArch64ISD::SST1Q_INDEX_PRED) ||
28424 ((SrcVT != MVT::nxv8f16) && (SrcVT != MVT::nxv8bf16))))
28437 if (Opcode == AArch64ISD::SSTNT1_INDEX_PRED) {
28440 Opcode = AArch64ISD::SSTNT1_PRED;
28441 }
else if (Opcode == AArch64ISD::SST1Q_INDEX_PRED) {
28444 Opcode = AArch64ISD::SST1Q_PRED;
28452 if ((Opcode == AArch64ISD::SSTNT1_PRED || Opcode == AArch64ISD::SST1Q_PRED) &&
28453 Offset.getValueType().isVector())
28462 if (Opcode == AArch64ISD::SST1_IMM_PRED) {
28465 if (MVT::nxv4i32 ==
Base.getValueType().getSimpleVT().SimpleTy)
28466 Opcode = AArch64ISD::SST1_UXTW_PRED;
28468 Opcode = AArch64ISD::SST1_PRED;
28481 if (!OnlyPackedOffsets &&
28482 Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
28501 if (Src.getValueType().isFloatingPoint())
28518 bool OnlyPackedOffsets =
true) {
28519 const EVT RetVT =
N->getValueType(0);
28521 "Gather loads are only possible for SVE vectors");
28539 if (Opcode == AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) {
28542 Opcode = AArch64ISD::GLDNT1_MERGE_ZERO;
28543 }
else if (Opcode == AArch64ISD::GLD1Q_INDEX_MERGE_ZERO) {
28546 Opcode = AArch64ISD::GLD1Q_MERGE_ZERO;
28554 if ((Opcode == AArch64ISD::GLDNT1_MERGE_ZERO ||
28555 Opcode == AArch64ISD::GLD1Q_MERGE_ZERO) &&
28556 Offset.getValueType().isVector())
28565 if (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO ||
28566 Opcode == AArch64ISD::GLDFF1_IMM_MERGE_ZERO) {
28569 if (MVT::nxv4i32 ==
Base.getValueType().getSimpleVT().SimpleTy)
28570 Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
28571 ? AArch64ISD::GLD1_UXTW_MERGE_ZERO
28572 : AArch64ISD::GLDFF1_UXTW_MERGE_ZERO;
28574 Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
28575 ? AArch64ISD::GLD1_MERGE_ZERO
28576 : AArch64ISD::GLDFF1_MERGE_ZERO;
28589 if (!OnlyPackedOffsets &&
28590 Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
28611 if (RetVT.
isInteger() && (RetVT != HwRetVt))
28627 unsigned Opc = Src->getOpcode();
28630 if (
Opc == AArch64ISD::UUNPKHI ||
Opc == AArch64ISD::UUNPKLO) {
28632 unsigned SOpc =
Opc == AArch64ISD::UUNPKHI ? AArch64ISD::SUNPKHI
28633 : AArch64ISD::SUNPKLO;
28656 return DAG.
getNode(SOpc,
DL,
N->getValueType(0), Ext);
28660 if (
Opc == AArch64ISD::CSEL &&
28662 EVT VT =
N->getValueType(0);
28668 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal,
28674 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT,
28688 unsigned MemVTOpNum = 4;
28690 case AArch64ISD::LD1_MERGE_ZERO:
28691 NewOpc = AArch64ISD::LD1S_MERGE_ZERO;
28694 case AArch64ISD::LDNF1_MERGE_ZERO:
28695 NewOpc = AArch64ISD::LDNF1S_MERGE_ZERO;
28698 case AArch64ISD::LDFF1_MERGE_ZERO:
28699 NewOpc = AArch64ISD::LDFF1S_MERGE_ZERO;
28702 case AArch64ISD::GLD1_MERGE_ZERO:
28703 NewOpc = AArch64ISD::GLD1S_MERGE_ZERO;
28705 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
28706 NewOpc = AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
28708 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
28709 NewOpc = AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
28711 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
28712 NewOpc = AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
28714 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
28715 NewOpc = AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
28717 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
28718 NewOpc = AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
28720 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
28721 NewOpc = AArch64ISD::GLD1S_IMM_MERGE_ZERO;
28723 case AArch64ISD::GLDFF1_MERGE_ZERO:
28724 NewOpc = AArch64ISD::GLDFF1S_MERGE_ZERO;
28726 case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
28727 NewOpc = AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO;
28729 case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
28730 NewOpc = AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO;
28732 case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
28733 NewOpc = AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO;
28735 case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
28736 NewOpc = AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO;
28738 case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
28739 NewOpc = AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO;
28741 case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
28742 NewOpc = AArch64ISD::GLDFF1S_IMM_MERGE_ZERO;
28744 case AArch64ISD::GLDNT1_MERGE_ZERO:
28745 NewOpc = AArch64ISD::GLDNT1S_MERGE_ZERO;
28754 if ((SignExtSrcVT != SrcMemVT) || !Src.hasOneUse())
28757 EVT DstVT =
N->getValueType(0);
28761 for (
unsigned I = 0;
I < Src->getNumOperands(); ++
I)
28762 Ops.push_back(Src->getOperand(
I));
28776 const unsigned OffsetPos = 4;
28780 if (
Offset.getValueType().getSimpleVT().SimpleTy != MVT::nxv2i32)
28799 unsigned ScalarSizeInBytes) {
28800 const unsigned ImmPos = 4, OffsetPos = 3;
28820 switch (
Op.getOpcode()) {
28823 case AArch64ISD::ANDV_PRED:
28824 case AArch64ISD::EORV_PRED:
28825 case AArch64ISD::FADDA_PRED:
28826 case AArch64ISD::FADDV_PRED:
28827 case AArch64ISD::FMAXNMV_PRED:
28828 case AArch64ISD::FMAXV_PRED:
28829 case AArch64ISD::FMINNMV_PRED:
28830 case AArch64ISD::FMINV_PRED:
28831 case AArch64ISD::ORV_PRED:
28832 case AArch64ISD::SADDV_PRED:
28833 case AArch64ISD::SMAXV_PRED:
28834 case AArch64ISD::SMINV_PRED:
28835 case AArch64ISD::UADDV_PRED:
28836 case AArch64ISD::UMAXV_PRED:
28837 case AArch64ISD::UMINV_PRED:
28845 switch (
Op.getOpcode()) {
28848 case AArch64ISD::REINTERPRET_CAST:
28852 case AArch64ISD::PTRUE:
28853 return Op.getConstantOperandVal(0) == AArch64SVEPredPattern::all;
28859 SDValue InsertVec =
N->getOperand(0);
28860 SDValue InsertElt =
N->getOperand(1);
28861 SDValue InsertIdx =
N->getOperand(2);
28916 EVT VT =
N->getValueType(0);
28922 auto hasValidElementTypeForFPExtLoad = [](
EVT VT) {
28924 return EltVT == MVT::f32 || EltVT == MVT::f64;
28952 EVT VT =
N->getValueType(0);
28955 if (!VT.
isScalableVector() || Subtarget->hasSVE2() || Subtarget->hasSME())
28971 EVT VT =
N->getValueType(0);
28973 SDValue Insert =
N->getOperand(0);
28977 if (!Insert.getOperand(0).isUndef())
28980 uint64_t IdxInsert = Insert.getConstantOperandVal(2);
28981 uint64_t IdxDupLane =
N->getConstantOperandVal(1);
28982 if (IdxInsert != 0 || IdxDupLane != 0)
28985 SDValue Bitcast = Insert.getOperand(1);
28989 SDValue Subvec = Bitcast.getOperand(0);
28999 DAG.
getPOISON(NewSubvecVT), Subvec, Insert->getOperand(2));
29000 SDValue NewDuplane128 = DAG.
getNode(AArch64ISD::DUPLANE128,
DL, NewSubvecVT,
29001 NewInsert,
N->getOperand(1));
29026 ExtractHigh =
LHS.getOperand(0);
29033 ExtractHigh =
RHS.getOperand(0);
29044 if (TruncHighOp.
getOpcode() == AArch64ISD::DUP ||
29062 bool HasFoundMULLow =
true;
29064 if (ExtractHighSrcVec->
use_size() != 2)
29065 HasFoundMULLow =
false;
29074 HasFoundMULLow =
false;
29081 if (!ExtractLow || !ExtractLow->
hasOneUse())
29082 HasFoundMULLow =
false;
29085 if (HasFoundMULLow) {
29087 if (ExtractLowUser->
getOpcode() !=
N->getOpcode()) {
29088 HasFoundMULLow =
false;
29090 if (ExtractLowUser->
getOperand(0) == ExtractLow) {
29094 HasFoundMULLow =
false;
29099 HasFoundMULLow =
false;
29112 if (HasFoundMULLow && (TruncLowOp.
getOpcode() == AArch64ISD::DUP ||
29117 if (TruncHighOpVT != UZP1VT)
29119 if (TruncLowOpVT != UZP1VT)
29123 DAG.
getNode(AArch64ISD::UZP1,
DL, UZP1VT, TruncLowOp, TruncHighOp);
29130 if (HasFoundMULLow) {
29160 auto Mask =
N->getOperand(0);
29161 auto Pred =
N->getOperand(1);
29166 if (Pred->getOpcode() == AArch64ISD::REINTERPRET_CAST)
29167 Pred = Pred->getOperand(0);
29170 Pred = Pred->getOperand(0);
29171 Pred = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, MVT::nxv16i1, Pred);
29172 return DAG.
getNode(AArch64ISD::PTEST_FIRST,
DL,
N->getValueType(0), Mask,
29204 EVT VT =
N->getValueType(0);
29205 if (VT != MVT::v1i64)
29221 if (UADDLV.
getOpcode() != AArch64ISD::UADDLV ||
29231 DAG.
getNode(AArch64ISD::NVCAST,
DL, MVT::v1i64, EXTRACT_SUBVEC);
29241 unsigned NumParts =
N->getNumOperands();
29242 if (NumParts != 2 && NumParts != 4)
29245 EVT SubVecTy =
N->getValueType(0);
29258 for (
unsigned I = 0;
I < NumParts;
I++) {
29274 if (!MaskedLoad || !MaskedLoad->hasNUsesOfValue(NumParts, 0) ||
29276 !MaskedLoad->getOffset().isUndef() ||
29277 (!MaskedLoad->getPassThru()->isUndef() &&
29284 getNarrowMaskForInterleavedOps(DAG,
DL, MaskedLoad->getMask(), NumParts);
29288 const Intrinsic::ID IID = NumParts == 2 ? Intrinsic::aarch64_sve_ld2_sret
29289 : Intrinsic::aarch64_sve_ld4_sret;
29290 SDValue NewLdOps[] = {MaskedLoad->getChain(),
29292 MaskedLoad->getBasePtr()};
29296 {SubVecTy, SubVecTy, MVT::Other}, NewLdOps);
29299 {SubVecTy, SubVecTy, SubVecTy, SubVecTy, MVT::Other},
29304 for (
unsigned Idx = 0; Idx < NumParts; Idx++)
29335 if (
N->hasOneUse()) {
29336 unsigned UseOpc =
N->user_begin()->getOpcode();
29338 UseOpc == AArch64ISD::ADDS || UseOpc == AArch64ISD::SUBS)
29343 EVT VT =
N->getValueType(0);
29358 unsigned IntrinsicID =
N->getConstantOperandVal(1);
29360 (IntrinsicID == Intrinsic::aarch64_rndr ? AArch64SysReg::RNDR
29361 : AArch64SysReg::RNDRRS);
29388 EVT VT =
N->getValueType(0);
29389 EVT MaskVT = Mask.getValueType();
29403 ReduceInVT = CmpVT;
29454 return DAG.
getNode(ReductionOpcode,
SDLoc(
N),
N->getValueType(0), Vec);
29460 switch (
N->getOpcode()) {
29482 case AArch64ISD::ANDS:
29484 case AArch64ISD::ADC:
29488 case AArch64ISD::SBC:
29490 case AArch64ISD::ADCS:
29494 case AArch64ISD::SBCS:
29498 case AArch64ISD::ADDS:
29500 case AArch64ISD::SUBS:
29502 case AArch64ISD::BICi:
29545 return performLOADCombine(
N, DCI, DAG, Subtarget);
29547 return performSTORECombine(
N, DCI, DAG, Subtarget);
29549 return performMSTORECombine(
N, DCI, DAG, Subtarget);
29556 case AArch64ISD::BRCOND:
29558 case AArch64ISD::TBNZ:
29559 case AArch64ISD::TBZ:
29561 case AArch64ISD::CSEL:
29563 case AArch64ISD::DUP:
29564 case AArch64ISD::DUPLANE8:
29565 case AArch64ISD::DUPLANE16:
29566 case AArch64ISD::DUPLANE32:
29567 case AArch64ISD::DUPLANE64:
29569 case AArch64ISD::DUPLANE128:
29571 case AArch64ISD::NVCAST:
29573 case AArch64ISD::SPLICE:
29575 case AArch64ISD::UUNPKLO:
29576 case AArch64ISD::UUNPKHI:
29578 case AArch64ISD::UZP1:
29579 case AArch64ISD::UZP2:
29581 case AArch64ISD::SETCC_MERGE_ZERO:
29583 case AArch64ISD::REINTERPRET_CAST:
29585 case AArch64ISD::GLD1_MERGE_ZERO:
29586 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
29587 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
29588 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
29589 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
29590 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
29591 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
29592 case AArch64ISD::GLD1S_MERGE_ZERO:
29593 case AArch64ISD::GLD1S_SCALED_MERGE_ZERO:
29594 case AArch64ISD::GLD1S_UXTW_MERGE_ZERO:
29595 case AArch64ISD::GLD1S_SXTW_MERGE_ZERO:
29596 case AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO:
29597 case AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO:
29598 case AArch64ISD::GLD1S_IMM_MERGE_ZERO:
29600 case AArch64ISD::VASHR:
29601 case AArch64ISD::VLSHR:
29603 case AArch64ISD::SUNPKLO:
29605 case AArch64ISD::BSP:
29618 N->getOperand(1),
N->getOperand(0));
29621 case AArch64ISD::UADDV:
29623 case AArch64ISD::SMULL:
29624 case AArch64ISD::UMULL:
29625 case AArch64ISD::PMULL:
29627 case AArch64ISD::PTEST_FIRST:
29631 switch (
N->getConstantOperandVal(1)) {
29632 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
29634 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
29636 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
29638 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
29640 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
29641 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
29642 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
29643 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
29644 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
29645 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
29646 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
29647 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
29649 case Intrinsic::aarch64_neon_ld2:
29650 case Intrinsic::aarch64_neon_ld3:
29651 case Intrinsic::aarch64_neon_ld4:
29652 case Intrinsic::aarch64_neon_ld1x2:
29653 case Intrinsic::aarch64_neon_ld1x3:
29654 case Intrinsic::aarch64_neon_ld1x4:
29655 case Intrinsic::aarch64_neon_ld2lane:
29656 case Intrinsic::aarch64_neon_ld3lane:
29657 case Intrinsic::aarch64_neon_ld4lane:
29658 case Intrinsic::aarch64_neon_ld2r:
29659 case Intrinsic::aarch64_neon_ld3r:
29660 case Intrinsic::aarch64_neon_ld4r:
29661 case Intrinsic::aarch64_neon_st2:
29662 case Intrinsic::aarch64_neon_st3:
29663 case Intrinsic::aarch64_neon_st4:
29664 case Intrinsic::aarch64_neon_st1x2:
29665 case Intrinsic::aarch64_neon_st1x3:
29666 case Intrinsic::aarch64_neon_st1x4:
29667 case Intrinsic::aarch64_neon_st2lane:
29668 case Intrinsic::aarch64_neon_st3lane:
29669 case Intrinsic::aarch64_neon_st4lane:
29671 case Intrinsic::aarch64_sve_ldnt1:
29673 case Intrinsic::aarch64_sve_ld1rq:
29675 case Intrinsic::aarch64_sve_ld1ro:
29677 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
29679 case Intrinsic::aarch64_sve_ldnt1_gather:
29681 case Intrinsic::aarch64_sve_ldnt1_gather_index:
29683 AArch64ISD::GLDNT1_INDEX_MERGE_ZERO);
29684 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
29686 case Intrinsic::aarch64_sve_ld1:
29688 case Intrinsic::aarch64_sve_ldnf1:
29690 case Intrinsic::aarch64_sve_ldff1:
29692 case Intrinsic::aarch64_sve_st1:
29694 case Intrinsic::aarch64_sve_stnt1:
29696 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
29698 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
29700 case Intrinsic::aarch64_sve_stnt1_scatter:
29702 case Intrinsic::aarch64_sve_stnt1_scatter_index:
29704 case Intrinsic::aarch64_sve_ld1_gather:
29706 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
29707 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
29709 case Intrinsic::aarch64_sve_ld1q_gather_index:
29711 AArch64ISD::GLD1Q_INDEX_MERGE_ZERO);
29712 case Intrinsic::aarch64_sve_ld1_gather_index:
29714 AArch64ISD::GLD1_SCALED_MERGE_ZERO);
29715 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
29718 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
29721 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
29723 AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO,
29725 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
29727 AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO,
29729 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
29731 case Intrinsic::aarch64_sve_ldff1_gather:
29733 case Intrinsic::aarch64_sve_ldff1_gather_index:
29735 AArch64ISD::GLDFF1_SCALED_MERGE_ZERO);
29736 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
29738 AArch64ISD::GLDFF1_SXTW_MERGE_ZERO,
29740 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
29742 AArch64ISD::GLDFF1_UXTW_MERGE_ZERO,
29744 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
29746 AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO,
29748 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
29750 AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO,
29752 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
29754 AArch64ISD::GLDFF1_IMM_MERGE_ZERO);
29755 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
29756 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
29758 case Intrinsic::aarch64_sve_st1q_scatter_index:
29760 case Intrinsic::aarch64_sve_st1_scatter:
29762 case Intrinsic::aarch64_sve_st1_scatter_index:
29764 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
29767 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
29770 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
29772 AArch64ISD::SST1_SXTW_SCALED_PRED,
29774 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
29776 AArch64ISD::SST1_UXTW_SCALED_PRED,
29778 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
29780 case Intrinsic::aarch64_rndr:
29781 case Intrinsic::aarch64_rndrrs:
29783 case Intrinsic::aarch64_sme_ldr_zt:
29785 DAG.
getVTList(MVT::Other),
N->getOperand(0),
29786 N->getOperand(2),
N->getOperand(3));
29787 case Intrinsic::aarch64_sme_str_zt:
29789 DAG.
getVTList(MVT::Other),
N->getOperand(0),
29790 N->getOperand(2),
N->getOperand(3));
29813bool AArch64TargetLowering::isUsedByReturnOnly(
SDNode *
N,
29815 if (
N->getNumValues() != 1)
29817 if (!
N->hasNUsesOfValue(1, 0))
29821 SDNode *Copy = *
N->user_begin();
29825 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
29828 TCChain = Copy->getOperand(0);
29832 bool HasRet =
false;
29834 if (
Node->getOpcode() != AArch64ISD::RET_GLUE)
29850bool AArch64TargetLowering::mayBeEmittedAsTailCall(
const CallInst *CI)
const {
29858 if (!CstOffset || CstOffset->isZero())
29864 return isInt<9>(CstOffset->getSExtValue());
29867bool AArch64TargetLowering::getIndexedAddressParts(
SDNode *
N,
SDNode *
Op,
29875 SDNode *ValOnlyUser =
nullptr;
29876 for (SDUse &U :
N->uses()) {
29877 if (
U.getResNo() == 1)
29879 if (ValOnlyUser ==
nullptr)
29880 ValOnlyUser =
U.getUser();
29882 ValOnlyUser =
nullptr;
29887 auto IsUndefOrZero = [](
SDValue V) {
29895 (ValOnlyUser->
getOpcode() == AArch64ISD::DUP_MERGE_PASSTHRU &&
29896 IsUndefOrZero(ValOnlyUser->
getOperand(2)))))
29899 Base =
Op->getOperand(0);
29903 int64_t RHSC =
RHS->getSExtValue();
29905 RHSC = -(uint64_t)RHSC;
29911 if (!Subtarget->isLittleEndian() && MemType.
isVector() &&
29929 VT =
LD->getMemoryVT();
29930 Ptr =
LD->getBasePtr();
29932 VT =
ST->getMemoryVT();
29933 Ptr =
ST->getBasePtr();
29943bool AArch64TargetLowering::getPostIndexedAddressParts(
29949 VT =
LD->getMemoryVT();
29950 Ptr =
LD->getBasePtr();
29952 VT =
ST->getMemoryVT();
29953 Ptr =
ST->getBasePtr();
29972 EVT VT =
N->getValueType(0);
29973 [[maybe_unused]]
EVT SrcVT =
Op.getValueType();
29975 "Must be bool vector.");
29981 bool AllUndef =
true;
29982 for (
unsigned I = 1;
I <
Op.getNumOperands(); ++
I)
29983 AllUndef &=
Op.getOperand(
I).isUndef();
29986 Op =
Op.getOperand(0);
29989 SDValue VectorBits = vectorToScalarBitmask(
Op.getNode(), DAG);
30000 EVT VT =
N->getValueType(0);
30010void AArch64TargetLowering::ReplaceBITCASTResults(
30014 EVT VT =
N->getValueType(0);
30015 EVT SrcVT =
Op.getValueType();
30017 if (VT == MVT::v2i16 && SrcVT == MVT::i32) {
30022 if (VT == MVT::v4i8 && SrcVT == MVT::i32) {
30027 if (VT == MVT::v2i8 && SrcVT == MVT::i16) {
30034 "Expected fp->int bitcast!");
30053 if (VT != MVT::i16 || (SrcVT != MVT::f16 && SrcVT != MVT::bf16))
30065 EVT VT =
N->getValueType(0);
30068 !
N->getFlags().hasAllowReassociation()) ||
30069 (VT.
getScalarType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
30077 X =
N->getOperand(1);
30082 if (Shuf->getOperand(0) !=
X || !Shuf->getOperand(1)->isUndef())
30087 for (
int I = 0,
E = Mask.size();
I <
E;
I++)
30088 if (Mask[
I] != (
I % 2 == 0 ?
I + 1 :
I - 1))
30093 assert(LoHi.first.getValueType() == LoHi.second.getValueType());
30094 SDValue Addp = DAG.
getNode(AArch64ISD::ADDP,
N, LoHi.first.getValueType(),
30095 LoHi.first, LoHi.second);
30106 DAG.
getPOISON(LoHi.first.getValueType())),
30113 unsigned AcrossOp) {
30124void AArch64TargetLowering::ReplaceExtractSubVectorResults(
30127 EVT InVT =
In.getValueType();
30134 EVT VT =
N->getValueType(0);
30147 unsigned Index = CIndex->getZExtValue();
30152 : (unsigned)AArch64ISD::UUNPKHI;
30159void AArch64TargetLowering::ReplaceGetActiveLaneMaskResults(
30161 assert((Subtarget->isSVEorStreamingSVEAvailable() &&
30162 (Subtarget->hasSVE2p1() || Subtarget->hasSME2())) &&
30163 "Custom lower of get.active.lane.mask missing required feature.");
30165 assert(
N->getValueType(0) == MVT::nxv32i1 &&
30166 "Unexpected result type for get.active.lane.mask");
30173 "Unexpected operand type for get.active.lane.mask");
30187 {WideMask.getValue(0), WideMask.getValue(1)}));
30193 auto [VLo, VHi] = DAG.
SplitScalar(V,
DL, MVT::i64, MVT::i64);
30200 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
30209 assert(
N->getValueType(0) == MVT::i128 &&
30210 "AtomicCmpSwap on types less than 128 should be legal");
30213 if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) {
30224 switch (
MemOp->getMergedOrdering()) {
30226 Opcode = AArch64::CASPX;
30229 Opcode = AArch64::CASPAX;
30232 Opcode = AArch64::CASPLX;
30236 Opcode = AArch64::CASPALX;
30246 unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64;
30260 switch (
MemOp->getMergedOrdering()) {
30262 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
30265 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
30268 Opcode = AArch64::CMP_SWAP_128_RELEASE;
30272 Opcode = AArch64::CMP_SWAP_128;
30279 auto Desired = DAG.
SplitScalar(
N->getOperand(2),
DL, MVT::i64, MVT::i64);
30280 auto New = DAG.
SplitScalar(
N->getOperand(3),
DL, MVT::i64, MVT::i64);
30281 SDValue Ops[] = {
N->getOperand(1), Desired.first, Desired.second,
30282 New.first, New.second,
N->getOperand(0)};
30284 Opcode,
SDLoc(
N), DAG.
getVTList(MVT::i64, MVT::i64, MVT::i32, MVT::Other),
30300 "ATOMIC_LOAD_AND should be lowered to LDCLRP directly");
30306 switch (Ordering) {
30308 return AArch64::LDCLRP;
30311 return AArch64::LDCLRPA;
30314 return AArch64::LDCLRPL;
30318 return AArch64::LDCLRPAL;
30326 switch (Ordering) {
30328 return AArch64::LDSETP;
30331 return AArch64::LDSETPA;
30334 return AArch64::LDSETPL;
30338 return AArch64::LDSETPAL;
30346 switch (Ordering) {
30348 return AArch64::SWPP;
30351 return AArch64::SWPPA;
30354 return AArch64::SWPPL;
30358 return AArch64::SWPPAL;
30380 assert(
N->getValueType(0) == MVT::i128 &&
30381 "AtomicLoadXXX on types less than 128 should be legal");
30383 if (!Subtarget->hasLSE128())
30387 const SDValue &Chain =
N->getOperand(0);
30388 const SDValue &Ptr =
N->getOperand(1);
30389 const SDValue &Val128 =
N->getOperand(2);
30390 std::pair<SDValue, SDValue> Val2x64 =
30393 const unsigned ISDOpcode =
N->getOpcode();
30394 const unsigned MachineOpcode =
30407 SDValue Ops[] = {Val2x64.first, Val2x64.second, Ptr, Chain};
30425void AArch64TargetLowering::ReplaceNodeResults(
30427 switch (
N->getOpcode()) {
30431 ReplaceBITCASTResults(
N,
Results, DAG);
30453 case AArch64ISD::SADDV:
30456 case AArch64ISD::UADDV:
30459 case AArch64ISD::SMINV:
30462 case AArch64ISD::UMINV:
30465 case AArch64ISD::SMAXV:
30468 case AArch64ISD::UMAXV:
30474 LowerToPredicatedOp(
SDValue(
N, 0), DAG, AArch64ISD::MULHS_PRED));
30479 LowerToPredicatedOp(
SDValue(
N, 0), DAG, AArch64ISD::MULHU_PRED));
30485 assert(
N->getValueType(0) == MVT::i128 &&
"unexpected illegal conversion");
30492 assert(
N->getValueType(0) != MVT::i128 &&
30493 "128-bit ATOMIC_LOAD_AND should be lowered directly to LDCLRP");
30499 "Expected 128-bit atomicrmw.");
30521 if (LoadNode->
isNonTemporal() && Subtarget->isLittleEndian() &&
30530 AArch64ISD::LDNP, SDLoc(
N),
30531 DAG.
getVTList({MVT::v2i64, MVT::v2i64, MVT::Other}),
30532 {LoadNode->getChain(), LoadNode->getBasePtr()},
30551 bool isLoadAcquire =
30553 unsigned Opcode = isLoadAcquire ? AArch64ISD::LDIAPP : AArch64ISD::LDP;
30556 assert(Subtarget->hasFeature(AArch64::FeatureRCPC3));
30559 Opcode, SDLoc(
N), DAG.
getVTList({MVT::i64, MVT::i64, MVT::Other}),
30560 {LoadNode->getChain(), LoadNode->getBasePtr()},
30567 Result.getValue(FirstRes),
Result.getValue(1 - FirstRes));
30573 ReplaceExtractSubVectorResults(
N,
Results, DAG);
30582 ReplaceGetActiveLaneMaskResults(
N,
Results, DAG);
30585 EVT VT =
N->getValueType(0);
30592 case Intrinsic::aarch64_sve_clasta_n: {
30593 assert((VT == MVT::i8 || VT == MVT::i16) &&
30594 "custom lowering for unexpected type");
30597 auto V = DAG.
getNode(AArch64ISD::CLASTA_N,
DL, MVT::i32,
30598 N->getOperand(1), Op2,
N->getOperand(3));
30602 case Intrinsic::aarch64_sve_clastb_n: {
30603 assert((VT == MVT::i8 || VT == MVT::i16) &&
30604 "custom lowering for unexpected type");
30607 auto V = DAG.
getNode(AArch64ISD::CLASTB_N,
DL, MVT::i32,
30608 N->getOperand(1), Op2,
N->getOperand(3));
30612 case Intrinsic::aarch64_sve_lasta: {
30613 assert((VT == MVT::i8 || VT == MVT::i16) &&
30614 "custom lowering for unexpected type");
30616 auto V = DAG.
getNode(AArch64ISD::LASTA,
DL, MVT::i32,
30617 N->getOperand(1),
N->getOperand(2));
30621 case Intrinsic::aarch64_sve_lastb: {
30622 assert((VT == MVT::i8 || VT == MVT::i16) &&
30623 "custom lowering for unexpected type");
30625 auto V = DAG.
getNode(AArch64ISD::LASTB,
DL, MVT::i32,
30626 N->getOperand(1),
N->getOperand(2));
30630 case Intrinsic::aarch64_sme_in_streaming_mode: {
30635 getRuntimePStateSM(DAG, Chain,
DL,
N->getValueType(0));
30640 case Intrinsic::experimental_vector_match: {
30658 assert(
N->getValueType(0) == MVT::i128 &&
30659 "READ_REGISTER custom lowering is only for 128-bit sysregs");
30664 AArch64ISD::MRRS,
DL, DAG.
getVTList({MVT::i64, MVT::i64, MVT::Other}),
30665 Chain, SysRegName);
30679 if (Subtarget->isTargetAndroid() || Subtarget->isTargetFuchsia())
30694 if (VT == MVT::v1i8 || VT == MVT::v1i16 || VT == MVT::v1i32 ||
30704 if (!Subtarget->hasLSE2())
30708 return LI->getType()->getPrimitiveSizeInBits() == 128 &&
30709 LI->getAlign() >=
Align(16);
30712 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
30719 if (!Subtarget->hasLSE128())
30725 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
30726 SI->getAlign() >=
Align(16) &&
30731 return RMW->getValOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
30732 RMW->getAlign() >=
Align(16) &&
30741 if (!Subtarget->hasLSE2() || !Subtarget->hasRCPC3())
30745 return LI->getType()->getPrimitiveSizeInBits() == 128 &&
30746 LI->getAlign() >=
Align(16) &&
30750 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
30751 SI->getAlign() >=
Align(16) &&
30774 if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
30794 return !Subtarget->hasLSE();
30802 unsigned Size =
SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
30855 return !Subtarget.hasFPARMv8();
30872 unsigned Size = Ty->getPrimitiveSizeInBits();
30873 assert(
Size <= 128 &&
"AtomicExpandPass should've handled larger sizes.");
30875 bool CanUseLSE128 = Subtarget->hasLSE128() &&
Size == 128 &&
30892 if (Subtarget->hasLSE()) {
30910 if (Subtarget->outlineAtomics()) {
30948 if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
30970 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
30976 if (ValueTy->getPrimitiveSizeInBits() == 128) {
30978 IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
30981 Builder.CreateIntrinsic(
Int, Addr,
nullptr,
"lohi");
30983 Value *
Lo = Builder.CreateExtractValue(LoHi, 0,
"lo");
30984 Value *
Hi = Builder.CreateExtractValue(LoHi, 1,
"hi");
30987 Lo = Builder.CreateZExt(
Lo, Int128Ty,
"lo64");
30988 Hi = Builder.CreateZExt(
Hi, Int128Ty,
"hi64");
30990 Value *
Or = Builder.CreateOr(
30991 Lo, Builder.CreateShl(
Hi, ConstantInt::get(Int128Ty, 64)),
"val64");
30992 return Builder.CreateBitCast(
Or, ValueTy);
30997 IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
31001 CallInst *CI = Builder.CreateIntrinsic(
Int, Tys, Addr);
31003 Attribute::ElementType, IntEltTy));
31004 Value *Trunc = Builder.CreateTrunc(CI, IntEltTy);
31006 return Builder.CreateBitCast(Trunc, ValueTy);
31011 Builder.CreateIntrinsic(Intrinsic::aarch64_clrex, {});
31017 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
31025 IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
31030 Value *CastVal = Builder.CreateBitCast(Val, Int128Ty);
31032 Value *
Lo = Builder.CreateTrunc(CastVal, Int64Ty,
"lo");
31034 Builder.CreateTrunc(Builder.CreateLShr(CastVal, 64), Int64Ty,
"hi");
31035 return Builder.CreateCall(Stxr, {
Lo,
Hi, Addr});
31039 IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;
31045 Val = Builder.CreateBitCast(Val, IntValTy);
31047 CallInst *CI = Builder.CreateCall(
31048 Stxr, {Builder.CreateZExtOrBitCast(
31052 Attribute::ElementType, Val->
getType()));
31059 if (!Ty->isArrayTy()) {
31060 const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
31070bool AArch64TargetLowering::shouldNormalizeToSelectSequence(
LLVMContext &,
31078 M, Intrinsic::thread_pointer, IRB.
getPtrTy());
31090 if (Subtarget->isTargetAndroid())
31095 if (Subtarget->isTargetFuchsia())
31104 RTLIB::LibcallImpl SecurityCheckCookieLibcall =
31105 Libcalls.getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE);
31107 RTLIB::LibcallImpl SecurityCookieVar =
31108 Libcalls.getLibcallImpl(RTLIB::STACK_CHECK_GUARD);
31109 if (SecurityCheckCookieLibcall != RTLIB::Unsupported &&
31110 SecurityCookieVar != RTLIB::Unsupported) {
31122 F->addParamAttr(0, Attribute::AttrKind::InReg);
31134 if (Subtarget->isTargetAndroid())
31143 const Constant *PersonalityFn)
const {
31145 return AArch64::X0;
31151 const Constant *PersonalityFn)
const {
31153 return AArch64::X1;
31166 return Mask->getValue().isPowerOf2();
31172 unsigned OldShiftOpcode,
unsigned NewShiftOpcode,
31176 X, XC, CC,
Y, OldShiftOpcode, NewShiftOpcode, DAG))
31179 return X.getValueType().isScalarInteger() || NewShiftOpcode ==
ISD::SHL;
31186 !Subtarget->isTargetWindows() && !Subtarget->isTargetDarwin())
31202 const MCPhysReg *IStart =
TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
31211 if (AArch64::GPR64RegClass.
contains(*
I))
31212 RC = &AArch64::GPR64RegClass;
31213 else if (AArch64::FPR64RegClass.
contains(*
I))
31214 RC = &AArch64::FPR64RegClass;
31224 assert(Entry->getParent()->getFunction().hasFnAttribute(
31225 Attribute::NoUnwind) &&
31226 "Function should be nounwind in insertCopiesSplitCSR!");
31227 Entry->addLiveIn(*
I);
31232 for (
auto *Exit : Exits)
31234 TII->get(TargetOpcode::COPY), *
I)
31247 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
31275 if (FPVT == MVT::v8f16 && !Subtarget->hasFullFP16())
31277 if (FPVT == MVT::v8bf16)
31293 "Invalid call instruction for a KCFI check");
31295 switch (
MBBI->getOpcode()) {
31297 case AArch64::BLRNoIP:
31298 case AArch64::TCRETURNri:
31299 case AArch64::TCRETURNrix16x17:
31300 case AArch64::TCRETURNrix17:
31301 case AArch64::TCRETURNrinotx16:
31308 assert(
Target.isReg() &&
"Invalid target operand for an indirect call");
31309 Target.setIsRenamable(
false);
31318 Type *VectorTy,
unsigned ElemSizeInBits,
unsigned &Index)
const {
31327 if ((VTy->getNumElements() == 16 || VTy->getNumElements() == 8) &&
31328 VTy->getElementType()->isIntegerTy(8)) {
31331 if (ElemSizeInBits == 8 || ElemSizeInBits == 16 || ElemSizeInBits == 32 ||
31332 ElemSizeInBits == 64) {
31347 if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
31353void AArch64TargetLowering::finalizeLowering(
MachineFunction &MF)
const {
31379bool AArch64TargetLowering::shouldLocalize(
31381 auto &MF = *
MI.getMF();
31383 auto maxUses = [](
unsigned RematCost) {
31385 if (RematCost == 1)
31386 return std::numeric_limits<unsigned>::max();
31387 if (RematCost == 2)
31396 unsigned Opc =
MI.getOpcode();
31398 case TargetOpcode::G_GLOBAL_VALUE: {
31407 case TargetOpcode::G_FCONSTANT:
31408 case TargetOpcode::G_CONSTANT: {
31409 const ConstantInt *CI;
31410 unsigned AdditionalCost = 0;
31412 if (
Opc == TargetOpcode::G_CONSTANT)
31413 CI =
MI.getOperand(1).getCImm();
31415 LLT Ty = MRI.
getType(
MI.getOperand(0).getReg());
31420 auto APF =
MI.getOperand(1).getFPImm()->getValueAPF();
31428 AdditionalCost = 1;
31436 RematCost += AdditionalCost;
31438 unsigned MaxUses = maxUses(RematCost);
31440 if (MaxUses == std::numeric_limits<unsigned>::max())
31446 case AArch64::ADRP:
31447 case AArch64::G_ADD_LOW:
31449 case TargetOpcode::G_PTR_ADD:
31471 if (AI->getAllocatedType()->isScalableTy())
31483 if (Subtarget->hasSME() ||
31484 Caller->hasFnAttribute(
"aarch64_pstate_sm_compatible") ||
31485 Caller->hasFnAttribute(
"aarch64_za_state_agnostic")) {
31500 "Expected legal fixed length vector!");
31505 return EVT(MVT::nxv16i8);
31507 return EVT(MVT::nxv8i16);
31509 return EVT(MVT::nxv4i32);
31511 return EVT(MVT::nxv2i64);
31513 return EVT(MVT::nxv8bf16);
31515 return EVT(MVT::nxv8f16);
31517 return EVT(MVT::nxv4f32);
31519 return EVT(MVT::nxv2f64);
31528 "Expected legal fixed length vector!");
31530 std::optional<unsigned> PgPattern =
31532 assert(PgPattern &&
"Unexpected element count for SVE predicate");
31539 MaskVT = MVT::nxv16i1;
31544 MaskVT = MVT::nxv8i1;
31548 MaskVT = MVT::nxv4i1;
31552 MaskVT = MVT::nxv2i1;
31556 return getPTrue(DAG,
DL, MaskVT, *PgPattern);
31562 "Expected legal scalable vector!");
31577 "Expected to convert into a scalable vector!");
31578 assert(V.getValueType().isFixedLengthVector() &&
31579 "Expected a fixed length vector operand!");
31588 "Expected to convert into a fixed length vector!");
31589 assert(V.getValueType().isScalableVector() &&
31590 "Expected a scalable vector operand!");
31597SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE(
31602 EVT VT =
Op.getValueType();
31604 EVT LoadVT = ContainerVT;
31605 EVT MemVT =
Load->getMemoryVT();
31615 LoadVT,
DL,
Load->getChain(),
Load->getBasePtr(),
Load->getOffset(), Pg,
31617 Load->getAddressingMode(),
Load->getExtensionType());
31624 Result = getSVESafeBitCast(ExtendVT, Result, DAG);
31625 Result = DAG.
getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU,
DL, ContainerVT,
31626 Pg, Result, DAG.
getPOISON(ContainerVT));
31639 EVT InVT = Mask.getValueType();
31646 bool InvertCond =
false;
31649 Mask = Mask.getOperand(0);
31670 {Pg, Op1, Op2, DAG.getCondCode(CC)});
31674SDValue AArch64TargetLowering::LowerFixedLengthVectorMLoadToSVE(
31679 EVT VT =
Op.getValueType();
31687 "Incorrect mask type");
31693 bool IsPassThruZeroOrUndef =
false;
31695 if (
Load->getPassThru()->isUndef()) {
31696 PassThru = DAG.
getUNDEF(ContainerVT);
31697 IsPassThruZeroOrUndef =
true;
31704 IsPassThruZeroOrUndef =
true;
31707 if (!
Load->isExpandingLoad()) {
31710 Load->getOffset(), Mask, PassThru,
31711 Load->getMemoryVT(),
Load->getMemOperand(),
31712 Load->getAddressingMode(),
Load->getExtensionType());
31717 assert(((Subtarget->isSVEAvailable() && Subtarget->hasSVE2p2()) ||
31718 (Subtarget->isSVEorStreamingSVEAvailable() &&
31719 Subtarget->hasSME2p2())) &&
31720 "Expected SVE2p2 or SME2p2");
31732 ContainerVT,
DL,
Load->getChain(),
Load->getBasePtr(),
31733 Load->getOffset(), ActiveMask, DAG.
getUNDEF(ContainerVT),
31734 Load->getMemoryVT(),
Load->getMemOperand(),
Load->getAddressingMode(),
31735 Load->getExtensionType());
31743 if (!IsPassThruZeroOrUndef) {
31755SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
31760 EVT VT =
Store->getValue().getValueType();
31762 EVT MemVT =
Store->getMemoryVT();
31771 NewValue = DAG.
getNode(AArch64ISD::FP_ROUND_MERGE_PASSTHRU,
DL, TruncVT, Pg,
31783 Store->getBasePtr(),
Store->getOffset(), Pg, MemVT,
31784 Store->getMemOperand(),
Store->getAddressingMode(),
31785 Store->isTruncatingStore());
31792 EVT VT =
Store->getValue().getValueType();
31794 return LowerFixedLengthVectorMStoreToSVE(
Op, DAG);
31796 if (!
Store->isCompressingStore())
31799 EVT MaskVT =
Store->getMask().getValueType();
31808 if (MaskReduceVT != MVT::i64)
31819 CompressedMask,
Store->getMemoryVT(),
31820 Store->getMemOperand(),
Store->getAddressingMode(),
31821 Store->isTruncatingStore(),
31825SDValue AArch64TargetLowering::LowerFixedLengthVectorMStoreToSVE(
31830 EVT VT =
Store->getValue().getValueType();
31838 Mask,
Store->getMemoryVT(),
Store->getMemOperand(),
31839 Store->getAddressingMode(),
Store->isTruncatingStore(),
31840 Store->isCompressingStore());
31843SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
31846 EVT VT =
Op.getValueType();
31848 unsigned Opc =
Op.getOpcode();
31865 DAG.
getNode(AArch64ISD::ASRD_MERGE_OP1,
DL, ContainerVT, Pg, Op1, Op2);
31874 if (EltVT == MVT::i32 || EltVT == MVT::i64) {
31906 auto HalveAndExtendVector = [&DAG, &
DL, &HalfVT, &PromVT,
31910 DAG.
getConstant(HalfVT.getVectorNumElements(),
DL, MVT::i64);
31913 return std::pair<SDValue, SDValue>(
31919 auto [Op0LoExt, Op0HiExt] = HalveAndExtendVector(
Op.getOperand(0));
31920 auto [Op1LoExt, Op1HiExt] = HalveAndExtendVector(
Op.getOperand(1));
31928SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE(
31930 EVT VT =
Op.getValueType();
31939 unsigned ExtendOpc =
Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
31946 Val = DAG.
getNode(ExtendOpc,
DL, MVT::nxv8i16, Val);
31951 Val = DAG.
getNode(ExtendOpc,
DL, MVT::nxv4i32, Val);
31956 Val = DAG.
getNode(ExtendOpc,
DL, MVT::nxv2i64, Val);
31964SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE(
31966 EVT VT =
Op.getValueType();
31980 Val = DAG.
getNode(AArch64ISD::UZP1,
DL, MVT::nxv4i32, Val, Val);
31986 Val = DAG.
getNode(AArch64ISD::UZP1,
DL, MVT::nxv8i16, Val, Val);
31992 Val = DAG.
getNode(AArch64ISD::UZP1,
DL, MVT::nxv16i8, Val, Val);
32000SDValue AArch64TargetLowering::LowerFixedLengthExtractVectorElt(
32002 EVT VT =
Op.getValueType();
32003 EVT InVT =
Op.getOperand(0).getValueType();
32013SDValue AArch64TargetLowering::LowerFixedLengthInsertVectorElt(
32015 EVT VT =
Op.getValueType();
32019 EVT InVT =
Op.getOperand(0).getValueType();
32024 Op.getOperand(1),
Op.getOperand(2));
32034 unsigned NewOp)
const {
32035 EVT VT =
Op.getValueType();
32045 for (
const SDValue &V :
Op->op_values()) {
32052 EVT VTArg = VTNode->getVT().getVectorElementType();
32060 "Expected only legal fixed-width types");
32067 auto ScalableRes = DAG.
getNode(NewOp,
DL, ContainerVT, Operands);
32074 for (
const SDValue &V :
Op->op_values()) {
32075 assert((!
V.getValueType().isVector() ||
32076 V.getValueType().isScalableVector()) &&
32077 "Only scalable vectors are supported!");
32084 return DAG.
getNode(NewOp,
DL, VT, Operands,
Op->getFlags());
32092 EVT VT =
Op.getValueType();
32094 "Only expected to lower fixed length vector operation!");
32099 for (
const SDValue &V :
Op->op_values()) {
32103 if (!
V.getValueType().isVector()) {
32109 assert(
V.getValueType().isFixedLengthVector() &&
32111 "Only fixed length vectors are supported!");
32115 auto ScalableRes = DAG.
getNode(
Op.getOpcode(), SDLoc(
Op), ContainerVT,
Ops);
32119SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(
SDValue ScalarOp,
32121 SDLoc
DL(ScalarOp);
32127 EVT ContainerVT = SrcVT;
32138 DAG.
getPOISON(ContainerVT), AccOp, Zero);
32147SDValue AArch64TargetLowering::LowerPredReductionToSVE(
SDValue ReduceOp,
32149 SDLoc
DL(ReduceOp);
32151 EVT OpVT =
Op.getValueType();
32176 if (OpVT == MVT::nxv1i1) {
32178 Pg = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, MVT::nxv2i1, Pg);
32179 Op = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, MVT::nxv2i1,
Op);
32194 return Intrinsic::aarch64_sve_sminp;
32196 return Intrinsic::aarch64_sve_smaxp;
32198 return Intrinsic::aarch64_sve_uminp;
32200 return Intrinsic::aarch64_sve_umaxp;
32202 return std::nullopt;
32210 return AArch64ISD::UADDV_PRED;
32212 return AArch64ISD::ANDV_PRED;
32214 return AArch64ISD::ORV_PRED;
32216 return AArch64ISD::SMAXV_PRED;
32218 return AArch64ISD::SMINV_PRED;
32220 return AArch64ISD::UMAXV_PRED;
32222 return AArch64ISD::UMINV_PRED;
32224 return AArch64ISD::EORV_PRED;
32226 return AArch64ISD::FADDV_PRED;
32228 return AArch64ISD::FMAXNMV_PRED;
32230 return AArch64ISD::FMINNMV_PRED;
32232 return AArch64ISD::FMAXV_PRED;
32234 return AArch64ISD::FMINV_PRED;
32241 SDValue RdxOp, std::optional<Intrinsic::ID> &PairwiseOpIID)
const {
32246 bool OverrideNEON = !Subtarget->isNeonAvailable() ||
32255 SrcVT, OverrideNEON && Subtarget->useSVEForFixedLengthVectors());
32261 if (Subtarget->hasSVE2() || Subtarget->isStreamingSVEAvailable())
32262 if (SrcVT == MVT::v2i64 || (UseSVE && SrcVT == MVT::v2i32))
32290 return LowerPredReductionToSVE(
Op, DAG);
32292 std::optional<Intrinsic::ID> PairwiseOpIID;
32305 if (PairwiseOpIID) {
32312 if (RdxOpcode == AArch64ISD::UADDV_PRED)
32317 Rdx = DAG.
getNode(RdxOpcode,
DL, RdxVT, Pg, VecOp);
32324 if (ResVT !=
Op.getValueType())
32331AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(
SDValue Op,
32333 EVT VT =
Op.getValueType();
32336 EVT InVT =
Op.getOperand(1).getValueType();
32343 EVT MaskVT =
Op.getOperand(0).getValueType();
32357SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
32360 EVT InVT =
Op.getOperand(0).getValueType();
32364 "Only expected to lower fixed length vector operation!");
32366 "Expected integer result of the same bit length as the inputs!");
32373 auto Cmp = DAG.
getNode(AArch64ISD::SETCC_MERGE_ZERO,
DL, CmpVT,
32374 {Pg, Op1, Op2,
Op.getOperand(2)});
32382AArch64TargetLowering::LowerFixedLengthBitcastToSVE(
SDValue Op,
32385 auto SrcOp =
Op.getOperand(0);
32386 EVT VT =
Op.getValueType();
32388 EVT ContainerSrcVT =
32396SDValue AArch64TargetLowering::LowerFixedLengthConcatVectorsToSVE(
32402 "Unexpected number of operands in CONCAT_VECTORS");
32404 auto SrcOp1 =
Op.getOperand(0);
32405 auto SrcOp2 =
Op.getOperand(1);
32406 EVT VT =
Op.getValueType();
32407 EVT SrcVT = SrcOp1.getValueType();
32413 DAG.
getNode(AArch64ISD::DUPLANE128,
DL, ContainerVT,
32419 if (NumOperands > 2) {
32422 for (
unsigned I = 0;
I < NumOperands;
I += 2)
32424 Op->getOperand(
I),
Op->getOperand(
I + 1)));
32435 Op = DAG.
getNode(AArch64ISD::SPLICE,
DL, ContainerVT, Pg, SrcOp1, SrcOp2);
32441AArch64TargetLowering::LowerFixedLengthFPExtendToSVE(
SDValue Op,
32443 EVT VT =
Op.getValueType();
32458 Val = getSVESafeBitCast(ExtendVT, Val, DAG);
32459 Val = DAG.
getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU,
DL, ContainerVT, Pg,
32466AArch64TargetLowering::LowerFixedLengthFPRoundToSVE(
SDValue Op,
32468 EVT VT =
Op.getValueType();
32480 Val = DAG.
getNode(AArch64ISD::FP_ROUND_MERGE_PASSTHRU,
DL, RoundVT, Pg, Val,
32490AArch64TargetLowering::LowerFixedLengthIntToFPToSVE(
SDValue Op,
32492 EVT VT =
Op.getValueType();
32496 unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
32497 : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
32515 Val = DAG.
getNode(Opcode,
DL, ContainerDstVT, Pg, Val,
32525 Val = getSVESafeBitCast(ContainerSrcVT, Val, DAG);
32534AArch64TargetLowering::LowerVECTOR_DEINTERLEAVE(
SDValue Op,
32537 EVT OpVT =
Op.getValueType();
32545 for (
unsigned I = 0;
I < 3; ++
I) {
32549 Ptr, MachinePointerInfo()));
32556 Ops.push_back(StackPtr);
32561 SDVTList VTs = DAG.
getVTList(OpVT, OpVT, OpVT, MVT::Other);
32563 TripleOpVT, MachinePointerInfo(),
32579 for (
unsigned I = 0;
I < 3; ++
I) {
32582 SDValue V = getSVESafeBitCast(PackedVT,
Op.getOperand(
I), DAG);
32594 Ops.push_back(StackPtr);
32597 SDVTList VTs = DAG.
getVTList(PackedVT, PackedVT, PackedVT, MVT::Other);
32608 if (Subtarget->hasSME2() && Subtarget->isStreaming() &&
32615 IntID = Intrinsic::aarch64_sve_uzp_x2;
32618 if (Subtarget->getMinSVEVectorSizeInBits() < 256 &&
32621 IntID = Intrinsic::aarch64_sve_uzp_x4;
32627 Ops.append(
Op->op_values().begin(),
Op->op_values().end());
32634 if (OpVT == MVT::v1i64 || OpVT == MVT::v1f64)
32647 EVT OpVT =
Op.getValueType();
32660 Ops.push_back(StackPtr);
32670 for (
unsigned I = 0;
I < 3; ++
I) {
32674 DAG.
getLoad(OpVT,
DL, Chain, Ptr, MachinePointerInfo()));
32684 InVecs.
push_back(getSVESafeBitCast(PackedVT, V, DAG));
32696 Ops.append(InVecs);
32698 Ops.push_back(StackPtr);
32705 for (
unsigned I = 0;
I < 3; ++
I) {
32709 Results.push_back(getSVESafeBitCast(OpVT, L, DAG));
32718 if (Subtarget->hasSME2() && Subtarget->isStreaming() &&
32725 IntID = Intrinsic::aarch64_sve_zip_x2;
32728 if (Subtarget->getMinSVEVectorSizeInBits() < 256 &&
32731 IntID = Intrinsic::aarch64_sve_zip_x4;
32737 Ops.append(
Op->op_values().begin(),
Op->op_values().end());
32744 if (OpVT == MVT::v1i64 || OpVT == MVT::v1f64)
32771 "Unexpected histogram update operation");
32773 EVT IndexVT =
Index.getValueType();
32780 bool ExtTrunc = IncSplatVT != MemVT;
32795 DAG.
getVTList(IncSplatVT, MVT::Other), MemVT,
DL,
Ops, GMMO, IndexType,
32815 ScatterOps, SMMO, IndexType, ExtTrunc);
32828AArch64TargetLowering::LowerPARTIAL_REDUCE_MLA(
SDValue Op,
32835 EVT ResultVT =
Op.getValueType();
32836 EVT OrigResultVT = ResultVT;
32837 EVT OpVT =
LHS.getValueType();
32841 if (ResultVT == MVT::v2i32 && OpVT == MVT::v16i8) {
32846 SDValue Reduced = DAG.
getNode(AArch64ISD::ADDP,
DL, MVT::v4i32, Wide, Wide);
32850 bool ConvertToScalable =
32854 if (ConvertToScalable) {
32860 Op = DAG.
getNode(
Op.getOpcode(),
DL, ResultVT, {Acc, LHS, RHS});
32875 if (Subtarget->hasSVE2() || Subtarget->isStreamingSVEAvailable()) {
32876 unsigned LoOpcode = IsUnsigned ? AArch64ISD::UADDWB : AArch64ISD::SADDWB;
32877 unsigned HiOpcode = IsUnsigned ? AArch64ISD::UADDWT : AArch64ISD::SADDWT;
32879 Res = DAG.
getNode(HiOpcode,
DL, ResultVT,
Lo, DotNode);
32882 auto [DotNodeLo, DotNodeHi] = DAG.
SplitVector(DotNode,
DL);
32899AArch64TargetLowering::LowerGET_ACTIVE_LANE_MASK(
SDValue Op,
32901 EVT VT =
Op.getValueType();
32904 assert(Subtarget->isSVEorStreamingSVEAvailable() &&
32905 "Lowering fixed length get_active_lane_mask requires SVE!");
32915 Op.getOperand(0),
Op.getOperand(1));
32922AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(
SDValue Op,
32924 EVT VT =
Op.getValueType();
32928 unsigned Opcode = IsSigned ? AArch64ISD::FCVTZS_MERGE_PASSTHRU
32929 : AArch64ISD::FCVTZU_MERGE_PASSTHRU;
32946 Val = getSVESafeBitCast(CvtVT, Val, DAG);
32947 Val = DAG.
getNode(Opcode,
DL, ContainerDstVT, Pg, Val,
32979 if (!IsSingleOp && !Subtarget.hasSVE2())
32982 EVT VTOp1 =
Op.getOperand(0).getValueType();
32984 unsigned IndexLen = MinSVESize / BitsPerElt;
32989 bool MinMaxEqual = (MinSVESize == MaxSVESize);
32990 assert(ElementsPerVectorReg <= IndexLen && ShuffleMask.size() <= IndexLen &&
32991 "Incorrectly legalised shuffle operation");
33000 if (!IsSingleOp && !MinMaxEqual && BitsPerElt == 8)
33003 for (
int Index : ShuffleMask) {
33011 if ((
unsigned)Index >= ElementsPerVectorReg) {
33013 Index += IndexLen - ElementsPerVectorReg;
33015 Index = Index - ElementsPerVectorReg;
33018 }
else if (!MinMaxEqual)
33023 if ((
unsigned)Index >= MaxOffset)
33032 for (
unsigned i = 0; i < IndexLen - ElementsPerVectorReg; ++i) {
33049 else if (Subtarget.hasSVE2()) {
33050 if (!MinMaxEqual) {
33052 SDValue VScale = (BitsPerElt == 64)
33076SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
33078 EVT VT =
Op.getValueType();
33082 auto ShuffleMask = SVN->
getMask();
33092 auto MinLegalExtractEltScalarTy = [](EVT ScalarTy) -> EVT {
33093 if (ScalarTy == MVT::i8 || ScalarTy == MVT::i16)
33107 bool ReverseEXT =
false;
33109 if (
isEXTMask(ShuffleMask, VT, ReverseEXT, Imm) &&
33117 Op = DAG.
getNode(AArch64ISD::INSR,
DL, ContainerVT, Op2, Scalar);
33122 for (
unsigned BlockSize : {64U, 32U, 16U}) {
33126 RevOp = AArch64ISD::BSWAP_MERGE_PASSTHRU;
33127 else if (EltSize == 16)
33128 RevOp = AArch64ISD::REVH_MERGE_PASSTHRU;
33130 RevOp = AArch64ISD::REVW_MERGE_PASSTHRU;
33143 if (Subtarget->hasSVE2p1() && EltSize == 64 &&
33146 SDValue Revd = DAG.
getNode(AArch64ISD::REVD_MERGE_PASSTHRU,
DL, ContainerVT,
33151 unsigned WhichResult;
33152 unsigned OperandOrder;
33155 WhichResult == 0) {
33157 OperandOrder == 0 ? Op1 : Op2,
33158 OperandOrder == 0 ? Op2 : Op1);
33164 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
33166 DAG.
getNode(
Opc,
DL, ContainerVT, OperandOrder == 0 ? Op1 : Op2,
33167 OperandOrder == 0 ? Op2 : Op1);
33173 DAG, VT, DAG.
getNode(AArch64ISD::ZIP1,
DL, ContainerVT, Op1, Op1));
33176 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
33178 DAG, VT, DAG.
getNode(
Opc,
DL, ContainerVT, Op1, Op1));
33199 unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
33200 unsigned MaxSVESize = Subtarget->getMaxSVEVectorSizeInBits();
33201 if (MinSVESize == MaxSVESize && MaxSVESize == VT.
getSizeInBits()) {
33210 WhichResult != 0) {
33212 OperandOrder == 0 ? Op1 : Op2,
33213 OperandOrder == 0 ? Op2 : Op1);
33218 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
33220 DAG, VT, DAG.
getNode(
Opc,
DL, ContainerVT, Op1, Op2));
33225 DAG, VT, DAG.
getNode(AArch64ISD::ZIP2,
DL, ContainerVT, Op1, Op1));
33228 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
33230 DAG, VT, DAG.
getNode(
Opc,
DL, ContainerVT, Op1, Op1));
33233 if ((Subtarget->hasSVE2p1() || Subtarget->hasSME2p1()) &&
33234 Subtarget->isSVEorStreamingSVEAvailable()) {
33236 "Unsupported SVE vector size");
33240 if (std::optional<unsigned> Lane =
33241 isDUPQMask(ShuffleMask, Segments, SegmentElts)) {
33248 DAG.getConstant(*Lane, DL, MVT::i64,
33262 if (MinSVESize || !Subtarget->isNeonAvailable())
33272 EVT InVT =
Op.getValueType();
33276 "Only expect to cast between legal scalable vector types!");
33279 "For predicate bitcasts, use getSVEPredicateBitCast");
33295 VT == PackedVT || InVT == PackedInVT) &&
33296 "Unexpected bitcast!");
33299 if (InVT != PackedInVT)
33300 Op = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, PackedInVT,
Op);
33302 if (Subtarget->isLittleEndian() ||
33313 Op = DAG.
getNode(AArch64ISD::NVCAST,
DL, PackedVTAsInt,
Op);
33320 if (VT != PackedVT)
33321 Op = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, VT,
Op);
33328 return ::isAllActivePredicate(DAG,
N);
33332 return ::getPromotedVTForPredicate(VT);
33335bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
33337 const APInt &OriginalDemandedElts,
KnownBits &Known, TargetLoweringOpt &TLO,
33338 unsigned Depth)
const {
33340 unsigned Opc =
Op.getOpcode();
33342 case AArch64ISD::VSHL: {
33346 if (ShiftR->
getOpcode() != AArch64ISD::VLSHR)
33357 if (ShiftRBits != ShiftLBits)
33360 unsigned ScalarSize =
Op.getScalarValueSizeInBits();
33361 assert(ScalarSize > ShiftLBits &&
"Invalid shift imm");
33364 APInt UnusedBits = ~OriginalDemandedBits;
33366 if ((ZeroBits & UnusedBits) != ZeroBits)
33373 case AArch64ISD::BICi: {
33377 TLO.DAG.computeKnownBits(Op0, OriginalDemandedElts,
Depth + 1);
33379 APInt BitsToClear =
33380 (
Op->getConstantOperandAPInt(1) <<
Op->getConstantOperandAPInt(2))
33382 APInt AlreadyZeroedBitsToClear = BitsToClear & KnownOp0.
Zero;
33383 if (BitsToClear.
isSubsetOf(AlreadyZeroedBitsToClear))
33384 return TLO.CombineTo(
Op, Op0);
33393 unsigned MaxSVEVectorSizeInBits = Subtarget->getMaxSVEVectorSizeInBits();
33394 if (!MaxSVEVectorSizeInBits)
33396 unsigned VscaleMax = MaxSVEVectorSizeInBits / 128;
33397 unsigned MaxValue = MaxCount->getKnownMinValue() * VscaleMax;
33412 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO,
Depth);
33415bool AArch64TargetLowering::canCreateUndefOrPoisonForTargetNode(
33420 switch (
Op.getOpcode()) {
33421 case AArch64ISD::MOVI:
33422 case AArch64ISD::MOVIedit:
33423 case AArch64ISD::MOVImsl:
33424 case AArch64ISD::MOVIshift:
33425 case AArch64ISD::MVNImsl:
33426 case AArch64ISD::MVNIshift:
33427 case AArch64ISD::VASHR:
33428 case AArch64ISD::VLSHR:
33429 case AArch64ISD::VSHL:
33433 Op, DemandedElts, DAG, Kind, ConsiderFlags,
Depth);
33436bool AArch64TargetLowering::isTargetCanonicalConstantNode(
SDValue Op)
const {
33437 return Op.getOpcode() == AArch64ISD::DUP ||
33438 Op.getOpcode() == AArch64ISD::MOVI ||
33439 Op.getOpcode() == AArch64ISD::MOVIshift ||
33440 Op.getOpcode() == AArch64ISD::MOVImsl ||
33441 Op.getOpcode() == AArch64ISD::MOVIedit ||
33442 Op.getOpcode() == AArch64ISD::MVNIshift ||
33443 Op.getOpcode() == AArch64ISD::MVNImsl ||
33448 Op.getOperand(0).getOpcode() == AArch64ISD::MOVIedit &&
33449 Op.getOperand(0).getConstantOperandVal(0) == 0) ||
33451 Op.getOperand(0).getOpcode() == AArch64ISD::DUP) ||
33456 return Subtarget->hasSVE() || Subtarget->hasSVE2() ||
33457 Subtarget->hasComplxNum();
33468 if (!VTy->isScalableTy() && !Subtarget->hasComplxNum())
33472 unsigned NumElements = VTy->getElementCount().getKnownMinValue();
33478 unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;
33479 if ((VTyWidth < 128 && (VTy->isScalableTy() || VTyWidth != 64)) ||
33483 if (ScalarTy->isIntegerTy() && Subtarget->hasSVE2() && VTy->isScalableTy()) {
33487 return ScalarWidth == 32 || ScalarWidth == 64;
33488 return 8 <= ScalarWidth && ScalarWidth <= 64;
33495 return (ScalarTy->isHalfTy() && Subtarget->hasFullFP16()) ||
33496 ScalarTy->isFloatTy() || ScalarTy->isDoubleTy();
33506 bool IsScalable = Ty->isScalableTy();
33507 bool IsInt = Ty->getElementType()->isIntegerTy();
33510 Ty->getScalarSizeInBits() * Ty->getElementCount().getKnownMinValue();
33513 "Vector type must be either 64 or a power of 2 that is at least 128");
33515 if (TyWidth > 128) {
33516 int Stride = Ty->getElementCount().getKnownMinValue() / 2;
33518 ->getElementCount()
33519 .getKnownMinValue() /
33522 auto *LowerSplitA =
B.CreateExtractVector(HalfTy, InputA,
uint64_t(0));
33523 auto *LowerSplitB =
B.CreateExtractVector(HalfTy, InputB,
uint64_t(0));
33524 auto *UpperSplitA =
B.CreateExtractVector(HalfTy, InputA, Stride);
33525 auto *UpperSplitB =
B.CreateExtractVector(HalfTy, InputB, Stride);
33526 Value *LowerSplitAcc =
nullptr;
33527 Value *UpperSplitAcc =
nullptr;
33533 UpperSplitAcc =
B.CreateExtractVector(HalfAccTy,
Accumulator, AccStride);
33535 B, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
33537 B, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
33541 return B.CreateInsertVector(FullTy, Result, UpperSplitInt, AccStride);
33547 return B.CreateIntrinsic(
33548 Intrinsic::aarch64_sve_cmla_x, Ty,
33549 {
Accumulator, InputA, InputB,
B.getInt32((
int)Rotation * 90)});
33551 auto *Mask =
B.getAllOnesMask(Ty->getElementCount());
33552 return B.CreateIntrinsic(
33553 Intrinsic::aarch64_sve_fcmla, Ty,
33554 {Mask,
Accumulator, InputA, InputB,
B.getInt32((
int)Rotation * 90)});
33557 Intrinsic::ID IdMap[4] = {Intrinsic::aarch64_neon_vcmla_rot0,
33558 Intrinsic::aarch64_neon_vcmla_rot90,
33559 Intrinsic::aarch64_neon_vcmla_rot180,
33560 Intrinsic::aarch64_neon_vcmla_rot270};
33563 return B.CreateIntrinsic(IdMap[(
int)Rotation], Ty,
33572 return B.CreateIntrinsic(
33573 Intrinsic::aarch64_sve_cadd_x, Ty,
33574 {InputA, InputB,
B.getInt32((
int)Rotation * 90)});
33576 auto *Mask =
B.getAllOnesMask(Ty->getElementCount());
33577 return B.CreateIntrinsic(
33578 Intrinsic::aarch64_sve_fcadd, Ty,
33579 {Mask, InputA, InputB,
B.getInt32((
int)Rotation * 90)});
33586 IntId = Intrinsic::aarch64_neon_vcadd_rot90;
33588 IntId = Intrinsic::aarch64_neon_vcadd_rot270;
33593 return B.CreateIntrinsic(IntId, Ty, {InputA, InputB});
33598 return B.CreateIntrinsic(
33599 Intrinsic::aarch64_sve_cdot,
Accumulator->getType(),
33600 {Accumulator, InputA, InputB, B.getInt32((int)Rotation * 90)});
33606bool AArch64TargetLowering::preferScalarizeSplat(
SDNode *
N)
const {
33607 unsigned Opc =
N->getOpcode();
33610 [&](
SDNode *
Use) { return Use->getOpcode() == ISD::MUL; }))
33617 return Subtarget->getMinimumJumpTableEntries();
33623 bool NonUnitFixedLengthVector =
33625 if (!NonUnitFixedLengthVector || !Subtarget->useSVEForFixedLengthVectors())
33630 unsigned NumIntermediates;
33638 bool NonUnitFixedLengthVector =
33640 if (!NonUnitFixedLengthVector || !Subtarget->useSVEForFixedLengthVectors())
33645 unsigned NumIntermediates;
33647 NumIntermediates, VT2);
33652 unsigned &NumIntermediates,
MVT &RegisterVT)
const {
33654 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
33659 assert(Subtarget->useSVEForFixedLengthVectors() &&
"Unexpected mode!");
33660 assert(IntermediateVT == RegisterVT &&
"Unexpected VT mismatch!");
33671 IntermediateVT = NewVT;
33674 return NumIntermediates;
33681 NumIntermediates *= NumSubRegs;
33682 NumRegs *= NumSubRegs;
33688 IntermediateVT = RegisterVT = MVT::v16i8;
33691 IntermediateVT = RegisterVT = MVT::v8i16;
33694 IntermediateVT = RegisterVT = MVT::v4i32;
33697 IntermediateVT = RegisterVT = MVT::v2i64;
33700 IntermediateVT = RegisterVT = MVT::v8f16;
33703 IntermediateVT = RegisterVT = MVT::v4f32;
33706 IntermediateVT = RegisterVT = MVT::v2f64;
33709 IntermediateVT = RegisterVT = MVT::v8bf16;
33718 return !Subtarget->isTargetWindows() &&
33727 if (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32)
33742 EVT VT =
Op.getValueType();
33751 return DAG.
getNode(AArch64ISD::FMINNM_PRED,
DL, VT, Pg, In, In);
33754 "Expected to lower to SVE!");
33757 In = DAG.
getNode(AArch64ISD::FMINNM_PRED,
DL, ContainerVT, Pg, In, In);
static MCRegister MatchRegisterName(StringRef Name)
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static SDValue trySVESplat64(SDValue Op, SelectionDAG &DAG, const AArch64Subtarget *ST, APInt &DefBits)
static SDValue tryLowerSmallVectorExtLoad(LoadSDNode *Load, SelectionDAG &DAG)
Helper function to optimize loads of extended small vectors.
static void CustomNonLegalBITCASTResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, EVT ExtendVT, EVT CastVT)
static bool isConcatMask(ArrayRef< int > Mask, EVT VT, bool SplitLHS)
static bool isAddSubSExt(SDValue N, SelectionDAG &DAG)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static void changeVectorFPCCToAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)
changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC usable with the vector...
static SDValue performZExtDeinterleaveShuffleCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isSingletonEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
static SDValue foldCSELofCTTZ(SDNode *N, SelectionDAG &DAG)
static SDValue performCONDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, unsigned CCIndex, unsigned CmpIndex)
static SDValue tryConvertSVEWideCompare(SDNode *N, ISD::CondCode CC, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue NormalizeBuildVector(SDValue Op, SelectionDAG &DAG)
static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St)
Replace a splat of zeros to a vector store by scalar stores of WZR/XZR.
static SDValue tryToWidenSetCCOperands(SDNode *Op, SelectionDAG &DAG)
static SDValue performLastTrueTestVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue convertMulToShlAdd(SDNode *N, SelectionDAG &DAG)
static SDValue GenerateTBL(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performDUPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG)
static std::optional< PredicateConstraint > parsePredicateConstraint(StringRef Constraint)
static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static void analyzeCallOperands(const AArch64TargetLowering &TLI, const AArch64Subtarget *Subtarget, const TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo)
static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo)
Check whether or not Op is a SET_CC operation, either a generic or an AArch64 lowered one.
static bool isLegalArithImmed(uint64_t C)
static std::optional< Intrinsic::ID > getPairwiseOpForReduction(unsigned Op)
Returns the pairwise SVE2 op that could be used for a v2<ty> reduction.
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performVectorDeinterleaveCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static ScalableVectorType * getSVEContainerIRType(FixedVectorType *VTy)
static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG)
unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend)
static SDValue performMulVectorCmpZeroCombine(SDNode *N, SelectionDAG &DAG)
static SDValue convertFixedMaskToScalableVector(SDValue Mask, SelectionDAG &DAG)
static void setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL, AArch64TargetLowering::IntrinsicInfo &Info, const CallBase &CI)
Set the IntrinsicInfo for the aarch64_sve_st<N> intrinsics.
static bool isZeroingInactiveLanes(SDValue Op)
static SDValue performPTestFirstCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue trySwapVSelectOperands(SDNode *N, SelectionDAG &DAG)
static SDValue tryCombineMULLWithUZP1(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue combineToExtendBoolVectorInReg(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N0, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget &Subtarget)
static bool isExtendedBUILD_VECTOR(SDValue N, SelectionDAG &DAG, bool isSigned)
static SDValue getSVEPredicateBitCast(EVT VT, SDValue Op, SelectionDAG &DAG)
static bool isZerosVector(const SDNode *N)
isZerosVector - Check whether SDNode N is a zero-filled vector.
static SDValue performGLD1Combine(SDNode *N, SelectionDAG &DAG)
static SDValue performNVCASTCombine(SDNode *N, SelectionDAG &DAG)
Get rid of unnecessary NVCASTs (that don't change the type).
static const TargetRegisterClass * getReducedGprRegisterClass(ReducedGprConstraint Constraint, EVT VT)
static const MachineInstr * stripVRegCopies(const MachineRegisterInfo &MRI, Register Reg)
static SDValue carryFlagToValue(SDValue Glue, EVT VT, SelectionDAG &DAG, bool Invert)
static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset, SDLoc DL, unsigned BitWidth)
static bool isPredicateCCSettingOp(SDValue N)
static SDValue performSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
If the operand is a bitwise AND with a constant RHS, and the shift has a constant RHS and is the only...
static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performSVEAndCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
bool isVectorizedBinOp(unsigned Opcode)
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG)
static SDValue overflowFlagToValue(SDValue Glue, EVT VT, SelectionDAG &DAG)
static SDValue GenerateFixedLengthSVETBL(SDValue Op, SDValue Op1, SDValue Op2, ArrayRef< int > ShuffleMask, EVT VT, EVT ContainerVT, SelectionDAG &DAG)
static SDValue performBRCONDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static MVT getSVEContainerType(EVT ContentTy)
static SDValue performVecReduceAddCntpCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *ST)
static bool isMergePassthruOpcode(unsigned Opc)
static unsigned selectUmullSmull(SDValue &N0, SDValue &N1, SelectionDAG &DAG, SDLoc DL, bool &IsMLA)
static SDValue performFADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performNEONPostLDSTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Target-specific DAG combine function for NEON load/store intrinsics to merge base address updates.
static SDValue emitVectorComparison(SDValue LHS, SDValue RHS, AArch64CC::CondCode CC, bool NoNans, EVT VT, const SDLoc &DL, SelectionDAG &DAG)
Emit vector comparison for floating-point values, producing a mask.
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performVectorExtCombine(SDNode *N, SelectionDAG &DAG)
static void ReplaceCMP_SWAP_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp, SelectionDAG &DAG)
static SDValue performMINMAXCombine(SDNode *N, SelectionDAG &DAG, const AArch64TargetLowering &TLI)
static bool isZeroExtended(SDValue N, SelectionDAG &DAG)
static SDValue performSelectCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with the compare-mask instruct...
static bool isCheapToExtend(const SDValue &N)
static cl::opt< bool > EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden, cl::desc("Enable AArch64 logical imm instruction " "optimization"), cl::init(true))
static SDValue performExtractLastActiveCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG)
static bool isValidImmForSVEVecImmAddrMode(unsigned OffsetInBytes, unsigned ScalarSizeInBytes)
Check if the value of OffsetInBytes can be used as an immediate for the gather load/prefetch and scat...
static bool isUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of "vector_shuffle v,...
static bool shouldLowerTailCallStackArg(const MachineFunction &MF, const CCValAssign &VA, SDValue Arg, ISD::ArgFlagsTy Flags, int CallOffset)
Check whether a stack argument requires lowering in a tail call.
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static std::optional< ElementCount > getMaxValueForSVECntIntrinsic(SDValue Op)
static unsigned getDUPLANEOp(EVT EltType)
static void changeFPCCToAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget, const TargetMachine &TM)
static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST, EVT VT, EVT MemVT, SelectionDAG &DAG)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool canLowerSRLToRoundingShiftForVT(SDValue Shift, EVT ResVT, SelectionDAG &DAG, unsigned &ShiftValue, SDValue &RShOperand)
static bool isExtendOrShiftOperand(SDValue N)
static bool isLanes1toNKnownZero(SDValue Op)
static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG)
static SDValue performVecReduceAddCombineWithUADDLP(SDNode *N, SelectionDAG &DAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static EVT getPackedSVEVectorVT(EVT VT)
static SDValue performANDORCSELCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performUnpackCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerPtrAuthGlobalAddressStatically(SDValue TGA, SDLoc DL, EVT VT, AArch64PACKey::ID KeyC, SDValue Discriminator, SDValue AddrDiscriminator, SelectionDAG &DAG)
static SDValue performVecReduceBitwiseCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performFlagSettingCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, unsigned GenericOpcode)
static SDValue performSpliceCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performCSELCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static void ReplaceReductionResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, unsigned InterOp, unsigned AcrossOp)
static bool isEquivalentMaskless(unsigned CC, unsigned width, ISD::LoadExtType ExtType, int AddConstant, int CompConstant)
static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG)
static SDValue constructDup(SDValue V, int Lane, SDLoc DL, EVT VT, unsigned Opcode, SelectionDAG &DAG)
static bool isCMP(SDValue Op)
static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool rmwOpMayLowerToLibcall(const AArch64Subtarget &Subtarget, const AtomicRMWInst *RMW)
static Function * getStructuredLoadFunction(Module *M, unsigned Factor, bool Scalable, Type *LDVTy, Type *PtrTy)
unsigned numberOfInstrToLoadImm(APInt C)
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue foldCSELOfCSEL(SDNode *Op, SelectionDAG &DAG)
static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc, SelectionDAG &DAG, bool UnpredOp=false, bool SwapOperands=false)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
SDValue LowerSMELdrStr(SDValue N, SelectionDAG &DAG, bool IsLoad)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performExtendDuplaneTruncCombine(SDNode *N, SelectionDAG &DAG)
static bool isPow2Splat(SDValue Op, uint64_t &SplatVal, bool &Negated)
static void createTblForTrunc(TruncInst *TI, bool IsLittleEndian)
static SDValue performANDSCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N, SelectionDAG &DAG)
static AArch64CC::CondCode parseConstraintCode(llvm::StringRef Constraint)
static bool isINSMask(ArrayRef< int > M, int NumInputElements, bool &DstIsLeft, int &Anomaly)
static SDValue performExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static bool callConvSupportsVarArgs(CallingConv::ID CC)
Return true if the call convention supports varargs Currently only those that pass varargs like the C...
static SDValue performBICiCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static const MCPhysReg GPRArgRegs[]
static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits, APInt &UndefBits)
static SDValue LowerSVEIntrinsicDUP(SDNode *N, SelectionDAG &DAG)
static SDValue LowerCLMUL(SDValue Op, SelectionDAG &DAG)
static SDValue performSignExtendSetCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static bool isPassedInFPR(EVT VT)
static unsigned getIntrinsicID(const SDNode *N)
static SDValue valueToCarryFlag(SDValue Value, SelectionDAG &DAG, bool Invert)
static SDValue performAddUADDVCombine(SDNode *N, SelectionDAG &DAG)
static bool IsSVECntIntrinsic(SDValue S)
static SDValue performExtBinopLoadFold(SDNode *N, SelectionDAG &DAG)
static bool findMoreOptimalIndexType(const MaskedGatherScatterSDNode *N, SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue tryCombineFADDReductionWithZero(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget, SDValue Vec, SDValue Start=SDValue())
Optimize patterns where we insert zeros into vector lanes before a floating-point add reduction.
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static SDValue performVSelectCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static bool isWideDUPMask(ArrayRef< int > M, EVT VT, unsigned BlockSize, unsigned &DupLaneOp)
Check if a vector shuffle corresponds to a DUP instructions with a larger element width than the vect...
constexpr MVT FlagsVT
Value type used for NZCV flags.
static SDValue performSELECT_CCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL, EVT VT)
static cl::opt< bool > EnableExtToTBL("aarch64-enable-ext-to-tbl", cl::Hidden, cl::desc("Combine ext and trunc to TBL"), cl::init(true))
static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St, SDValue SplatVal, unsigned NumVecElts)
static SDValue performNegCSelCombine(SDNode *N, SelectionDAG &DAG)
static std::optional< std::pair< unsigned, const TargetRegisterClass * > > parseSVERegAsConstraint(StringRef Constraint)
static SDValue performST1Combine(SDNode *N, SelectionDAG &DAG)
static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *ST)
static SDValue tryLowerToBSL(SDValue N, SelectionDAG &DAG)
static SDValue performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue removeRedundantInsertVectorElt(SDNode *N)
static std::optional< AArch64CC::CondCode > getCSETCondCode(SDValue Op)
static bool isLane0KnownActive(SDValue Op)
static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc, SelectionDAG &DAG)
static SDValue trySQDMULHCombine(SDNode *N, SelectionDAG &DAG)
static SDValue legalizeSVEGatherPrefetchOffsVec(SDNode *N, SelectionDAG &DAG)
Legalize the gather prefetch (scalar + vector addressing mode) when the offset vector is an unpacked ...
static bool isNegatedInteger(SDValue Op)
static SDValue performFirstTrueTestVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
constexpr MVT CondCodeVT
Value type used for condition codes.
static bool isLoadOrMultipleLoads(SDValue B, SmallVector< LoadSDNode * > &Loads)
static SDValue performSubAddMULCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performLD1Combine(SDNode *N, SelectionDAG &DAG, unsigned Opc)
static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16)
SDValue LowerVectorMatch(SDValue Op, SelectionDAG &DAG)
static Function * getStructuredStoreFunction(Module *M, unsigned Factor, bool Scalable, Type *STVTy, Type *PtrTy)
static SDValue performZExtUZPCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performVectorShiftCombine(SDNode *N, const AArch64TargetLowering &TLI, TargetLowering::DAGCombinerInfo &DCI)
Optimize a vector shift instruction and its operand if shifted out bits are not used.
static SDValue performUADDVAddCombine(SDValue A, SelectionDAG &DAG)
static SDValue combineSVEPrefetchVecBaseImmOff(SDNode *N, SelectionDAG &DAG, unsigned ScalarSizeInBytes)
Combines a node carrying the intrinsic aarch64_sve_prf<T>_gather_scalar_offset into a node that uses ...
static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St)
Replace a splat of a scalar to a vector store by scalar stores of the scalar value.
unsigned getSignExtendedGatherOpcode(unsigned Opcode)
static bool isOrXorChain(SDValue N, unsigned &Num, SmallVector< std::pair< SDValue, SDValue >, 16 > &WorkList)
static SDValue tryCombineToREV(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
getVShiftImm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG, unsigned Opcode, bool OnlyPackedOffsets=true)
static SDValue foldOverflowCheck(SDNode *Op, SelectionDAG &DAG, bool IsAdd)
static bool isEXTMaskWithSplat(ArrayRef< int > M, EVT VT, unsigned SplatOperand, bool &ReverseEXT, unsigned &Imm)
static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc, SelectionDAG &DAG)
static SDValue performDupLane128Combine(SDNode *N, SelectionDAG &DAG)
static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm, const APInt &Demanded, TargetLowering::TargetLoweringOpt &TLO, unsigned NewOpc)
bool isLegalCmpImmed(APInt C)
static bool isSafeSignedCMN(SDValue Op, SelectionDAG &DAG)
static unsigned getCmpOperandFoldingProfit(SDValue Op)
Returns how profitable it is to fold a comparison's operand's shift and/or extension operations.
static SDValue lowerIntNeonIntrinsic(SDValue Op, unsigned Opcode, SelectionDAG &DAG, bool LastOperandIsImm=false)
static SDValue performFPExtendCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue performCTPOPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG)
static SDValue performConcatVectorsCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performSVEMulAddSubCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue foldCSELofLASTB(SDNode *Op, SelectionDAG &DAG)
static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N, SelectionDAG &DAG)
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AArch64cc, SelectionDAG &DAG, const SDLoc &DL)
static SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG)
static SDValue performBuildShuffleExtendCombine(SDValue BV, SelectionDAG &DAG)
Combines a buildvector(sext/zext) or shuffle(sext/zext, undef) node pattern into sext/zext(buildvecto...
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static Value * createTblShuffleForZExt(IRBuilderBase &Builder, Value *Op, FixedVectorType *ZExtTy, FixedVectorType *DstTy, bool IsLittleEndian)
static SDValue performAddSubIntoVectorOp(SDNode *N, SelectionDAG &DAG)
static SDValue performANDORDUPNOTCombine(SDNode *N, SelectionDAG &DAG)
static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL, EVT VT)
static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG)
static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC, SDValue RHS={})
changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64 CC
static const MCPhysReg FPRArgRegs[]
static SDValue getSETCC(AArch64CC::CondCode CC, SDValue NZCV, const SDLoc &DL, SelectionDAG &DAG)
Helper function to create 'CSET', which is equivalent to 'CSINC <Wd>, WZR, WZR, invert(<cond>)'.
static SDValue performAddTruncShiftCombine(SDNode *N, SelectionDAG &DAG)
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static SDValue tryCombineNeonFcvtFP16ToI16(SDNode *N, unsigned Opcode, SelectionDAG &DAG)
static void replaceBoolVectorBitcast(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG)
static bool isOneVector(SDValue V)
static SDValue performActiveLaneMaskCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *ST)
static SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT, int Pattern)
static bool isEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseEXT, unsigned &Imm)
static std::optional< ReducedGprConstraint > parseReducedGprConstraint(StringRef Constraint)
static SDValue tryCombineFixedPointConvert(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue getPredicateForVector(SelectionDAG &DAG, SDLoc &DL, EVT VT)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SMECallAttrs getSMECallAttrs(const Function &Caller, const RTLIB::RuntimeLibcallsInfo &RTLCI, const TargetLowering::CallLoweringInfo &CLI)
static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG)
Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup)) making use of the vector SExt/ZE...
static SDValue performAddSubLongCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG)
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Fold a floating-point multiply by power of two into floating-point to fixed-point conversion.
static EVT calculatePreExtendType(SDValue Extend)
Calculates what the pre-extend type is, based on the extension operation node provided by Extend.
static SDValue performSetCCPunpkCombine(SDNode *N, SelectionDAG &DAG)
static bool isSignExtInReg(const SDValue &V)
static EVT getPromotedVTForPredicate(EVT VT)
static void changeFPCCToANDAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert a DAG fp condition code to an AArch64 CC.
static SDValue performAddWithSBCCombine(SDNode *N, SelectionDAG &DAG)
static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Turn vector tests of the signbit in the form of: xor (sra X, elt_size(X)-1), -1 into: cmge X,...
static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG)
static bool isAllConstantBuildVector(const SDValue &PotentialBVec, uint64_t &ConstVal)
static SDValue performExtractSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue tryToReplaceScalarFPConversionWithSVE(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Tries to replace scalar FP <-> INT conversions with SVE in streaming functions, this can help to redu...
static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG)
static Value * UseTlsOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static SDValue performLD1ReplicateCombine(SDNode *N, SelectionDAG &DAG)
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG, const AArch64Subtarget *ST)
static bool isUnpackedType(EVT VT, SelectionDAG &DAG)
Returns true if the conceptual representation for VT does not map directly to its physical register r...
static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op, AArch64CC::CondCode Cond)
static bool isSetCCOrZExtSetCC(const SDValue &Op, SetCCInfoAndKind &Info)
cl::opt< bool > EnableAArch64ELFLocalDynamicTLSGeneration("aarch64-elf-ldtls-generation", cl::Hidden, cl::desc("Allow AArch64 Local Dynamic TLS code generation"), cl::init(false))
static SDValue ReconstructTruncateFromBuildVector(SDValue V, SelectionDAG &DAG)
static SDValue performVselectPowCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performBSPExpandForSVE(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue getCondCode(SelectionDAG &DAG, AArch64CC::CondCode CC)
Like SelectionDAG::getCondCode(), but for AArch64 condition codes.
static SDValue foldADCToCINC(SDNode *N, SelectionDAG &DAG)
static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG)
static SDValue optimizeIncrementingWhile(SDNode *N, SelectionDAG &DAG, bool IsSigned, bool IsEqual)
static SDValue performSunpkloCombine(SDNode *N, SelectionDAG &DAG)
static unsigned getPredicatedReductionOpcode(unsigned Op)
Returns the corresponding predicated SVE reduction opcode for a VECREDUCE_*.
static SDValue tryToConvertShuffleOfTbl2ToTbl4(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static unsigned getAtomicLoad128Opcode(unsigned ISDOpcode, AtomicOrdering Ordering)
static void ReplaceAddWithADDP(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
cl::opt< bool > EnableSVEGISel("aarch64-enable-gisel-sve", cl::Hidden, cl::desc("Enable / disable SVE scalable vectors in Global ISel"), cl::init(false))
static SDValue performSetccMergeZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performPostLD1Combine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, bool IsLaneOp)
Target-specific DAG combine function for post-increment LD1 (lane) and post-increment LD1R.
std::pair< SDValue, uint64_t > lookThroughSignExtension(SDValue Val)
static SDValue performSubWithBorrowCombine(SDNode *N, SelectionDAG &DAG)
bool hasNearbyPairedStore(Iter It, Iter End, Value *Ptr, const DataLayout &DL)
static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG)
static bool canEmitConjunction(SelectionDAG &DAG, const SDValue Val, bool &CanNegate, bool &MustBeFirst, bool &PreferFirst, bool WillNegate, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/SETCC operations that can be expressed as a conjunction.
static bool foldIndexIntoBase(SDValue &BasePtr, SDValue &Index, SDValue Scale, SDLoc DL, SelectionDAG &DAG)
static SDValue emitFloatCompareMask(SDValue LHS, SDValue RHS, SDValue TVal, SDValue FVal, ISD::CondCode CC, bool NoNaNs, const SDLoc &DL, SelectionDAG &DAG)
For SELECT_CC, when the true/false values are (-1, 0) and the compared values are scalars,...
static SDValue performRNDRCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue skipExtensionForVectorMULL(SDValue N, SelectionDAG &DAG)
static SDValue performOrXorChainCombine(SDNode *N, SelectionDAG &DAG)
static unsigned getReductionForOpcode(unsigned Op)
static SDValue performAddCombineForShiftedOperands(SDNode *N, SelectionDAG &DAG)
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
static bool shouldBeAdjustedToZero(SDValue LHS, APInt C, ISD::CondCode &CC)
static bool isPackedPredicateType(EVT VT, SelectionDAG &DAG)
static SDValue combineSVEBitSel(unsigned IID, SDNode *N, SelectionDAG &DAG)
static SDValue lowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG, unsigned Opcode, bool IsSigned)
static bool isPackedVectorType(EVT VT, SelectionDAG &DAG)
Returns true if VT's elements occupy the lowest bit positions of its associated register class withou...
static bool isTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of "vector_shuffle v,...
static AArch64SME::ToggleCondition getSMToggleCondition(const SMECallAttrs &CallAttrs)
static bool isAddSubZExt(SDValue N, SelectionDAG &DAG)
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
static SDValue optimizeBitTest(SDLoc DL, SDValue Op, SDValue Chain, SDValue Dest, unsigned Opcode, SelectionDAG &DAG)
static SDValue performMaskedGatherScatterCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert, SelectionDAG &DAG)
static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performBuildVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG, unsigned Opcode, bool OnlyPackedOffsets=true)
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue LowerFLDEXP(SDValue Op, SelectionDAG &DAG)
static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc, SelectionDAG &DAG)
static SDValue isNVCastToHalfWidthElements(SDValue V)
static bool isHalvingTruncateAndConcatOfLegalIntScalableType(SDNode *N)
static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode, SDValue Operand, SelectionDAG &DAG, int &ExtraSteps)
static SDValue performUADDVZextCombine(SDValue A, SelectionDAG &DAG)
static SDValue performAddCSelIntoCSinc(SDNode *N, SelectionDAG &DAG)
Perform the scalar expression combine in the form of: CSEL(c, 1, cc) + b => CSINC(b+c,...
static SDValue performCTLZCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static bool isEligibleForSmallVectorLoadOpt(LoadSDNode *LD, const AArch64Subtarget &Subtarget)
Helper function to check if a small vector load can be optimized.
static std::optional< uint64_t > getConstantLaneNumOfExtractHalfOperand(SDValue &Op)
static void ReplaceATOMIC_LOAD_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &DL, SelectionDAG &DAG, SDValue Chain, bool IsSignaling)
static bool areLoadedOffsetButOtherwiseSame(SDValue Op0, SDValue Op1, SelectionDAG &DAG, unsigned &NumSubLoads)
static SDValue performMulRdsvlCombine(SDNode *Mul, SelectionDAG &DAG)
static bool isEssentiallyExtractHighSubvector(SDValue N)
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
static Value * createTblShuffleForSExt(IRBuilderBase &Builder, Value *Op, FixedVectorType *DstTy, bool IsLittleEndian)
static unsigned getExtFactor(SDValue &V)
getExtFactor - Determine the adjustment factor for the position when generating an "extract from vect...
static cl::opt< unsigned > MaxXors("aarch64-max-xors", cl::init(16), cl::Hidden, cl::desc("Maximum of xors"))
static SDValue performInsertVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue performMULLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performSubNegAndOneCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG)
static SDValue trySimplifySrlAddToRshrnb(SDValue Srl, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static bool isLegalNTStore(Type *DataType, Align Alignment, const DataLayout &DL)
static SDValue performAddDotCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performExtractVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performReinterpretCastCombine(SDNode *N)
SDValue ReconstructShuffleWithRuntimeMask(SDValue Op, SelectionDAG &DAG)
static SDValue performTBZCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
static void simplifySetCCIntoEq(ISD::CondCode &CC, SDValue &LHS, SDValue &RHS, SelectionDAG &DAG, const SDLoc DL)
static SDValue tryCombineExtendRShTrunc(SDNode *N, SelectionDAG &DAG)
static bool isAllInactivePredicate(SDValue N)
static SDValue LowerNTStore(StoreSDNode *StoreNode, EVT VT, EVT MemVT, const SDLoc &DL, SelectionDAG &DAG)
static SDValue getVectorBitwiseReduce(unsigned Opcode, SDValue Vec, EVT VT, SDLoc DL, SelectionDAG &DAG)
static SDValue performIntrinsicCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static cl::opt< bool > EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden, cl::desc("Combine extends of AArch64 masked " "gather intrinsics"), cl::init(true))
static bool isZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of "vector_shuffle v,...
static SDValue performInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static cl::opt< bool > UseFEATCPACodegen("aarch64-use-featcpa-codegen", cl::Hidden, cl::desc("Generate ISD::PTRADD nodes for pointer arithmetic in " "SelectionDAG for FEAT_CPA"), cl::init(false))
static bool createTblShuffleMask(unsigned SrcWidth, unsigned DstWidth, unsigned NumElts, bool IsLittleEndian, SmallVectorImpl< int > &Mask)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue performAddCombineSubShift(SDNode *N, SDValue SUB, SDValue Z, SelectionDAG &DAG)
static SDValue performANDSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static const TargetRegisterClass * getPredicateRegisterClass(PredicateConstraint Constraint, EVT VT)
static SDValue performAddSubCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue optimizeBrk(SDNode *N, SelectionDAG &DAG)
static SDValue performSubsToAndsCombine(SDNode *N, SDNode *SubsNode, SDNode *AndNode, SelectionDAG &DAG, unsigned CCIndex, unsigned CmpIndex, unsigned CC)
static std::pair< SDValue, SDValue > getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG)
#define FALKOR_STRIDED_ACCESS_MD
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static bool isConstant(const MachineInstr &MI)
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI, Type *T)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
Function Alias Analysis Results
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
static bool isSigned(unsigned Opcode)
const HexagonInstrInfo * TII
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv Induction Variable Users
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
This file provides utility analysis objects describing memory locations.
This file defines ARC utility functions which are used by various parts of the compiler.
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
PowerPC Reduce CR logical Operation
static bool getVal(MDTuple *MD, const char *Key, uint64_t &Val)
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Contains matchers for matching SelectionDAG nodes and values.
static Type * getValueType(Value *V, bool LookThroughCmp=false)
Returns the "element type" of the given value/instruction V.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
static LLVM_ATTRIBUTE_ALWAYS_INLINE MVT::SimpleValueType getSimpleVT(const uint8_t *MatcherTable, size_t &MatcherIndex)
getSimpleVT - Decode a value in MatcherTable, if it's a VBR encoded value, use GetVBR to decode it.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
static const int BlockSize
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static constexpr int Concat[]
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool branchTargetEnforcement() const
unsigned getVarArgsFPRSize() const
void setVarArgsStackOffset(unsigned Offset)
void setVarArgsStackIndex(int Index)
void setEarlyAllocSMESaveBuffer(Register Ptr)
void setTailCallReservedStack(unsigned bytes)
bool hasELFSignedGOT() const
SmallVectorImpl< ForwardedRegister > & getForwardedMustTailRegParms()
void setIsSplitCSR(bool s)
int getVarArgsFPRIndex() const
void incNumLocalDynamicTLSAccesses()
void setBytesInStackArgArea(unsigned bytes)
int getVarArgsStackIndex() const
void setVarArgsGPRIndex(int Index)
int getVarArgsGPRIndex() const
void setPStateSMReg(Register Reg)
void setVarArgsFPRSize(unsigned Size)
unsigned getVarArgsStackOffset() const
SMEAttrs getSMEFnAttrs() const
unsigned getVarArgsGPRSize() const
unsigned getSRetReturnReg() const
Register getPStateSMReg() const
void setSRetReturnReg(unsigned Reg)
unsigned getBytesInStackArgArea() const
void setVarArgsFPRIndex(int Index)
void setVarArgsGPRSize(unsigned Size)
void setArgumentStackToRestore(unsigned bytes)
void setHasStreamingModeChanges(bool HasChanges)
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
const AArch64RegisterInfo * getRegisterInfo() const override
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
unsigned getMaximumJumpTableSize() const
bool isTargetDarwin() const
Align getPrefLoopAlignment() const
Align getPrefFunctionAlignment() const
bool isTargetMachO() const
unsigned getMaxBytesForLoopAlignment() const
bool supportsAddressTopByteIgnored() const
CPU has TBI (top byte of addresses is ignored during HW address translation) and OS enables it.
bool isStreamingCompatible() const
Returns true if the function has a streaming-compatible body.
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
bool useSVEForFixedLengthVectors() const
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
bool isLittleEndian() const
bool isStreaming() const
Returns true if the function has a streaming body.
unsigned getSVEVectorSizeInBits() const
bool isXRegisterReserved(size_t i) const
unsigned getMaxSVEVectorSizeInBits() const
bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const
unsigned getMinSVEVectorSizeInBits() const
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable, SDValue Chain, SDValue InGlue, unsigned Condition, bool InsertVectorLengthCheck=false) const
If a change in streaming mode is required on entry to/return from a function call it emits and return...
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset) const override
Return true if it is profitable to reduce a load to a smaller type.
Value * getIRStackGuard(IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
Value * getSafeStackPointerLocation(IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const override
If the target has a standard location for the unsafe stack pointer, returns the address of that locat...
void initializeSplitCSR(MachineBasicBlock *Entry) const override
Perform necessary initialization to handle a subset of CSRs explicitly via copies.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
void insertSSPDeclarations(Module &M, const LibcallLoweringInfo &Libcalls) const override
Inserts necessary declarations for SSP (stack protection) purpose.
EVT getPromotedVTForPredicate(EVT VT) const
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
unsigned getVaListSizeInBits(const DataLayout &DL) const override
Returns the size of the platform's va_list object.
MachineBasicBlock * EmitZAInstr(unsigned Opc, unsigned BaseReg, MachineInstr &MI, MachineBasicBlock *BB) const
void insertCopiesSplitCSR(MachineBasicBlock *Entry, const SmallVectorImpl< MachineBasicBlock * > &Exits) const override
Insert explicit copies in entry and exit blocks.
int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const override
Return the prefered common base offset.
bool shouldLowerReductionToSVE(SDValue RdxOp, std::optional< Intrinsic::ID > &PairwiseOpIID) const
Returns true if RdxOp should be lowered to a SVE reduction.
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool lowerInterleavedStore(Instruction *Store, Value *Mask, ShuffleVectorInst *SVI, unsigned Factor, const APInt &GapMask) const override
Lower an interleaved store into a stN intrinsic.
MachineBasicBlock * EmitTileLoad(unsigned Opc, unsigned BaseReg, MachineInstr &MI, MachineBasicBlock *BB) const
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, bool UseScalable) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool shouldFoldConstantShiftPairToMask(const SDNode *N) const override
Return true if it is profitable to fold a pair of shifts into a mask.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
bool preferSelectsOverBooleanArithmetic(EVT VT) const override
Should we prefer selects to doing arithmetic on boolean types.
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
Provide custom lowering hooks for some operations.
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
bool shouldOptimizeMulOverflowWithZeroHighBits(LLVMContext &Context, EVT VT) const override
bool shouldInsertTrailingSeqCstFenceForAtomicStore(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert a seq_cst trailing fence without reducing the or...
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool shouldRemoveRedundantExtend(SDValue Op) const override
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC) const
Selects the correct CCAssignFn for a given CallingConvention value.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ISD::SETCC ValueType.
bool optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, const TargetTransformInfo &TTI) const override
Try to optimize extending or truncating conversion instructions (like zext, trunc,...
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
Selects the correct CCAssignFn for a given CallingConvention value.
bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask, IntrinsicInst *DI) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, EVT *LargestVT=nullptr) const override
Determines the optimal series of memory ops to replace the memset / memcpy.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool isLegalICmpImmediate(int64_t) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const override
bool isOpSuitableForLSE128(const Instruction *I) const
void fixupPtrauthDiscriminator(MachineInstr &MI, MachineBasicBlock *BB, MachineOperand &IntDiscOp, MachineOperand &AddrDiscOp, const TargetRegisterClass *AddrDiscRC) const
Replace (0, vreg) discriminator components with the operands of blend or with (immediate,...
bool lowerInterleavedLoad(Instruction *Load, Value *Mask, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor, const APInt &GapMask) const override
Lower an interleaved load into a ldN intrinsic.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool fallBackToDAGISel(const Instruction &Inst) const override
bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override
Return true if the target has native support for the specified value type and it is 'desirable' to us...
bool isLegalAddScalableImmediate(int64_t) const override
Return true if adding the specified scalable immediate is legal, that is the target has add instructi...
Value * createComplexDeinterleavingIR(IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, Value *Accumulator=nullptr) const override
Create the IR node for the given complex deinterleaving operation.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
MachineBasicBlock * EmitCheckMatchingVL(MachineInstr &MI, MachineBasicBlock *MBB) const
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, bool &UseScalable) const
Returns true if VecTy is a legal interleaved access type.
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const override
For some targets, an LLVM struct type must be broken down into multiple simple types,...
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
MachineBasicBlock * EmitLoweredCatchRet(MachineInstr &MI, MachineBasicBlock *BB) const
bool isComplexDeinterleavingSupported() const override
Does this target support complex deinterleaving.
bool isZExtFree(Type *Ty1, Type *Ty2) const override
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const override
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const
MachineBasicBlock * EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isProfitableToHoist(Instruction *I) const override
Check if it is profitable to hoist instruction in then/else to if.
bool isOpSuitableForRCPC3(const Instruction *I) const
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const override
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
MachineBasicBlock * EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode, bool Op0IsDef) const
MachineBasicBlock * EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const
bool shouldExpandVectorMatch(EVT VT, unsigned SearchSize) const override
Return true if the @llvm.experimental.vector.match intrinsic should be expanded for vector type ‘VT’ ...
MachineBasicBlock * EmitEntryPStateSM(MachineInstr &MI, MachineBasicBlock *BB) const
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const override
Control the following reassociation of operands: (op (op x, c1), y) -> (op (op x, y),...
bool shouldPreservePtrArith(const Function &F, EVT PtrVT) const override
In AArch64, true if FEAT_CPA is present.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
MachineBasicBlock * EmitF128CSEL(MachineInstr &MI, MachineBasicBlock *BB) const
LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &FuncAttributes) const override
LLT returning variant.
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
bool needsFixedCatchObjects() const override
Used for exception handling on Win64.
const AArch64TargetMachine & getTM() const
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool generateFMAsInMachineCombiner(EVT VT, CodeGenOptLevel OptLevel) const override
bool isComplexDeinterleavingOperationSupported(ComplexDeinterleavingOperation Operation, Type *Ty) const override
Does this target support complex deinterleaving with the given operation and type.
bool isOpSuitableForLDPSTP(const Instruction *I) const
AArch64TargetLowering(const TargetMachine &TM, const AArch64Subtarget &STI)
bool hasPairedLoad(EVT LoadedType, Align &RequiredAlignment) const override
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
bool isLegalAddImmediate(int64_t) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool shouldConsiderGEPOffsetSplit() const override
bool isVectorClearMaskLegal(ArrayRef< int > M, EVT VT) const override
Similar to isShuffleMaskLegal.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo, const LibcallLoweringInfo *libcallLowering) const override
This method returns a target specific FastISel object, or null if the target does not support "fast" ...
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool useLoadStackGuardNode(const Module &M) const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
void getTgtMemIntrinsic(SmallVectorImpl< IntrinsicInfo > &Infos, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
ArrayRef< MCPhysReg > getRoundingControlRegisters() const override
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
bool isFPImmLegalAsFMov(const APFloat &Imm, EVT VT) const
bool lowerInterleaveIntrinsicToStore(Instruction *Store, Value *Mask, ArrayRef< Value * > InterleaveValues) const override
Lower an interleave intrinsic to a target specific store intrinsic.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
bool isDesirableToCommuteXorWithShift(const SDNode *N) const override
Returns false if N is a bit extraction pattern of (X >> C) & Mask.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Returns false if N is a bit extraction pattern of (X >> C) & Mask.
bool enableAggressiveFMAFusion(EVT VT) const override
Enable aggressive FMA fusion on targets that want it.
MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override
Return the type to use for a scalar shift opcode, given the shifted amount type.
MachineBasicBlock * EmitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override
Return true if the @llvm.get.active.lane.mask intrinsic should be expanded using generic code in Sele...
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON=false) const
bool mergeStoresAfterLegalization(EVT VT) const override
SVE code generation for fixed length vectors does not custom lower BUILD_VECTOR.
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
LLVM_ABI APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
bool isMinSignedValue() const
Determine if this is the smallest signed value.
uint64_t getZExtValue() const
Get zero extended value.
static LLVM_ABI void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
unsigned popcount() const
Count the number of bits set.
LLVM_ABI APInt getHiBits(unsigned numBits) const
Compute an APInt containing numBits highbits from this APInt.
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
APInt abs() const
Get the absolute value.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
unsigned getBitWidth() const
Return the number of bits in the APInt.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
bool isNegative() const
Determine sign of this APInt.
LLVM_ABI APInt sadd_ov(const APInt &RHS, bool &Overflow) const
bool sle(const APInt &RHS) const
Signed less or equal comparison.
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
unsigned countr_zero() const
Count the number of trailing zero bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool getBoolValue() const
Convert APInt to a boolean value.
bool isMask(unsigned numBits) const
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
bool isOne() const
Determine if this is a value of 1.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
int64_t getSExtValue() const
Get sign extended value.
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
an instruction to allocate memory on the stack
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
bool empty() const
Check if the array is empty.
An instruction that atomically checks whether a specified value is in a memory location,...
Value * getCompareOperand()
an instruction that atomically reads a memory location, combines it with another value,...
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
bool isFloatingPointOperation() const
BinOp getOperation() const
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
const BlockAddress * getBlockAddress() const
Function * getFunction() const
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI ConstantFPSDNode * getConstantFPSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant FP or null if this is not a constant FP splat.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
LLVM_ABI ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
LLVM_ABI std::optional< std::pair< APInt, APInt > > isArithmeticSequence() const
If this BuildVector is constant and represents an arithmetic sequence "<a, a+n, a+2n,...
LLVM_ABI bool isConstant() const
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
static LLVM_ABI bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
LLVM_ABI bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
LLVM_ABI void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
int64_t getLocMemOffset() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
This class represents a function call, abstracting a target machine's calling convention.
bool isZero() const
Return true if the value is positive or negative zero.
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
uint64_t getNumOperands() const
A parsed version of the target data layout string in and methods for querying it.
bool isLittleEndian() const
Layout endianness...
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
Tagged union holding either a T or a Error.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Class to represent fixed width SIMD vectors.
static FixedVectorType * getInteger(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Type * getParamType(unsigned i) const
Parameter type accessors.
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
FunctionType * getFunctionType() const
Returns the FunctionType for me.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Constant * getPersonalityFn() const
Get the personality function associated with this function.
AttributeList getAttributes() const
Return the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
const Argument * const_arg_iterator
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
int64_t getOffset() const
const GlobalValue * getGlobal() const
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
bool hasExternalWeakLinkage() const
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Type * getValueType() const
Common base class shared among various IRBuilders.
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
BasicBlock * GetInsertBlock() const
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
constexpr unsigned getScalarSizeInBits() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
static LLT integer(unsigned SizeInBits)
static LLT floatIEEE(unsigned SizeInBits)
This is an important class for using LLVM in a threaded context.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
Tracks which library functions to use for a particular subtarget.
LLVM_ABI CallingConv::ID getLibcallImplCallingConv(RTLIB::LibcallImpl Call) const
Get the CallingConv that should be used for the specified libcall.
LLVM_ABI RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Return the lowering's selection of implementation call for Call.
An instruction for reading from memory.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Represents a single loop in the control flow graph.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
@ INVALID_SIMPLE_VALUE_TYPE
static auto integer_fixedlen_vector_valuetypes()
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
bool isScalableVT() const
Return true if the type is a scalable type.
static auto all_valuetypes()
SimpleValueType Iteration.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto scalable_vector_valuetypes()
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool isFixedLengthVector() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
bool is64BitVector() const
Return true if this is a 64-bit vector type.
static auto fp_fixedlen_vector_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
SSPLayoutKind getObjectSSPLayout(int ObjectIdx) const
LLVM_ABI void computeMaxCallFrameSize(MachineFunction &MF, std::vector< MachineBasicBlock::iterator > *FrameSDOps=nullptr)
Computes the maximum size of a callframe.
void setAdjustsStack(bool V)
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
@ SSPLK_None
Did not trigger a stack protector.
void setFrameAddressIsTaken(bool T)
bool hasScalableStackID(int ObjectIdx) const
bool isImmutableObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to an immutable object.
int getStackProtectorIndex() const
Return the index for the stack protector object.
void setStackID(int ObjectIdx, uint8_t ID)
void setHasTailCall(bool V=true)
bool hasMustTailInVarArgFunc() const
Returns true if the function is variadic and contains a musttail call.
void setReturnAddressIsTaken(bool s)
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
LLVM_ABI int CreateVariableSizedObject(Align Alignment, const AllocaInst *Alloca)
Notify the MachineFrameInfo object that a variable sized object has been created.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasStackProtectorIndex() const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI bool hasAtMostUserInstrs(Register Reg, unsigned MaxUsers) const
hasAtMostUses - Return true if the given register has at most MaxUsers non-debug user instructions.
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
This is a base class used to represent MGATHER and MSCATTER nodes.
const SDValue & getIndex() const
bool isIndexScaled() const
const SDValue & getScale() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
bool isIndexSigned() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getInc() const
const SDValue & getScale() const
const SDValue & getMask() const
const SDValue & getIntID() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
bool isExpandingLoad() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
AtomicOrdering getMergedOrdering() const
Return a single atomic ordering that is at least as strong as both the success and failure orderings ...
const SDValue & getChain() const
bool isNonTemporal() const
bool isAtomic() const
Return true if the memory operation ordering is Unordered or higher.
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
bool getRtLibUseGOT() const
Returns true if PLT should be avoided for RTLib calls.
Metadata * getModuleFlag(StringRef Key) const
Return the corresponding value if Key appears in module flags, otherwise return null.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isStrictFPOpcode()
Test if this node is a strict floating point pseudo-op.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
void dropFlags(unsigned Mask)
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
LLVM_ABI bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
iterator_range< user_iterator > users()
void setFlags(SDNodeFlags NewFlags)
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
op_iterator op_end() const
bool isAssert() const
Test if this node is an assert operation.
op_iterator op_begin() const
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
void setNode(SDNode *N)
set the SDNode
unsigned getOpcode() const
unsigned getNumOperands() const
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasStreamingInterface() const
bool hasStreamingCompatibleInterface() const
bool hasNonStreamingInterface() const
bool hasStreamingBody() const
SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.
bool requiresPreservingZT0() const
bool requiresSMChange() const
bool requiresLazySave() const
bool requiresPreservingAllZAState() const
Class to represent scalable SIMD vectors.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC)
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
LLVM_ABI SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS)
Return an AddrSpaceCastSDNode.
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags, bool AllowCommute=false)
Get the specified node if it's already available, or else return NULL.
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI bool canIgnoreSignBitOfZero(const SDUse &Use) const
Check if a use of a float value is insensitive to signed zeros.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
LLVM_ABI SDValue getIdentityElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) identity element for the given opcode, if it exists.
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getMaskedHistogram(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getTypeSize(const SDLoc &DL, EVT VT, TypeSize TS)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
const LibcallLoweringInfo & getLibcalls() const
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
void addCalledGlobal(const SDNode *Node, const GlobalValue *GV, unsigned OpFlags)
Set CalledGlobal to be associated with Node.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getDeactivationSymbol(const GlobalValue *GV)
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI bool isSingleSourceMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
int getSplatIndex() const
ArrayRef< int > getMask() const
static LLVM_ABI bool isSplatMask(ArrayRef< int > Mask)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
Represent a constant reference to a string, i.e.
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
constexpr size_t size() const
Get the string size.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
EVT getMemValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
void setMaxDivRemBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum div/rem the backend supports.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
ShiftLegalizationStrategy
Return the preferred strategy to legalize tihs SHIFT instruction, with ExpansionFactor being the recu...
virtual Value * getIRStackGuard(IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
virtual bool shouldLocalize(const MachineInstr &MI, const TargetTransformInfo *TTI) const
Check whether or not MI needs to be moved close to its uses.
void setMaximumJumpTableSize(unsigned)
Indicate the maximum number of entries in jump tables.
virtual unsigned getMinimumJumpTableEntries() const
Return lower limit for number of blocks in a jump table.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
virtual void insertSSPDeclarations(Module &M, const LibcallLoweringInfo &Libcalls) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setMaxBytesForAlignment(unsigned MaxBytes)
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
const LibcallLoweringInfo & getLibcallLoweringInfo() const
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
unsigned getMaximumJumpTableSize() const
Return upper limit for number of entries in a jump table.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool EnableExtLdPromotion
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
@ ZeroOrOneBooleanContent
@ ZeroOrNegativeOneBooleanContent
virtual ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
static StringRef getLibcallImplName(RTLIB::LibcallImpl Call)
Get the libcall routine name for the specified libcall implementation.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual Value * getSafeStackPointerLocation(IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const
Returns the target-specific address of the unsafe stack pointer.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
MVT getFrameIndexTy(const DataLayout &DL) const
Return the type for frame index, which is determined by the alloca address space specified through th...
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
bool isLoadLegalOrCustom(EVT ValVT, EVT MemVT, Align Alignment, unsigned AddrSpace, unsigned ExtType, bool Atomic) const
Return true if the specified load with extension is legal or custom on this target.
const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
virtual bool isTargetCanonicalConstantNode(SDValue Op) const
Returns true if the given Opc is considered a canonical constant for the target, which should not be ...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, EVT *LargestVT=nullptr) const
Determines the optimal series of memory ops to replace the memset / memcpy.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, UndefPoisonKind Kind, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
virtual bool useLoadStackGuardNode(const Module &M) const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
void setTypeIdForCallsiteInfo(const CallBase *CB, MachineFunction &MF, MachineFunction::CallSiteInfo &CSInfo) const
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned TLSSize
Bit size of immediate TLS offsets (0 == use the default).
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Target - Wrapper for Target specific information.
This class represents a truncation of integer types.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static LLVM_ABI IntegerType * getInt128Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
@ HalfTyID
16-bit floating point type
@ FloatTyID
32-bit floating point type
@ BFloatTyID
16-bit floating point type (7-bit significand)
@ DoubleTyID
64-bit floating point type
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeID getTypeID() const
Return the type id for the type.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
User * getUser() const
Returns the User that contains this Use.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< use_iterator > uses()
LLVM_ABI void dump() const
Support for debugging, callable in GDB: V->dump()
Base class of all SIMD vector types.
static VectorType * getHalfElementsVectorType(VectorType *VTy)
This static method returns a VectorType with half as many elements as the input type and the same ele...
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static VectorType * getTruncatedElementVectorType(VectorType *VTy)
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Type * getIndexedType() const
A range adaptor for a pair of iterators.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isValidCBCond(AArch64CC::CondCode Code)
True, if a given condition code can be used in a fused compare-and-branch instructions,...
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_HI12
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address,...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint64_t decodeAdvSIMDModImmType10(uint8_t Imm)
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static bool isSVELogicalImm(unsigned SizeInBits, uint64_t ImmVal, uint64_t &Encoding)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isSVECpyDupImm(int SizeInBits, int64_t Val, int32_t &Imm, int32_t &Shift)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
ArrayRef< MCPhysReg > getFPRArgRegs()
static constexpr unsigned SVEMaxBitsPerVector
const unsigned RoundingBitsPos
const uint64_t ReservedFPControlBits
static constexpr unsigned SVEBitsPerBlock
int32_t getSMEPseudoMap(uint32_t Opcode)
ArrayRef< MCPhysReg > getGPRArgRegs()
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo, const LibcallLoweringInfo *libcallLowering)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ ARM64EC_Thunk_Native
Calling convention used in the ARM64EC ABI to implement calls between ARM64 code and thunks.
@ AArch64_VectorCall
Used between AArch64 Advanced SIMD functions.
@ Swift
Calling convention for Swift.
@ AArch64_SVE_VectorCall
Used between AArch64 SVE functions.
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
@ PreserveMost
Used for runtime calls that preserves most registers.
@ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2
Preserve X2-X15, X19-X29, SP, Z0-Z31, P0-P15.
@ CXX_FAST_TLS
Used for access functions.
@ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0
Preserve X0-X13, X19-X29, SP, Z0-Z31, P0-P15.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
@ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1
Preserve X1-X15, X19-X29, SP, Z0-Z31, P0-P15.
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ PreserveNone
Used for runtime calls that preserves none general registers.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
@ ARM64EC_Thunk_X64
Calling convention used in the ARM64EC ABI to implement calls between x64 code and thunks.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNormalMaskedLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed masked load.
bool isNormalMaskedStore(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed masked store.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ POISON
POISON - A poison node.
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ LOOP_DEPENDENCE_RAW_MASK
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
@ BSWAP
Byte Swap and Counting operators.
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SET_FPMODE
Sets the current dynamic floating-point control modes.
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ CTTZ_ELTS
Returns the number of number of trailing (least significant) zero elements in a vector.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ VECTOR_FIND_LAST_ACTIVE
Finds the index of the last active mask element Operands: Mask.
@ FMODF
FMODF - Decomposes the operand into integral and fractional parts, each having the same type and sign...
@ FATAN2
FATAN2 - atan2, inspired by libm.
@ FSINCOSPI
FSINCOSPI - Compute both the sine and cosine times pi more accurately than FSINCOS(pi*x),...
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
@ FADD
Simple binary floating point operators.
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
@ RESET_FPMODE
Sets default dynamic floating-point control modes.
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ CLMUL
Carry-less multiplication operations.
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ SET_ROUNDING
Set rounding mode.
@ SIGN_EXTEND
Conversion operators.
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ ADDROFRETURNADDR
ADDROFRETURNADDR - Represents the llvm.addressofreturnaddress intrinsic.
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ SSUBO
Same for subtraction.
@ BRIND
BRIND - Indirect branch.
@ BR_JT
BR_JT - Jumptable branch.
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor to...
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ GET_ACTIVE_LANE_MASK
GET_ACTIVE_LANE_MASK - this corrosponds to the llvm.get.active.lane.mask intrinsic.
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
@ CTLS
Count leading redundant sign bits.
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ GET_FPMODE
Reads the current dynamic floating-point control modes.
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ PtrAuthGlobalAddress
A ptrauth constant.
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ UBSANTRAP
UBSANTRAP - Trap with an immediate describing the kind of sanitizer failure.
@ SMULO
Same for multiplication.
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ VECTOR_SPLICE_LEFT
VECTOR_SPLICE_LEFT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1, VEC2) left by OFFSET elements an...
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ MASKED_UDIV
Masked vector arithmetic that returns poison on disabled lanes.
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
@ VECTOR_SPLICE_RIGHT
VECTOR_SPLICE_RIGHT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1,VEC2) right by OFFSET elements a...
@ STRICT_FADD
Constrained versions of the binary floating point operators.
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
@ SPONENTRY
SPONENTRY - Represents the llvm.sponentry intrinsic.
@ CLEAR_CACHE
llvm.clear_cache intrinsic Operands: Input Chain, Start Addres, End Address Outputs: Output Chain
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
@ EXPERIMENTAL_VECTOR_HISTOGRAM
Experimental vector histogram intrinsic Operands: Input Chain, Inc, Mask, Base, Index,...
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor ...
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
@ LOOP_DEPENDENCE_WAR_MASK
The llvm.loop.dependence.
bool isOverflowIntrOpRes(SDValue Op)
Returns true if the specified value is the overflow result from one of the overflow intrinsic nodes.
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI bool isVectorShrinkable(const SDNode *N, unsigned NewEltSize, bool Signed)
Returns true if the specified node is a vector where all elements can be truncated to the specified e...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
static const int LAST_INDEXED_MODE
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
auto m_Poison()
Match an arbitrary poison constant.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
auto m_Value()
Match an arbitrary value and ignore it.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, UIToFPInst > m_UIToFP(const OpTy &Op)
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
LLVM_ABI Libcall getPOW(EVT RetVT)
getPOW - Return the POW_* value for the given types, or UNKNOWN_LIBCALL if there is none.
auto m_SpecificVT(EVT RefVT, const Pattern &P)
Match a specific ValueType.
TernaryOpc_match< T0_P, T1_P, T2_P > m_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
auto m_VT(EVT &VT)
Retreive the ValueType of the current SDValue.
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
ConstantInt_match m_ConstInt()
Match any integer constants or splat of an integer constant.
const unsigned VectorBits
@ ScalablePredicateVector
initializer< Ty > init(const Ty &Val)
std::optional< Function * > getAttachedARCFunction(const CallBase *CB)
This function returns operand bundle clang_arc_attachedcall's argument, which is the address of the A...
bool attachedCallOpBundleNeedsMarker(const CallBase *CB)
This function determines whether the clang_arc_attachedcall should be emitted with or without the mar...
bool hasAttachedCallOpBundle(const CallBase *CB)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< NodeBase * > Node
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool isPackedVectorType(EVT SomeVT)
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool CC_AArch64_Arm64EC_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
bool CC_AArch64_Win64PCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs=nullptr, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
testing::Matcher< const detail::ErrorHolder & > Failed()
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
auto map_to_vector(ContainerTy &&C, FuncTy &&F)
Map a range to a SmallVector with element types deduced from the mapping.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
std::optional< unsigned > getSVEPredPatternFromNumElements(unsigned MinNumElts)
Return specific VL predicate pattern based on the number of elements.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
bool CC_AArch64_Arm64EC_Thunk(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
unsigned M1(unsigned Val)
bool isReleaseOrStronger(AtomicOrdering AO)
auto dyn_cast_or_null(const Y &Val)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool RetCC_AArch64_Arm64EC_Thunk(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
bool CC_AArch64_Arm64EC_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool CC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
ComplexDeinterleavingOperation
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
generic_gep_type_iterator<> gep_type_iterator
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
constexpr RegState getDefRegState(bool B)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
LLVM_ABI SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
constexpr int PoisonMaskElem
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool CC_AArch64_DarwinPCS_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
ComplexDeinterleavingRotation
bool CC_AArch64_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
FunctionAddr VTableAddr uintptr_t uintptr_t Data
bool CC_AArch64_Win64_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
LLVM_ABI VectorType * getDeinterleavedVectorType(IntrinsicInst *DI)
Given a deinterleaveN intrinsic, return the (narrow) vector type of each factor.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Mul
Product of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool RetCC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool isZeroOrZeroSplat(SDValue N, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
gep_type_iterator gep_type_begin(const User *GEP)
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
UndefPoisonKind
Enumeration to track whether we are interested in Undef, Poison, or both.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
static const MachineMemOperand::Flags MOStridedAccess
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
bool CC_AArch64_DarwinPCS_ILP32_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
LLVM_ABI llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
@ Default
The result value is uniform if and only if all operands are uniform.
@ Custom
The result value requires a custom uniformity check.
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
bool CC_AArch64_Arm64EC_Thunk_Native(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool CC_AArch64_Win64_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool RetCC_AArch64_Arm64EC_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
bool CC_AArch64_Preserve_None(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const unsigned PerfectShuffleTable[6561+1]
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> (WhichResultOut = 0,...
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Helper structure to keep track of a SET_CC lowered into AArch64 code.
Helper structure to keep track of ISD::SET_CC operands.
Helper structure to be able to read SetCC information.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Represent subnormal handling kind for floating point instruction inputs and outputs.
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
uint64_t getScalarStoreSize() const
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
ElementCount getVectorElementCount() const
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
EVT widenIntegerVectorElementType(LLVMContext &Context) const
Return a VT for an integer vector type with the size of the elements doubled.
bool isScalableVT() const
Return true if the type is a scalable type.
bool isFixedLengthVector() const
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
EVT changeElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a type whose attributes match ourselves with the exception of the element type that i...
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool is64BitVector() const
Return true if this is a 64-bit vector type.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
bool isZero() const
Returns true if value is all zero.
static LLVM_ABI KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for ashr(LHS, RHS).
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
unsigned getBitWidth() const
Get the bit width of this value.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false, bool SelfAdd=false)
Compute knownbits resulting from addition of LHS and RHS.
static LLVM_ABI KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for lshr(LHS, RHS).
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
APInt getSignedMinValue() const
Return the minimal signed value possible given these KnownBits.
static LLVM_ABI KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Constraint for a predicate of the form "cmp Pred Op, OtherOp", where Op is the value the constraint a...
A simple container for information about the supported runtime calls.
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::OutputArg, 32 > Outs
bool isBeforeLegalizeOps() const
bool isAfterLegalizeDAG() const
bool isCalledByLegalizer() const
bool isBeforeLegalize() const
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)
Helper structure to keep track of SetCC information.