74#include "llvm/IR/IntrinsicsAArch64.h"
109#define DEBUG_TYPE "aarch64-lower"
112STATISTIC(NumOptimizedImms,
"Number of times immediates were optimized");
119 cl::desc(
"Allow AArch64 Local Dynamic TLS code generation"),
124 cl::desc(
"Enable AArch64 logical imm instruction "
134 cl::desc(
"Combine extends of AArch64 masked "
135 "gather intrinsics"),
139 cl::desc(
"Combine ext and trunc to TBL"),
154 cl::desc(
"Enable / disable SVE scalable vectors in Global ISel"),
161 cl::desc(
"Generate ISD::PTRADD nodes for pointer arithmetic in "
162 "SelectionDAG for FEAT_CPA"),
172 AArch64::X3, AArch64::X4, AArch64::X5,
173 AArch64::X6, AArch64::X7};
175 AArch64::Q3, AArch64::Q4, AArch64::Q5,
176 AArch64::Q6, AArch64::Q7};
201 return MVT::nxv8bf16;
208 switch (EC.getKnownMinValue()) {
224 "Expected scalable predicate vector type!");
246 "Expected legal vector type!");
257 case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
258 case AArch64ISD::BSWAP_MERGE_PASSTHRU:
259 case AArch64ISD::REVH_MERGE_PASSTHRU:
260 case AArch64ISD::REVW_MERGE_PASSTHRU:
261 case AArch64ISD::REVD_MERGE_PASSTHRU:
262 case AArch64ISD::CTLZ_MERGE_PASSTHRU:
263 case AArch64ISD::CTPOP_MERGE_PASSTHRU:
264 case AArch64ISD::DUP_MERGE_PASSTHRU:
265 case AArch64ISD::ABS_MERGE_PASSTHRU:
266 case AArch64ISD::NEG_MERGE_PASSTHRU:
267 case AArch64ISD::FNEG_MERGE_PASSTHRU:
268 case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
269 case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
270 case AArch64ISD::FCEIL_MERGE_PASSTHRU:
271 case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
272 case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
273 case AArch64ISD::FRINT_MERGE_PASSTHRU:
274 case AArch64ISD::FROUND_MERGE_PASSTHRU:
275 case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
276 case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
277 case AArch64ISD::FP_ROUND_MERGE_PASSTHRU:
278 case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
279 case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
280 case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
281 case AArch64ISD::FCVTX_MERGE_PASSTHRU:
282 case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
283 case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
284 case AArch64ISD::FSQRT_MERGE_PASSTHRU:
285 case AArch64ISD::FRECPX_MERGE_PASSTHRU:
286 case AArch64ISD::FABS_MERGE_PASSTHRU:
293 switch (
Op.getOpcode()) {
298 case ISD::GET_ACTIVE_LANE_MASK:
299 case AArch64ISD::PTRUE:
300 case AArch64ISD::SETCC_MERGE_ZERO:
303 switch (
Op.getConstantOperandVal(0)) {
306 case Intrinsic::aarch64_sve_ptrue:
307 case Intrinsic::aarch64_sve_pnext:
308 case Intrinsic::aarch64_sve_cmpeq:
309 case Intrinsic::aarch64_sve_cmpne:
310 case Intrinsic::aarch64_sve_cmpge:
311 case Intrinsic::aarch64_sve_cmpgt:
312 case Intrinsic::aarch64_sve_cmphs:
313 case Intrinsic::aarch64_sve_cmphi:
314 case Intrinsic::aarch64_sve_cmpeq_wide:
315 case Intrinsic::aarch64_sve_cmpne_wide:
316 case Intrinsic::aarch64_sve_cmpge_wide:
317 case Intrinsic::aarch64_sve_cmpgt_wide:
318 case Intrinsic::aarch64_sve_cmplt_wide:
319 case Intrinsic::aarch64_sve_cmple_wide:
320 case Intrinsic::aarch64_sve_cmphs_wide:
321 case Intrinsic::aarch64_sve_cmphi_wide:
322 case Intrinsic::aarch64_sve_cmplo_wide:
323 case Intrinsic::aarch64_sve_cmpls_wide:
324 case Intrinsic::aarch64_sve_fcmpeq:
325 case Intrinsic::aarch64_sve_fcmpne:
326 case Intrinsic::aarch64_sve_fcmpge:
327 case Intrinsic::aarch64_sve_fcmpgt:
328 case Intrinsic::aarch64_sve_fcmpuo:
329 case Intrinsic::aarch64_sve_facgt:
330 case Intrinsic::aarch64_sve_facge:
331 case Intrinsic::aarch64_sve_whilege:
332 case Intrinsic::aarch64_sve_whilegt:
333 case Intrinsic::aarch64_sve_whilehi:
334 case Intrinsic::aarch64_sve_whilehs:
335 case Intrinsic::aarch64_sve_whilele:
336 case Intrinsic::aarch64_sve_whilelo:
337 case Intrinsic::aarch64_sve_whilels:
338 case Intrinsic::aarch64_sve_whilelt:
339 case Intrinsic::aarch64_sve_match:
340 case Intrinsic::aarch64_sve_nmatch:
341 case Intrinsic::aarch64_sve_whilege_x2:
342 case Intrinsic::aarch64_sve_whilegt_x2:
343 case Intrinsic::aarch64_sve_whilehi_x2:
344 case Intrinsic::aarch64_sve_whilehs_x2:
345 case Intrinsic::aarch64_sve_whilele_x2:
346 case Intrinsic::aarch64_sve_whilelo_x2:
347 case Intrinsic::aarch64_sve_whilels_x2:
348 case Intrinsic::aarch64_sve_whilelt_x2:
354static std::tuple<SDValue, SDValue>
375 if (!ConstDiscN || !
isUInt<16>(ConstDiscN->getZExtValue()))
381 AddrDisc = DAG->
getRegister(AArch64::NoRegister, MVT::i64);
383 return std::make_tuple(
402 if (Subtarget->hasLS64()) {
408 if (Subtarget->hasFPARMv8()) {
417 if (Subtarget->hasNEON()) {
421 addDRType(MVT::v2f32);
422 addDRType(MVT::v8i8);
423 addDRType(MVT::v4i16);
424 addDRType(MVT::v2i32);
425 addDRType(MVT::v1i64);
426 addDRType(MVT::v1f64);
427 addDRType(MVT::v4f16);
428 addDRType(MVT::v4bf16);
430 addQRType(MVT::v4f32);
431 addQRType(MVT::v2f64);
432 addQRType(MVT::v16i8);
433 addQRType(MVT::v8i16);
434 addQRType(MVT::v4i32);
435 addQRType(MVT::v2i64);
436 addQRType(MVT::v8f16);
437 addQRType(MVT::v8bf16);
440 if (Subtarget->isSVEorStreamingSVEAvailable()) {
468 if (Subtarget->useSVEForFixedLengthVectors()) {
508 if (Subtarget->hasFPARMv8()) {
600 if (Subtarget->hasFPARMv8()) {
606 if (Subtarget->hasFPARMv8()) {
660 if (Subtarget->hasCSSC()) {
739 if (Subtarget->hasFullFP16()) {
748 ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
749 ISD::FSINCOSPI, ISD::FMODF, ISD::FACOS,
750 ISD::FASIN, ISD::FATAN, ISD::FATAN2,
751 ISD::FCOSH, ISD::FSINH, ISD::FTANH,
752 ISD::FTAN, ISD::FEXP, ISD::FEXP2,
753 ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
771 if (Subtarget->hasFullFP16()) {
784 auto LegalizeNarrowFP = [
this](
MVT ScalarVT) {
829 for (
auto Op : {ISD::FNEG, ISD::FABS})
834 for (
auto Op : {ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
890 if (!Subtarget->hasFullFP16()) {
891 LegalizeNarrowFP(MVT::f16);
893 LegalizeNarrowFP(MVT::bf16);
900 {ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL,
901 ISD::FRINT, ISD::FTRUNC, ISD::FROUND,
902 ISD::FROUNDEVEN, ISD::FMINNUM, ISD::FMAXNUM,
903 ISD::FMINIMUM, ISD::FMAXIMUM, ISD::LROUND,
904 ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
905 ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE,
911 for (
MVT Ty : {MVT::f32, MVT::f64})
913 if (Subtarget->hasFullFP16())
921 for (
MVT Ty : {MVT::f32, MVT::f64})
923 if (Subtarget->hasFullFP16())
936 if (!Subtarget->hasLSE() && !Subtarget->outlineAtomics()) {
948 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
976 if (Subtarget->outlineAtomics() && !Subtarget->hasLSFE()) {
1003 if (Subtarget->hasLSE128()) {
1017 if (Subtarget->hasLSE2()) {
1074 if (WideVT.getScalarSizeInBits() > NarrowVT.getScalarSizeInBits()) {
1080 if (Subtarget->hasFPARMv8()) {
1153 {ISD::MGATHER, ISD::MSCATTER, ISD::EXPERIMENTAL_VECTOR_HISTOGRAM});
1204 if (!Subtarget->isTargetWindows())
1220 if (Subtarget->hasSME())
1223 if (Subtarget->isNeonAvailable()) {
1231 ISD::FNEG, ISD::FABS, ISD::FCEIL,
1232 ISD::FSQRT, ISD::FFLOOR, ISD::FNEARBYINT,
1233 ISD::FSIN, ISD::FCOS, ISD::FTAN,
1234 ISD::FASIN, ISD::FACOS, ISD::FATAN,
1235 ISD::FSINH, ISD::FCOSH, ISD::FTANH,
1236 ISD::FPOW, ISD::FLOG, ISD::FLOG2,
1237 ISD::FLOG10, ISD::FEXP, ISD::FEXP2,
1238 ISD::FEXP10, ISD::FRINT, ISD::FROUND,
1239 ISD::FROUNDEVEN, ISD::FTRUNC, ISD::FMINNUM,
1240 ISD::FMAXNUM, ISD::FMINIMUM, ISD::FMAXIMUM,
1241 ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE,
1268 for (
auto VT : {MVT::v2i32, MVT::v2i64, MVT::v4i32})
1271 if (Subtarget->hasFullFP16()) {
1304 for (
auto VT : {MVT::v1i64, MVT::v2i64}) {
1320 for (
MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64,
1321 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1328 for (
MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1339 for (
MVT VT : { MVT::v4f16, MVT::v2f32,
1340 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1341 if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1350 if (Subtarget->hasFullFP16())
1353 for (
MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1354 MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1376 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
1398 {ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC,
1399 ISD::FROUND, ISD::FROUNDEVEN, ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE,
1403 for (
MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
1405 if (Subtarget->hasFullFP16())
1406 for (
MVT Ty : {MVT::v4f16, MVT::v8f16})
1411 for (
auto Op : {ISD::LRINT, ISD::LLRINT}) {
1412 for (
MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
1414 if (Subtarget->hasFullFP16())
1415 for (
MVT Ty : {MVT::v4f16, MVT::v8f16})
1438 for (
MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1441 for (
MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
1444 if (Subtarget->hasDotProd()) {
1445 static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
1446 ISD::PARTIAL_REDUCE_UMLA};
1453 if (Subtarget->hasMatMulInt8()) {
1469 if (VT.is128BitVector() || VT.is64BitVector()) {
1484 for (
MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
1490 if (Subtarget->hasSME()) {
1496 if (Subtarget->isSVEorStreamingSVEAvailable()) {
1498 {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
1504 for (
auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
1509 if (Subtarget->hasSVE2p1() ||
1510 (Subtarget->hasSME2() && Subtarget->isStreaming()))
1513 for (
auto VT : {MVT::v16i8, MVT::v8i8, MVT::v4i16, MVT::v2i32})
1517 if (Subtarget->isSVEorStreamingSVEAvailable()) {
1518 for (
auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
1582 if (!Subtarget->isLittleEndian())
1585 if (Subtarget->hasSVE2() ||
1586 (Subtarget->hasSME() && Subtarget->isStreaming()))
1592 for (
auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
1598 for (
auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32})
1602 { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
1603 MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
1615 {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
1629 if (VT != MVT::nxv16i1) {
1639 {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
1640 MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1641 MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1680 for (
auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1681 MVT::nxv4f32, MVT::nxv2f64}) {
1758 for (
auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1775 if (Subtarget->hasSVEB16B16() &&
1776 Subtarget->isNonStreamingSVEorSME2Available()) {
1789 {ISD::FCEIL,
ISD::FDIV, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FRINT,
1790 ISD::FROUND, ISD::FROUNDEVEN, ISD::FSQRT, ISD::FTRUNC,
ISD::SETCC,
1791 ISD::VECREDUCE_FADD, ISD::VECREDUCE_FMAX, ISD::VECREDUCE_FMAXIMUM,
1792 ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMINIMUM}) {
1798 if (!Subtarget->hasSVEB16B16() ||
1799 !Subtarget->isNonStreamingSVEorSME2Available()) {
1812 for (
auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1813 MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1824 if (Subtarget->useSVEForFixedLengthVectors()) {
1827 VT, !Subtarget->isNeonAvailable()))
1828 addTypeForFixedLengthSVE(VT);
1832 VT, !Subtarget->isNeonAvailable()))
1833 addTypeForFixedLengthSVE(VT);
1837 for (
auto VT : {MVT::v8i8, MVT::v4i16})
1842 for (
auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
1844 for (
auto VT : {MVT::v8f16, MVT::v4f32})
1870 for (
auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1871 MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
1882 for (
auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
1893 for (
auto VT : {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1})
1898 if (Subtarget->isSVEorStreamingSVEAvailable()) {
1901 static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
1902 ISD::PARTIAL_REDUCE_UMLA};
1908 if (Subtarget->hasMatMulInt8()) {
1910 MVT::nxv16i8,
Legal);
1916 if (Subtarget->hasSVE2() || Subtarget->hasSME()) {
1923 if (Subtarget->hasSVE2p1() || Subtarget->hasSME2()) {
1925 MVT::nxv8f16,
Legal);
1933 if (Subtarget->hasSVE2() ||
1934 (Subtarget->hasSME() && Subtarget->isStreaming())) {
1936 for (
auto VT : {MVT::v2i32, MVT::v4i16, MVT::v8i8, MVT::v16i8}) {
1940 for (
auto VT : {MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, MVT::nxv16i1}) {
1947 if (Subtarget->isSVEAvailable()) {
1948 for (
auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64,
1949 MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1950 MVT::nxv4f32, MVT::nxv2f64, MVT::nxv2bf16, MVT::nxv4bf16,
1951 MVT::nxv8bf16, MVT::v4f16, MVT::v8f16, MVT::v2f32,
1952 MVT::v4f32, MVT::v1f64, MVT::v2f64, MVT::v8i8,
1953 MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1954 MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1959 for (
auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1960 MVT::nxv4f32, MVT::nxv2f64, MVT::v4f16, MVT::v8f16,
1961 MVT::v2f32, MVT::v4f32, MVT::v2f64})
1966 {MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv2f32,
1967 MVT::nxv2f64, MVT::nxv4i8, MVT::nxv4i16, MVT::nxv4i32, MVT::nxv4f32})
1972 for (
auto VT : {MVT::v2i8, MVT::v2i16, MVT::v2i32, MVT::v2i64, MVT::v2f32,
1973 MVT::v2f64, MVT::v4i8, MVT::v4i16, MVT::v4i32, MVT::v4f32})
1977 if (Subtarget->hasSVE2()) {
1983 static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
1984 ISD::PARTIAL_REDUCE_UMLA};
1995 if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
2002 if (Subtarget->hasSVE()) {
2016 if (Subtarget->isTargetWindows()) {
2033void AArch64TargetLowering::addTypeForNEON(
MVT VT) {
2043 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
2064 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
2065 ((VT == MVT::v4bf16 || VT == MVT::v8bf16 || VT == MVT::v4f16 ||
2066 VT == MVT::v8f16) &&
2067 Subtarget->hasFullFP16()))
2090 if (VT != MVT::v8i8 && VT != MVT::v16i8)
2099 for (
unsigned Opcode :
2117 for (
unsigned Opcode :
2118 {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM,
2148 if (Subtarget->isLittleEndian()) {
2159 if (Subtarget->hasD128()) {
2177 if (!Subtarget->isSVEorStreamingSVEAvailable() ||
2184 (OpVT != MVT::i32 && OpVT != MVT::i64))))
2196 if (!Subtarget->isSVEorStreamingSVEAvailable())
2201 return VT != MVT::nxv16i1 && VT != MVT::nxv8i1 && VT != MVT::nxv4i1 &&
2202 VT != MVT::nxv2i1 && VT != MVT::v16i1 && VT != MVT::v8i1 &&
2203 VT != MVT::v4i1 && VT != MVT::v2i1;
2207 unsigned SearchSize)
const {
2209 if (!Subtarget->hasSVE2() || !Subtarget->isSVEAvailable())
2212 if (VT == MVT::nxv8i16 || VT == MVT::v8i16)
2213 return SearchSize != 8;
2214 if (VT == MVT::nxv16i8 || VT == MVT::v16i8 || VT == MVT::v8i8)
2215 return SearchSize != 8 && SearchSize != 16;
2219void AArch64TargetLowering::addTypeForFixedLengthSVE(
MVT VT) {
2244 while (InnerVT != VT) {
2258 while (InnerVT != VT) {
2267 bool PreferSVE = !PreferNEON && Subtarget->isSVEAvailable();
2269 static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
2270 ISD::PARTIAL_REDUCE_UMLA};
2288 if (Subtarget->hasMatMulInt8()) {
2397void AArch64TargetLowering::addDRType(
MVT VT) {
2399 if (Subtarget->isNeonAvailable())
2403void AArch64TargetLowering::addQRType(
MVT VT) {
2405 if (Subtarget->isNeonAvailable())
2422 Imm =
C->getZExtValue();
2430 case AArch64ISD::SQDMULH:
2442 return N->getOpcode() ==
Opc &&
2447 const APInt &Demanded,
2450 uint64_t OldImm = Imm, NewImm, Enc;
2455 if (Imm == 0 || Imm == Mask ||
2459 unsigned EltSize =
Size;
2476 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
2478 uint64_t Sum = RotatedImm + NonDemandedBits;
2479 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
2480 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
2481 NewImm = (Imm | Ones) & Mask;
2509 while (EltSize <
Size) {
2510 NewImm |= NewImm << EltSize;
2516 "demanded bits should never be altered");
2517 assert(OldImm != NewImm &&
"the new imm shouldn't be equal to the old imm");
2520 EVT VT =
Op.getValueType();
2526 if (NewImm == 0 || NewImm == OrigMask) {
2551 EVT VT =
Op.getValueType();
2565 switch (
Op.getOpcode()) {
2569 NewOpc =
Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
2572 NewOpc =
Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
2575 NewOpc =
Size == 32 ? AArch64::EORWri : AArch64::EORXri;
2590 switch (
Op.getOpcode()) {
2593 case AArch64ISD::DUP: {
2596 if (
SrcOp.getValueSizeInBits() !=
Op.getScalarValueSizeInBits()) {
2597 assert(
SrcOp.getValueSizeInBits() >
Op.getScalarValueSizeInBits() &&
2598 "Expected DUP implicit truncation");
2599 Known = Known.
trunc(
Op.getScalarValueSizeInBits());
2603 case AArch64ISD::CSEL: {
2610 case AArch64ISD::CSNEG:
2611 case AArch64ISD::CSINC:
2612 case AArch64ISD::CSINV: {
2620 if (
Op.getOpcode() == AArch64ISD::CSINC)
2624 else if (
Op.getOpcode() == AArch64ISD::CSINV)
2626 else if (
Op.getOpcode() == AArch64ISD::CSNEG)
2629 Op.getScalarValueSizeInBits())));
2634 case AArch64ISD::BICi: {
2637 ~(
Op->getConstantOperandAPInt(1) <<
Op->getConstantOperandAPInt(2))
2643 case AArch64ISD::VLSHR: {
2650 case AArch64ISD::VASHR: {
2657 case AArch64ISD::VSHL: {
2664 case AArch64ISD::MOVI: {
2669 case AArch64ISD::MOVIshift: {
2672 <<
Op->getConstantOperandVal(1)));
2675 case AArch64ISD::MOVImsl: {
2678 Known.
getBitWidth(), ~(~
Op->getConstantOperandVal(0) << ShiftAmt)));
2681 case AArch64ISD::MOVIedit: {
2687 case AArch64ISD::MVNIshift: {
2690 ~(
Op->getConstantOperandVal(0) <<
Op->getConstantOperandVal(1)),
2694 case AArch64ISD::MVNImsl: {
2701 case AArch64ISD::LOADgot:
2702 case AArch64ISD::ADDlow: {
2703 if (!Subtarget->isTargetILP32())
2709 case AArch64ISD::ASSERT_ZEXT_BOOL: {
2719 case Intrinsic::aarch64_ldaxr:
2720 case Intrinsic::aarch64_ldxr: {
2732 unsigned IntNo =
Op.getConstantOperandVal(0);
2736 case Intrinsic::aarch64_neon_uaddlv: {
2737 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
2739 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
2740 unsigned Bound = (VT == MVT::v8i8) ? 11 : 12;
2747 case Intrinsic::aarch64_neon_umaxv:
2748 case Intrinsic::aarch64_neon_uminv: {
2753 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
2755 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
2759 }
else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
2773 unsigned Depth)
const {
2774 EVT VT =
Op.getValueType();
2776 unsigned Opcode =
Op.getOpcode();
2778 case AArch64ISD::FCMEQ:
2779 case AArch64ISD::FCMGE:
2780 case AArch64ISD::FCMGT:
2783 case AArch64ISD::VASHR: {
2786 return std::min<uint64_t>(Tmp +
Op.getConstantOperandVal(1), VTBits);
2800 unsigned *
Fast)
const {
2810 if (ElementSizeBits % 8 == 0 && Alignment >=
Align(ElementSizeBits / 8))
2814 if (Subtarget->requiresStrictAlign())
2819 *
Fast = !Subtarget->isMisaligned128StoreSlow() || VT.
getStoreSize() != 16 ||
2838 unsigned *
Fast)
const {
2839 if (Subtarget->requiresStrictAlign())
2844 *
Fast = !Subtarget->isMisaligned128StoreSlow() ||
2845 Ty.getSizeInBytes() != 16 ||
2888 Register DestReg =
MI.getOperand(0).getReg();
2889 Register IfTrueReg =
MI.getOperand(1).getReg();
2890 Register IfFalseReg =
MI.getOperand(2).getReg();
2891 unsigned CondCode =
MI.getOperand(3).getImm();
2892 bool NZCVKilled =
MI.getOperand(4).isKill();
2906 MBB->addSuccessor(TrueBB);
2907 MBB->addSuccessor(EndBB);
2923 MI.eraseFromParent();
2931 "SEH does not use catchret!");
2942 Register TargetReg =
MI.getOperand(0).getReg();
2944 TII.probedStackAlloc(
MBBI, TargetReg,
false);
2946 MI.eraseFromParent();
2947 return NextInst->getParent();
2959 Register RegVL_GPR =
MRI.createVirtualRegister(RC_GPR);
2960 Register RegVL_GPRsp =
MRI.createVirtualRegister(RC_GPRsp);
2961 Register RegSVL_GPR =
MRI.createVirtualRegister(RC_GPR);
2962 Register RegSVL_GPRsp =
MRI.createVirtualRegister(RC_GPRsp);
3000 MBB->addSuccessor(TrapBB);
3001 MBB->addSuccessor(PassBB);
3003 MI.eraseFromParent();
3015 MIB.
add(
MI.getOperand(1));
3016 MIB.
add(
MI.getOperand(2));
3017 MIB.
add(
MI.getOperand(3));
3018 MIB.
add(
MI.getOperand(4));
3019 MIB.
add(
MI.getOperand(5));
3021 MI.eraseFromParent();
3032 MIB.
add(
MI.getOperand(0));
3033 MIB.
add(
MI.getOperand(1));
3034 MIB.
add(
MI.getOperand(2));
3035 MIB.
add(
MI.getOperand(1));
3037 MI.eraseFromParent();
3044 bool Op0IsDef)
const {
3050 for (
unsigned I = 1;
I <
MI.getNumOperands(); ++
I)
3051 MIB.
add(
MI.getOperand(
I));
3053 MI.eraseFromParent();
3063 unsigned StartIdx = 0;
3065 bool HasTile = BaseReg != AArch64::ZA;
3066 bool HasZPROut = HasTile &&
MI.getOperand(0).isReg();
3068 MIB.
add(
MI.getOperand(StartIdx));
3072 MIB.
addReg(BaseReg +
MI.getOperand(StartIdx).getImm(),
3074 MIB.
addReg(BaseReg +
MI.getOperand(StartIdx).getImm());
3078 if (
MI.getOperand(0).isReg() && !
MI.getOperand(1).isImm()) {
3079 MIB.
add(
MI.getOperand(StartIdx));
3084 for (
unsigned I = StartIdx;
I <
MI.getNumOperands(); ++
I)
3085 MIB.
add(
MI.getOperand(
I));
3087 MI.eraseFromParent();
3096 MIB.
add(
MI.getOperand(0));
3098 unsigned Mask =
MI.getOperand(0).getImm();
3099 for (
unsigned I = 0;
I < 8;
I++) {
3100 if (Mask & (1 <<
I))
3104 MI.eraseFromParent();
3115 if (TPIDR2.Uses > 0) {
3118 if (!Subtarget->isLittleEndian())
3120 "TPIDR2 block initialization is not supported on big-endian targets");
3148 "Lazy ZA save is not yet supported on Windows");
3152 if (TPIDR2.
Uses > 0) {
3158 Register SP =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3159 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(TargetOpcode::COPY), SP)
3163 auto Size =
MI.getOperand(1).getReg();
3164 auto Dest =
MI.getOperand(0).getReg();
3165 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(AArch64::MSUBXrrr), Dest)
3189 "Lazy ZA save is not yet supported on Windows");
3194 auto Size =
MI.getOperand(1).getReg();
3195 auto Dest =
MI.getOperand(0).getReg();
3196 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(AArch64::SUBXrx64), AArch64::SP)
3200 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(TargetOpcode::COPY), Dest)
3206 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(TargetOpcode::IMPLICIT_DEF),
3207 MI.getOperand(0).getReg());
3221 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE_SIZE;
3228 MI.getOperand(0).getReg())
3232 MI.getOperand(0).getReg())
3244 Register ResultReg =
MI.getOperand(0).getReg();
3247 }
else if (Subtarget->hasSME()) {
3249 .
addImm(AArch64SysReg::SVCR)
3252 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE;
3261 MI.eraseFromParent();
3269 while (
Reg.isVirtual()) {
3271 assert(
DefMI &&
"Virtual register definition not found");
3272 unsigned Opcode =
DefMI->getOpcode();
3274 if (Opcode == AArch64::COPY) {
3275 Reg =
DefMI->getOperand(1).getReg();
3277 if (
Reg.isPhysical())
3281 if (Opcode == AArch64::SUBREG_TO_REG) {
3282 Reg =
DefMI->getOperand(2).getReg();
3299 int64_t IntDisc = IntDiscOp.
getImm();
3300 assert(IntDisc == 0 &&
"Blend components are already expanded");
3305 case AArch64::MOVKXi:
3314 case AArch64::MOVi32imm:
3315 case AArch64::MOVi64imm:
3319 AddrDisc = AArch64::NoRegister;
3328 if (AddrDisc == AArch64::XZR)
3329 AddrDisc = AArch64::NoRegister;
3332 if (AddrDisc &&
MRI.getRegClass(AddrDisc) != AddrDiscRC) {
3333 Register TmpReg =
MRI.createVirtualRegister(AddrDiscRC);
3338 AddrDiscOp.
setReg(AddrDisc);
3339 IntDiscOp.
setImm(IntDisc);
3346 if (SMEOrigInstr != -1) {
3350 switch (SMEMatrixType) {
3366 switch (
MI.getOpcode()) {
3372 case AArch64::InitTPIDR2Obj:
3374 case AArch64::AllocateZABuffer:
3376 case AArch64::AllocateSMESaveBuffer:
3378 case AArch64::GetSMESaveSize:
3380 case AArch64::EntryPStateSM:
3382 case AArch64::F128CSEL:
3384 case TargetOpcode::STATEPOINT:
3390 MI.addOperand(*
MI.getMF(),
3396 case TargetOpcode::STACKMAP:
3397 case TargetOpcode::PATCHPOINT:
3400 case TargetOpcode::PATCHABLE_EVENT_CALL:
3401 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
3404 case AArch64::CATCHRET:
3407 case AArch64::PROBED_STACKALLOC_DYN:
3410 case AArch64::CHECK_MATCHING_VL_PSEUDO:
3413 case AArch64::LD1_MXIPXX_H_PSEUDO_B:
3414 return EmitTileLoad(AArch64::LD1_MXIPXX_H_B, AArch64::ZAB0,
MI, BB);
3415 case AArch64::LD1_MXIPXX_H_PSEUDO_H:
3416 return EmitTileLoad(AArch64::LD1_MXIPXX_H_H, AArch64::ZAH0,
MI, BB);
3417 case AArch64::LD1_MXIPXX_H_PSEUDO_S:
3418 return EmitTileLoad(AArch64::LD1_MXIPXX_H_S, AArch64::ZAS0,
MI, BB);
3419 case AArch64::LD1_MXIPXX_H_PSEUDO_D:
3420 return EmitTileLoad(AArch64::LD1_MXIPXX_H_D, AArch64::ZAD0,
MI, BB);
3421 case AArch64::LD1_MXIPXX_H_PSEUDO_Q:
3422 return EmitTileLoad(AArch64::LD1_MXIPXX_H_Q, AArch64::ZAQ0,
MI, BB);
3423 case AArch64::LD1_MXIPXX_V_PSEUDO_B:
3424 return EmitTileLoad(AArch64::LD1_MXIPXX_V_B, AArch64::ZAB0,
MI, BB);
3425 case AArch64::LD1_MXIPXX_V_PSEUDO_H:
3426 return EmitTileLoad(AArch64::LD1_MXIPXX_V_H, AArch64::ZAH0,
MI, BB);
3427 case AArch64::LD1_MXIPXX_V_PSEUDO_S:
3428 return EmitTileLoad(AArch64::LD1_MXIPXX_V_S, AArch64::ZAS0,
MI, BB);
3429 case AArch64::LD1_MXIPXX_V_PSEUDO_D:
3430 return EmitTileLoad(AArch64::LD1_MXIPXX_V_D, AArch64::ZAD0,
MI, BB);
3431 case AArch64::LD1_MXIPXX_V_PSEUDO_Q:
3432 return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0,
MI, BB);
3433 case AArch64::LDR_ZA_PSEUDO:
3435 case AArch64::LDR_TX_PSEUDO:
3437 case AArch64::STR_TX_PSEUDO:
3439 case AArch64::ZERO_M_PSEUDO:
3441 case AArch64::ZERO_T_PSEUDO:
3443 case AArch64::MOVT_TIZ_PSEUDO:
3448 &AArch64::GPR64noipRegClass);
3474 while (
N->getOpcode() == ISD::BITCAST)
3475 N =
N->getOperand(0).getNode();
3480 if (
N->getOpcode() != AArch64ISD::DUP)
3483 auto Opnd0 =
N->getOperand(0);
3637 CondCode, CondCode2);
3650 bool IsLegal = (
C >> 12 == 0) || ((
C & 0xFFFULL) == 0 &&
C >> 24 == 0);
3652 <<
" legal: " << (IsLegal ?
"yes\n" :
"no\n"));
3671 if (
Op->getFlags().hasNoSignedWrap())
3697 (isIntEqualitySetCC(CC) ||
3705 EVT VT =
LHS.getValueType();
3710 if ((VT == MVT::f16 && !FullFP16) || VT == MVT::bf16) {
3715 Chain =
RHS.getValue(1);
3718 IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
3724 EVT VT =
LHS.getValueType();
3729 if ((VT == MVT::f16 && !FullFP16) || VT == MVT::bf16) {
3740 unsigned Opcode = AArch64ISD::SUBS;
3744 Opcode = AArch64ISD::ADDS;
3747 isIntEqualitySetCC(CC)) {
3750 Opcode = AArch64ISD::ADDS;
3759 LHS.getOperand(0),
LHS.getOperand(1));
3763 }
else if (
LHS.getOpcode() == AArch64ISD::ANDS) {
3765 return LHS.getValue(1);
3831 unsigned Opcode = 0;
3834 if (
LHS.getValueType().isFloatingPoint()) {
3835 assert(
LHS.getValueType() != MVT::f128);
3836 if ((
LHS.getValueType() == MVT::f16 && !FullFP16) ||
3837 LHS.getValueType() == MVT::bf16) {
3841 Opcode = AArch64ISD::FCCMP;
3843 APInt Imm = Const->getAPIntValue();
3844 if (Imm.isNegative() && Imm.sgt(-32)) {
3845 Opcode = AArch64ISD::CCMN;
3849 Opcode = AArch64ISD::CCMN;
3852 isIntEqualitySetCC(CC)) {
3855 Opcode = AArch64ISD::CCMN;
3859 Opcode = AArch64ISD::CCMP;
3883 bool &MustBeFirst,
bool WillNegate,
3884 unsigned Depth = 0) {
3892 MustBeFirst =
false;
3899 bool IsOR = Opcode ==
ISD::OR;
3911 if (MustBeFirstL && MustBeFirstR)
3917 if (!CanNegateL && !CanNegateR)
3921 CanNegate = WillNegate && CanNegateL && CanNegateR;
3924 MustBeFirst = !CanNegate;
3929 MustBeFirst = MustBeFirstL || MustBeFirstR;
3953 bool isInteger =
LHS.getValueType().isInteger();
3955 CC = getSetCCInverse(CC,
LHS.getValueType());
3961 assert(
LHS.getValueType().isFloatingPoint());
3987 bool IsOR = Opcode ==
ISD::OR;
3993 assert(ValidL &&
"Valid conjunction/disjunction tree");
4000 assert(ValidR &&
"Valid conjunction/disjunction tree");
4005 assert(!MustBeFirstR &&
"Valid conjunction/disjunction tree");
4014 bool NegateAfterAll;
4018 assert(CanNegateR &&
"at least one side must be negatable");
4019 assert(!MustBeFirstR &&
"invalid conjunction/disjunction tree");
4023 NegateAfterR =
true;
4026 NegateR = CanNegateR;
4027 NegateAfterR = !CanNegateR;
4030 NegateAfterAll = !Negate;
4032 assert(Opcode ==
ISD::AND &&
"Valid conjunction/disjunction tree");
4033 assert(!Negate &&
"Valid conjunction/disjunction tree");
4037 NegateAfterR =
false;
4038 NegateAfterAll =
false;
4058 bool DummyCanNegate;
4059 bool DummyMustBeFirst;
4071 auto isSupportedExtend = [&](
SDValue V) {
4077 uint64_t Mask = MaskCst->getZExtValue();
4078 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
4084 if (!
Op.hasOneUse())
4087 if (isSupportedExtend(
Op))
4090 unsigned Opc =
Op.getOpcode();
4093 uint64_t Shift = ShiftCst->getZExtValue();
4094 if (isSupportedExtend(
Op.getOperand(0)))
4095 return (Shift <= 4) ? 2 : 1;
4096 EVT VT =
Op.getValueType();
4097 if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
4109 if (
LHS.getOpcode() !=
ISD::AND &&
LHS.getOpcode() != AArch64ISD::ANDS)
4129 EVT VT =
RHS.getValueType();
4130 APInt C = RHSC->getAPIntValue();
4145 if (!
C.isMinSignedValue()) {
4157 assert(!
C.isZero() &&
"C should not be zero here");
4168 if (!
C.isMaxSignedValue()) {
4179 if (!
C.isAllOnes()) {
4204 bool LHSIsCMN =
isCMN(
LHS, CC, DAG);
4205 bool RHSIsCMN =
isCMN(
RHS, CC, DAG);
4240 LHS.getNode()->hasNUsesOfValue(1, 0)) {
4241 int16_t ValueofRHS =
RHS->getAsZExtVal();
4269static std::pair<SDValue, SDValue>
4271 assert((
Op.getValueType() == MVT::i32 ||
Op.getValueType() == MVT::i64) &&
4272 "Unsupported value type");
4278 switch (
Op.getOpcode()) {
4282 Opc = AArch64ISD::ADDS;
4286 Opc = AArch64ISD::ADDS;
4290 Opc = AArch64ISD::SUBS;
4294 Opc = AArch64ISD::SUBS;
4302 if (
Op.getValueType() == MVT::i32) {
4325 assert(
Op.getValueType() == MVT::i64 &&
"Expected an i64 value type");
4335 Overflow = DAG.
getNode(AArch64ISD::SUBS,
DL, VTs, UpperBits, LowerBits)
4354 Overflow =
Value.getValue(1);
4356 return std::make_pair(
Value, Overflow);
4361 !Subtarget->isNeonAvailable()))
4362 return LowerToScalableOp(
Op, DAG);
4386 return DAG.
getNode(AArch64ISD::CSEL,
DL,
Op.getValueType(), TVal, FVal,
4409 if (
LHS.getValueType() != MVT::i32 &&
LHS.getValueType() != MVT::i64)
4416 if (!CFVal || !CTVal)
4453 return Cmp.getValue(1);
4466 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, One, Zero, CC, Glue);
4476 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, One, Zero, CC, Glue);
4482 unsigned Opcode,
bool IsSigned) {
4483 EVT VT0 =
Op.getValue(0).getValueType();
4484 EVT VT1 =
Op.getValue(1).getValueType();
4486 if (VT0 != MVT::i32 && VT0 != MVT::i64)
4489 bool InvertCarry = Opcode == AArch64ISD::SBCS;
4526 DAG.
getNode(AArch64ISD::CSEL,
DL, MVT::i32, FVal, TVal, CCVal, Overflow);
4538 unsigned IsWrite =
Op.getConstantOperandVal(2);
4539 unsigned Locality =
Op.getConstantOperandVal(3);
4540 unsigned IsData =
Op.getConstantOperandVal(4);
4542 bool IsStream = !Locality;
4546 assert(Locality <= 3 &&
"Prefetch locality out-of-range");
4550 Locality = 3 - Locality;
4554 unsigned PrfOp = (IsWrite << 4) |
4558 return DAG.
getNode(AArch64ISD::PREFETCH,
DL, MVT::Other,
Op.getOperand(0),
4571 if (LHSConstOp && RHSConst) {
4575 uint64_t NewMaskValue = LHSConstValue & ~(RHSConstant - 1);
4588 EVT VT =
Op.getValueType();
4592 if (VT == MVT::nxv2f64 && SrcVal.
getValueType() == MVT::nxv2bf16) {
4596 return DAG.
getNode(ISD::FP_EXTEND,
DL, VT,
4597 DAG.
getNode(ISD::FP_EXTEND,
DL, MVT::nxv2f32, SrcVal));
4600 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
4604 return LowerFixedLengthFPExtendToSVE(
Op, DAG);
4606 bool IsStrict =
Op->isStrictFPOpcode();
4607 SDValue Op0 =
Op.getOperand(IsStrict ? 1 : 0);
4609 if (VT == MVT::f64) {
4611 if (Op0VT == MVT::f32 || Op0VT == MVT::f16)
4614 if (Op0VT == MVT::bf16 && IsStrict) {
4617 {Op0,
Op.getOperand(0)});
4621 if (Op0VT == MVT::bf16)
4622 return DAG.
getNode(ISD::FP_EXTEND, SDLoc(
Op), VT,
4623 DAG.
getNode(ISD::FP_EXTEND, SDLoc(
Op), MVT::f32, Op0));
4627 assert(
Op.getValueType() == MVT::f128 &&
"Unexpected lowering");
4633 EVT VT =
Op.getValueType();
4634 bool IsStrict =
Op->isStrictFPOpcode();
4635 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
4637 bool Trunc =
Op.getConstantOperandVal(IsStrict ? 2 : 1) == 1;
4641 if (SrcVT == MVT::nxv8f32)
4645 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
4648 constexpr EVT
I32 = MVT::nxv4i32;
4654 if (SrcVT == MVT::nxv2f32 || SrcVT == MVT::nxv4f32) {
4655 if (Subtarget->hasBF16())
4656 return LowerToPredicatedOp(
Op, DAG,
4657 AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
4659 Narrow = getSVESafeBitCast(I32, SrcVal, DAG);
4664 }
else if (SrcVT == MVT::nxv2f64 &&
4665 (Subtarget->hasSVE2() || Subtarget->isStreamingSVEAvailable())) {
4668 Narrow = DAG.
getNode(AArch64ISD::FCVTX_MERGE_PASSTHRU,
DL, MVT::nxv2f32,
4669 Pg, SrcVal, DAG.
getUNDEF(MVT::nxv2f32));
4675 NewOps.
push_back(
Op.getOperand(IsStrict ? 2 : 1));
4676 return DAG.
getNode(
Op.getOpcode(),
DL, VT, NewOps,
Op->getFlags());
4690 EVT
I1 =
I32.changeElementType(MVT::i1);
4693 IsNaN = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, I1, IsNaN);
4694 Narrow = DAG.
getSelect(
DL, I32, IsNaN, NaN, Narrow);
4699 return getSVESafeBitCast(VT, Narrow, DAG);
4703 return LowerFixedLengthFPRoundToSVE(
Op, DAG);
4708 !((Subtarget->hasNEON() || Subtarget->hasSME()) &&
4709 Subtarget->hasBF16())) {
4717 Narrow = DAG.
getNode(ISD::BITCAST,
DL, I32, Narrow);
4724 Narrow = DAG.
getNode(AArch64ISD::FCVTXN,
DL,
F32, Narrow);
4725 Narrow = DAG.
getNode(ISD::BITCAST,
DL, I32, Narrow);
4745 Narrow = DAG.
getSelect(
DL, I32, IsNaN, NaN, Narrow);
4752 EVT
I16 =
I32.changeVectorElementType(MVT::i16);
4754 return DAG.
getNode(ISD::BITCAST,
DL, VT, Narrow);
4762 if (SrcVT != MVT::f128) {
4779 bool IsStrict =
Op->isStrictFPOpcode();
4780 EVT InVT =
Op.getOperand(IsStrict ? 1 : 0).getValueType();
4781 EVT VT =
Op.getValueType();
4784 "Unimplemented SVE support for STRICT_FP_to_INT!");
4793 {
Op.getOperand(0),
Op.getOperand(1)});
4794 return DAG.
getNode(
Op.getOpcode(),
DL, {VT, MVT::Other},
4795 {Ext.getValue(1), Ext.getValue(0)});
4798 Op.getOpcode(),
DL,
Op.getValueType(),
4799 DAG.
getNode(ISD::FP_EXTEND,
DL, NewVT,
Op.getOperand(0)));
4812 if (InVT == MVT::nxv8f32)
4816 ? AArch64ISD::FCVTZU_MERGE_PASSTHRU
4817 : AArch64ISD::FCVTZS_MERGE_PASSTHRU;
4818 return LowerToPredicatedOp(
Op, DAG, Opcode);
4823 return LowerFixedLengthFPToIntToSVE(
Op, DAG);
4827 if (VTSize < InVTSize) {
4832 {Op.getOperand(0), Op.getOperand(1)});
4842 if (VTSize > InVTSize) {
4849 {
Op.getOperand(0),
Op.getOperand(1)});
4850 return DAG.
getNode(
Op.getOpcode(),
DL, {VT, MVT::Other},
4851 {Ext.getValue(1), Ext.getValue(0)});
4866 return DAG.
getNode(
Op.getOpcode(),
DL, {ScalarVT, MVT::Other},
4867 {Op.getOperand(0), Extract});
4868 return DAG.
getNode(
Op.getOpcode(),
DL, ScalarVT, Extract);
4877 bool IsStrict =
Op->isStrictFPOpcode();
4878 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
4881 return LowerVectorFP_TO_INT(
Op, DAG);
4884 if ((SrcVal.
getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
4890 {
Op.getOperand(0), SrcVal});
4891 return DAG.
getNode(
Op.getOpcode(),
DL, {Op.getValueType(), MVT::Other},
4892 {Ext.getValue(1), Ext.getValue(0)});
4895 DAG.
getNode(ISD::FP_EXTEND,
DL, MVT::f32, SrcVal));
4907AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(
SDValue Op,
4913 EVT DstVT =
Op.getValueType();
4919 assert(SatWidth <= DstElementWidth &&
4920 "Saturation width cannot exceed result width");
4933 if ((SrcElementVT == MVT::f16 &&
4934 (!Subtarget->hasFullFP16() || DstElementWidth > 16)) ||
4935 SrcElementVT == MVT::bf16) {
4937 SrcVal = DAG.
getNode(ISD::FP_EXTEND,
DL, F32VT, SrcVal);
4945 SrcElementVT = MVT::f32;
4946 SrcElementWidth = 32;
4947 }
else if (SrcElementVT != MVT::f64 && SrcElementVT != MVT::f32 &&
4948 SrcElementVT != MVT::f16 && SrcElementVT != MVT::bf16)
4953 if (SatWidth == 64 && SrcElementWidth < 64) {
4955 SrcVal = DAG.
getNode(ISD::FP_EXTEND,
DL, F64VT, SrcVal);
4957 SrcElementVT = MVT::f64;
4958 SrcElementWidth = 64;
4961 if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth) {
4976 if (SrcElementWidth < SatWidth || SrcElementVT == MVT::f64)
4983 SrcVal2 ? DAG.
getNode(
Op.getOpcode(),
DL, IntVT, SrcVal2,
5019 return LowerVectorFP_TO_INT_SAT(
Op, DAG);
5021 EVT DstVT =
Op.getValueType();
5025 assert(SatWidth <= DstWidth &&
"Saturation width cannot exceed result width");
5028 if ((SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) || SrcVT == MVT::bf16) {
5029 SrcVal = DAG.
getNode(ISD::FP_EXTEND, SDLoc(
Op), MVT::f32, SrcVal);
5031 }
else if (SrcVT != MVT::f64 && SrcVT != MVT::f32 && SrcVT != MVT::f16 &&
5037 if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 ||
5038 (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
5039 DstVT == SatVT && (DstVT == MVT::i64 || DstVT == MVT::i32))
5040 return DAG.
getNode(
Op.getOpcode(),
DL, DstVT, SrcVal,
5046 if (DstWidth < SatWidth)
5049 if (SrcVT == MVT::f16 && SatVT == MVT::i16 && DstVT == MVT::i32) {
5052 DAG.
getNode(AArch64ISD::FCVTZS_HALF,
DL, MVT::f32, SrcVal);
5057 SDValue CVTf32 = DAG.
getNode(AArch64ISD::FCVTZU_HALF,
DL, MVT::f32, SrcVal);
5082 EVT VT =
Op.getValueType();
5105 bool IsStrict =
Op->isStrictFPOpcode();
5106 EVT VT =
Op.getValueType();
5109 EVT InVT =
In.getValueType();
5110 unsigned Opc =
Op.getOpcode();
5114 "Unimplemented SVE support for ISD:::STRICT_INT_TO_FP!");
5129 {Op.getOperand(0), In});
5131 {
Op.getValueType(), MVT::Other},
5142 if (VT == MVT::nxv8f32)
5145 unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
5146 : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
5147 return LowerToPredicatedOp(
Op, DAG, Opcode);
5152 return LowerFixedLengthIntToFPToSVE(
Op, DAG);
5156 if (VTSize < InVTSize) {
5162 bool IsTargetf16 =
false;
5163 if (
Op.hasOneUse() &&
5168 SDNode *
U = *
Op->user_begin();
5169 if (
U->hasOneUse() &&
U->user_begin()->getOpcode() ==
ISD::FP_ROUND) {
5170 EVT TmpVT =
U->user_begin()->getValueType(0);
5176 if (IsTargetf32 && !IsTargetf16) {
5186 {
In.getValue(1),
In.getValue(0),
5194 if (VTSize > InVTSize) {
5211 return DAG.
getNode(
Op.getOpcode(),
DL, {ScalarVT, MVT::Other},
5212 {Op.getOperand(0), Extract});
5213 return DAG.
getNode(
Op.getOpcode(),
DL, ScalarVT, Extract);
5221 if (
Op.getValueType().isVector())
5222 return LowerVectorINT_TO_FP(
Op, DAG);
5224 bool IsStrict =
Op->isStrictFPOpcode();
5225 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
5230 auto IntToFpViaPromotion = [&](EVT PromoteVT) {
5234 {Op.getOperand(0), SrcVal});
5236 {
Op.getValueType(), MVT::Other},
5241 DAG.
getNode(
Op.getOpcode(),
DL, PromoteVT, SrcVal),
5245 if (
Op.getValueType() == MVT::bf16) {
5246 unsigned MaxWidth = IsSigned
5250 if (MaxWidth <= 24) {
5251 return IntToFpViaPromotion(MVT::f32);
5255 if (MaxWidth <= 53) {
5256 return IntToFpViaPromotion(MVT::f64);
5307 IsStrict ? DAG.
getNode(
Op.getOpcode(),
DL, {MVT::f64, MVT::Other},
5308 {Op.getOperand(0), ToRound})
5309 : DAG.
getNode(
Op.getOpcode(),
DL, MVT::f64, ToRound);
5336 {
Op.getValueType(), MVT::Other},
5340 DAG.getIntPtrConstant(0,
DL,
true));
5345 if (
Op.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
5346 return IntToFpViaPromotion(MVT::f32);
5355 if (
Op.getValueType() != MVT::f128)
5363AArch64TargetLowering::LowerLOOP_DEPENDENCE_MASK(
SDValue Op,
5366 uint64_t EltSize =
Op.getConstantOperandVal(2);
5367 EVT VT =
Op.getValueType();
5370 if (VT != MVT::v16i8 && VT != MVT::nxv16i1)
5374 if (VT != MVT::v8i8 && VT != MVT::nxv8i1)
5378 if (VT != MVT::v4i16 && VT != MVT::nxv4i1)
5382 if (VT != MVT::v2i32 && VT != MVT::nxv2i1)
5394 return DAG.
getNode(
Op.getOpcode(),
DL, VT, PtrA, PtrB,
Op.getOperand(2));
5406 DAG.
getNode(
Op.getOpcode(),
DL, WhileVT, PtrA, PtrB,
Op.getOperand(2));
5414 EVT OpVT =
Op.getValueType();
5415 EVT ArgVT =
Op.getOperand(0).getValueType();
5418 return LowerFixedLengthBitcastToSVE(
Op, DAG);
5426 "Expected int->fp bitcast!");
5439 return getSVESafeBitCast(OpVT, ExtResult, DAG);
5450 return getSVESafeBitCast(OpVT,
Op.getOperand(0), DAG);
5453 if (OpVT != MVT::f16 && OpVT != MVT::bf16)
5457 if (ArgVT == MVT::f16 || ArgVT == MVT::bf16)
5460 assert(ArgVT == MVT::i16);
5470static std::optional<uint64_t>
5474 return std::nullopt;
5479 return std::nullopt;
5481 return C->getZExtValue();
5486 EVT VT =
N.getValueType();
5491 for (
const SDValue &Elt :
N->op_values()) {
5494 unsigned HalfSize = EltSize / 2;
5496 if (!
isIntN(HalfSize,
C->getSExtValue()))
5499 if (!
isUIntN(HalfSize,
C->getZExtValue()))
5511 EVT VT =
N.getValueType();
5533 unsigned Opcode =
N.getOpcode();
5544 unsigned Opcode =
N.getOpcode();
5686 if (IsN0SExt && IsN1SExt)
5687 return AArch64ISD::SMULL;
5692 if (IsN0ZExt && IsN1ZExt)
5693 return AArch64ISD::UMULL;
5699 if (IsN0ZExt || IsN1ZExt) {
5701 return AArch64ISD::UMULL;
5706 return AArch64ISD::UMULL;
5709 if (IsN0SExt || IsN1SExt) {
5711 return AArch64ISD::SMULL;
5714 return AArch64ISD::SMULL;
5717 if (!IsN1SExt && !IsN1ZExt)
5724 return AArch64ISD::SMULL;
5728 return AArch64ISD::UMULL;
5733 return AArch64ISD::UMULL;
5739 EVT VT =
Op.getValueType();
5741 bool OverrideNEON = !Subtarget->isNeonAvailable();
5743 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MUL_PRED);
5748 "unexpected type for custom-lowering ISD::MUL");
5762 if (VT == MVT::v1i64) {
5763 if (Subtarget->hasSVE())
5764 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MUL_PRED);
5780 if (Subtarget->hasSVE())
5781 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MUL_PRED);
5796 "unexpected types for extended operands to VMULL");
5811 DAG.
getNode(ISD::BITCAST,
DL, Op1VT, N00), Op1),
5813 DAG.
getNode(ISD::BITCAST,
DL, Op1VT, N01), Op1)),
5819 if (
Pattern == AArch64SVEPredPattern::all)
5821 return DAG.
getNode(AArch64ISD::PTRUE,
DL, VT,
5826 bool IsSigned,
bool IsEqual) {
5830 if (!
N->getValueType(0).isScalableVector() ||
5835 APInt Y =
N->getConstantOperandAPInt(Op1);
5840 if (IsSigned ?
Y.isMaxSignedValue() :
Y.isMaxValue())
5846 APInt X =
N->getConstantOperandAPInt(Op0);
5849 APInt NumActiveElems =
5850 IsSigned ?
Y.ssub_ov(
X, Overflow) :
Y.usub_ov(
X, Overflow);
5857 NumActiveElems = IsSigned ? NumActiveElems.
sadd_ov(One, Overflow)
5858 : NumActiveElems.
uadd_ov(One, Overflow);
5863 std::optional<unsigned> PredPattern =
5865 unsigned MinSVEVectorSize = std::max(
5867 unsigned ElementSize = 128 /
N->getValueType(0).getVectorMinNumElements();
5868 if (PredPattern != std::nullopt &&
5869 NumActiveElems.
getZExtValue() <= (MinSVEVectorSize / ElementSize))
5870 return getPTrue(DAG,
DL,
N->getValueType(0), *PredPattern);
5879 EVT InVT =
Op.getValueType();
5883 "Expected a predicate-to-predicate bitcast");
5887 "Only expect to cast between legal scalable predicate types!");
5897 Op.getConstantOperandVal(0) == Intrinsic::aarch64_sve_convert_to_svbool &&
5898 Op.getOperand(1).getValueType().bitsGT(VT))
5899 Op =
Op.getOperand(1);
5917 Mask = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, VT, Mask);
5924 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE;
5929 TargetLowering::CallLoweringInfo CLI(DAG);
5931 CLI.setDebugLoc(
DL).setChain(Chain).setLibCallee(
5933 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
5979 SDValue TileSlice =
N->getOperand(2);
5982 int32_t ConstAddend = 0;
5991 ConstAddend = ImmNode->getSExtValue();
5995 int32_t ImmAddend = ConstAddend % 16;
5996 if (int32_t
C = (ConstAddend - ImmAddend)) {
5998 VarAddend = VarAddend
6005 auto SVL = DAG.
getNode(AArch64ISD::RDSVL,
DL, MVT::i64,
6017 return DAG.
getNode(IsLoad ? AArch64ISD::SME_ZA_LDR : AArch64ISD::SME_ZA_STR,
6019 {
N.getOperand(0), TileSlice,
Base,
6028 auto Op1 =
Op.getOperand(1);
6029 auto Op2 =
Op.getOperand(2);
6030 auto Mask =
Op.getOperand(3);
6033 EVT Op2VT = Op2.getValueType();
6034 EVT ResVT =
Op.getValueType();
6038 "Expected 8-bit or 16-bit characters.");
6052 Op2 = DAG.
getNode(AArch64ISD::DUPLANE128,
DL, OpContainerVT, Op2,
6080 ID, Mask, Op1, Op2);
6091 unsigned IntNo =
Op.getConstantOperandVal(1);
6096 case Intrinsic::aarch64_prefetch: {
6100 unsigned IsWrite =
Op.getConstantOperandVal(3);
6101 unsigned Locality =
Op.getConstantOperandVal(4);
6102 unsigned IsStream =
Op.getConstantOperandVal(5);
6103 unsigned IsData =
Op.getConstantOperandVal(6);
6104 unsigned PrfOp = (IsWrite << 4) |
6109 return DAG.
getNode(AArch64ISD::PREFETCH,
DL, MVT::Other, Chain,
6112 case Intrinsic::aarch64_sme_str:
6113 case Intrinsic::aarch64_sme_ldr: {
6116 case Intrinsic::aarch64_sme_za_enable:
6118 AArch64ISD::SMSTART,
DL, DAG.
getVTList(MVT::Other, MVT::Glue),
6121 case Intrinsic::aarch64_sme_za_disable:
6123 AArch64ISD::SMSTOP,
DL, DAG.
getVTList(MVT::Other, MVT::Glue),
6131 unsigned IntNo =
Op.getConstantOperandVal(1);
6136 case Intrinsic::aarch64_mops_memset_tag: {
6143 auto Alignment =
Node->getMemOperand()->getAlign();
6144 bool IsVol =
Node->isVolatile();
6145 auto DstPtrInfo =
Node->getPointerInfo();
6149 SDValue MS = SDI.EmitMOPS(AArch64::MOPSMemorySetTaggingPseudo, DAG,
DL,
6150 Chain, Dst, Val,
Size, Alignment, IsVol,
6151 DstPtrInfo, MachinePointerInfo{});
6164 unsigned IntNo =
Op.getConstantOperandVal(0);
6168 case Intrinsic::thread_pointer: {
6170 return DAG.
getNode(AArch64ISD::THREAD_POINTER,
DL, PtrVT);
6172 case Intrinsic::aarch64_sve_whilewr_b:
6174 Op.getOperand(1),
Op.getOperand(2),
6176 case Intrinsic::aarch64_sve_whilewr_h:
6178 Op.getOperand(1),
Op.getOperand(2),
6180 case Intrinsic::aarch64_sve_whilewr_s:
6182 Op.getOperand(1),
Op.getOperand(2),
6184 case Intrinsic::aarch64_sve_whilewr_d:
6186 Op.getOperand(1),
Op.getOperand(2),
6188 case Intrinsic::aarch64_sve_whilerw_b:
6190 Op.getOperand(1),
Op.getOperand(2),
6192 case Intrinsic::aarch64_sve_whilerw_h:
6194 Op.getOperand(1),
Op.getOperand(2),
6196 case Intrinsic::aarch64_sve_whilerw_s:
6198 Op.getOperand(1),
Op.getOperand(2),
6200 case Intrinsic::aarch64_sve_whilerw_d:
6202 Op.getOperand(1),
Op.getOperand(2),
6204 case Intrinsic::aarch64_neon_abs: {
6205 EVT Ty =
Op.getValueType();
6206 if (Ty == MVT::i64) {
6208 DAG.
getNode(ISD::BITCAST,
DL, MVT::v1i64,
Op.getOperand(1));
6210 return DAG.
getNode(ISD::BITCAST,
DL, MVT::i64, Result);
6217 case Intrinsic::aarch64_neon_pmull64: {
6221 std::optional<uint64_t> LHSLane =
6223 std::optional<uint64_t> RHSLane =
6226 assert((!LHSLane || *LHSLane < 2) &&
"Expect lane to be None or 0 or 1");
6227 assert((!RHSLane || *RHSLane < 2) &&
"Expect lane to be None or 0 or 1");
6233 auto TryVectorizeOperand = [](
SDValue N, std::optional<uint64_t> NLane,
6234 std::optional<uint64_t> OtherLane,
6236 SelectionDAG &DAG) ->
SDValue {
6245 if (OtherLane == 1) {
6254 DAG.
getNode(AArch64ISD::DUPLANE64,
DL, MVT::v2i64,
6260 return DAG.
getNode(AArch64ISD::DUP,
DL, MVT::v1i64,
N);
6265 assert(
N.getValueType() == MVT::i64 &&
6266 "Intrinsic aarch64_neon_pmull64 requires i64 parameters");
6270 LHS = TryVectorizeOperand(
LHS, LHSLane, RHSLane,
DL, DAG);
6271 RHS = TryVectorizeOperand(
RHS, RHSLane, LHSLane,
DL, DAG);
6275 case Intrinsic::aarch64_neon_smax:
6278 case Intrinsic::aarch64_neon_umax:
6281 case Intrinsic::aarch64_neon_smin:
6284 case Intrinsic::aarch64_neon_umin:
6287 case Intrinsic::aarch64_neon_scalar_sqxtn:
6288 case Intrinsic::aarch64_neon_scalar_sqxtun:
6289 case Intrinsic::aarch64_neon_scalar_uqxtn: {
6290 assert(
Op.getValueType() == MVT::i32 ||
Op.getValueType() == MVT::f32);
6291 if (
Op.getValueType() == MVT::i32)
6292 return DAG.
getNode(ISD::BITCAST,
DL, MVT::i32,
6296 Op.getOperand(1))));
6299 case Intrinsic::aarch64_neon_sqxtn:
6302 case Intrinsic::aarch64_neon_sqxtun:
6305 case Intrinsic::aarch64_neon_uqxtn:
6308 case Intrinsic::aarch64_neon_sqshrn:
6309 if (
Op.getValueType().isVector())
6312 Op.getOperand(1).getValueType(),
6313 Op.getOperand(1),
Op.getOperand(2)));
6315 case Intrinsic::aarch64_neon_sqshrun:
6316 if (
Op.getValueType().isVector())
6319 Op.getOperand(1).getValueType(),
6320 Op.getOperand(1),
Op.getOperand(2)));
6322 case Intrinsic::aarch64_neon_uqshrn:
6323 if (
Op.getValueType().isVector())
6326 Op.getOperand(1).getValueType(),
6327 Op.getOperand(1),
Op.getOperand(2)));
6329 case Intrinsic::aarch64_neon_sqrshrn:
6330 if (
Op.getValueType().isVector())
6333 Op.getOperand(1).getValueType(),
6334 Op.getOperand(1),
Op.getOperand(2)));
6336 case Intrinsic::aarch64_neon_sqrshrun:
6337 if (
Op.getValueType().isVector())
6340 Op.getOperand(1).getValueType(),
6341 Op.getOperand(1),
Op.getOperand(2)));
6343 case Intrinsic::aarch64_neon_uqrshrn:
6344 if (
Op.getValueType().isVector())
6347 Op.getOperand(1).getValueType(),
6348 Op.getOperand(1),
Op.getOperand(2)));
6350 case Intrinsic::aarch64_neon_sqadd:
6351 if (
Op.getValueType().isVector())
6355 case Intrinsic::aarch64_neon_sqsub:
6356 if (
Op.getValueType().isVector())
6360 case Intrinsic::aarch64_neon_uqadd:
6361 if (
Op.getValueType().isVector())
6365 case Intrinsic::aarch64_neon_uqsub:
6366 if (
Op.getValueType().isVector())
6370 case Intrinsic::aarch64_sve_whilelt:
6373 case Intrinsic::aarch64_sve_whilels:
6376 case Intrinsic::aarch64_sve_whilele:
6379 case Intrinsic::aarch64_sve_sunpkhi:
6380 return DAG.
getNode(AArch64ISD::SUNPKHI,
DL,
Op.getValueType(),
6382 case Intrinsic::aarch64_sve_sunpklo:
6383 return DAG.
getNode(AArch64ISD::SUNPKLO,
DL,
Op.getValueType(),
6385 case Intrinsic::aarch64_sve_uunpkhi:
6386 return DAG.
getNode(AArch64ISD::UUNPKHI,
DL,
Op.getValueType(),
6388 case Intrinsic::aarch64_sve_uunpklo:
6389 return DAG.
getNode(AArch64ISD::UUNPKLO,
DL,
Op.getValueType(),
6391 case Intrinsic::aarch64_sve_clasta_n:
6392 return DAG.
getNode(AArch64ISD::CLASTA_N,
DL,
Op.getValueType(),
6393 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6394 case Intrinsic::aarch64_sve_clastb_n:
6395 return DAG.
getNode(AArch64ISD::CLASTB_N,
DL,
Op.getValueType(),
6396 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6397 case Intrinsic::aarch64_sve_lasta:
6398 return DAG.
getNode(AArch64ISD::LASTA,
DL,
Op.getValueType(),
6399 Op.getOperand(1),
Op.getOperand(2));
6400 case Intrinsic::aarch64_sve_lastb:
6401 return DAG.
getNode(AArch64ISD::LASTB,
DL,
Op.getValueType(),
6402 Op.getOperand(1),
Op.getOperand(2));
6403 case Intrinsic::aarch64_sve_rev:
6406 case Intrinsic::aarch64_sve_tbl:
6407 return DAG.
getNode(AArch64ISD::TBL,
DL,
Op.getValueType(),
Op.getOperand(1),
6409 case Intrinsic::aarch64_sve_trn1:
6410 return DAG.
getNode(AArch64ISD::TRN1,
DL,
Op.getValueType(),
6411 Op.getOperand(1),
Op.getOperand(2));
6412 case Intrinsic::aarch64_sve_trn2:
6413 return DAG.
getNode(AArch64ISD::TRN2,
DL,
Op.getValueType(),
6414 Op.getOperand(1),
Op.getOperand(2));
6415 case Intrinsic::aarch64_sve_uzp1:
6416 return DAG.
getNode(AArch64ISD::UZP1,
DL,
Op.getValueType(),
6417 Op.getOperand(1),
Op.getOperand(2));
6418 case Intrinsic::aarch64_sve_uzp2:
6419 return DAG.
getNode(AArch64ISD::UZP2,
DL,
Op.getValueType(),
6420 Op.getOperand(1),
Op.getOperand(2));
6421 case Intrinsic::aarch64_sve_zip1:
6422 return DAG.
getNode(AArch64ISD::ZIP1,
DL,
Op.getValueType(),
6423 Op.getOperand(1),
Op.getOperand(2));
6424 case Intrinsic::aarch64_sve_zip2:
6425 return DAG.
getNode(AArch64ISD::ZIP2,
DL,
Op.getValueType(),
6426 Op.getOperand(1),
Op.getOperand(2));
6427 case Intrinsic::aarch64_sve_splice:
6428 return DAG.
getNode(AArch64ISD::SPLICE,
DL,
Op.getValueType(),
6429 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6430 case Intrinsic::aarch64_sve_ptrue:
6431 return getPTrue(DAG,
DL,
Op.getValueType(),
Op.getConstantOperandVal(1));
6432 case Intrinsic::aarch64_sve_clz:
6433 return DAG.
getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6434 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6435 case Intrinsic::aarch64_sme_cntsd: {
6441 case Intrinsic::aarch64_sve_cnt: {
6444 if (
Data.getValueType().isFloatingPoint())
6446 return DAG.
getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6447 Op.getOperand(2),
Data,
Op.getOperand(1));
6449 case Intrinsic::aarch64_sve_dupq_lane:
6450 return LowerDUPQLane(
Op, DAG);
6451 case Intrinsic::aarch64_sve_convert_from_svbool:
6452 if (
Op.getValueType() == MVT::aarch64svcount)
6453 return DAG.
getNode(ISD::BITCAST,
DL,
Op.getValueType(),
Op.getOperand(1));
6455 case Intrinsic::aarch64_sve_convert_to_svbool:
6456 if (
Op.getOperand(1).getValueType() == MVT::aarch64svcount)
6457 return DAG.
getNode(ISD::BITCAST,
DL, MVT::nxv16i1,
Op.getOperand(1));
6459 case Intrinsic::aarch64_sve_fneg:
6460 return DAG.
getNode(AArch64ISD::FNEG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6461 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6462 case Intrinsic::aarch64_sve_frintp:
6463 return DAG.
getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6464 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6465 case Intrinsic::aarch64_sve_frintm:
6466 return DAG.
getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6467 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6468 case Intrinsic::aarch64_sve_frinti:
6469 return DAG.
getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU,
DL,
6470 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6472 case Intrinsic::aarch64_sve_frintx:
6473 return DAG.
getNode(AArch64ISD::FRINT_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6474 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6475 case Intrinsic::aarch64_sve_frinta:
6476 return DAG.
getNode(AArch64ISD::FROUND_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6477 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6478 case Intrinsic::aarch64_sve_frintn:
6479 return DAG.
getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU,
DL,
6480 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6482 case Intrinsic::aarch64_sve_frintz:
6483 return DAG.
getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6484 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6485 case Intrinsic::aarch64_sve_ucvtf:
6486 return DAG.
getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU,
DL,
6487 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6489 case Intrinsic::aarch64_sve_scvtf:
6490 return DAG.
getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU,
DL,
6491 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6493 case Intrinsic::aarch64_sve_fcvtzu:
6494 return DAG.
getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6495 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6496 case Intrinsic::aarch64_sve_fcvtzs:
6497 return DAG.
getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6498 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6499 case Intrinsic::aarch64_sve_fsqrt:
6500 return DAG.
getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6501 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6502 case Intrinsic::aarch64_sve_frecpx:
6503 return DAG.
getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6504 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6505 case Intrinsic::aarch64_sve_frecpe_x:
6506 return DAG.
getNode(AArch64ISD::FRECPE,
DL,
Op.getValueType(),
6508 case Intrinsic::aarch64_sve_frecps_x:
6509 return DAG.
getNode(AArch64ISD::FRECPS,
DL,
Op.getValueType(),
6510 Op.getOperand(1),
Op.getOperand(2));
6511 case Intrinsic::aarch64_sve_frsqrte_x:
6512 return DAG.
getNode(AArch64ISD::FRSQRTE,
DL,
Op.getValueType(),
6514 case Intrinsic::aarch64_sve_frsqrts_x:
6515 return DAG.
getNode(AArch64ISD::FRSQRTS,
DL,
Op.getValueType(),
6516 Op.getOperand(1),
Op.getOperand(2));
6517 case Intrinsic::aarch64_sve_fabs:
6518 return DAG.
getNode(AArch64ISD::FABS_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6519 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6520 case Intrinsic::aarch64_sve_abs:
6521 return DAG.
getNode(AArch64ISD::ABS_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6522 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6523 case Intrinsic::aarch64_sve_neg:
6524 return DAG.
getNode(AArch64ISD::NEG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6525 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6526 case Intrinsic::aarch64_sve_insr: {
6528 EVT ScalarTy =
Scalar.getValueType();
6529 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
6532 return DAG.
getNode(AArch64ISD::INSR,
DL,
Op.getValueType(),
6533 Op.getOperand(1), Scalar);
6535 case Intrinsic::aarch64_sve_rbit:
6536 return DAG.
getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
DL,
6537 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6539 case Intrinsic::aarch64_sve_revb:
6540 return DAG.
getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6541 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6542 case Intrinsic::aarch64_sve_revh:
6543 return DAG.
getNode(AArch64ISD::REVH_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6544 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6545 case Intrinsic::aarch64_sve_revw:
6546 return DAG.
getNode(AArch64ISD::REVW_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6547 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6548 case Intrinsic::aarch64_sve_revd:
6549 return DAG.
getNode(AArch64ISD::REVD_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6550 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6551 case Intrinsic::aarch64_sve_sxtb:
6553 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6554 Op.getOperand(2),
Op.getOperand(3),
6555 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i8)),
6557 case Intrinsic::aarch64_sve_sxth:
6559 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6560 Op.getOperand(2),
Op.getOperand(3),
6561 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i16)),
6563 case Intrinsic::aarch64_sve_sxtw:
6565 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6566 Op.getOperand(2),
Op.getOperand(3),
6567 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i32)),
6569 case Intrinsic::aarch64_sve_uxtb:
6571 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6572 Op.getOperand(2),
Op.getOperand(3),
6573 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i8)),
6575 case Intrinsic::aarch64_sve_uxth:
6577 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6578 Op.getOperand(2),
Op.getOperand(3),
6579 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i16)),
6581 case Intrinsic::aarch64_sve_uxtw:
6583 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6584 Op.getOperand(2),
Op.getOperand(3),
6585 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i32)),
6587 case Intrinsic::localaddress: {
6589 const auto *RegInfo = Subtarget->getRegisterInfo();
6590 unsigned Reg = RegInfo->getLocalAddressRegister(MF);
6592 Op.getSimpleValueType());
6595 case Intrinsic::eh_recoverfp: {
6600 SDValue IncomingFPOp =
Op.getOperand(2);
6605 "llvm.eh.recoverfp must take a function as the first argument");
6606 return IncomingFPOp;
6608 case Intrinsic::aarch64_neon_vsri:
6609 case Intrinsic::aarch64_neon_vsli:
6610 case Intrinsic::aarch64_sve_sri:
6611 case Intrinsic::aarch64_sve_sli: {
6612 EVT Ty =
Op.getValueType();
6619 bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri ||
6620 IntNo == Intrinsic::aarch64_sve_sri;
6621 unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
6622 return DAG.
getNode(Opcode,
DL, Ty,
Op.getOperand(1),
Op.getOperand(2),
6626 case Intrinsic::aarch64_neon_srhadd:
6627 case Intrinsic::aarch64_neon_urhadd:
6628 case Intrinsic::aarch64_neon_shadd:
6629 case Intrinsic::aarch64_neon_uhadd: {
6630 bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
6631 IntNo == Intrinsic::aarch64_neon_shadd);
6632 bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
6633 IntNo == Intrinsic::aarch64_neon_urhadd);
6634 unsigned Opcode = IsSignedAdd
6637 return DAG.
getNode(Opcode,
DL,
Op.getValueType(),
Op.getOperand(1),
6640 case Intrinsic::aarch64_neon_saddlp:
6641 case Intrinsic::aarch64_neon_uaddlp: {
6642 unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uaddlp
6643 ? AArch64ISD::UADDLP
6644 : AArch64ISD::SADDLP;
6645 return DAG.
getNode(Opcode,
DL,
Op.getValueType(),
Op.getOperand(1));
6647 case Intrinsic::aarch64_neon_sdot:
6648 case Intrinsic::aarch64_neon_udot:
6649 case Intrinsic::aarch64_sve_sdot:
6650 case Intrinsic::aarch64_sve_udot: {
6651 unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
6652 IntNo == Intrinsic::aarch64_sve_udot)
6655 return DAG.
getNode(Opcode,
DL,
Op.getValueType(),
Op.getOperand(1),
6656 Op.getOperand(2),
Op.getOperand(3));
6658 case Intrinsic::aarch64_neon_usdot:
6659 case Intrinsic::aarch64_sve_usdot: {
6660 return DAG.
getNode(AArch64ISD::USDOT,
DL,
Op.getValueType(),
6661 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6663 case Intrinsic::aarch64_neon_saddlv:
6664 case Intrinsic::aarch64_neon_uaddlv: {
6665 EVT OpVT =
Op.getOperand(1).getValueType();
6666 EVT ResVT =
Op.getValueType();
6668 ((ResVT == MVT::i32 && (OpVT == MVT::v8i8 || OpVT == MVT::v16i8 ||
6669 OpVT == MVT::v8i16 || OpVT == MVT::v4i16)) ||
6670 (ResVT == MVT::i64 && (OpVT == MVT::v4i32 || OpVT == MVT::v2i32))) &&
6671 "Unexpected aarch64_neon_u/saddlv type");
6675 IntNo == Intrinsic::aarch64_neon_uaddlv ? AArch64ISD::UADDLV
6676 : AArch64ISD::SADDLV,
6677 DL, ResVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64,
Op.getOperand(1));
6681 return EXTRACT_VEC_ELT;
6683 case Intrinsic::experimental_cttz_elts: {
6697 DAG.
getNode(AArch64ISD::CTTZ_ELTS,
DL, MVT::i64, CttzOp);
6700 case Intrinsic::experimental_vector_match: {
6706bool AArch64TargetLowering::shouldExtendGSIndex(
EVT VT,
EVT &EltTy)
const {
6715bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(
SDValue Extend,
6731bool AArch64TargetLowering::isVectorLoadExtDesirable(
SDValue ExtVal)
const {
6733 if (!ExtVT.
isScalableVector() && !Subtarget->useSVEForFixedLengthVectors())
6748 unsigned NumExtMaskedLoads = 0;
6749 for (
auto *U : Ld->getMask()->users())
6751 NumExtMaskedLoads++;
6753 if (NumExtMaskedLoads <= 1)
6759 return PreExtScalarVT == MVT::i8 || PreExtScalarVT == MVT::i16 ||
6760 PreExtScalarVT == MVT::i32 || PreExtScalarVT == MVT::i64;
6764 std::map<std::tuple<bool, bool, bool>,
unsigned> AddrModes = {
6765 {std::make_tuple(
false,
false,
false),
6766 AArch64ISD::GLD1_MERGE_ZERO},
6767 {std::make_tuple(
false,
false,
true),
6768 AArch64ISD::GLD1_UXTW_MERGE_ZERO},
6769 {std::make_tuple(
false,
true,
false),
6770 AArch64ISD::GLD1_MERGE_ZERO},
6771 {std::make_tuple(
false,
true,
true),
6772 AArch64ISD::GLD1_SXTW_MERGE_ZERO},
6773 {std::make_tuple(
true,
false,
false),
6774 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
6775 {std::make_tuple(
true,
false,
true),
6776 AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO},
6777 {std::make_tuple(
true,
true,
false),
6778 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
6779 {std::make_tuple(
true,
true,
true),
6780 AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO},
6782 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
6783 return AddrModes.find(
Key)->second;
6791 case AArch64ISD::GLD1_MERGE_ZERO:
6792 return AArch64ISD::GLD1S_MERGE_ZERO;
6793 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
6794 return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
6795 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
6796 return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
6797 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
6798 return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
6799 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
6800 return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
6801 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
6802 return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
6803 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
6804 return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
6819 EVT VT =
Op.getValueType();
6843 EVT IndexVT =
Index.getValueType();
6855 assert(Subtarget->useSVEForFixedLengthVectors() &&
6856 "Cannot lower when not using SVE for fixed vectors!");
6865 Index.getValueType().getVectorElementType() == MVT::i64 ||
6866 Mask.getValueType().getVectorElementType() == MVT::i64)
6931 EVT IndexVT =
Index.getValueType();
6943 assert(Subtarget->useSVEForFixedLengthVectors() &&
6944 "Cannot lower when not using SVE for fixed vectors!");
6950 StoreVal = DAG.
getNode(ISD::BITCAST,
DL, VT, StoreVal);
6956 Index.getValueType().getVectorElementType() == MVT::i64 ||
6957 Mask.getValueType().getVectorElementType() == MVT::i64)
6967 if (PromotedVT != VT)
6991 assert(LoadNode &&
"Expected custom lowering of a masked load node");
6992 EVT VT =
Op->getValueType(0);
6995 return LowerFixedLengthVectorMLoadToSVE(
Op, DAG);
7019 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16);
7032 {Undef, Undef, Undef, Undef});
7038 Trunc = DAG.
getNode(ISD::BITCAST,
DL, MVT::v2i32, Trunc);
7042 return DAG.
getStore(ST->getChain(),
DL, ExtractTrunc,
7043 ST->getBasePtr(), ST->getMemOperand());
7049 MVT DestVT =
Op.getSimpleValueType();
7053 unsigned SrcAS =
N->getSrcAddressSpace();
7054 unsigned DestAS =
N->getDestAddressSpace();
7055 assert(SrcAS != DestAS &&
7056 "addrspacecast must be between different address spaces");
7059 "addrspacecast must be between different ptr sizes");
7085 assert (StoreNode &&
"Can only custom lower store nodes");
7089 EVT VT =
Value.getValueType();
7095 Subtarget->useSVEForFixedLengthVectors()))
7096 return LowerFixedLengthVectorStoreToSVE(
Op, DAG);
7108 MemVT == MVT::v4i8) {
7131 AArch64ISD::STNP, Dl, DAG.
getVTList(MVT::Other),
7132 {StoreNode->getChain(), DAG.getBitcast(MVT::v2i64, Lo),
7133 DAG.getBitcast(MVT::v2i64, Hi), StoreNode->getBasePtr()},
7137 }
else if (MemVT == MVT::i128 && StoreNode->
isVolatile()) {
7138 return LowerStore128(
Op, DAG);
7139 }
else if (MemVT == MVT::i64x8) {
7144 EVT PtrVT =
Base.getValueType();
7145 for (
unsigned i = 0; i < 8; i++) {
7146 SDValue Part = DAG.
getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64,
7166 bool IsStoreRelease =
7169 assert((Subtarget->hasFeature(AArch64::FeatureLSE2) &&
7170 Subtarget->hasFeature(AArch64::FeatureRCPC3) && IsStoreRelease) ||
7175 StoreNode->
getOpcode() == ISD::ATOMIC_STORE)
7180 unsigned Opcode = IsStoreRelease ? AArch64ISD::STILP : AArch64ISD::STP;
7182 std::swap(StoreValue.first, StoreValue.second);
7185 {StoreNode->getChain(), StoreValue.first, StoreValue.second,
7186 StoreNode->getBasePtr()},
7195 assert(LoadNode &&
"Expected custom lowering of a load node");
7201 EVT PtrVT =
Base.getValueType();
7202 for (
unsigned i = 0; i < 8; i++) {
7208 Ops.push_back(Part);
7216 EVT VT =
Op->getValueType(0);
7217 assert((VT == MVT::v4i16 || VT == MVT::v4i32) &&
"Expected v4i16 or v4i32");
7223 if (Subtarget->requiresStrictAlign() && LoadNode->
getAlign() <
Align(4))
7236 LoadNode->
getBasePtr(), MachinePointerInfo());
7243 if (VT == MVT::v4i32)
7255 EVT MaskVT =
Mask.getValueType();
7258 const bool HasPassthru = !Passthru.
isUndef();
7262 assert(VecVT.
isVector() &&
"Input to VECTOR_COMPRESS must be vector.");
7264 if (!Subtarget->isSVEAvailable())
7271 if (MinElmts != 2 && MinElmts != 4)
7275 if (IsFixedLength) {
7285 DAG.
getUNDEF(ScalableMaskVT), Mask,
7290 DAG.
getUNDEF(ScalableVecVT), Passthru,
7294 MaskVT =
Mask.getValueType();
7303 if (ContainerVT != VecVT) {
7330 if (IsFixedLength) {
7340 if (ContainerVT != VecVT) {
7342 Compressed = DAG.
getBitcast(VecVT, Compressed);
7350 MVT VT =
Op.getSimpleValueType();
7353 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
7361 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT,
Op.getOperand(0), Neg,
7374 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CCVal,
7389 MVT VT =
Op.getSimpleValueType();
7393 if (NewShiftNo == 0)
7394 return Op.getOperand(0);
7403 if (NewShiftNo == 0)
7404 return Op.getOperand(1);
7406 if (ShiftNo->getZExtValue() == NewShiftNo)
7421 EVT XScalarTy =
X.getValueType();
7426 switch (
Op.getSimpleValueType().SimpleTy) {
7435 ExpVT = MVT::nxv4i32;
7439 ExpVT = MVT::nxv2i64;
7450 AArch64SVEPredPattern::all);
7457 if (
X.getValueType() != XScalarTy)
7465 return Op.getOperand(0);
7500 const char FptrReg = 0x11;
7506 Chain,
DL, DAG.
getConstant(0x58000080u | NestReg,
DL, MVT::i32), Addr,
7507 MachinePointerInfo(TrmpAddr));
7512 Chain,
DL, DAG.
getConstant(0x580000b0u | FptrReg,
DL, MVT::i32), Addr,
7513 MachinePointerInfo(TrmpAddr, 4));
7519 MachinePointerInfo(TrmpAddr, 8));
7524 DAG.
getStore(Chain,
DL, Nest, Addr, MachinePointerInfo(TrmpAddr, 16));
7529 DAG.
getStore(Chain,
DL, FPtr, Addr, MachinePointerInfo(TrmpAddr, 24));
7546 switch (
Op.getOpcode()) {
7552 return LowerLOOP_DEPENDENCE_MASK(
Op, DAG);
7554 return LowerBITCAST(
Op, DAG);
7556 return LowerGlobalAddress(
Op, DAG);
7558 return LowerGlobalTLSAddress(
Op, DAG);
7560 return LowerPtrAuthGlobalAddress(
Op, DAG);
7561 case ISD::ADJUST_TRAMPOLINE:
7562 return LowerADJUST_TRAMPOLINE(
Op, DAG);
7563 case ISD::INIT_TRAMPOLINE:
7564 return LowerINIT_TRAMPOLINE(
Op, DAG);
7568 return LowerSETCC(
Op, DAG);
7570 return LowerSETCCCARRY(
Op, DAG);
7574 return LowerBR_CC(
Op, DAG);
7576 return LowerSELECT(
Op, DAG);
7578 return LowerSELECT_CC(
Op, DAG);
7580 return LowerJumpTable(
Op, DAG);
7582 return LowerBR_JT(
Op, DAG);
7584 return LowerBRIND(
Op, DAG);
7586 return LowerConstantPool(
Op, DAG);
7588 return LowerBlockAddress(
Op, DAG);
7590 return LowerVASTART(
Op, DAG);
7592 return LowerVACOPY(
Op, DAG);
7594 return LowerVAARG(
Op, DAG);
7611 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FADD_PRED);
7613 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FSUB_PRED);
7615 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMUL_PRED);
7617 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMA_PRED);
7619 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FDIV_PRED);
7621 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
7623 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
7625 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
7626 case ISD::FNEARBYINT:
7627 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
7629 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
7631 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
7632 case ISD::FROUNDEVEN:
7633 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
7635 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
7637 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
7639 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
7642 return LowerFP_ROUND(
Op, DAG);
7643 case ISD::FP_EXTEND:
7645 return LowerFP_EXTEND(
Op, DAG);
7647 return LowerFRAMEADDR(
Op, DAG);
7649 return LowerSPONENTRY(
Op, DAG);
7651 return LowerRETURNADDR(
Op, DAG);
7653 return LowerADDROFRETURNADDR(
Op, DAG);
7655 return LowerCONCAT_VECTORS(
Op, DAG);
7657 return LowerINSERT_VECTOR_ELT(
Op, DAG);
7659 return LowerEXTRACT_VECTOR_ELT(
Op, DAG);
7661 return LowerBUILD_VECTOR(
Op, DAG);
7663 return LowerZERO_EXTEND_VECTOR_INREG(
Op, DAG);
7665 return LowerVECTOR_SHUFFLE(
Op, DAG);
7667 return LowerSPLAT_VECTOR(
Op, DAG);
7669 return LowerEXTRACT_SUBVECTOR(
Op, DAG);
7671 return LowerINSERT_SUBVECTOR(
Op, DAG);
7674 return LowerDIV(
Op, DAG);
7679 return LowerMinMax(
Op, DAG);
7683 return LowerVectorSRA_SRL_SHL(
Op, DAG);
7687 return LowerShiftParts(
Op, DAG);
7690 return LowerCTPOP_PARITY(
Op, DAG);
7692 return LowerFCOPYSIGN(
Op, DAG);
7694 return LowerVectorOR(
Op, DAG);
7696 return LowerXOR(
Op, DAG);
7703 return LowerINT_TO_FP(
Op, DAG);
7708 return LowerFP_TO_INT(
Op, DAG);
7711 return LowerFP_TO_INT_SAT(
Op, DAG);
7713 return LowerGET_ROUNDING(
Op, DAG);
7714 case ISD::SET_ROUNDING:
7715 return LowerSET_ROUNDING(
Op, DAG);
7716 case ISD::GET_FPMODE:
7717 return LowerGET_FPMODE(
Op, DAG);
7718 case ISD::SET_FPMODE:
7719 return LowerSET_FPMODE(
Op, DAG);
7720 case ISD::RESET_FPMODE:
7721 return LowerRESET_FPMODE(
Op, DAG);
7723 return LowerMUL(
Op, DAG);
7725 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MULHS_PRED);
7727 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MULHU_PRED);
7729 return LowerINTRINSIC_W_CHAIN(
Op, DAG);
7731 return LowerINTRINSIC_WO_CHAIN(
Op, DAG);
7733 return LowerINTRINSIC_VOID(
Op, DAG);
7734 case ISD::ATOMIC_STORE:
7736 assert(Subtarget->hasLSE2() || Subtarget->hasRCPC3());
7737 return LowerStore128(
Op, DAG);
7741 return LowerSTORE(
Op, DAG);
7743 return LowerFixedLengthVectorMStoreToSVE(
Op, DAG);
7745 return LowerMGATHER(
Op, DAG);
7747 return LowerMSCATTER(
Op, DAG);
7748 case ISD::VECREDUCE_SEQ_FADD:
7749 return LowerVECREDUCE_SEQ_FADD(
Op, DAG);
7750 case ISD::VECREDUCE_ADD:
7751 case ISD::VECREDUCE_AND:
7752 case ISD::VECREDUCE_OR:
7753 case ISD::VECREDUCE_XOR:
7754 case ISD::VECREDUCE_SMAX:
7755 case ISD::VECREDUCE_SMIN:
7756 case ISD::VECREDUCE_UMAX:
7757 case ISD::VECREDUCE_UMIN:
7758 case ISD::VECREDUCE_FADD:
7759 case ISD::VECREDUCE_FMAX:
7760 case ISD::VECREDUCE_FMIN:
7761 case ISD::VECREDUCE_FMAXIMUM:
7762 case ISD::VECREDUCE_FMINIMUM:
7763 return LowerVECREDUCE(
Op, DAG);
7764 case ISD::VECREDUCE_MUL:
7765 case ISD::VECREDUCE_FMUL:
7766 return LowerVECREDUCE_MUL(
Op, DAG);
7767 case ISD::ATOMIC_LOAD_AND:
7768 return LowerATOMIC_LOAD_AND(
Op, DAG);
7769 case ISD::DYNAMIC_STACKALLOC:
7770 return LowerDYNAMIC_STACKALLOC(
Op, DAG);
7772 return LowerVSCALE(
Op, DAG);
7774 return LowerVECTOR_COMPRESS(
Op, DAG);
7778 return LowerFixedLengthVectorIntExtendToSVE(
Op, DAG);
7779 case ISD::ADDRSPACECAST:
7785 if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
7786 (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
7789 return LowerToPredicatedOp(
Op, DAG,
7790 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU);
7793 return LowerTRUNCATE(
Op, DAG);
7795 return LowerMLOAD(
Op, DAG);
7798 !Subtarget->isNeonAvailable()))
7799 return LowerFixedLengthVectorLoadToSVE(
Op, DAG);
7800 return LowerLOAD(
Op, DAG);
7804 return LowerToScalableOp(
Op, DAG);
7806 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMAX_PRED);
7808 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMAXNM_PRED);
7810 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMIN_PRED);
7812 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMINNM_PRED);
7814 return LowerFixedLengthVectorSelectToSVE(
Op, DAG);
7816 return LowerABS(
Op, DAG);
7818 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::ABDS_PRED);
7820 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::ABDU_PRED);
7822 return LowerAVG(
Op, DAG, AArch64ISD::HADDS_PRED);
7824 return LowerAVG(
Op, DAG, AArch64ISD::HADDU_PRED);
7826 return LowerAVG(
Op, DAG, AArch64ISD::RHADDS_PRED);
7828 return LowerAVG(
Op, DAG, AArch64ISD::RHADDU_PRED);
7830 return LowerBitreverse(
Op, DAG);
7832 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
7834 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU);
7836 return LowerCTTZ(
Op, DAG);
7838 return LowerVECTOR_SPLICE(
Op, DAG);
7840 return LowerVECTOR_DEINTERLEAVE(
Op, DAG);
7842 return LowerVECTOR_INTERLEAVE(
Op, DAG);
7843 case ISD::GET_ACTIVE_LANE_MASK:
7844 return LowerGET_ACTIVE_LANE_MASK(
Op, DAG);
7847 if (
Op.getValueType().isVector())
7848 return LowerVectorXRINT(
Op, DAG);
7851 case ISD::LLROUND: {
7852 assert((
Op.getOperand(0).getValueType() == MVT::f16 ||
7853 Op.getOperand(0).getValueType() == MVT::bf16) &&
7854 "Expected custom lowering of rounding operations only for f16");
7857 return DAG.
getNode(
Op.getOpcode(),
DL,
Op.getValueType(), Ext);
7863 assert((
Op.getOperand(1).getValueType() == MVT::f16 ||
7864 Op.getOperand(1).getValueType() == MVT::bf16) &&
7865 "Expected custom lowering of rounding operations only for f16");
7868 {
Op.getOperand(0),
Op.getOperand(1)});
7869 return DAG.
getNode(
Op.getOpcode(),
DL, {Op.getValueType(), MVT::Other},
7870 {Ext.getValue(1), Ext.getValue(0)});
7873 assert(
Op.getOperand(2).getValueType() == MVT::i128 &&
7874 "WRITE_REGISTER custom lowering is only for 128-bit sysregs");
7879 std::pair<SDValue, SDValue> Pair =
7884 SysRegName, Pair.first, Pair.second);
7893 case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM:
7894 return LowerVECTOR_HISTOGRAM(
Op, DAG);
7895 case ISD::PARTIAL_REDUCE_SMLA:
7896 case ISD::PARTIAL_REDUCE_UMLA:
7897 case ISD::PARTIAL_REDUCE_SUMLA:
7898 case ISD::PARTIAL_REDUCE_FMLA:
7899 return LowerPARTIAL_REDUCE_MLA(
Op, DAG);
7904 return !Subtarget->useSVEForFixedLengthVectors();
7908 EVT VT,
bool OverrideNEON)
const {
7931 return Subtarget->isSVEorStreamingSVEAvailable();
7938 if (!Subtarget->useSVEForFixedLengthVectors())
7958 unsigned Opcode =
N->getOpcode();
7963 unsigned IID =
N->getConstantOperandVal(0);
7964 if (IID < Intrinsic::num_intrinsics)
7978 if (IID == Intrinsic::aarch64_neon_umull ||
7980 IID == Intrinsic::aarch64_neon_smull ||
7989 bool IsVarArg)
const {
8012 if (Subtarget->isTargetWindows()) {
8014 if (Subtarget->isWindowsArm64EC())
8020 if (!Subtarget->isTargetDarwin())
8028 if (Subtarget->isWindowsArm64EC())
8034 if (Subtarget->isWindowsArm64EC())
8058 if (Subtarget->isWindowsArm64EC())
8094 IsSave ? RTLIB::SMEABI_SME_SAVE : RTLIB::SMEABI_SME_RESTORE;
8110 RTLIB::Libcall LC = RTLIB::SMEABI_TPIDR2_RESTORE;
8125 Chain = DAG.
getCopyToReg(Chain,
DL, AArch64::X0, TPIDR2Block, Glue);
8127 DAG.
getNode(AArch64ISD::RESTORE_ZA,
DL, MVT::Other,
8128 {Chain, TPIDR2_EL0, DAG.
getRegister(AArch64::X0, MVT::i64),
8129 RestoreRoutine, RegMask, Chain.
getValue(1)});
8145 auto &FuncInfo = *MF.
getInfo<AArch64FunctionInfo>();
8146 auto &Subtarget = DAG.
getSubtarget<AArch64Subtarget>();
8147 const AArch64RegisterInfo &
TRI = *Subtarget.getRegisterInfo();
8149 SMEAttrs SMEFnAttrs = FuncInfo.getSMEFnAttrs();
8175 if (
getTM().useNewSMEABILowering())
8185 AArch64ISD::SMSTART,
DL, DAG.
getVTList(MVT::Other, MVT::Glue), Chain,
8194 {Chain, DAG.getConstant(0, DL, MVT::i32), ZT0FrameIndex});
8205SDValue AArch64TargetLowering::LowerFormalArguments(
8213 Subtarget->isCallingConvWin64(
F.getCallingConv(),
F.isVarArg());
8215 (isVarArg && Subtarget->isWindowsArm64EC());
8216 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
8226 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.
getContext());
8234 unsigned NumArgs =
Ins.size();
8236 unsigned CurArgIdx = 0;
8237 bool UseVarArgCC =
false;
8239 UseVarArgCC = isVarArg;
8243 for (
unsigned i = 0; i != NumArgs; ++i) {
8244 MVT ValVT =
Ins[i].VT;
8245 if (Ins[i].isOrigArg()) {
8246 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
8247 CurArgIdx =
Ins[i].getOrigArgIndex();
8254 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
8256 else if (ActualMVT == MVT::i16)
8260 Ins[i].OrigTy, CCInfo);
8261 assert(!Res &&
"Call operand has unhandled type");
8266 bool IsLocallyStreaming =
8267 !
Attrs.hasStreamingInterface() &&
Attrs.hasStreamingBody();
8271 unsigned ExtraArgLocs = 0;
8272 for (
unsigned i = 0, e =
Ins.size(); i != e; ++i) {
8273 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
8275 if (Ins[i].
Flags.isByVal()) {
8279 int Size =
Ins[i].Flags.getByValSize();
8280 unsigned NumRegs = (
Size + 7) / 8;
8292 if (Ins[i].
Flags.isSwiftAsync())
8293 MF.
getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(
true);
8299 const TargetRegisterClass *RC;
8301 if (RegVT == MVT::i32)
8302 RC = &AArch64::GPR32RegClass;
8303 else if (RegVT == MVT::i64)
8304 RC = &AArch64::GPR64RegClass;
8305 else if (RegVT == MVT::f16 || RegVT == MVT::bf16)
8306 RC = &AArch64::FPR16RegClass;
8307 else if (RegVT == MVT::f32)
8308 RC = &AArch64::FPR32RegClass;
8310 RC = &AArch64::FPR64RegClass;
8312 RC = &AArch64::FPR128RegClass;
8316 RC = &AArch64::PPRRegClass;
8317 }
else if (RegVT == MVT::aarch64svcount) {
8319 RC = &AArch64::PPRRegClass;
8322 RC = &AArch64::ZPRRegClass;
8329 if (IsLocallyStreaming) {
8344 DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
8363 "Indirect arguments should be scalable on most subtargets");
8385 uint32_t BEAlign = 0;
8386 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
8387 !Ins[i].Flags.isInConsecutiveRegs())
8388 BEAlign = 8 - ArgSize;
8391 MachinePointerInfo PtrInfo;
8397 unsigned ObjOffset = ArgOffset + BEAlign;
8426 Subtarget->isWindowsArm64EC()) &&
8427 "Indirect arguments should be scalable on most subtargets");
8447 Subtarget->isWindowsArm64EC()) &&
8448 "Indirect arguments should be scalable on most subtargets");
8451 unsigned NumParts = 1;
8452 if (Ins[i].
Flags.isInConsecutiveRegs()) {
8453 while (!Ins[i + NumParts - 1].
Flags.isInConsecutiveRegsLast())
8462 while (NumParts > 0) {
8463 ArgValue = DAG.
getLoad(PartLoad,
DL, Chain,
Ptr, MachinePointerInfo());
8470 DL,
Ptr.getValueType(),
8471 APInt(
Ptr.getValueSizeInBits().getFixedValue(), PartSize));
8474 APInt(
Ptr.getValueSizeInBits().getFixedValue(), PartSize),
DL,
8475 Ptr.getValueType());
8484 if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
8490 if (Ins[i].isOrigArg()) {
8491 Argument *OrigArg =
F.getArg(Ins[i].getOrigArgIndex());
8493 if (!Ins[i].
Flags.isZExt()) {
8494 ArgValue = DAG.
getNode(AArch64ISD::ASSERT_ZEXT_BOOL,
DL,
8505 if (
Attrs.hasStreamingCompatibleInterface()) {
8507 DAG.
getNode(AArch64ISD::ENTRY_PSTATE_SM,
DL,
8508 DAG.
getVTList(MVT::i64, MVT::Other), {Chain});
8520 if (IsLocallyStreaming) {
8521 if (
Attrs.hasStreamingCompatibleInterface())
8530 for (
unsigned I=0;
I<InVals.
size(); ++
I) {
8542 if (!Subtarget->isTargetDarwin() || IsWin64) {
8548 saveVarArgRegisters(CCInfo, DAG,
DL, Chain);
8552 unsigned VarArgsOffset = CCInfo.getStackSize();
8555 alignTo(VarArgsOffset, Subtarget->isTargetILP32() ? 4 : 8);
8566 SmallVectorImpl<ForwardedRegister> &Forwards =
8568 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
8572 if (!CCInfo.isAllocated(AArch64::X8)) {
8574 Forwards.
push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
8583 for (
unsigned I = 0,
E =
Ins.size();
I !=
E; ++
I) {
8585 Ins[
I].Flags.isInReg()) &&
8586 Ins[
I].Flags.isSRet()) {
8601 unsigned StackArgSize = CCInfo.getStackSize();
8603 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
8607 StackArgSize =
alignTo(StackArgSize, 16);
8621 if (Subtarget->hasCustomCallingConv())
8622 Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
8624 if (
getTM().useNewSMEABILowering()) {
8627 if (
Attrs.hasZAState()) {
8631 }
else if (
Attrs.hasAgnosticZAInterface()) {
8632 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE_SIZE;
8635 auto *RetTy = EVT(MVT::i64).getTypeForEVT(*DAG.
getContext());
8636 TargetLowering::CallLoweringInfo CLI(DAG);
8637 CLI.setDebugLoc(
DL).setChain(Chain).setLibCallee(
8643 ISD::DYNAMIC_STACKALLOC,
DL, DAG.
getVTList(MVT::i64, MVT::Other),
8644 {Chain, Size, DAG.getConstant(1, DL, MVT::i64)});
8650 Chain = DAG.
getNode(AArch64ISD::SME_STATE_ALLOC,
DL,
8661 if (
Attrs.hasZAState()) {
8668 Buffer = DAG.
getNode(AArch64ISD::ALLOCATE_ZA_BUFFER,
DL,
8669 DAG.
getVTList(MVT::i64, MVT::Other), {Chain, SVL});
8672 Buffer = DAG.
getNode(ISD::DYNAMIC_STACKALLOC,
DL,
8674 {Chain, Size, DAG.getConstant(1, DL, MVT::i64)});
8680 AArch64ISD::INIT_TPIDR2OBJ,
DL, DAG.
getVTList(MVT::Other),
8681 { Buffer.getValue(1), Buffer.getValue(0),
8683 }
else if (
Attrs.hasAgnosticZAInterface()) {
8686 DAG.
getNode(AArch64ISD::GET_SME_SAVE_SIZE,
DL,
8687 DAG.
getVTList(MVT::i64, MVT::Other), Chain);
8691 Buffer = DAG.
getNode(AArch64ISD::ALLOC_SME_SAVE_BUFFER,
DL,
8693 {Chain, BufferSize});
8697 ISD::DYNAMIC_STACKALLOC,
DL, DAG.
getVTList(MVT::i64, MVT::Other),
8698 {Chain, BufferSize, DAG.getConstant(1, DL, MVT::i64)});
8710 for (
const ISD::InputArg &
I : Ins) {
8711 if (
I.Flags.isSwiftSelf() ||
I.Flags.isSwiftError() ||
8712 I.Flags.isSwiftAsync()) {
8716 "Swift attributes can't be used with preserve_none",
8723 if (
getTM().useNewSMEABILowering()) {
8725 if (
Attrs.isNewZT0())
8735void AArch64TargetLowering::saveVarArgRegisters(
CCState &CCInfo,
8741 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
8745 Subtarget->isCallingConvWin64(
F.getCallingConv(),
F.isVarArg());
8751 if (Subtarget->isWindowsArm64EC()) {
8758 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
8760 if (GPRSaveSize != 0) {
8763 if (GPRSaveSize & 15)
8770 if (Subtarget->isWindowsArm64EC()) {
8783 for (
unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
8789 MF, GPRIdx, (i - FirstVariadicGPR) * 8)
8799 if (Subtarget->hasFPARMv8() && !IsWin64) {
8801 const unsigned NumFPRArgRegs =
FPRArgRegs.size();
8804 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
8806 if (FPRSaveSize != 0) {
8811 for (
unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
8826 if (!MemOps.
empty()) {
8833SDValue AArch64TargetLowering::LowerCallResult(
8837 SDValue ThisVal,
bool RequiresSMChange)
const {
8838 DenseMap<unsigned, SDValue> CopiedRegs;
8840 for (
unsigned i = 0; i != RVLocs.
size(); ++i) {
8841 CCValAssign VA = RVLocs[i];
8845 if (i == 0 && isThisReturn) {
8847 "unexpected return calling convention register assignment");
8883 Val = DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
8950 unsigned NumArgs = Outs.
size();
8951 for (
unsigned i = 0; i != NumArgs; ++i) {
8952 MVT ArgVT = Outs[i].VT;
8955 bool UseVarArgCC =
false;
8959 if (IsCalleeWin64) {
8973 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
8975 else if (ActualMVT == MVT::i16)
8983 Outs[i].OrigTy, CCInfo);
8984 assert(!Res &&
"Call operand has unhandled type");
8999bool AArch64TargetLowering::isEligibleForTailCallOptimization(
9000 const CallLoweringInfo &CLI)
const {
9006 bool IsVarArg = CLI.IsVarArg;
9010 const SelectionDAG &DAG = CLI.DAG;
9017 SMECallAttrs CallAttrs =
9030 MF.
getInfo<AArch64FunctionInfo>()->isSVECC())
9033 bool CCMatch = CallerCC == CalleeCC;
9048 if (i->hasByValAttr())
9057 if (i->hasInRegAttr()) {
9058 unsigned ArgIdx = i - CallerF.
arg_begin();
9059 if (!CLI.CB || CLI.CB->arg_size() <= ArgIdx)
9061 AttributeSet
Attrs = CLI.CB->getParamAttributes(ArgIdx);
9062 if (!
Attrs.hasAttribute(Attribute::InReg) ||
9063 !
Attrs.hasAttribute(Attribute::StructRet) || !i->hasStructRetAttr() ||
9064 CLI.CB->getArgOperand(ArgIdx) != i) {
9081 const GlobalValue *GV =
G->getGlobal();
9084 (!
TT.isOSWindows() ||
TT.isOSBinFormatELF() ||
TT.isOSBinFormatMachO()))
9104 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
9105 const uint32_t *CallerPreserved =
TRI->getCallPreservedMask(MF, CallerCC);
9107 const uint32_t *CalleePreserved =
TRI->getCallPreservedMask(MF, CalleeCC);
9108 if (Subtarget->hasCustomCallingConv()) {
9109 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
9110 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
9112 if (!
TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
9121 CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs,
C);
9125 if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) {
9133 for (
const CCValAssign &ArgLoc : ArgLocs)
9134 if (!ArgLoc.isRegLoc())
9138 const AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
9146 A.getValVT().isScalableVector() ||
9147 Subtarget->isWindowsArm64EC()) &&
9148 "Expected value to be scalable");
9168 int ClobberedFI)
const {
9171 int64_t LastByte = FirstByte + MFI.
getObjectSize(ClobberedFI) - 1;
9182 if (FI->getIndex() < 0) {
9184 int64_t InLastByte = InFirstByte;
9187 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
9188 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
9196bool AArch64TargetLowering::DoesCalleeRestoreStack(
CallingConv::ID CallCC,
9197 bool TailCallOpt)
const {
9208 APInt RequiredZero(SizeInBits, 0xFE);
9210 bool ZExtBool = (Bits.Zero & RequiredZero) == RequiredZero;
9214void AArch64TargetLowering::AdjustInstrPostInstrSelection(
MachineInstr &
MI,
9220 if (
MI.getOpcode() == AArch64::MSRpstatesvcrImm1 ||
9221 MI.getOpcode() == AArch64::MSRpstatePseudo) {
9222 for (
unsigned I =
MI.getNumOperands() - 1;
I > 0; --
I)
9223 if (MachineOperand &MO =
MI.getOperand(
I);
9224 MO.isReg() && MO.isImplicit() && MO.isDef() &&
9225 (AArch64::GPR32RegClass.contains(MO.getReg()) ||
9226 AArch64::GPR64RegClass.contains(MO.getReg())))
9227 MI.removeOperand(
I);
9231 if (
MI.getOperand(0).getImm() == AArch64SVCR::SVCRSM ||
9232 MI.getOperand(0).getImm() == AArch64SVCR::SVCRSMZA) {
9247 const MachineFunction &MF = *
MI.getMF();
9248 if (MF.
getInfo<AArch64FunctionInfo>()->hasStreamingModeChanges() &&
9249 (
MI.getOpcode() == AArch64::ADDXri ||
9250 MI.getOpcode() == AArch64::SUBXri)) {
9251 const MachineOperand &MO =
MI.getOperand(1);
9260 unsigned Condition,
bool InsertVectorLengthCheck)
const {
9268 Ops.push_back(InGlue);
9269 return DAG.
getNode(AArch64ISD::CHECK_MATCHING_VL,
DL,
9273 if (InsertVectorLengthCheck &&
Enable) {
9276 SDValue CheckVL = GetCheckVL(Chain, InGlue);
9289 assert(PStateReg.
isValid() &&
"PStateSM Register is invalid");
9296 Opcode =
Enable ? AArch64ISD::COND_SMSTART : AArch64ISD::COND_SMSTOP;
9297 Ops.push_back(ConditionOp);
9298 Ops.push_back(PStateSM);
9300 Opcode =
Enable ? AArch64ISD::SMSTART : AArch64ISD::SMSTOP;
9302 Ops.push_back(RegMask);
9305 Ops.push_back(InGlue);
9310 if (!InsertVectorLengthCheck ||
Enable)
9337 if (Flags.isZExt() || Flags.isSExt())
9344 Arg->
isAssert() ||
Op == AArch64ISD::ASSERT_ZEXT_BOOL) {
9356 int FI = FINode->getIndex();
9374AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
9376 SelectionDAG &DAG = CLI.DAG;
9383 bool &IsTailCall = CLI.IsTailCall;
9385 bool IsVarArg = CLI.IsVarArg;
9386 const CallBase *CB = CLI.CB;
9389 MachineFunction::CallSiteInfo CSInfo;
9390 bool IsThisReturn =
false;
9392 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
9394 bool IsCFICall = CLI.CB && CLI.CB->isIndirectCall() && CLI.CFIType;
9395 bool IsSibCall =
false;
9396 bool GuardWithBTI =
false;
9398 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
9399 !Subtarget->noBTIAtReturnTwice()) {
9405 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.
getContext());
9408 unsigned NumArgs = Outs.
size();
9410 for (
unsigned i = 0; i != NumArgs; ++i) {
9411 if (Outs[i].
Flags.isVarArg() && Outs[i].VT.isScalableVector())
9413 "currently not supported");
9424 RetCCInfo.AnalyzeCallResult(Ins, RetCC);
9428 CSInfo = MachineFunction::CallSiteInfo(*CB);
9433 auto HasSVERegLoc = [](CCValAssign &Loc) {
9434 if (!Loc.isRegLoc())
9436 return AArch64::ZPRRegClass.contains(Loc.getLocReg()) ||
9437 AArch64::PPRRegClass.contains(Loc.getLocReg());
9439 if (
any_of(RVLocs, HasSVERegLoc) ||
any_of(ArgLocs, HasSVERegLoc))
9444 SMECallAttrs CallAttrs =
9447 std::optional<unsigned> ZAMarkerNode;
9450 if (UseNewSMEABILowering) {
9453 ZAMarkerNode = AArch64ISD::REQUIRES_ZA_SAVE;
9456 ZAMarkerNode = AArch64ISD::INOUT_ZA_USE;
9461 IsTailCall = isEligibleForTailCallOptimization(CLI);
9465 if (!ZAMarkerNode && !TailCallOpt && IsTailCall &&
9473 if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
9475 "site marked musttail");
9493 if (IsTailCall && !IsSibCall) {
9498 NumBytes =
alignTo(NumBytes, 16);
9503 FPDiff = NumReusableBytes - NumBytes;
9507 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (
unsigned)-FPDiff)
9515 assert(FPDiff % 16 == 0 &&
"unaligned stack on tail call");
9518 auto DescribeCallsite =
9519 [&](OptimizationRemarkAnalysis &
R) -> OptimizationRemarkAnalysis & {
9522 R <<
ore::NV(
"Callee", ES->getSymbol());
9523 else if (CLI.CB && CLI.CB->getCalledFunction())
9524 R <<
ore::NV(
"Callee", CLI.CB->getCalledFunction()->getName());
9526 R <<
"unknown callee";
9531 bool RequiresLazySave = !UseNewSMEABILowering && CallAttrs.
requiresLazySave();
9532 bool RequiresSaveAllZA =
9534 if (RequiresLazySave) {
9545 auto R = CLI.CB ? OptimizationRemarkAnalysis(
"sme",
"SMELazySaveZA",
9547 : OptimizationRemarkAnalysis(
"sme",
"SMELazySaveZA",
9549 return DescribeCallsite(R) <<
" sets up a lazy save for ZA";
9551 }
else if (RequiresSaveAllZA) {
9553 "Cannot share state that may not exist");
9559 if (RequiresSMChange) {
9562 auto R = CLI.CB ? OptimizationRemarkAnalysis(
"sme",
"SMETransition",
9564 : OptimizationRemarkAnalysis(
"sme",
"SMETransition",
9566 DescribeCallsite(R) <<
" requires a streaming mode transition";
9577 if (ShouldPreserveZT0) {
9581 {Chain, DAG.getConstant(0, DL, MVT::i32), ZTFrameIdx});
9587 assert((!DisableZA || !RequiresLazySave) &&
9588 "Lazy-save should have PSTATE.SM=1 on entry to the function");
9592 AArch64ISD::SMSTOP,
DL, DAG.
getVTList(MVT::Other, MVT::Glue), Chain,
9597 assert((!IsSibCall || !ZAMarkerNode) &&
"ZA markers require CALLSEQ_START");
9607 {Chain, Chain.getValue(1)});
9615 SmallSet<unsigned, 8> RegsUsed;
9619 if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
9621 for (
const auto &
F : Forwards) {
9628 unsigned ExtraArgLocs = 0;
9629 for (
unsigned i = 0, e = Outs.
size(); i != e; ++i) {
9630 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
9632 ISD::ArgFlagsTy
Flags = Outs[i].Flags;
9647 if (Outs[i].ArgVT == MVT::i1) {
9669 assert(VA.
getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits");
9685 assert((isScalable || Subtarget->isWindowsArm64EC()) &&
9686 "Indirect arguments should be scalable on most subtargets");
9689 uint64_t PartSize = StoreSize;
9690 unsigned NumParts = 1;
9691 if (Outs[i].
Flags.isInConsecutiveRegs()) {
9692 while (!Outs[i + NumParts - 1].
Flags.isInConsecutiveRegsLast())
9694 StoreSize *= NumParts;
9702 bool IsPred = VA.
getValVT() == MVT::aarch64svcount ||
9724 DL,
Ptr.getValueType(),
9725 APInt(
Ptr.getValueSizeInBits().getFixedValue(), PartSize));
9728 APInt(
Ptr.getValueSizeInBits().getFixedValue(), PartSize),
DL,
9729 Ptr.getValueType());
9744 if (i == 0 &&
Flags.isReturned() && !
Flags.isSwiftSelf() &&
9745 Outs[0].VT == MVT::i64) {
9747 "unexpected calling convention register assignment");
9748 assert(!
Ins.empty() && Ins[0].VT == MVT::i64 &&
9749 "unexpected use of 'returned'");
9750 IsThisReturn =
true;
9759 [=](
const std::pair<unsigned, SDValue> &Elt) {
9768 [&VA](MachineFunction::ArgRegPair ArgReg) {
9769 return ArgReg.Reg == VA.getLocReg();
9776 Arg = DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
9788 MachinePointerInfo DstInfo;
9792 uint32_t BEAlign = 0;
9798 OpSize =
Flags.isByVal() ?
Flags.getByValSize() * 8
9800 OpSize = (OpSize + 7) / 8;
9801 if (!Subtarget->isLittleEndian() && !
Flags.isByVal() &&
9802 !
Flags.isInConsecutiveRegs()) {
9804 BEAlign = 8 - OpSize;
9807 int32_t
Offset = LocMemOffset + BEAlign;
9824 Chain = addTokenForArgument(Chain, DAG, MF.
getFrameInfo(), FI);
9832 if (Outs[i].
Flags.isByVal()) {
9836 Chain,
DL, DstAddr, Arg, SizeNode,
9837 Outs[i].
Flags.getNonZeroByValAlign(),
9839 nullptr, std::nullopt, DstInfo, MachinePointerInfo());
9856 if (IsVarArg && Subtarget->isWindowsArm64EC() &&
9857 !(CLI.CB && CLI.CB->isMustTailCall())) {
9875 if (!MemOpChains.
empty())
9879 if (RequiresSMChange) {
9880 bool InsertVectorLengthCheck =
9890 for (
auto &RegToPass : RegsToPass) {
9892 RegToPass.second, InGlue);
9899 const GlobalValue *CalledGlobal =
nullptr;
9900 unsigned OpFlags = 0;
9902 CalledGlobal =
G->getGlobal();
9903 OpFlags = Subtarget->classifyGlobalFunctionReference(CalledGlobal,
9909 const GlobalValue *GV =
G->getGlobal();
9914 Subtarget->isTargetMachO()) ||
9916 const char *Sym = S->getSymbol();
9929 if (IsTailCall && !IsSibCall) {
9934 unsigned Opc = IsTailCall ? AArch64ISD::TC_RETURN : AArch64ISD::CALL;
9936 std::vector<SDValue>
Ops;
9937 Ops.push_back(Chain);
9938 Ops.push_back(Callee);
9945 "tail calls cannot be marked with clang.arc.attachedcall");
9946 Opc = AArch64ISD::CALL_RVMARKER;
9952 Ops.insert(
Ops.begin() + 1, GA);
9959 Ops.insert(
Ops.begin() + 2, DoEmitMarker);
9961 Opc = AArch64ISD::CALL_ARM64EC_TO_X64;
9962 }
else if (GuardWithBTI) {
9963 Opc = AArch64ISD::CALL_BTI;
9974 const uint64_t
Key = CLI.PAI->Key;
9976 "Invalid auth call key");
9980 std::tie(IntDisc, AddrDisc) =
9983 if (
Opc == AArch64ISD::CALL_RVMARKER)
9984 Opc = AArch64ISD::AUTH_CALL_RVMARKER;
9986 Opc = IsTailCall ? AArch64ISD::AUTH_TC_RETURN : AArch64ISD::AUTH_CALL;
9988 Ops.push_back(IntDisc);
9989 Ops.push_back(AddrDisc);
9994 for (
auto &RegToPass : RegsToPass)
9996 RegToPass.second.getValueType()));
9999 const uint32_t *
Mask;
10000 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
10001 if (IsThisReturn) {
10003 Mask =
TRI->getThisReturnPreservedMask(MF, CallConv);
10005 IsThisReturn =
false;
10006 Mask =
TRI->getCallPreservedMask(MF, CallConv);
10009 Mask =
TRI->getCallPreservedMask(MF, CallConv);
10011 if (Subtarget->hasCustomCallingConv())
10012 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
10014 if (
TRI->isAnyArgRegReserved(MF))
10015 TRI->emitReservedArgRegCallError(MF);
10017 assert(Mask &&
"Missing call preserved mask for calling convention");
10021 Ops.push_back(InGlue);
10029 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
10033 if (CalledGlobal &&
10047 if (CalledGlobal &&
10051 uint64_t CalleePopBytes =
10052 DoesCalleeRestoreStack(CallConv, TailCallOpt) ?
alignTo(NumBytes, 16) : 0;
10060 Chain, InGlue, CallConv, IsVarArg, RVLocs,
DL, DAG, InVals, IsThisReturn,
10061 IsThisReturn ? OutVals[0] :
SDValue(), RequiresSMChange);
10066 if (RequiresSMChange) {
10075 AArch64ISD::SMSTART,
DL, DAG.
getVTList(MVT::Other, MVT::Glue), Result,
10078 if (ShouldPreserveZT0)
10081 {Result, DAG.getConstant(0, DL, MVT::i32), ZTFrameIdx});
10083 if (RequiresLazySave) {
10085 }
else if (RequiresSaveAllZA) {
10090 if (RequiresSMChange || RequiresLazySave || ShouldPreserveZT0 ||
10091 RequiresSaveAllZA) {
10092 for (
unsigned I = 0;
I < InVals.
size(); ++
I) {
10107 for (
const ISD::OutputArg &O : Outs) {
10108 if (
O.Flags.isSwiftSelf() ||
O.Flags.isSwiftError() ||
10109 O.Flags.isSwiftAsync()) {
10113 "Swift attributes can't be used with preserve_none",
10114 DL.getDebugLoc()));
10123bool AArch64TargetLowering::CanLowerReturn(
10126 const Type *RetTy)
const {
10129 CCState CCInfo(CallConv, isVarArg, MF, RVLocs,
Context);
10140 auto *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
10144 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.
getContext());
10150 SmallSet<unsigned, 4> RegsUsed;
10151 for (
unsigned i = 0, realRVLocIdx = 0; i != RVLocs.
size();
10152 ++i, ++realRVLocIdx) {
10153 CCValAssign &VA = RVLocs[i];
10155 SDValue Arg = OutVals[realRVLocIdx];
10161 if (Outs[i].ArgVT == MVT::i1) {
10177 assert(VA.
getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits");
10186 llvm::find_if(RetVals, [=](
const std::pair<unsigned, SDValue> &Elt) {
10196 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
10212 for (
auto &RetVal : RetVals) {
10216 DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
10217 DAG.
getVTList(RetVal.second.getValueType(), MVT::Glue),
10219 Chain = DAG.
getCopyToReg(Chain,
DL, RetVal.first, RetVal.second, Glue);
10222 DAG.
getRegister(RetVal.first, RetVal.second.getValueType()));
10233 unsigned RetValReg = AArch64::X0;
10235 RetValReg = AArch64::X8;
10246 if (AArch64::GPR64RegClass.
contains(*
I))
10248 else if (AArch64::FPR64RegClass.
contains(*
I))
10259 RetOps.push_back(Glue);
10270 MachinePointerInfo());
10271 RetOps.insert(RetOps.begin() + 1, Arm64ECRetDest);
10273 return DAG.
getNode(AArch64ISD::TC_RETURN,
DL, MVT::Other, RetOps);
10276 return DAG.
getNode(AArch64ISD::RET_GLUE,
DL, MVT::Other, RetOps);
10285 unsigned Flag)
const {
10287 N->getOffset(), Flag);
10292 unsigned Flag)
const {
10298 unsigned Flag)
const {
10300 N->getOffset(), Flag);
10305 unsigned Flag)
const {
10311 unsigned Flag)
const {
10316template <
class NodeTy>
10318 unsigned Flags)
const {
10326 .
getInfo<AArch64FunctionInfo>()
10327 ->hasELFSignedGOT())
10330 return DAG.
getNode(AArch64ISD::LOADgot,
DL, Ty, GotAddr);
10334template <
class NodeTy>
10336 unsigned Flags)
const {
10342 AArch64ISD::WrapperLarge,
DL, Ty,
10350template <
class NodeTy>
10352 unsigned Flags)
const {
10360 return DAG.
getNode(AArch64ISD::ADDlow,
DL, Ty, ADRP,
Lo);
10364template <
class NodeTy>
10366 unsigned Flags)
const {
10370 SDValue Sym = getTargetNode(
N, Ty, DAG, Flags);
10371 return DAG.
getNode(AArch64ISD::ADR,
DL, Ty, Sym);
10377 const GlobalValue *GV = GN->
getGlobal();
10378 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV,
getTargetMachine());
10382 "unexpected offset in global node");
10387 return getGOT(GN, DAG, OpFlags);
10393 Result = getAddrLarge(GN, DAG, OpFlags);
10395 Result = getAddrTiny(GN, DAG, OpFlags);
10397 Result = getAddr(GN, DAG, OpFlags);
10436AArch64TargetLowering::LowerDarwinGlobalTLSAddress(
SDValue Op,
10438 assert(Subtarget->isTargetDarwin() &&
10439 "This function expects a Darwin target");
10454 PtrMemVT,
DL, Chain, DescAddr,
10469 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
10470 const uint32_t *
Mask =
TRI->getTLSCallPreservedMask();
10471 if (Subtarget->hasCustomCallingConv())
10479 unsigned Opcode = AArch64ISD::CALL;
10481 Ops.push_back(Chain);
10482 Ops.push_back(FuncTLVGet);
10486 Opcode = AArch64ISD::AUTH_CALL;
10608SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(
SDValue SymAddr,
10613 auto *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
10617 SDVTList NodeTys = DAG.
getVTList(MVT::Other, MVT::Glue);
10620 bool RequiresSMChange = TLSCallAttrs.requiresSMChange();
10622 auto ChainAndGlue = [](
SDValue Chain) -> std::pair<SDValue, SDValue> {
10623 return {Chain, Chain.
getValue(1)};
10626 if (RequiresSMChange)
10627 std::tie(Chain, Glue) =
10633 ? AArch64ISD::TLSDESC_AUTH_CALLSEQ
10634 : AArch64ISD::TLSDESC_CALLSEQ;
10636 std::tie(Chain, Glue) = ChainAndGlue(DAG.
getNode(
10639 if (TLSCallAttrs.requiresLazySave())
10640 std::tie(Chain, Glue) = ChainAndGlue(DAG.
getNode(
10641 AArch64ISD::REQUIRES_ZA_SAVE,
DL, NodeTys, {Chain, Chain.getValue(1)}));
10643 if (RequiresSMChange)
10644 std::tie(Chain, Glue) =
10652AArch64TargetLowering::LowerELFGlobalTLSAddress(
SDValue Op,
10654 assert(Subtarget->isTargetELF() &&
"This function expects an ELF target");
10657 AArch64FunctionInfo *MFI =
10672 "in local exec TLS model");
10683 const GlobalValue *GV = GA->
getGlobal();
10688 return LowerELFTLSLocalExec(GV, ThreadBase,
DL, DAG);
10691 TPOff = DAG.
getNode(AArch64ISD::LOADgot,
DL, PtrVT, TPOff);
10709 TPOff = LowerELFTLSDescCallSeq(SymAddr,
DL, DAG);
10716 GV,
DL, MVT::i64, 0,
10733 TPOff = LowerELFTLSDescCallSeq(SymAddr,
DL, DAG);
10741AArch64TargetLowering::LowerWindowsGlobalTLSAddress(
SDValue Op,
10743 assert(Subtarget->isTargetWindows() &&
"Windows specific TLS lowering");
10755 TLSArray = DAG.
getLoad(PtrVT,
DL, Chain, TLSArray, MachinePointerInfo());
10768 DAG.
getNode(AArch64ISD::ADDlow,
DL, PtrVT, ADRP, TLSIndexLo);
10769 TLSIndex = DAG.
getLoad(MVT::i32,
DL, Chain, TLSIndex, MachinePointerInfo());
10779 MachinePointerInfo());
10780 Chain =
TLS.getValue(1);
10783 const GlobalValue *GV = GA->
getGlobal();
10795 Addr = DAG.
getNode(AArch64ISD::ADDlow,
DL, PtrVT, Addr, TGALo);
10805 if (Subtarget->isTargetDarwin())
10806 return LowerDarwinGlobalTLSAddress(
Op, DAG);
10807 if (Subtarget->isTargetELF())
10808 return LowerELFGlobalTLSAddress(
Op, DAG);
10809 if (Subtarget->isTargetWindows())
10810 return LowerWindowsGlobalTLSAddress(
Op, DAG);
10848 assert(TGN->getGlobal()->hasExternalWeakLinkage());
10854 if (TGN->getOffset() != 0)
10856 "unsupported non-zero offset in weak ptrauth global reference");
10863 {TGA, Key, Discriminator}),
10868AArch64TargetLowering::LowerPtrAuthGlobalAddress(
SDValue Op,
10871 uint64_t KeyC =
Op.getConstantOperandVal(1);
10872 SDValue AddrDiscriminator =
Op.getOperand(2);
10873 uint64_t DiscriminatorC =
Op.getConstantOperandVal(3);
10874 EVT VT =
Op.getValueType();
10884 "constant discriminator in ptrauth global out of range [0, 0xffff]");
10887 if (!Subtarget->isTargetELF() && !Subtarget->isTargetMachO())
10890 int64_t PtrOffsetC = 0;
10892 PtrOffsetC =
Ptr.getConstantOperandVal(1);
10893 Ptr =
Ptr.getOperand(0);
10896 const GlobalValue *PtrGV = PtrN->getGlobal();
10899 const unsigned OpFlags =
10903 "unsupported non-GOT op flags on ptrauth global reference");
10906 PtrOffsetC += PtrN->getOffset();
10909 assert(PtrN->getTargetFlags() == 0 &&
10910 "unsupported target flags on ptrauth global");
10915 ? AddrDiscriminator
10919 if (!NeedsGOTLoad) {
10923 {TPtr, Key, TAddrDiscriminator, Discriminator}),
10932 {TPtr, Key, TAddrDiscriminator, Discriminator}),
10969 bool ProduceNonFlagSettingCondBr =
10975 if (
LHS.getValueType() == MVT::f128) {
10980 if (!
RHS.getNode()) {
11000 OFCC = getInvertedCondCode(OFCC);
11003 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CCVal,
11007 if (
LHS.getValueType().isInteger()) {
11009 (
LHS.getValueType() == MVT::i32 ||
LHS.getValueType() == MVT::i64));
11014 if (RHSC && RHSC->
getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
11024 uint64_t
Mask =
LHS.getConstantOperandVal(1);
11025 return DAG.
getNode(AArch64ISD::TBZ,
DL, MVT::Other, Chain,
Test,
11030 return DAG.
getNode(AArch64ISD::CBZ,
DL, MVT::Other, Chain,
LHS, Dest);
11040 uint64_t
Mask =
LHS.getConstantOperandVal(1);
11041 return DAG.
getNode(AArch64ISD::TBNZ,
DL, MVT::Other, Chain,
Test,
11046 return DAG.
getNode(AArch64ISD::CBNZ,
DL, MVT::Other, Chain,
LHS, Dest);
11051 uint64_t SignBitPos;
11053 return DAG.
getNode(AArch64ISD::TBNZ,
DL, MVT::Other, Chain,
LHS,
11058 LHS.getOpcode() !=
ISD::AND && ProduceNonFlagSettingCondBr) {
11062 uint64_t SignBitPos;
11064 return DAG.
getNode(AArch64ISD::TBZ,
DL, MVT::Other, Chain,
LHS,
11070 if (Subtarget->hasCMPBR() &&
11072 ProduceNonFlagSettingCondBr) {
11081 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CCVal,
11085 assert(
LHS.getValueType() == MVT::f16 ||
LHS.getValueType() == MVT::bf16 ||
11086 LHS.getValueType() == MVT::f32 ||
LHS.getValueType() == MVT::f64);
11095 DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CC1Val, Cmp);
11098 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, BR1, Dest, CC2Val,
11107 if (!Subtarget->isNeonAvailable() &&
11108 !Subtarget->useSVEForFixedLengthVectors())
11111 EVT VT =
Op.getValueType();
11139 if (!VT.
isVector() && !Subtarget->isNeonAvailable() &&
11140 Subtarget->isSVEorStreamingSVEAvailable()) {
11141 if (VT != MVT::f16 && VT != MVT::f32 && VT != MVT::f64 && VT != MVT::bf16)
11156 auto BitCast = [
this](EVT VT,
SDValue Op, SelectionDAG &DAG) {
11158 return getSVESafeBitCast(VT,
Op, DAG);
11165 auto SetVecVal = [&](
int Idx = -1) {
11172 VecVal1 = BitCast(VecVT, In1, DAG);
11173 VecVal2 = BitCast(VecVT, In2, DAG);
11179 }
else if (VT == MVT::f64) {
11180 VecVT = MVT::v2i64;
11181 SetVecVal(AArch64::dsub);
11182 }
else if (VT == MVT::f32) {
11183 VecVT = MVT::v4i32;
11184 SetVecVal(AArch64::ssub);
11185 }
else if (VT == MVT::f16 || VT == MVT::bf16) {
11186 VecVT = MVT::v8i16;
11187 SetVecVal(AArch64::hsub);
11198 if (VT == MVT::f64 || VT == MVT::v2f64) {
11200 SignMaskV = DAG.
getNode(ISD::BITCAST,
DL, MVT::v2f64, SignMaskV);
11201 SignMaskV = DAG.
getNode(ISD::FNEG,
DL, MVT::v2f64, SignMaskV);
11202 SignMaskV = DAG.
getNode(ISD::BITCAST,
DL, MVT::v2i64, SignMaskV);
11206 DAG.
getNode(AArch64ISD::BSP,
DL, VecVT, SignMaskV, VecVal1, VecVal2);
11207 if (VT == MVT::f16 || VT == MVT::bf16)
11209 if (VT == MVT::f32)
11211 if (VT == MVT::f64)
11214 return BitCast(VT, BSP, DAG);
11220 Attribute::NoImplicitFloat))
11223 EVT VT =
Op.getValueType();
11226 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
11234 if (VT == MVT::i32 && IsParity)
11237 if (Subtarget->isSVEorStreamingSVEAvailable()) {
11238 if (VT == MVT::i32 || VT == MVT::i64) {
11239 EVT ContainerVT = VT == MVT::i32 ? MVT::nxv4i32 : MVT::nxv2i64;
11251 if (VT == MVT::i128) {
11252 Val = DAG.
getNode(ISD::BITCAST,
DL, MVT::v2i64, Val);
11256 Val = DAG.
getNode(ISD::VECREDUCE_ADD,
DL, MVT::i64, Val);
11264 if (!Subtarget->isNeonAvailable())
11275 if (VT == MVT::i32 || VT == MVT::i64) {
11276 if (VT == MVT::i32)
11278 Val = DAG.
getNode(ISD::BITCAST,
DL, MVT::v8i8, Val);
11282 AddV = DAG.
getNode(AArch64ISD::NVCAST,
DL,
11283 VT == MVT::i32 ? MVT::v2i32 : MVT::v1i64, AddV);
11289 }
else if (VT == MVT::i128) {
11290 Val = DAG.
getNode(ISD::BITCAST,
DL, MVT::v16i8, Val);
11295 DAG.
getNode(AArch64ISD::NVCAST,
DL, MVT::v2i64, AddV),
11303 assert(!IsParity &&
"ISD::PARITY of vector types not supported");
11305 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
11306 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
11307 "Unexpected type for custom ctpop lowering");
11315 EVT DT = VT == MVT::v2i64 ? MVT::v4i32 : VT;
11319 if (VT == MVT::v2i64) {
11320 Val = DAG.
getNode(AArch64ISD::UDOT,
DL, DT, Zeros, Ones, Val);
11321 Val = DAG.
getNode(AArch64ISD::UADDLP,
DL, VT, Val);
11322 }
else if (VT == MVT::v2i32) {
11323 Val = DAG.
getNode(AArch64ISD::UDOT,
DL, DT, Zeros, Ones, Val);
11324 }
else if (VT == MVT::v4i32) {
11325 Val = DAG.
getNode(AArch64ISD::UDOT,
DL, DT, Zeros, Ones, Val);
11334 unsigned EltSize = 8;
11340 Val = DAG.
getNode(AArch64ISD::UADDLP,
DL, WidenVT, Val);
11347 EVT VT =
Op.getValueType();
11350 VT, Subtarget->useSVEForFixedLengthVectors()));
11360 EVT VT =
Op.getValueType();
11362 unsigned Opcode =
Op.getOpcode();
11389 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SMAX_PRED);
11391 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SMIN_PRED);
11393 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::UMAX_PRED);
11395 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::UMIN_PRED);
11407 EVT VT =
Op.getValueType();
11411 VT, Subtarget->useSVEForFixedLengthVectors()))
11412 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU);
11424 REVB = DAG.
getNode(AArch64ISD::REV32,
DL, VST,
Op.getOperand(0));
11431 REVB = DAG.
getNode(AArch64ISD::REV32,
DL, VST,
Op.getOperand(0));
11438 REVB = DAG.
getNode(AArch64ISD::REV64,
DL, VST,
Op.getOperand(0));
11445 REVB = DAG.
getNode(AArch64ISD::REV64,
DL, VST,
Op.getOperand(0));
11451 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT,
11458 SmallVector<std::pair<SDValue, SDValue>, 16> &WorkList) {
11464 N =
N->getOperand(0);
11468 WorkList.push_back(std::make_pair(
N->getOperand(0),
N->getOperand(1)));
11474 if (
N->getOpcode() !=
ISD::OR || !
N->hasOneUse())
11488 EVT VT =
N->getValueType(0);
11498 unsigned NumXors = 0;
11503 std::tie(XOR0, XOR1) = WorkList[0];
11506 for (
unsigned I = 1;
I < WorkList.
size();
I++) {
11507 std::tie(XOR0, XOR1) = WorkList[
I];
11509 Cmp = DAG.
getNode(LogicOp,
DL, VT, Cmp, CmpChain);
11521 if (
Op.getValueType().isVector())
11522 return LowerVSETCC(
Op, DAG);
11524 bool IsStrict =
Op->isStrictFPOpcode();
11526 unsigned OpNo = IsStrict ? 1 : 0;
11529 Chain =
Op.getOperand(0);
11536 EVT VT =
Op.getValueType();
11542 if (
LHS.getValueType() == MVT::f128) {
11547 if (!
RHS.getNode()) {
11548 assert(
LHS.getValueType() ==
Op.getValueType() &&
11549 "Unexpected setcc expansion!");
11554 if (
LHS.getValueType().isInteger()) {
11565 SDValue Res = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, FVal, TVal, CCVal, Cmp);
11570 assert(
LHS.getValueType() == MVT::bf16 ||
LHS.getValueType() == MVT::f16 ||
11571 LHS.getValueType() == MVT::f32 ||
LHS.getValueType() == MVT::f64);
11592 Res = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, FVal, TVal, CC1Val, Cmp);
11602 DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, FVal, CC1Val, Cmp);
11605 Res = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, CS1, CC2Val, Cmp);
11615 EVT VT =
LHS.getValueType();
11616 if (VT != MVT::i32 && VT != MVT::i64)
11626 EVT OpVT =
Op.getValueType();
11635 return DAG.
getNode(AArch64ISD::CSEL,
DL, OpVT, FVal, TVal, CCVal,
11644 "function only supposed to emit natural comparisons");
11653 if (!
LHS.getValueType().isVector()) {
11658 DAG.
getUNDEF(VecVT), Fcmeq, Zero);
11692 assert(!
LHS.getValueType().isVector());
11693 assert(!
RHS.getValueType().isVector());
11697 if (!CTVal || !CFVal)
11711 bool OneNaN =
false;
11727 bool ShouldInvert =
false;
11736 if (!Cmp2 && !ShouldInvert)
11754SDValue AArch64TargetLowering::LowerSELECT_CC(
11760 if (
LHS.getValueType() == MVT::f128) {
11765 if (!
RHS.getNode()) {
11772 if ((
LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
11773 LHS.getValueType() == MVT::bf16) {
11779 if (
LHS.getValueType().isInteger()) {
11781 (
LHS.getValueType() == MVT::i32 ||
LHS.getValueType() == MVT::i64));
11793 LHS.getValueType() ==
RHS.getValueType()) {
11794 EVT VT =
LHS.getValueType();
11800 Shift = DAG.
getNOT(
DL, Shift, VT);
11814 uint64_t SignBitPos;
11816 EVT TestVT =
LHS.getValueType();
11820 LHS, SignBitConst);
11848 unsigned Opcode = AArch64ISD::CSEL;
11856 }
else if (CTVal && CFVal && CTVal->
isOne() && CFVal->
isZero()) {
11876 }
else if (CTVal && CFVal) {
11884 if (TrueVal == ~FalseVal) {
11885 Opcode = AArch64ISD::CSINV;
11886 }
else if (FalseVal > std::numeric_limits<int64_t>::min() &&
11887 TrueVal == -FalseVal) {
11888 Opcode = AArch64ISD::CSNEG;
11898 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
11899 Opcode = AArch64ISD::CSINC;
11901 if (TrueVal32 > FalseVal32) {
11907 const uint64_t TrueVal64 =
TrueVal;
11908 const uint64_t FalseVal64 =
FalseVal;
11910 if ((TrueVal64 == FalseVal64 + 1) || (TrueVal64 + 1 == FalseVal64)) {
11911 Opcode = AArch64ISD::CSINC;
11913 if (TrueVal > FalseVal) {
11926 if (Opcode != AArch64ISD::CSEL) {
11939 if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->
isOne() &&
11944 if (CTVal && CTVal == RHSVal && AArch64CC ==
AArch64CC::EQ)
11946 else if (CFVal && CFVal == RHSVal && AArch64CC ==
AArch64CC::NE)
11948 }
else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->
isOne()) {
11949 assert (CTVal && CFVal &&
"Expected constant operands for CSNEG.");
11954 Opcode = AArch64ISD::CSINV;
11963 return DAG.
getNode(Opcode,
DL, VT, TVal, FVal, CCVal, Cmp);
11967 assert(
LHS.getValueType() == MVT::f16 ||
LHS.getValueType() == MVT::f32 ||
11968 LHS.getValueType() == MVT::f64);
11975 if (Subtarget->isNeonAvailable() &&
all_of(
Users, [](
const SDNode *U) {
11976 switch (
U->getOpcode()) {
11981 case AArch64ISD::DUP:
11999 if (
Flags.hasNoSignedZeros()) {
12003 if (RHSVal && RHSVal->
isZero()) {
12011 CFVal && CFVal->
isZero() &&
12019 SDValue CS1 = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, FVal, CC1Val, Cmp);
12025 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, CS1, CC2Val, Cmp);
12034 EVT Ty =
Op.getValueType();
12035 auto Idx =
Op.getConstantOperandAPInt(2);
12036 int64_t IdxVal = Idx.getSExtValue();
12038 "Only expect scalable vectors for custom lowering of VECTOR_SPLICE");
12047 std::optional<unsigned> PredPattern;
12059 return DAG.
getNode(AArch64ISD::SPLICE,
DL, Ty, Pred,
Op.getOperand(0),
12065 if (IdxVal >= 0 && (IdxVal *
BlockSize / 8) < 256)
12078 SDNodeFlags
Flags =
Op->getFlags();
12080 return LowerSELECT_CC(CC,
LHS,
RHS, TVal, FVal,
Op->users(), Flags,
DL, DAG);
12090 EVT Ty =
Op.getValueType();
12091 if (Ty == MVT::aarch64svcount) {
12092 TVal = DAG.
getNode(ISD::BITCAST,
DL, MVT::nxv16i1, TVal);
12093 FVal = DAG.
getNode(ISD::BITCAST,
DL, MVT::nxv16i1, FVal);
12096 return DAG.
getNode(ISD::BITCAST,
DL, Ty, Sel);
12128 return DAG.
getNode(AArch64ISD::CSEL,
DL,
Op.getValueType(), TVal, FVal,
12147 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
12155 Op->getFlags(),
DL, DAG);
12157 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
12172 !Subtarget->isTargetMachO())
12173 return getAddrLarge(JT, DAG);
12175 return getAddrTiny(JT, DAG);
12176 return getAddr(JT, DAG);
12189 AFI->setJumpTableEntryInfo(JTI, 4,
nullptr);
12194 "aarch64-jump-table-hardening")) {
12196 if (Subtarget->isTargetMachO()) {
12201 assert(Subtarget->isTargetELF() &&
12202 "jump table hardening only supported on MachO/ELF");
12233 std::optional<uint16_t> BADisc =
12234 Subtarget->getPtrAuthBlockAddressDiscriminatorIfEnabled(MF.
getFunction());
12245 {Dest,
Key, Disc, AddrDisc, Chain});
12255 if (Subtarget->isTargetMachO()) {
12256 return getGOT(CP, DAG);
12259 return getAddrLarge(CP, DAG);
12261 return getAddrTiny(CP, DAG);
12263 return getAddr(CP, DAG);
12271 if (std::optional<uint16_t> BADisc =
12272 Subtarget->getPtrAuthBlockAddressDiscriminatorIfEnabled(
12286 {TargetBA,
Key, AddrDisc, Disc});
12294 return getAddrLarge(BAN, DAG);
12296 return getAddrTiny(BAN, DAG);
12298 return getAddr(BAN, DAG);
12303 AArch64FunctionInfo *FuncInfo =
12312 MachinePointerInfo(SV));
12318 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
12322 if (Subtarget->isWindowsArm64EC()) {
12328 uint64_t StackOffset;
12343 MachinePointerInfo(SV));
12351 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
12352 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
12367 MachinePointerInfo(SV),
Align(PtrSize)));
12384 MachinePointerInfo(SV,
Offset),
12402 MachinePointerInfo(SV,
Offset),
12412 GROffsAddr, MachinePointerInfo(SV,
Offset),
Align(4)));
12420 VROffsAddr, MachinePointerInfo(SV,
Offset),
Align(4)));
12430 if (Subtarget->isCallingConvWin64(
F.getCallingConv(),
F.isVarArg()))
12431 return LowerWin64_VASTART(
Op, DAG);
12432 else if (Subtarget->isTargetDarwin())
12433 return LowerDarwin_VASTART(
Op, DAG);
12435 return LowerAAPCS_VASTART(
Op, DAG);
12443 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
12444 unsigned VaListSize =
12445 (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
12447 : Subtarget->isTargetILP32() ? 20 : 32;
12453 Align(PtrSize),
false,
false,
nullptr,
12454 std::nullopt, MachinePointerInfo(DestSV),
12455 MachinePointerInfo(SrcSV));
12459 assert(Subtarget->isTargetDarwin() &&
12460 "automatic va_arg instruction only works on Darwin");
12463 EVT VT =
Op.getValueType();
12467 MaybeAlign
Align(
Op.getConstantOperandVal(3));
12468 unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
12472 DAG.
getLoad(PtrMemVT,
DL, Chain, Addr, MachinePointerInfo(V));
12478 "currently not supported");
12480 if (Align && *Align > MinSlotSize) {
12496 ArgSize = std::max(ArgSize, MinSlotSize);
12497 bool NeedFPTrunc =
false;
12500 NeedFPTrunc =
true;
12510 DAG.
getStore(Chain,
DL, VANext, Addr, MachinePointerInfo(V));
12516 DAG.
getLoad(MVT::f64,
DL, APStore, VAList, MachinePointerInfo());
12526 return DAG.
getLoad(VT,
DL, APStore, VAList, MachinePointerInfo());
12534 EVT VT =
Op.getValueType();
12536 unsigned Depth =
Op.getConstantOperandVal(0);
12541 MachinePointerInfo());
12543 if (Subtarget->isTargetILP32())
12559#define GET_REGISTER_MATCHER
12560#include "AArch64GenAsmMatcher.inc"
12567 if (AArch64::X1 <=
Reg &&
Reg <= AArch64::X28) {
12569 unsigned DwarfRegNum =
MRI->getDwarfRegNum(
Reg,
false);
12570 if (!Subtarget->isXRegisterReserved(DwarfRegNum) &&
12571 !
MRI->isReservedReg(MF,
Reg))
12581 EVT VT =
Op.getValueType();
12597 EVT VT =
Op.getValueType();
12599 unsigned Depth =
Op.getConstantOperandVal(0);
12602 SDValue FrameAddr = LowerFRAMEADDR(
Op, DAG);
12619 if (Subtarget->hasPAuth()) {
12647 bool OptForSize)
const {
12648 bool IsLegal =
false;
12657 const APInt ImmInt = Imm.bitcastToAPInt();
12658 if (VT == MVT::f64)
12660 else if (VT == MVT::f32)
12662 else if (VT == MVT::f16 || VT == MVT::bf16)
12672 if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
12681 "Should be able to build any value with at most 4 moves");
12682 unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 4 : 2));
12683 IsLegal = Insn.
size() <= Limit;
12687 <<
" imm value: "; Imm.dump(););
12699 if ((ST->hasNEON() &&
12700 (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
12701 VT == MVT::f32 || VT == MVT::v1f32 || VT == MVT::v2f32 ||
12702 VT == MVT::v4f32)) ||
12704 (VT == MVT::nxv8f16 || VT == MVT::nxv4f32 || VT == MVT::nxv2f64))) {
12711 constexpr unsigned AccurateBits = 8;
12713 ExtraSteps = DesiredBits <= AccurateBits
12718 return DAG.
getNode(Opcode,
SDLoc(Operand), VT, Operand);
12728 EVT VT =
Op.getValueType();
12735AArch64TargetLowering::getSqrtResultForDenormInput(
SDValue Op,
12744 bool Reciprocal)
const {
12748 DAG, ExtraSteps)) {
12753 SDNodeFlags
Flags =
12758 for (
int i = ExtraSteps; i > 0; --i) {
12761 Step = DAG.
getNode(AArch64ISD::FRSQRTS,
DL, VT, Operand, Step, Flags);
12776 int &ExtraSteps)
const {
12779 DAG, ExtraSteps)) {
12787 for (
int i = ExtraSteps; i > 0; --i) {
12827const char *AArch64TargetLowering::LowerXConstraint(
EVT ConstraintVT)
const {
12835 if (!Subtarget->hasFPARMv8())
12860static std::optional<std::pair<unsigned, const TargetRegisterClass *>>
12863 (Constraint[1] !=
'p' && Constraint[1] !=
'z'))
12864 return std::nullopt;
12866 bool IsPredicate = Constraint[1] ==
'p';
12867 Constraint = Constraint.
substr(2, Constraint.
size() - 3);
12868 bool IsPredicateAsCount = IsPredicate && Constraint.
starts_with(
"n");
12869 if (IsPredicateAsCount)
12874 return std::nullopt;
12876 if (IsPredicateAsCount)
12877 return std::make_pair(AArch64::PN0 + V, &AArch64::PNRRegClass);
12879 return std::make_pair(AArch64::P0 + V, &AArch64::PPRRegClass);
12880 return std::make_pair(AArch64::Z0 + V, &AArch64::ZPRRegClass);
12883static std::optional<PredicateConstraint>
12894 if (VT != MVT::aarch64svcount &&
12898 switch (Constraint) {
12900 return VT == MVT::aarch64svcount ? &AArch64::PNR_p8to15RegClass
12901 : &AArch64::PPR_p8to15RegClass;
12903 return VT == MVT::aarch64svcount ? &AArch64::PNR_3bRegClass
12904 : &AArch64::PPR_3bRegClass;
12906 return VT == MVT::aarch64svcount ? &AArch64::PNRRegClass
12907 : &AArch64::PPRRegClass;
12915static std::optional<ReducedGprConstraint>
12928 switch (Constraint) {
12930 return &AArch64::MatrixIndexGPR32_8_11RegClass;
12932 return &AArch64::MatrixIndexGPR32_12_15RegClass;
12966 return DAG.
getNode(AArch64ISD::CSINC,
DL, MVT::i32,
12969 getCondCode(DAG, getInvertedCondCode(CC)), NZCV);
12973SDValue AArch64TargetLowering::LowerAsmOutputForConstraint(
12975 const AsmOperandInfo &OpInfo,
SelectionDAG &DAG)
const {
12980 if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
12981 OpInfo.ConstraintVT.getSizeInBits() < 8)
12996 if (OpInfo.ConstraintVT.getSizeInBits() <= 32)
13007AArch64TargetLowering::getConstraintType(
StringRef Constraint)
const {
13008 if (Constraint.
size() == 1) {
13009 switch (Constraint[0]) {
13046AArch64TargetLowering::getSingleConstraintMatchWeight(
13047 AsmOperandInfo &
info,
const char *constraint)
const {
13049 Value *CallOperandVal =
info.CallOperandVal;
13052 if (!CallOperandVal)
13056 switch (*constraint) {
13078std::pair<unsigned, const TargetRegisterClass *>
13079AArch64TargetLowering::getRegForInlineAsmConstraint(
13081 if (Constraint.
size() == 1) {
13082 switch (Constraint[0]) {
13085 return std::make_pair(0U,
nullptr);
13087 return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass);
13089 return std::make_pair(0U, &AArch64::GPR64commonRegClass);
13090 return std::make_pair(0U, &AArch64::GPR32commonRegClass);
13092 if (!Subtarget->hasFPARMv8())
13096 return std::make_pair(0U, &AArch64::ZPRRegClass);
13097 return std::make_pair(0U,
nullptr);
13099 if (VT == MVT::Other)
13103 return std::make_pair(0U, &AArch64::FPR16RegClass);
13105 return std::make_pair(0U, &AArch64::FPR32RegClass);
13107 return std::make_pair(0U, &AArch64::FPR64RegClass);
13109 return std::make_pair(0U, &AArch64::FPR128RegClass);
13115 if (!Subtarget->hasFPARMv8())
13118 return std::make_pair(0U, &AArch64::ZPR_4bRegClass);
13120 return std::make_pair(0U, &AArch64::FPR128_loRegClass);
13123 if (!Subtarget->hasFPARMv8())
13126 return std::make_pair(0U, &AArch64::ZPR_3bRegClass);
13134 if (AArch64::ZPRRegClass.hasSubClassEq(
P->second) &&
13135 !Subtarget->isSVEorStreamingSVEAvailable())
13136 return std::make_pair(
TRI->getSubReg(
P->first, AArch64::zsub),
13137 &AArch64::FPR128RegClass);
13142 return std::make_pair(0U, RegClass);
13146 return std::make_pair(0U, RegClass);
13148 if (StringRef(
"{cc}").equals_insensitive(Constraint) ||
13150 return std::make_pair(
unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
13152 if (Constraint ==
"{za}") {
13153 return std::make_pair(
unsigned(AArch64::ZA), &AArch64::MPRRegClass);
13156 if (Constraint ==
"{zt0}") {
13157 return std::make_pair(
unsigned(AArch64::ZT0), &AArch64::ZTRRegClass);
13162 std::pair<unsigned, const TargetRegisterClass *> Res;
13167 unsigned Size = Constraint.
size();
13168 if ((
Size == 4 ||
Size == 5) && Constraint[0] ==
'{' &&
13169 tolower(Constraint[1]) ==
'v' && Constraint[
Size - 1] ==
'}') {
13172 if (!
Failed && RegNo >= 0 && RegNo <= 31) {
13177 Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
13178 Res.second = &AArch64::FPR64RegClass;
13180 Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
13181 Res.second = &AArch64::FPR128RegClass;
13187 if (Res.second && !Subtarget->hasFPARMv8() &&
13188 !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
13189 !AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
13190 return std::make_pair(0U,
nullptr);
13197 bool AllowUnknown)
const {
13198 if (Subtarget->hasLS64() && Ty->isIntegerTy(512))
13199 return EVT(MVT::i64x8);
13206void AArch64TargetLowering::LowerAsmOperandForConstraint(
13212 if (Constraint.
size() != 1)
13215 char ConstraintLetter = Constraint[0];
13216 switch (ConstraintLetter) {
13227 if (
Op.getValueType() == MVT::i64)
13228 Result = DAG.
getRegister(AArch64::XZR, MVT::i64);
13230 Result = DAG.
getRegister(AArch64::WZR, MVT::i32);
13252 switch (ConstraintLetter) {
13266 CVal =
C->getSExtValue();
13297 if ((CVal & 0xFFFF) == CVal)
13299 if ((CVal & 0xFFFF0000ULL) == CVal)
13301 uint64_t NCVal = ~(uint32_t)CVal;
13302 if ((NCVal & 0xFFFFULL) == NCVal)
13304 if ((NCVal & 0xFFFF0000ULL) == NCVal)
13311 if ((CVal & 0xFFFFULL) == CVal)
13313 if ((CVal & 0xFFFF0000ULL) == CVal)
13315 if ((CVal & 0xFFFF00000000ULL) == CVal)
13317 if ((CVal & 0xFFFF000000000000ULL) == CVal)
13319 uint64_t NCVal = ~CVal;
13320 if ((NCVal & 0xFFFFULL) == NCVal)
13322 if ((NCVal & 0xFFFF0000ULL) == NCVal)
13324 if ((NCVal & 0xFFFF00000000ULL) == NCVal)
13326 if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
13340 Ops.push_back(Result);
13377 EVT VT =
Op.getValueType();
13379 "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
13383 if (VT != MVT::v16i8 && VT != MVT::v8i8)
13387 assert((NumElts == 8 || NumElts == 16) &&
13388 "Need to have exactly 8 or 16 elements in vector.");
13394 for (
unsigned i = 0; i < NumElts; ++i) {
13401 SourceVec = OperandSourceVec;
13402 else if (SourceVec != OperandSourceVec)
13415 }
else if (!AndMaskConstants.
empty()) {
13435 if (!MaskSourceVec) {
13439 }
else if (MaskSourceVec != MaskSource->
getOperand(0)) {
13453 if (!AndMaskConstants.
empty())
13460 SourceVec, MaskSourceVec);
13468 LLVM_DEBUG(
dbgs() <<
"AArch64TargetLowering::ReconstructShuffle\n");
13470 EVT VT =
Op.getValueType();
13472 "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
13475 struct ShuffleSourceInfo {
13490 ShuffleSourceInfo(
SDValue Vec)
13491 : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
13492 ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
13500 for (
unsigned i = 0; i < NumElts; ++i) {
13506 V.getOperand(0).getValueType().isScalableVector()) {
13508 dbgs() <<
"Reshuffle failed: "
13509 "a shuffle can only come from building a vector from "
13510 "various elements of other fixed-width vectors, provided "
13511 "their indices are constant\n");
13517 auto Source =
find(Sources, SourceVec);
13518 if (Source == Sources.
end())
13519 Source = Sources.
insert(Sources.
end(), ShuffleSourceInfo(SourceVec));
13522 unsigned EltNo = V.getConstantOperandVal(1);
13523 Source->MinElt = std::min(Source->MinElt, EltNo);
13524 Source->MaxElt = std::max(Source->MaxElt, EltNo);
13529 if ((Sources.
size() == 3 || Sources.
size() == 4) && NumElts > 4) {
13534 for (
unsigned I = 0;
I < NumElts; ++
I) {
13537 for (
unsigned OF = 0; OF < OutputFactor; OF++)
13538 Mask.push_back(-1);
13544 unsigned Lane = V.getConstantOperandVal(1);
13545 for (
unsigned S = 0; S < Sources.
size(); S++) {
13546 if (V.getOperand(0) == Sources[S].Vec) {
13547 unsigned InputSize = Sources[S].Vec.getScalarValueSizeInBits();
13548 unsigned InputBase = 16 * S + Lane * InputSize / 8;
13549 for (
unsigned OF = 0; OF < OutputFactor; OF++)
13550 Mask.push_back(InputBase + OF);
13560 ? Intrinsic::aarch64_neon_tbl3
13561 : Intrinsic::aarch64_neon_tbl4,
13563 for (
unsigned i = 0; i < Sources.
size(); i++) {
13564 SDValue Src = Sources[i].Vec;
13565 EVT SrcVT = Src.getValueType();
13568 "Expected a legally typed vector");
13576 for (
unsigned i = 0; i < Mask.size(); i++)
13578 assert((Mask.size() == 8 || Mask.size() == 16) &&
13579 "Expected a v8i8 or v16i8 Mask");
13581 Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8,
DL, TBLMask));
13585 Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, TBLOperands);
13589 if (Sources.
size() > 2) {
13590 LLVM_DEBUG(
dbgs() <<
"Reshuffle failed: currently only do something "
13591 <<
"sensible when at most two source vectors are "
13599 for (
auto &Source : Sources) {
13600 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
13601 if (SrcEltTy.
bitsLT(SmallestEltTy)) {
13602 SmallestEltTy = SrcEltTy;
13605 unsigned ResMultiplier =
13614 for (
auto &Src : Sources) {
13615 EVT SrcVT = Src.ShuffleVec.getValueType();
13628 assert(2 * SrcVTSize == VTSize);
13633 DAG.
getUNDEF(Src.ShuffleVec.getValueType()));
13639 dbgs() <<
"Reshuffle failed: result vector too small to extract\n");
13643 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
13645 dbgs() <<
"Reshuffle failed: span too large for a VEXT to cope\n");
13649 if (Src.MinElt >= NumSrcElts) {
13654 Src.WindowBase = -NumSrcElts;
13655 }
else if (Src.MaxElt < NumSrcElts) {
13672 dbgs() <<
"Reshuffle failed: don't know how to lower AArch64ISD::EXT "
13673 "for SVE vectors.");
13678 DAG.
getNode(AArch64ISD::EXT,
DL, DestVT, VEXTSrc1, VEXTSrc2,
13680 Src.WindowBase = -Src.MinElt;
13687 for (
auto &Src : Sources) {
13689 if (SrcEltTy == SmallestEltTy)
13694 DAG.
getNode(AArch64ISD::NVCAST,
DL, ShuffleVT, Src.ShuffleVec);
13696 Src.ShuffleVec = DAG.
getNode(ISD::BITCAST,
DL, ShuffleVT, Src.ShuffleVec);
13700 Src.WindowBase *= Src.WindowScale;
13705 for (
auto Src : Sources)
13706 assert(Src.ShuffleVec.getValueType() == ShuffleVT);
13714 if (Entry.isUndef())
13717 auto Src =
find(Sources, Entry.getOperand(0));
13726 int LanesDefined = BitsDefined / BitsPerShuffleLane;
13730 int *LaneMask = &Mask[i * ResMultiplier];
13732 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
13733 ExtractBase += NumElts * (Src - Sources.
begin());
13734 for (
int j = 0; j < LanesDefined; ++j)
13735 LaneMask[j] = ExtractBase + j;
13740 LLVM_DEBUG(
dbgs() <<
"Reshuffle failed: illegal shuffle mask\n");
13745 for (
unsigned i = 0; i < Sources.
size(); ++i)
13752 V = DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Shuffle);
13754 V = DAG.
getNode(ISD::BITCAST,
DL, VT, Shuffle);
13758 dbgs() <<
"Reshuffle, creating node: "; V.dump(););
13777 unsigned ExpectedElt = Imm;
13778 for (
unsigned i = 1; i < NumElts; ++i) {
13782 if (ExpectedElt == NumElts)
13787 if (ExpectedElt !=
static_cast<unsigned>(M[i]))
13798 if (V.getValueType() != MVT::v16i8)
13800 assert(V.getNumOperands() == 16 &&
"Expected 16 operands on the BUILDVECTOR");
13802 for (
unsigned X = 0;
X < 4;
X++) {
13814 for (
unsigned Y = 1;
Y < 4;
Y++) {
13817 Ext.getOperand(0) !=
Base ||
13819 Ext.getConstantOperandVal(1) !=
Y)
13830 V.getOperand(0).getOperand(0), V.getOperand(4).getOperand(0),
13831 V.getOperand(8).getOperand(0), V.getOperand(12).getOperand(0)};
13833 if (V.getValueType() == MVT::v4i32)
13849 unsigned &DupLaneOp) {
13851 "Only possible block sizes for wide DUP are: 16, 32, 64");
13870 for (
size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
13871 for (
size_t I = 0;
I < NumEltsPerBlock;
I++) {
13872 int Elt = M[BlockIndex * NumEltsPerBlock +
I];
13876 if ((
unsigned)Elt >= SingleVecNumElements)
13878 if (BlockElts[
I] < 0)
13879 BlockElts[
I] = Elt;
13880 else if (BlockElts[
I] != Elt)
13889 auto FirstRealEltIter =
find_if(BlockElts, [](
int Elt) {
return Elt >= 0; });
13890 assert(FirstRealEltIter != BlockElts.
end() &&
13891 "Shuffle with all-undefs must have been caught by previous cases, "
13893 if (FirstRealEltIter == BlockElts.
end()) {
13899 size_t FirstRealIndex = FirstRealEltIter - BlockElts.
begin();
13901 if ((
unsigned)*FirstRealEltIter < FirstRealIndex)
13904 size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
13907 if (Elt0 % NumEltsPerBlock != 0)
13911 for (
size_t I = 0;
I < NumEltsPerBlock;
I++)
13912 if (BlockElts[
I] >= 0 && (
unsigned)BlockElts[
I] != Elt0 +
I)
13915 DupLaneOp = Elt0 / NumEltsPerBlock;
13924 const int *FirstRealElt =
find_if(M, [](
int Elt) {
return Elt >= 0; });
13929 APInt ExpectedElt =
APInt(MaskBits, *FirstRealElt + 1,
false,
13933 bool FoundWrongElt = std::any_of(FirstRealElt + 1, M.end(), [&](
int Elt) {
13934 return Elt != ExpectedElt++ && Elt >= 0;
13966 if (NumElts % 2 != 0)
13968 WhichResult = (M[0] == 0 ? 0 : 1);
13969 unsigned Idx = WhichResult * NumElts / 2;
13970 for (
unsigned i = 0; i != NumElts; i += 2) {
13971 if ((M[i] >= 0 && (
unsigned)M[i] != Idx) ||
13972 (M[i + 1] >= 0 && (
unsigned)M[i + 1] != Idx))
13985 WhichResult = (M[0] == 0 ? 0 : 1);
13986 for (
unsigned j = 0; j != 2; ++j) {
13987 unsigned Idx = WhichResult;
13988 for (
unsigned i = 0; i != Half; ++i) {
13989 int MIdx = M[i + j * Half];
13990 if (MIdx >= 0 && (
unsigned)MIdx != Idx)
14004 if (NumElts % 2 != 0)
14006 WhichResult = (M[0] == 0 ? 0 : 1);
14007 for (
unsigned i = 0; i < NumElts; i += 2) {
14008 if ((M[i] >= 0 && (
unsigned)M[i] != i + WhichResult) ||
14009 (M[i + 1] >= 0 && (
unsigned)M[i + 1] != i + WhichResult))
14016 bool &DstIsLeft,
int &Anomaly) {
14017 if (M.size() !=
static_cast<size_t>(NumInputElements))
14020 int NumLHSMatch = 0, NumRHSMatch = 0;
14021 int LastLHSMismatch = -1, LastRHSMismatch = -1;
14023 for (
int i = 0; i < NumInputElements; ++i) {
14033 LastLHSMismatch = i;
14035 if (M[i] == i + NumInputElements)
14038 LastRHSMismatch = i;
14041 if (NumLHSMatch == NumInputElements - 1) {
14043 Anomaly = LastLHSMismatch;
14045 }
else if (NumRHSMatch == NumInputElements - 1) {
14047 Anomaly = LastRHSMismatch;
14060 for (
int I = 0,
E = NumElts / 2;
I !=
E;
I++) {
14065 int Offset = NumElts / 2;
14066 for (
int I = NumElts / 2,
E = NumElts;
I !=
E;
I++) {
14067 if (Mask[
I] !=
I + SplitLHS *
Offset)
14076 EVT VT =
Op.getValueType();
14111 unsigned OpNum = (PFEntry >> 26) & 0x0F;
14112 unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
14113 unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
14135 if (LHSID == (1 * 9 + 2) * 9 + 3)
14137 assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 &&
"Illegal OP_COPY!");
14141 if (OpNum == OP_MOVLANE) {
14143 auto getPFIDLane = [](
unsigned ID,
int Elt) ->
int {
14144 assert(Elt < 4 &&
"Expected Perfect Lanes to be less than 4");
14150 return (
ID % 9 == 8) ? -1 :
ID % 9;
14159 assert(RHSID < 8 &&
"Expected a lane index for RHSID!");
14160 unsigned ExtLane = 0;
14166 int MaskElt = getPFIDLane(
ID, (RHSID & 0x01) << 1) >> 1;
14168 MaskElt = (getPFIDLane(
ID, ((RHSID & 0x01) << 1) + 1) - 1) >> 1;
14169 assert(MaskElt >= 0 &&
"Didn't expect an undef movlane index!");
14170 ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2);
14171 Input = MaskElt < 2 ? V1 : V2;
14177 "Expected 16 or 32 bit shuffle elements");
14182 int MaskElt = getPFIDLane(
ID, RHSID);
14183 assert(MaskElt >= 0 &&
"Didn't expect an undef movlane index!");
14184 ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4);
14185 Input = MaskElt < 4 ? V1 : V2;
14187 if (VT == MVT::v4i16) {
14193 Input.getValueType().getVectorElementType(),
14215 return DAG.
getNode(AArch64ISD::REV64,
DL, VT, OpLHS);
14220 return DAG.
getNode(AArch64ISD::REV32,
DL, VT, OpLHS);
14223 return DAG.
getNode(AArch64ISD::REV16,
DL, VT, OpLHS);
14230 if (EltTy == MVT::i8)
14231 Opcode = AArch64ISD::DUPLANE8;
14232 else if (EltTy == MVT::i16 || EltTy == MVT::f16 || EltTy == MVT::bf16)
14233 Opcode = AArch64ISD::DUPLANE16;
14234 else if (EltTy == MVT::i32 || EltTy == MVT::f32)
14235 Opcode = AArch64ISD::DUPLANE32;
14236 else if (EltTy == MVT::i64 || EltTy == MVT::f64)
14237 Opcode = AArch64ISD::DUPLANE64;
14244 return DAG.
getNode(Opcode,
DL, VT, OpLHS, Lane);
14250 return DAG.
getNode(AArch64ISD::EXT,
DL, VT, OpLHS, OpRHS,
14254 return DAG.
getNode(AArch64ISD::UZP1,
DL, VT, OpLHS, OpRHS);
14256 return DAG.
getNode(AArch64ISD::UZP2,
DL, VT, OpLHS, OpRHS);
14258 return DAG.
getNode(AArch64ISD::ZIP1,
DL, VT, OpLHS, OpRHS);
14260 return DAG.
getNode(AArch64ISD::ZIP2,
DL, VT, OpLHS, OpRHS);
14262 return DAG.
getNode(AArch64ISD::TRN1,
DL, VT, OpLHS, OpRHS);
14264 return DAG.
getNode(AArch64ISD::TRN2,
DL, VT, OpLHS, OpRHS);
14275 EVT EltVT =
Op.getValueType().getVectorElementType();
14288 MVT IndexVT = MVT::v8i8;
14289 unsigned IndexLen = 8;
14290 if (
Op.getValueSizeInBits() == 128) {
14291 IndexVT = MVT::v16i8;
14296 for (
int Val : ShuffleMask) {
14297 for (
unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
14298 unsigned Offset = Byte + Val * BytesPerElt;
14301 if (IsUndefOrZero &&
Offset >= IndexLen)
14311 if (IsUndefOrZero) {
14320 if (IndexLen == 8) {
14341 return DAG.
getNode(ISD::BITCAST,
DL,
Op.getValueType(), Shuffle);
14345 if (EltType == MVT::i8)
14346 return AArch64ISD::DUPLANE8;
14347 if (EltType == MVT::i16 || EltType == MVT::f16 || EltType == MVT::bf16)
14348 return AArch64ISD::DUPLANE16;
14349 if (EltType == MVT::i32 || EltType == MVT::f32)
14350 return AArch64ISD::DUPLANE32;
14351 if (EltType == MVT::i64 || EltType == MVT::f64)
14352 return AArch64ISD::DUPLANE64;
14360 auto getScaledOffsetDup = [](
SDValue BitCast,
int &LaneC,
MVT &CastVT) {
14362 if (BitCast.
getOpcode() != ISD::BITCAST ||
14371 unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
14373 if (ExtIdxInBits % CastedEltBitWidth != 0)
14381 LaneC += ExtIdxInBits / CastedEltBitWidth;
14388 unsigned SrcVecNumElts =
14395 if (getScaledOffsetDup(V, Lane, CastVT)) {
14396 V = DAG.
getBitcast(CastVT, V.getOperand(0).getOperand(0));
14398 V.getOperand(0).getValueType().is128BitVector()) {
14401 Lane += V.getConstantOperandVal(1);
14402 V = V.getOperand(0);
14428 EVT VT =
Op.getValueType();
14438 if (ElementSize > 32 || ElementSize == 1)
14468 EVT VT =
Op.getValueType();
14485 for (
unsigned I = 0;
I < 16;
I++) {
14486 if (ShuffleMask[
I] < 16)
14492 TBLMaskParts[
I] = DAG.
getConstant(
C->getSExtValue() + 32,
DL, MVT::i32);
14509AArch64TargetLowering::LowerZERO_EXTEND_VECTOR_INREG(
SDValue Op,
14512 EVT VT =
Op.getValueType();
14516 "Unexpected extension factor.");
14523 DAG.
getNode(AArch64ISD::ZIP1,
DL, SrcVT, SrcOp, Zeros));
14529 EVT VT =
Op.getValueType();
14534 return LowerFixedLengthVECTOR_SHUFFLEToSVE(
Op, DAG);
14540 ArrayRef<int> ShuffleMask = SVN->
getMask();
14547 "Unexpected VECTOR_SHUFFLE mask size!");
14573 for (
unsigned LaneSize : {64U, 32U, 16U}) {
14576 unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
14577 : LaneSize == 32 ? AArch64ISD::DUPLANE32
14578 : AArch64ISD::DUPLANE16;
14593 if (
isREVMask(ShuffleMask, EltSize, NumElts, 64))
14595 if (
isREVMask(ShuffleMask, EltSize, NumElts, 32))
14597 if (
isREVMask(ShuffleMask, EltSize, NumElts, 16))
14600 if (((NumElts == 8 && EltSize == 16) || (NumElts == 16 && EltSize == 8)) &&
14603 return DAG.
getNode(AArch64ISD::EXT,
DL, VT, Rev, Rev,
14607 bool ReverseEXT =
false;
14609 if (
isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
14621 unsigned WhichResult;
14622 if (
isZIPMask(ShuffleMask, NumElts, WhichResult)) {
14623 unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
14626 if (
isUZPMask(ShuffleMask, NumElts, WhichResult)) {
14627 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
14630 if (
isTRNMask(ShuffleMask, NumElts, WhichResult)) {
14631 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
14636 unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
14640 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
14644 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
14654 if (
isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
14655 SDValue DstVec = DstIsLeft ? V1 : V2;
14659 int SrcLane = ShuffleMask[Anomaly];
14660 if (SrcLane >= NumInputElements) {
14662 SrcLane -= NumElts;
14669 ScalarVT = MVT::i32;
14682 if (NumElts == 4) {
14683 unsigned PFIndexes[4];
14684 for (
unsigned i = 0; i != 4; ++i) {
14685 if (ShuffleMask[i] < 0)
14688 PFIndexes[i] = ShuffleMask[i];
14692 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
14693 PFIndexes[2] * 9 + PFIndexes[3];
14703 "Expected larger vector element sizes to be handled already");
14705 for (
int M : ShuffleMask)
14707 M >=
static_cast<int>(NumElts) ? 0 : 0xffffffff,
DL, MVT::i32));
14721 EVT VT =
Op.getValueType();
14724 return LowerToScalableOp(
Op, DAG);
14727 "Unexpected vector type!");
14742 if (VT == MVT::nxv1i1)
14754 EVT VT =
Op.getValueType();
14767 if (CIdx && (CIdx->getZExtValue() <= 3)) {
14769 return DAG.
getNode(AArch64ISD::DUPLANE128,
DL, VT,
Op.getOperand(1), CI);
14791 SDValue TBL = DAG.
getNode(AArch64ISD::TBL,
DL, MVT::nxv2i64, V, ShuffleMask);
14792 return DAG.
getNode(ISD::BITCAST,
DL, VT, TBL);
14797 APInt &UndefBits) {
14799 APInt SplatBits, SplatUndef;
14800 unsigned SplatBitSize;
14802 if (BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14805 for (
unsigned i = 0; i < NumSplats; ++i) {
14806 CnstBits <<= SplatBitSize;
14807 UndefBits <<= SplatBitSize;
14809 UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.
getSizeInBits());
14820 const APInt &Bits) {
14821 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
14823 EVT VT =
Op.getValueType();
14832 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
14843 EVT VT =
Op.getValueType();
14848 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
14851 bool isAdvSIMDModImm =
false;
14871 if (isAdvSIMDModImm) {
14885 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
14896 EVT VT =
Op.getValueType();
14901 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
14904 bool isAdvSIMDModImm =
false;
14916 if (isAdvSIMDModImm) {
14930 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
14940 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
14942 EVT VT =
Op.getValueType();
14944 bool isAdvSIMDModImm =
false;
14956 if (isAdvSIMDModImm) {
14961 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
14970 const APInt &Bits) {
14971 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
14973 EVT VT =
Op.getValueType();
14982 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
14991 const APInt &Bits) {
14992 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
14994 EVT VT =
Op.getValueType();
14997 bool isAdvSIMDModImm =
false;
15001 MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
15006 MovTy = MVT::v2f64;
15009 if (isAdvSIMDModImm) {
15013 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15033 for (
unsigned i = 1; i < NumElts; ++i)
15042 while (
N.getOpcode() == AArch64ISD::REINTERPRET_CAST)
15043 N =
N.getOperand(0);
15049 unsigned NumElts =
N.getValueType().getVectorMinNumElements();
15052 while (
N.getOpcode() == AArch64ISD::REINTERPRET_CAST) {
15053 N =
N.getOperand(0);
15056 if (
N.getValueType().getVectorMinNumElements() < NumElts)
15066 if (
N.getOpcode() == AArch64ISD::PTRUE &&
15067 N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
15068 return N.getValueType().getVectorMinNumElements() >= NumElts;
15072 if (
N.getOpcode() == AArch64ISD::PTRUE) {
15074 unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
15075 unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
15076 if (MaxSVESize && MinSVESize == MaxSVESize) {
15078 unsigned PatNumElts =
15080 return PatNumElts == (NumElts * VScale);
15094 EVT VT =
N->getValueType(0);
15104 SDValue FirstOp =
N->getOperand(0);
15105 unsigned FirstOpc = FirstOp.
getOpcode();
15106 SDValue SecondOp =
N->getOperand(1);
15107 unsigned SecondOpc = SecondOp.
getOpcode();
15114 if ((FirstOpc ==
ISD::AND || FirstOpc == AArch64ISD::BICi) &&
15115 (SecondOpc == AArch64ISD::VSHL || SecondOpc == AArch64ISD::VLSHR ||
15116 SecondOpc == AArch64ISD::SHL_PRED ||
15117 SecondOpc == AArch64ISD::SRL_PRED)) {
15121 }
else if ((SecondOpc ==
ISD::AND || SecondOpc == AArch64ISD::BICi) &&
15122 (FirstOpc == AArch64ISD::VSHL || FirstOpc == AArch64ISD::VLSHR ||
15123 FirstOpc == AArch64ISD::SHL_PRED ||
15124 FirstOpc == AArch64ISD::SRL_PRED)) {
15131 bool IsShiftRight = Shift.
getOpcode() == AArch64ISD::VLSHR ||
15132 Shift.
getOpcode() == AArch64ISD::SRL_PRED;
15133 bool ShiftHasPredOp = Shift.
getOpcode() == AArch64ISD::SHL_PRED ||
15134 Shift.
getOpcode() == AArch64ISD::SRL_PRED;
15138 if (ShiftHasPredOp) {
15144 C2 =
C.getZExtValue();
15147 C2 = C2node->getZExtValue();
15161 assert(C1nodeImm && C1nodeShift);
15163 C1AsAPInt = C1AsAPInt.
zextOrTrunc(ElemSizeInBits);
15169 if (C2 > ElemSizeInBits)
15174 if (C1AsAPInt != RequiredC1)
15182 unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
15187 EVT VT =
N->getValueType(0);
15188 assert(VT.
isVector() &&
"Expected vector type in tryLowerToBSL\n");
15206 for (
int i = 1; i >= 0; --i) {
15207 for (
int j = 1; j >= 0; --j) {
15233 if (
Sub.getOperand(1) !=
Add.getOperand(0))
15236 return DAG.
getNode(AArch64ISD::BSP,
DL, VT,
Sub, SubSibling, AddSibling);
15244 for (
int i = 1; i >= 0; --i)
15245 for (
int j = 1; j >= 0; --j) {
15256 if (!BVN0 || !BVN1)
15259 bool FoundMatch =
true;
15263 if (!CN0 || !CN1 ||
15266 FoundMatch =
false;
15281 !Subtarget->isNeonAvailable()))
15282 return LowerToScalableOp(
Op, DAG);
15291 EVT VT =
Op.getValueType();
15296 BuildVectorSDNode *BVN =
15300 LHS =
Op.getOperand(1);
15318 UndefBits, &
LHS)) ||
15334 EVT VT =
Op.getValueType();
15348 CstLane->getAPIntValue().trunc(EltTy.
getSizeInBits()).getZExtValue(),
15350 }
else if (Lane.getNode()->isUndef()) {
15353 assert(Lane.getValueType() == MVT::i32 &&
15354 "Unexpected BUILD_VECTOR operand type");
15356 Ops.push_back(Lane);
15363 EVT VT =
Op.getValueType();
15371 int32_t ImmVal, ShiftVal;
15379 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Res);
15384 EVT VT =
Op.getValueType();
15386 "Expected a legal NEON vector");
15392 auto TryMOVIWithBits = [&](
APInt DefBits) {
15406 APInt NotDefBits = ~DefBits;
15416 if (
SDValue R = TryMOVIWithBits(DefBits))
15418 if (
SDValue R = TryMOVIWithBits(UndefBits))
15426 auto TryWithFNeg = [&](
APInt DefBits,
MVT FVT) {
15432 unsigned NumElts = VT.
getSizeInBits() / FVT.getScalarSizeInBits();
15433 for (
unsigned i = 0; i < NumElts; i++)
15434 NegBits |= Neg << (FVT.getScalarSizeInBits() * i);
15435 NegBits = DefBits ^ NegBits;
15439 if (
SDValue NewOp = TryMOVIWithBits(NegBits)) {
15443 AArch64ISD::NVCAST,
DL, VT,
15445 DAG.
getNode(AArch64ISD::NVCAST,
DL, VFVT, NewOp)));
15450 if ((R = TryWithFNeg(DefBits, MVT::f32)) ||
15451 (R = TryWithFNeg(DefBits, MVT::f64)) ||
15452 (ST->hasFullFP16() && (R = TryWithFNeg(DefBits, MVT::f16))))
15459SDValue AArch64TargetLowering::LowerFixedLengthBuildVectorToSVE(
15461 EVT VT =
Op.getValueType();
15485 NumElems -
count_if(
Op->op_values(), IsExtractElt) > 4)
15492 return Op.isUndef() ? Undef
15493 : DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
15494 ContainerVT, Undef, Op, ZeroI64);
15498 while (Intermediates.
size() > 1) {
15501 for (
unsigned I = 0;
I < Intermediates.
size();
I += 2) {
15504 Intermediates[
I / 2] =
15506 : DAG.
getNode(AArch64ISD::ZIP1,
DL, ZipVT, Op0, Op1);
15509 Intermediates.
resize(Intermediates.
size() / 2);
15520 EVT VT =
Op.getValueType();
15522 bool OverrideNEON = !Subtarget->isNeonAvailable() ||
15525 return LowerFixedLengthBuildVectorToSVE(
Op, DAG);
15543 Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
15544 if (Val.isZero() || (VT.
isInteger() && Val.isAllOnes()))
15548 if (
Const->isZero() && !
Const->isNegative())
15569 bool isOnlyLowElement =
true;
15570 bool usesOnlyOneValue =
true;
15571 bool usesOnlyOneConstantValue =
true;
15573 bool AllLanesExtractElt =
true;
15574 unsigned NumConstantLanes = 0;
15575 unsigned NumDifferentLanes = 0;
15576 unsigned NumUndefLanes = 0;
15579 SmallMapVector<SDValue, unsigned, 16> DifferentValueMap;
15580 unsigned ConsecutiveValCount = 0;
15582 for (
unsigned i = 0; i < NumElts; ++i) {
15585 AllLanesExtractElt =
false;
15591 isOnlyLowElement =
false;
15596 ++NumConstantLanes;
15597 if (!ConstantValue.
getNode())
15599 else if (ConstantValue != V)
15600 usesOnlyOneConstantValue =
false;
15603 if (!
Value.getNode())
15605 else if (V !=
Value) {
15606 usesOnlyOneValue =
false;
15607 ++NumDifferentLanes;
15610 if (PrevVal != V) {
15611 ConsecutiveValCount = 0;
15626 DifferentValueMap[
V] = ++ConsecutiveValCount;
15629 if (!
Value.getNode()) {
15631 dbgs() <<
"LowerBUILD_VECTOR: value undefined, creating undef node\n");
15639 LLVM_DEBUG(
dbgs() <<
"LowerBUILD_VECTOR: only low element used, creating 1 "
15640 "SCALAR_TO_VECTOR node\n");
15644 if (AllLanesExtractElt) {
15645 SDNode *
Vector =
nullptr;
15650 for (
unsigned i = 0; i < NumElts; ++i) {
15652 const SDNode *
N =
V.getNode();
15677 if (Val == 2 * i) {
15681 if (Val - 1 == 2 * i) {
15708 if (usesOnlyOneValue) {
15711 Value.getValueType() != VT) {
15713 dbgs() <<
"LowerBUILD_VECTOR: use DUP for non-constant splats\n");
15721 if (
Value.getValueSizeInBits() == 64) {
15723 dbgs() <<
"LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
15735 assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||
15736 EltTy == MVT::f64) &&
"Unsupported floating-point vector type");
15738 dbgs() <<
"LowerBUILD_VECTOR: float constant splats, creating int "
15739 "BITCASTS, and try again\n");
15741 for (
unsigned i = 0; i < NumElts; ++i)
15742 Ops.push_back(DAG.
getNode(ISD::BITCAST,
DL, NewType,
Op.getOperand(i)));
15745 LLVM_DEBUG(
dbgs() <<
"LowerBUILD_VECTOR: trying to lower new vector: ";
15747 Val = LowerBUILD_VECTOR(Val, DAG);
15749 return DAG.
getNode(ISD::BITCAST,
DL, VT, Val);
15757 bool PreferDUPAndInsert =
15759 NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
15760 NumDifferentLanes >= NumConstantLanes;
15766 if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
15770 APInt ConstantValueAPInt(1, 0);
15772 ConstantValueAPInt =
C->getAPIntValue().zextOrTrunc(BitSize);
15774 !ConstantValueAPInt.isAllOnes()) {
15778 Val = DAG.
getNode(AArch64ISD::DUP,
DL, VT, ConstantValue);
15782 for (
unsigned i = 0; i < NumElts; ++i) {
15796 dbgs() <<
"LowerBUILD_VECTOR: all elements are constant, use default "
15808 if (NumElts >= 4) {
15816 if (PreferDUPAndInsert) {
15821 for (
unsigned I = 0;
I < NumElts; ++
I)
15832 if (DifferentValueMap.
size() == 2 && NumUndefLanes == 0) {
15844 bool canUseVECTOR_CONCAT =
true;
15845 for (
auto Pair : DifferentValueMap) {
15847 if (Pair.second != NumElts / 2)
15848 canUseVECTOR_CONCAT =
false;
15861 if (canUseVECTOR_CONCAT) {
15884 if (NumElts >= 8) {
15885 SmallVector<int, 16> MaskVec;
15887 SDValue FirstLaneVal =
Op.getOperand(0);
15888 for (
unsigned i = 0; i < NumElts; ++i) {
15890 if (FirstLaneVal == Val)
15914 dbgs() <<
"LowerBUILD_VECTOR: alternatives failed, creating sequence "
15915 "of INSERT_VECTOR_ELT\n");
15932 LLVM_DEBUG(
dbgs() <<
"Creating node for op0, it is not undefined:\n");
15938 dbgs() <<
"Creating nodes for the other vector elements:\n";
15940 for (; i < NumElts; ++i) {
15951 dbgs() <<
"LowerBUILD_VECTOR: use default expansion, failed to find "
15952 "better alternative\n");
15959 !Subtarget->isNeonAvailable()))
15960 return LowerFixedLengthConcatVectorsToSVE(
Op, DAG);
15962 assert(
Op.getValueType().isScalableVector() &&
15964 "Expected legal scalable vector type!");
15969 "Unexpected number of operands in CONCAT_VECTORS");
15971 if (NumOperands == 2)
15976 while (ConcatOps.size() > 1) {
15977 for (
unsigned I = 0,
E = ConcatOps.size();
I !=
E;
I += 2) {
15985 ConcatOps.resize(ConcatOps.size() / 2);
15987 return ConcatOps[0];
15998 !Subtarget->isNeonAvailable()))
15999 return LowerFixedLengthInsertVectorElt(
Op, DAG);
16001 EVT VT =
Op.getOperand(0).getValueType();
16015 ExtendedValue,
Op.getOperand(2));
16028AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(
SDValue Op,
16031 EVT VT =
Op.getOperand(0).getValueType();
16040 MVT ExtractTy = VectorVT == MVT::nxv2i64 ? MVT::i64 : MVT::i32;
16042 Extend,
Op.getOperand(1));
16047 return LowerFixedLengthExtractVectorElt(
Op, DAG);
16055 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
16056 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
16057 VT == MVT::v8f16 || VT == MVT::v8bf16)
16060 if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
16061 VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
16072 if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
16082 EVT VT =
Op.getValueType();
16084 "Only cases that extract a fixed length vector are supported!");
16085 EVT InVT =
Op.getOperand(0).getValueType();
16093 unsigned Idx =
Op.getConstantOperandVal(1);
16112 if (PackedVT != InVT) {
16135 assert(
Op.getValueType().isScalableVector() &&
16136 "Only expect to lower inserts into scalable vectors!");
16138 EVT InVT =
Op.getOperand(1).getValueType();
16139 unsigned Idx =
Op.getConstantOperandVal(2);
16144 EVT VT =
Op.getValueType();
16160 if (Idx < (NumElts / 2))
16186 Vec0 = getSVESafeBitCast(NarrowVT, Vec0, DAG);
16187 Vec1 = getSVESafeBitCast(NarrowVT, Vec1, DAG);
16191 Vec1 = DAG.
getNode(AArch64ISD::NVCAST,
DL, NarrowVT, Vec1);
16200 HiVec0 = DAG.
getNode(AArch64ISD::NVCAST,
DL, NarrowVT, HiVec0);
16201 Narrow = DAG.
getNode(AArch64ISD::UZP1,
DL, NarrowVT, Vec1, HiVec0);
16204 "Invalid subvector index!");
16206 LoVec0 = DAG.
getNode(AArch64ISD::NVCAST,
DL, NarrowVT, LoVec0);
16207 Narrow = DAG.
getNode(AArch64ISD::UZP1,
DL, NarrowVT, LoVec0, Vec1);
16210 return getSVESafeBitCast(VT, Narrow, DAG);
16218 std::optional<unsigned> PredPattern =
16230 if (
Op.getOpcode() != AArch64ISD::DUP &&
16243 SplatVal =
Op->getConstantOperandVal(0);
16244 if (
Op.getValueType().getVectorElementType() != MVT::i64)
16245 SplatVal = (int32_t)SplatVal;
16253 SplatVal = -SplatVal;
16261 EVT VT =
Op.getValueType();
16265 return LowerFixedLengthVectorIntDivideToSVE(
Op, DAG);
16270 unsigned PredOpcode =
Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
16279 DAG.
getNode(AArch64ISD::ASRD_MERGE_OP1,
DL, VT, Pg,
Op->getOperand(0),
16287 if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
16288 return LowerToPredicatedOp(
Op, DAG, PredOpcode);
16293 if (VT == MVT::nxv16i8)
16294 WidenedVT = MVT::nxv8i16;
16295 else if (VT == MVT::nxv8i16)
16296 WidenedVT = MVT::nxv4i32;
16300 unsigned UnpkLo =
Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
16301 unsigned UnpkHi =
Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
16310 return DAG.
getNode(AArch64ISD::UZP1,
DL, VT, ResultLoCast, ResultHiCast);
16313bool AArch64TargetLowering::shouldExpandBuildVectorWithShuffles(
16314 EVT VT,
unsigned DefinedValues)
const {
16315 if (!Subtarget->isNeonAvailable())
16334 unsigned DummyUnsigned;
16342 isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
16344 isTRNMask(M, NumElts, DummyUnsigned) ||
16345 isUZPMask(M, NumElts, DummyUnsigned) ||
16346 isZIPMask(M, NumElts, DummyUnsigned) ||
16350 isINSMask(M, NumElts, DummyBool, DummyInt) ||
16365 while (
Op.getOpcode() == ISD::BITCAST)
16366 Op =
Op.getOperand(0);
16368 APInt SplatBits, SplatUndef;
16369 unsigned SplatBitSize;
16371 if (!BVN || !BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
16372 HasAnyUndefs, ElementBits) ||
16373 SplatBitSize > ElementBits)
16384 assert(VT.
isVector() &&
"vector shift count is not a vector type");
16388 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
16395 assert(VT.
isVector() &&
"vector shift count is not a vector type");
16399 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
16404 EVT VT =
Op.getValueType();
16409 EVT OpVT =
Op.getOperand(0).getValueType();
16420 !Subtarget->isNeonAvailable()))
16421 return LowerFixedLengthVectorTruncateToSVE(
Op, DAG);
16431 unsigned &ShiftValue,
16444 ShiftValue = ShiftOp1->getZExtValue();
16453 "ResVT must be truncated or same type as the shift.");
16456 if (ShiftValue > ExtraBits && !
Add->getFlags().hasNoUnsignedWrap())
16463 uint64_t AddValue = AddOp1->getZExtValue();
16464 if (AddValue != 1ULL << (ShiftValue - 1))
16467 RShOperand =
Add->getOperand(0);
16473 EVT VT =
Op.getValueType();
16477 if (!
Op.getOperand(1).getValueType().isVector())
16481 switch (
Op.getOpcode()) {
16485 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SHL_PRED);
16487 if (
isVShiftLImm(
Op.getOperand(1), VT,
false, Cnt) && Cnt < EltSize)
16488 return DAG.
getNode(AArch64ISD::VSHL,
DL, VT,
Op.getOperand(0),
16493 Op.getOperand(0),
Op.getOperand(1));
16497 (Subtarget->hasSVE2() ||
16498 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
16500 unsigned ShiftValue;
16502 return DAG.
getNode(AArch64ISD::URSHR_I_PRED,
DL, VT,
16509 unsigned Opc =
Op.getOpcode() ==
ISD::SRA ? AArch64ISD::SRA_PRED
16510 : AArch64ISD::SRL_PRED;
16511 return LowerToPredicatedOp(
Op, DAG,
Opc);
16515 if (
isVShiftRImm(
Op.getOperand(1), VT,
false, Cnt) && Cnt < EltSize) {
16517 (
Op.getOpcode() ==
ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
16526 unsigned Opc = (
Op.getOpcode() ==
ISD::SRA) ? Intrinsic::aarch64_neon_sshl
16527 : Intrinsic::aarch64_neon_ushl;
16535 return NegShiftLeft;
16543 if (
Op.getValueType().isScalableVector())
16544 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
16547 !Subtarget->isNeonAvailable()))
16548 return LowerFixedLengthVectorSetccToSVE(
Op, DAG);
16553 EVT CmpVT =
LHS.getValueType().changeVectorElementTypeToInteger();
16556 if (
LHS.getValueType().getVectorElementType().isInteger())
16559 assert(((!Subtarget->hasFullFP16() &&
16560 LHS.getValueType().getVectorElementType() != MVT::f16) ||
16561 LHS.getValueType().getVectorElementType() != MVT::bf16 ||
16562 LHS.getValueType().getVectorElementType() != MVT::f128) &&
16563 "Unexpected type!");
16568 bool OneNaN =
false;
16592 if (!
Cmp.getNode())
16621 unsigned ScalarOpcode;
16623 case ISD::VECREDUCE_AND:
16626 case ISD::VECREDUCE_OR:
16629 case ISD::VECREDUCE_XOR:
16639 "Expected power-of-2 length vector");
16647 if (ElemVT == MVT::i1) {
16649 if (NumElems > 16) {
16652 EVT HalfVT =
Lo.getValueType();
16663 unsigned ExtendedWidth = 64;
16666 ExtendedWidth = 128;
16671 unsigned ExtendOp =
16679 NumElems == 2 && ExtendedWidth == 128) {
16680 Extended = DAG.
getBitcast(MVT::v4i32, Extended);
16681 ExtendedVT = MVT::i32;
16683 switch (ScalarOpcode) {
16685 Result = DAG.
getNode(ISD::VECREDUCE_UMIN,
DL, ExtendedVT, Extended);
16688 Result = DAG.
getNode(ISD::VECREDUCE_UMAX,
DL, ExtendedVT, Extended);
16691 Result = DAG.
getNode(ISD::VECREDUCE_ADD,
DL, ExtendedVT, Extended);
16704 VecVT =
Lo.getValueType();
16720 for (
unsigned Shift = NumElems / 2; Shift > 0; Shift /= 2) {
16725 Scalar = DAG.
getNode(ScalarOpcode,
DL, ScalarVT, Scalar, Shifted);
16737 EVT SrcVT = Src.getValueType();
16741 if (Subtarget->hasFullFP16() &&
Op.getOpcode() == ISD::VECREDUCE_FADD &&
16742 SrcVT == MVT::v2f16) {
16750 bool OverrideNEON = !Subtarget->isNeonAvailable() ||
16751 Op.getOpcode() == ISD::VECREDUCE_AND ||
16752 Op.getOpcode() == ISD::VECREDUCE_OR ||
16753 Op.getOpcode() == ISD::VECREDUCE_XOR ||
16754 Op.getOpcode() == ISD::VECREDUCE_FADD ||
16755 (
Op.getOpcode() != ISD::VECREDUCE_ADD &&
16759 SrcVT, OverrideNEON && Subtarget->useSVEForFixedLengthVectors())) {
16762 return LowerPredReductionToSVE(
Op, DAG);
16764 switch (
Op.getOpcode()) {
16765 case ISD::VECREDUCE_ADD:
16766 return LowerReductionToSVE(AArch64ISD::UADDV_PRED,
Op, DAG);
16767 case ISD::VECREDUCE_AND:
16768 return LowerReductionToSVE(AArch64ISD::ANDV_PRED,
Op, DAG);
16769 case ISD::VECREDUCE_OR:
16770 return LowerReductionToSVE(AArch64ISD::ORV_PRED,
Op, DAG);
16771 case ISD::VECREDUCE_SMAX:
16772 return LowerReductionToSVE(AArch64ISD::SMAXV_PRED,
Op, DAG);
16773 case ISD::VECREDUCE_SMIN:
16774 return LowerReductionToSVE(AArch64ISD::SMINV_PRED,
Op, DAG);
16775 case ISD::VECREDUCE_UMAX:
16776 return LowerReductionToSVE(AArch64ISD::UMAXV_PRED,
Op, DAG);
16777 case ISD::VECREDUCE_UMIN:
16778 return LowerReductionToSVE(AArch64ISD::UMINV_PRED,
Op, DAG);
16779 case ISD::VECREDUCE_XOR:
16780 return LowerReductionToSVE(AArch64ISD::EORV_PRED,
Op, DAG);
16781 case ISD::VECREDUCE_FADD:
16782 return LowerReductionToSVE(AArch64ISD::FADDV_PRED,
Op, DAG);
16783 case ISD::VECREDUCE_FMAX:
16784 return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED,
Op, DAG);
16785 case ISD::VECREDUCE_FMIN:
16786 return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED,
Op, DAG);
16787 case ISD::VECREDUCE_FMAXIMUM:
16788 return LowerReductionToSVE(AArch64ISD::FMAXV_PRED,
Op, DAG);
16789 case ISD::VECREDUCE_FMINIMUM:
16790 return LowerReductionToSVE(AArch64ISD::FMINV_PRED,
Op, DAG);
16798 switch (
Op.getOpcode()) {
16799 case ISD::VECREDUCE_AND:
16800 case ISD::VECREDUCE_OR:
16801 case ISD::VECREDUCE_XOR:
16803 Op.getValueType(),
DL, DAG);
16804 case ISD::VECREDUCE_ADD:
16806 case ISD::VECREDUCE_SMAX:
16808 case ISD::VECREDUCE_SMIN:
16810 case ISD::VECREDUCE_UMAX:
16812 case ISD::VECREDUCE_UMIN:
16823 EVT SrcVT = Src.getValueType();
16826 SDVTList SrcVTs = DAG.
getVTList(SrcVT, SrcVT);
16838 for (
unsigned I = 0;
I < Stages; ++
I) {
16840 Src = DAG.
getNode(BaseOpc,
DL, SrcVT, Src.getValue(0), Src.getValue(1));
16848 auto &Subtarget = DAG.
getSubtarget<AArch64Subtarget>();
16850 if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
16855 MVT VT =
Op.getSimpleValueType();
16856 assert(VT != MVT::i128 &&
"Handled elsewhere, code replicated.");
16861 Op.getOperand(0),
Op.getOperand(1),
RHS,
16866AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
SDValue Op,
16871 SDNode *
Node =
Op.getNode();
16876 EVT VT =
Node->getValueType(0);
16879 "no-stack-arg-probe")) {
16881 Chain =
SP.getValue(1);
16897 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
16898 const uint32_t *
Mask =
TRI->getWindowsStackProbePreservedMask();
16899 if (Subtarget->hasCustomCallingConv())
16907 Chain, Callee, DAG.
getRegister(AArch64::X15, MVT::i64),
16918 Chain =
SP.getValue(1);
16932AArch64TargetLowering::LowerInlineDYNAMIC_STACKALLOC(
SDValue Op,
16935 SDNode *
Node =
Op.getNode();
16942 EVT VT =
Node->getValueType(0);
16946 Chain =
SP.getValue(1);
16953 Chain = DAG.
getNode(AArch64ISD::PROBED_ALLOCA,
DL, MVT::Other, Chain, SP);
16959AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(
SDValue Op,
16963 if (Subtarget->isTargetWindows())
16964 return LowerWindowsDYNAMIC_STACKALLOC(
Op, DAG);
16966 return LowerInlineDYNAMIC_STACKALLOC(
Op, DAG);
16972 unsigned NewOp)
const {
16973 if (Subtarget->hasSVE2())
16974 return LowerToPredicatedOp(
Op, DAG, NewOp);
16982 EVT VT =
Op.getValueType();
16983 assert(VT != MVT::i64 &&
"Expected illegal VSCALE node");
16986 APInt MulImm =
Op.getConstantOperandAPInt(0);
16992template <
unsigned NumVecs>
17002 for (
unsigned I = 0;
I < NumVecs; ++
I)
17011 Info.align.reset();
17023 auto &
DL =
I.getDataLayout();
17025 case Intrinsic::aarch64_sve_st2:
17027 case Intrinsic::aarch64_sve_st3:
17029 case Intrinsic::aarch64_sve_st4:
17031 case Intrinsic::aarch64_neon_ld2:
17032 case Intrinsic::aarch64_neon_ld3:
17033 case Intrinsic::aarch64_neon_ld4:
17034 case Intrinsic::aarch64_neon_ld1x2:
17035 case Intrinsic::aarch64_neon_ld1x3:
17036 case Intrinsic::aarch64_neon_ld1x4: {
17038 uint64_t NumElts =
DL.getTypeSizeInBits(
I.getType()) / 64;
17040 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17042 Info.align.reset();
17047 case Intrinsic::aarch64_neon_ld2lane:
17048 case Intrinsic::aarch64_neon_ld3lane:
17049 case Intrinsic::aarch64_neon_ld4lane:
17050 case Intrinsic::aarch64_neon_ld2r:
17051 case Intrinsic::aarch64_neon_ld3r:
17052 case Intrinsic::aarch64_neon_ld4r: {
17055 Type *RetTy =
I.getType();
17057 unsigned NumElts = StructTy->getNumElements();
17058 Type *VecTy = StructTy->getElementType(0);
17061 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17063 Info.align.reset();
17068 case Intrinsic::aarch64_neon_st2:
17069 case Intrinsic::aarch64_neon_st3:
17070 case Intrinsic::aarch64_neon_st4:
17071 case Intrinsic::aarch64_neon_st1x2:
17072 case Intrinsic::aarch64_neon_st1x3:
17073 case Intrinsic::aarch64_neon_st1x4: {
17075 unsigned NumElts = 0;
17076 for (
const Value *Arg :
I.args()) {
17077 Type *ArgTy = Arg->getType();
17080 NumElts +=
DL.getTypeSizeInBits(ArgTy) / 64;
17083 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17085 Info.align.reset();
17090 case Intrinsic::aarch64_neon_st2lane:
17091 case Intrinsic::aarch64_neon_st3lane:
17092 case Intrinsic::aarch64_neon_st4lane: {
17094 unsigned NumElts = 0;
17096 Type *VecTy =
I.getArgOperand(0)->getType();
17099 for (
const Value *Arg :
I.args()) {
17100 Type *ArgTy = Arg->getType();
17107 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17109 Info.align.reset();
17114 case Intrinsic::aarch64_ldaxr:
17115 case Intrinsic::aarch64_ldxr: {
17116 Type *ValTy =
I.getParamElementType(0);
17119 Info.ptrVal =
I.getArgOperand(0);
17121 Info.align =
DL.getABITypeAlign(ValTy);
17125 case Intrinsic::aarch64_stlxr:
17126 case Intrinsic::aarch64_stxr: {
17127 Type *ValTy =
I.getParamElementType(1);
17130 Info.ptrVal =
I.getArgOperand(1);
17132 Info.align =
DL.getABITypeAlign(ValTy);
17136 case Intrinsic::aarch64_ldaxp:
17137 case Intrinsic::aarch64_ldxp:
17139 Info.memVT = MVT::i128;
17140 Info.ptrVal =
I.getArgOperand(0);
17142 Info.align =
Align(16);
17145 case Intrinsic::aarch64_stlxp:
17146 case Intrinsic::aarch64_stxp:
17148 Info.memVT = MVT::i128;
17149 Info.ptrVal =
I.getArgOperand(2);
17151 Info.align =
Align(16);
17154 case Intrinsic::aarch64_sve_ldnt1: {
17158 Info.ptrVal =
I.getArgOperand(1);
17160 Info.align =
DL.getABITypeAlign(ElTy);
17164 case Intrinsic::aarch64_sve_stnt1: {
17168 Info.memVT =
MVT::getVT(
I.getOperand(0)->getType());
17169 Info.ptrVal =
I.getArgOperand(2);
17171 Info.align =
DL.getABITypeAlign(ElTy);
17175 case Intrinsic::aarch64_mops_memset_tag: {
17176 Value *Dst =
I.getArgOperand(0);
17177 Value *Val =
I.getArgOperand(1);
17182 Info.align =
I.getParamAlign(0).valueOrOne();
17197 std::optional<unsigned> ByteOffset)
const {
17214 Base.getOperand(1).hasOneUse() &&
17221 uint64_t ShiftAmount =
Base.getOperand(1).getConstantOperandVal(1);
17223 if (ShiftAmount ==
Log2_32(LoadBytes))
17233 if ((VT == MVT::i64 || VT == MVT::i32) && Extend->
use_size()) {
17252 return NumBits1 > NumBits2;
17259 return NumBits1 > NumBits2;
17266 if (
I->getOpcode() != Instruction::FMul)
17269 if (!
I->hasOneUse())
17274 if (!(
User->getOpcode() == Instruction::FSub ||
17275 User->getOpcode() == Instruction::FAdd))
17286 I->getFastMathFlags().allowContract()));
17296 return NumBits1 == 32 && NumBits2 == 64;
17303 return NumBits1 == 32 && NumBits2 == 64;
17321bool AArch64TargetLowering::isExtFreeImpl(
const Instruction *Ext)
const {
17326 if (Ext->getType()->isVectorTy())
17329 for (
const Use &U : Ext->uses()) {
17337 switch (Instr->getOpcode()) {
17338 case Instruction::Shl:
17342 case Instruction::GetElementPtr: {
17344 auto &
DL = Ext->getDataLayout();
17345 std::advance(GTI, U.getOperandNo()-1);
17358 if (ShiftAmt == 0 || ShiftAmt > 4)
17362 case Instruction::Trunc:
17365 if (Instr->getType() == Ext->getOperand(0)->getType())
17379 unsigned NumElts,
bool IsLittleEndian,
17381 if (DstWidth % 8 != 0 || DstWidth <= 16 || DstWidth > 64)
17384 assert(DstWidth % SrcWidth == 0 &&
17385 "TBL lowering is not supported for a conversion instruction with this "
17386 "source and destination element type.");
17388 unsigned Factor = DstWidth / SrcWidth;
17389 unsigned MaskLen = NumElts * Factor;
17392 Mask.resize(MaskLen, NumElts);
17394 unsigned SrcIndex = 0;
17395 for (
unsigned I = IsLittleEndian ? 0 : Factor - 1;
I < MaskLen;
I += Factor)
17396 Mask[
I] = SrcIndex++;
17404 bool IsLittleEndian) {
17406 unsigned NumElts = SrcTy->getNumElements();
17414 auto *FirstEltZero = Builder.CreateInsertElement(
17416 Value *Result = Builder.CreateShuffleVector(
Op, FirstEltZero, Mask);
17417 Result = Builder.CreateBitCast(Result, DstTy);
17418 if (DstTy != ZExtTy)
17419 Result = Builder.CreateZExt(Result, ZExtTy);
17425 bool IsLittleEndian) {
17432 !IsLittleEndian, Mask))
17435 auto *FirstEltZero = Builder.CreateInsertElement(
17438 return Builder.CreateShuffleVector(
Op, FirstEltZero, Mask);
17447 assert(SrcTy->getElementType()->isIntegerTy() &&
17448 "Non-integer type source vector element is not supported");
17449 assert(DstTy->getElementType()->isIntegerTy(8) &&
17450 "Unsupported destination vector element type");
17451 unsigned SrcElemTySz =
17453 unsigned DstElemTySz =
17455 assert((SrcElemTySz % DstElemTySz == 0) &&
17456 "Cannot lower truncate to tbl instructions for a source element size "
17457 "that is not divisible by the destination element size");
17458 unsigned TruncFactor = SrcElemTySz / DstElemTySz;
17459 assert((SrcElemTySz == 16 || SrcElemTySz == 32 || SrcElemTySz == 64) &&
17460 "Unsupported source vector element type size");
17468 for (
int Itr = 0; Itr < 16; Itr++) {
17469 if (Itr < NumElements)
17471 IsLittleEndian ? Itr * TruncFactor
17472 : Itr * TruncFactor + (TruncFactor - 1)));
17474 MaskConst.
push_back(Builder.getInt8(255));
17477 int MaxTblSz = 128 * 4;
17478 int MaxSrcSz = SrcElemTySz * NumElements;
17480 (MaxTblSz > MaxSrcSz) ? NumElements : (MaxTblSz / SrcElemTySz);
17481 assert(ElemsPerTbl <= 16 &&
17482 "Maximum elements selected using TBL instruction cannot exceed 16!");
17484 int ShuffleCount = 128 / SrcElemTySz;
17486 for (
int i = 0; i < ShuffleCount; ++i)
17493 while (ShuffleLanes.
back() < NumElements) {
17495 Builder.CreateShuffleVector(TI->
getOperand(0), ShuffleLanes), VecTy));
17497 if (Parts.
size() == 4) {
17500 Builder.CreateIntrinsic(Intrinsic::aarch64_neon_tbl4, VecTy, Parts));
17504 for (
int i = 0; i < ShuffleCount; ++i)
17505 ShuffleLanes[i] += ShuffleCount;
17509 "Lowering trunc for vectors requiring different TBL instructions is "
17513 if (!Parts.
empty()) {
17515 switch (Parts.
size()) {
17517 TblID = Intrinsic::aarch64_neon_tbl1;
17520 TblID = Intrinsic::aarch64_neon_tbl2;
17523 TblID = Intrinsic::aarch64_neon_tbl3;
17528 Results.push_back(Builder.CreateIntrinsic(TblID, VecTy, Parts));
17533 assert(
Results.size() <= 2 &&
"Trunc lowering does not support generation of "
17534 "more than 2 tbl instructions!");
17537 if (ElemsPerTbl < 16) {
17539 std::iota(FinalMask.
begin(), FinalMask.
end(), 0);
17540 FinalResult = Builder.CreateShuffleVector(
Results[0], FinalMask);
17544 if (ElemsPerTbl < 16) {
17545 std::iota(FinalMask.
begin(), FinalMask.
begin() + ElemsPerTbl, 0);
17546 std::iota(FinalMask.
begin() + ElemsPerTbl, FinalMask.
end(), 16);
17548 std::iota(FinalMask.
begin(), FinalMask.
end(), 0);
17562 if (!
EnableExtToTBL || Subtarget->useSVEForFixedLengthVectors())
17570 if (!L || L->getHeader() !=
I->getParent() ||
F->hasOptSize())
17575 if (!SrcTy || !DstTy)
17582 if (ZExt && SrcTy->getElementType()->isIntegerTy(8)) {
17583 auto DstWidth = DstTy->getElementType()->getScalarSizeInBits();
17584 if (DstWidth % 8 != 0)
17587 auto *TruncDstType =
17591 auto SrcWidth = SrcTy->getElementType()->getScalarSizeInBits();
17592 if (
TTI.getCastInstrCost(
I->getOpcode(), DstTy, TruncDstType,
17595 if (SrcWidth * 2 >= TruncDstType->getElementType()->getScalarSizeInBits())
17598 DstTy = TruncDstType;
17606 if (SrcWidth * 4 <= DstWidth) {
17607 if (
all_of(
I->users(), [&](
auto *U) {
17608 using namespace llvm::PatternMatch;
17609 auto *SingleUser = cast<Instruction>(&*U);
17610 if (match(SingleUser, m_c_Mul(m_Specific(I), m_SExt(m_Value()))))
17612 if (match(SingleUser,
17613 m_Intrinsic<Intrinsic::vector_partial_reduce_add>(
17614 m_Value(), m_Specific(I))))
17621 if (DstTy->getScalarSizeInBits() >= 64)
17627 DstTy, Subtarget->isLittleEndian());
17630 ZExt->replaceAllUsesWith(Result);
17631 ZExt->eraseFromParent();
17636 if (UIToFP && ((SrcTy->getElementType()->isIntegerTy(8) &&
17637 DstTy->getElementType()->isFloatTy()) ||
17638 (SrcTy->getElementType()->isIntegerTy(16) &&
17639 DstTy->getElementType()->isDoubleTy()))) {
17644 assert(ZExt &&
"Cannot fail for the i8 to float conversion");
17645 auto *UI = Builder.CreateUIToFP(ZExt, DstTy);
17646 I->replaceAllUsesWith(UI);
17647 I->eraseFromParent();
17652 if (SIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
17653 DstTy->getElementType()->isFloatTy()) {
17657 Subtarget->isLittleEndian());
17658 assert(Shuffle &&
"Cannot fail for the i8 to float conversion");
17660 auto *AShr = Builder.CreateAShr(Cast, 24,
"",
true);
17661 auto *
SI = Builder.CreateSIToFP(AShr, DstTy);
17662 I->replaceAllUsesWith(
SI);
17663 I->eraseFromParent();
17671 (SrcTy->getNumElements() == 8 || SrcTy->getNumElements() == 16) &&
17672 SrcTy->getElementType()->isFloatTy() &&
17673 DstTy->getElementType()->isIntegerTy(8)) {
17675 auto *WideConv = Builder.CreateFPToUI(FPToUI->getOperand(0),
17677 auto *TruncI = Builder.CreateTrunc(WideConv, DstTy);
17678 I->replaceAllUsesWith(TruncI);
17679 I->eraseFromParent();
17689 if (TI && DstTy->getElementType()->isIntegerTy(8) &&
17690 ((SrcTy->getElementType()->isIntegerTy(32) ||
17691 SrcTy->getElementType()->isIntegerTy(64)) &&
17692 (SrcTy->getNumElements() == 16 || SrcTy->getNumElements() == 8))) {
17701 Align &RequiredAlignment)
const {
17706 RequiredAlignment =
Align(1);
17708 return NumBits == 32 || NumBits == 64;
17715 unsigned VecSize = 128;
17719 VecSize = std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
17720 return std::max<unsigned>(1, (MinElts * ElSize + 127) / VecSize);
17725 if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
17735 unsigned MinElts = EC.getKnownMinValue();
17737 UseScalable =
false;
17740 (!Subtarget->useSVEForFixedLengthVectors() ||
17745 !Subtarget->isSVEorStreamingSVEAvailable())
17753 if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
17756 if (EC.isScalable()) {
17757 UseScalable =
true;
17758 return isPowerOf2_32(MinElts) && (MinElts * ElSize) % 128 == 0;
17761 unsigned VecSize =
DL.getTypeSizeInBits(VecTy);
17762 if (Subtarget->useSVEForFixedLengthVectors()) {
17763 unsigned MinSVEVectorSize =
17764 std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
17765 if (VecSize % MinSVEVectorSize == 0 ||
17767 (!Subtarget->isNeonAvailable() || VecSize > 128))) {
17768 UseScalable =
true;
17775 return Subtarget->isNeonAvailable() && (VecSize == 64 || VecSize % 128 == 0);
17807 bool Scalable,
Type *LDVTy,
17809 assert(Factor >= 2 && Factor <= 4 &&
"Invalid interleave factor");
17810 static const Intrinsic::ID SVELoads[3] = {Intrinsic::aarch64_sve_ld2_sret,
17811 Intrinsic::aarch64_sve_ld3_sret,
17812 Intrinsic::aarch64_sve_ld4_sret};
17813 static const Intrinsic::ID NEONLoads[3] = {Intrinsic::aarch64_neon_ld2,
17814 Intrinsic::aarch64_neon_ld3,
17815 Intrinsic::aarch64_neon_ld4};
17824 bool Scalable,
Type *STVTy,
17826 assert(Factor >= 2 && Factor <= 4 &&
"Invalid interleave factor");
17827 static const Intrinsic::ID SVEStores[3] = {Intrinsic::aarch64_sve_st2,
17828 Intrinsic::aarch64_sve_st3,
17829 Intrinsic::aarch64_sve_st4};
17830 static const Intrinsic::ID NEONStores[3] = {Intrinsic::aarch64_neon_st2,
17831 Intrinsic::aarch64_neon_st3,
17832 Intrinsic::aarch64_neon_st4};
17855 "Invalid interleave factor");
17856 assert(!Shuffles.
empty() &&
"Empty shufflevector input");
17858 "Unmatched number of shufflevectors and indices");
17863 assert(!Mask && GapMask.
popcount() == Factor &&
"Unexpected mask on a load");
17882 SI->getType()->getScalarSizeInBits() * 4 ==
17883 SI->user_back()->getType()->getScalarSizeInBits();
17893 Type *EltTy = FVTy->getElementType();
17901 FVTy->getNumElements() / NumLoads);
17909 Value *BaseAddr = LI->getPointerOperand();
17911 Type *PtrTy = LI->getPointerOperandType();
17913 LDVTy->getElementCount());
17916 UseScalable, LDVTy, PtrTy);
17923 Value *PTrue =
nullptr;
17925 std::optional<unsigned> PgPattern =
17927 if (Subtarget->getMinSVEVectorSizeInBits() ==
17928 Subtarget->getMaxSVEVectorSizeInBits() &&
17929 Subtarget->getMinSVEVectorSizeInBits() ==
DL.getTypeSizeInBits(FVTy))
17930 PgPattern = AArch64SVEPredPattern::all;
17934 PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
17938 for (
unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
17943 BaseAddr = Builder.CreateConstGEP1_32(LDVTy->getElementType(), BaseAddr,
17944 FVTy->getNumElements() * Factor);
17948 LdN = Builder.CreateCall(LdNFunc, {PTrue, BaseAddr},
"ldN");
17950 LdN = Builder.CreateCall(LdNFunc, BaseAddr,
"ldN");
17953 for (
unsigned i = 0; i < Shuffles.
size(); i++) {
17955 unsigned Index = Indices[i];
17957 Value *SubVec = Builder.CreateExtractValue(LdN, Index);
17960 SubVec = Builder.CreateExtractVector(FVTy, SubVec,
uint64_t(0));
17964 SubVec = Builder.CreateIntToPtr(
17966 FVTy->getNumElements()));
17968 SubVecs[SVI].push_back(SubVec);
17977 auto &SubVec = SubVecs[SVI];
17980 SVI->replaceAllUsesWith(WideVec);
17986template <
typename Iter>
17988 int MaxLookupDist = 20;
17989 unsigned IdxWidth =
DL.getIndexSizeInBits(0);
17990 APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
17991 const Value *PtrA1 =
17992 Ptr->stripAndAccumulateInBoundsConstantOffsets(
DL, OffsetA);
17994 while (++It != End) {
17995 if (It->isDebugOrPseudoInst())
17997 if (MaxLookupDist-- == 0)
18000 const Value *PtrB1 =
18001 SI->getPointerOperand()->stripAndAccumulateInBoundsConstantOffsets(
18003 if (PtrA1 == PtrB1 &&
18004 (OffsetA.sextOrTrunc(IdxWidth) - OffsetB.
sextOrTrunc(IdxWidth))
18043 const APInt &GapMask)
const {
18046 "Invalid interleave factor");
18051 "Unexpected mask on store");
18054 assert(VecTy->getNumElements() % Factor == 0 &&
"Invalid interleaved store");
18056 unsigned LaneLen = VecTy->getNumElements() / Factor;
18057 Type *EltTy = VecTy->getElementType();
18078 Type *IntTy =
DL.getIntPtrType(EltTy);
18079 unsigned NumOpElts =
18084 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
18085 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
18092 LaneLen /= NumStores;
18099 Value *BaseAddr =
SI->getPointerOperand();
18113 if (Factor == 2 && SubVecTy->getPrimitiveSizeInBits() == 64 &&
18121 Type *PtrTy =
SI->getPointerOperandType();
18123 STVTy->getElementCount());
18126 UseScalable, STVTy, PtrTy);
18128 Value *PTrue =
nullptr;
18130 std::optional<unsigned> PgPattern =
18132 if (Subtarget->getMinSVEVectorSizeInBits() ==
18133 Subtarget->getMaxSVEVectorSizeInBits() &&
18134 Subtarget->getMinSVEVectorSizeInBits() ==
18135 DL.getTypeSizeInBits(SubVecTy))
18136 PgPattern = AArch64SVEPredPattern::all;
18140 PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
18144 for (
unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
18149 for (
unsigned i = 0; i < Factor; i++) {
18151 unsigned IdxI = StoreCount * LaneLen * Factor + i;
18152 if (Mask[IdxI] >= 0) {
18153 Shuffle = Builder.CreateShuffleVector(
18156 unsigned StartMask = 0;
18157 for (
unsigned j = 1; j < LaneLen; j++) {
18158 unsigned IdxJ = StoreCount * LaneLen * Factor + j * Factor + i;
18159 if (Mask[IdxJ] >= 0) {
18160 StartMask = Mask[IdxJ] - j;
18169 Shuffle = Builder.CreateShuffleVector(
18177 Ops.push_back(Shuffle);
18181 Ops.push_back(PTrue);
18185 if (StoreCount > 0)
18186 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
18187 BaseAddr, LaneLen * Factor);
18189 Ops.push_back(BaseAddr);
18190 Builder.CreateCall(StNFunc,
Ops);
18198 if (Factor != 2 && Factor != 3 && Factor != 4) {
18199 LLVM_DEBUG(
dbgs() <<
"Matching ld2, ld3 and ld4 patterns failed\n");
18205 assert(!Mask &&
"Unexpected mask on a load\n");
18209 const DataLayout &
DL = LI->getModule()->getDataLayout();
18224 Type *PtrTy = LI->getPointerOperandType();
18226 UseScalable, LdTy, PtrTy);
18229 Value *Pred =
nullptr;
18232 Builder.CreateVectorSplat(LdTy->
getElementCount(), Builder.getTrue());
18234 Value *BaseAddr = LI->getPointerOperand();
18235 Value *Result =
nullptr;
18236 if (NumLoads > 1) {
18239 for (
unsigned I = 0;
I < NumLoads; ++
I) {
18243 Value *LdN =
nullptr;
18245 LdN = Builder.CreateCall(LdNFunc, {Pred,
Address},
"ldN");
18247 LdN = Builder.CreateCall(LdNFunc,
Address,
"ldN");
18250 for (
unsigned J = 0; J < Factor; ++J) {
18251 ExtractedLdValues[J] = Builder.CreateInsertVector(
18252 VTy, ExtractedLdValues[J], Builder.CreateExtractValue(LdN, J), Idx);
18259 for (
unsigned J = 0; J < Factor; ++J)
18260 Result = Builder.CreateInsertValue(Result, ExtractedLdValues[J], J);
18263 Result = Builder.CreateCall(LdNFunc, {Pred, BaseAddr},
"ldN");
18265 Result = Builder.CreateCall(LdNFunc, BaseAddr,
"ldN");
18276 unsigned Factor = InterleavedValues.
size();
18277 if (Factor != 2 && Factor != 3 && Factor != 4) {
18278 LLVM_DEBUG(
dbgs() <<
"Matching st2, st3 and st4 patterns failed\n");
18284 assert(!Mask &&
"Unexpected mask on plain store");
18304 Type *PtrTy =
SI->getPointerOperandType();
18306 UseScalable, StTy, PtrTy);
18310 Value *BaseAddr =
SI->getPointerOperand();
18311 Value *Pred =
nullptr;
18315 Builder.CreateVectorSplat(StTy->
getElementCount(), Builder.getTrue());
18317 auto ExtractedValues = InterleavedValues;
18322 for (
unsigned I = 0;
I < NumStores; ++
I) {
18324 if (NumStores > 1) {
18329 for (
unsigned J = 0; J < Factor; J++) {
18331 Builder.CreateExtractVector(StTy, ExtractedValues[J], Idx);
18334 StoreOperands[StoreOperands.
size() - 1] =
Address;
18336 Builder.CreateCall(StNFunc, StoreOperands);
18343 const AttributeList &FuncAttributes)
const {
18344 bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
18345 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
18346 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
18350 bool IsSmallMemset =
Op.isMemset() &&
Op.size() < 32;
18351 auto AlignmentIsAcceptable = [&](
EVT VT,
Align AlignCheck) {
18352 if (
Op.isAligned(AlignCheck))
18360 if (CanUseNEON &&
Op.isMemset() && !IsSmallMemset &&
18361 AlignmentIsAcceptable(MVT::v16i8,
Align(16)))
18363 if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128,
Align(16)))
18365 if (
Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64,
Align(8)))
18367 if (
Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32,
Align(4)))
18373 const MemOp &
Op,
const AttributeList &FuncAttributes)
const {
18374 bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
18375 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
18376 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
18380 bool IsSmallMemset =
Op.isMemset() &&
Op.size() < 32;
18381 auto AlignmentIsAcceptable = [&](
EVT VT,
Align AlignCheck) {
18382 if (
Op.isAligned(AlignCheck))
18390 if (CanUseNEON &&
Op.isMemset() && !IsSmallMemset &&
18391 AlignmentIsAcceptable(MVT::v2i64,
Align(16)))
18393 if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128,
Align(16)))
18395 if (
Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64,
Align(8)))
18397 if (
Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32,
Align(4)))
18404 if (Immed == std::numeric_limits<int64_t>::min()) {
18413 if (!Subtarget->hasSVE2())
18432 return std::abs(Imm / 8) <= 16;
18435 return std::abs(Imm / 4) <= 16;
18438 return std::abs(Imm / 2) <= 16;
18465 if (Insn.
size() > 1)
18502 if (AM.
Scale == 1) {
18505 }
else if (AM.
Scale == 2) {
18517 if (Ty->isScalableTy()) {
18523 uint64_t VecNumBytes =
DL.getTypeSizeInBits(Ty).getKnownMinValue() / 8;
18545 if (Ty->isSized()) {
18546 uint64_t NumBits =
DL.getTypeSizeInBits(Ty);
18547 NumBytes = NumBits / 8;
18552 return Subtarget->getInstrInfo()->isLegalAddressingMode(NumBytes, AM.
BaseOffs,
18560 int64_t MaxOffset)
const {
18561 int64_t HighPart = MinOffset & ~0xfffULL;
18584 return Subtarget->hasFullFP16();
18590 Subtarget->isNonStreamingSVEorSME2Available();
18600 switch (Ty->getScalarType()->getTypeID()) {
18620 static const MCPhysReg ScratchRegs[] = {
18621 AArch64::X16, AArch64::X17, AArch64::LR, 0
18623 return ScratchRegs;
18627 static const MCPhysReg RCRegs[] = {AArch64::FPCR};
18636 "Expected shift op");
18638 SDValue ShiftLHS =
N->getOperand(0);
18639 EVT VT =
N->getValueType(0);
18660 return SRLC->getZExtValue() == SHLC->getZExtValue();
18672 (
N->getOperand(0).getOpcode() ==
ISD::SHL ||
18673 N->getOperand(0).getOpcode() ==
ISD::SRL) &&
18674 "Expected XOR(SHIFT) pattern");
18679 if (XorC && ShiftC) {
18680 unsigned MaskIdx, MaskLen;
18681 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
18682 unsigned ShiftAmt = ShiftC->getZExtValue();
18683 unsigned BitWidth =
N->getValueType(0).getScalarSizeInBits();
18684 if (
N->getOperand(0).getOpcode() ==
ISD::SHL)
18685 return MaskIdx == ShiftAmt && MaskLen == (
BitWidth - ShiftAmt);
18686 return MaskIdx == 0 && MaskLen == (
BitWidth - ShiftAmt);
18696 N->getOperand(0).getOpcode() ==
ISD::SRL) ||
18698 N->getOperand(0).getOpcode() ==
ISD::SHL)) &&
18699 "Expected shift-shift mask");
18701 if (!
N->getOperand(0)->hasOneUse())
18705 EVT VT =
N->getValueType(0);
18706 if (
N->getOpcode() ==
ISD::SRL && (VT == MVT::i32 || VT == MVT::i64)) {
18709 return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue());
18714 if (
N->getOpcode() ==
ISD::SHL &&
N->hasOneUse()) {
18716 unsigned ShlAmt = C2->getZExtValue();
18717 if (
auto ShouldADD = *
N->user_begin();
18718 ShouldADD->getOpcode() ==
ISD::ADD && ShouldADD->hasOneUse()) {
18720 EVT MemVT = Load->getMemoryVT();
18722 if (Load->getValueType(0).isScalableVector())
18736 unsigned BinOpcode,
EVT VT,
unsigned SelectOpcode,
SDValue X,
18744 assert(Ty->isIntegerTy());
18746 unsigned BitSize = Ty->getPrimitiveSizeInBits();
18750 int64_t Val = Imm.getSExtValue();
18757 Val &= (1LL << 32) - 1;
18765 unsigned Index)
const {
18778 EVT VT =
N->getValueType(0);
18779 if (!Subtarget->hasNEON() || !VT.
isVector())
18793 if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.
getSizeInBits() - 1)
18828 if (
N->getValueType(0) != MVT::i32)
18831 SDValue VecReduceOp0 =
N->getOperand(0);
18832 bool SawTrailingZext =
false;
18838 SawTrailingZext =
true;
18843 MVT AbsInputVT = SawTrailingZext ? MVT::v16i16 : MVT::v16i32;
18845 unsigned Opcode = VecReduceOp0.
getOpcode();
18851 if (ABS->getOperand(0)->getOpcode() !=
ISD::SUB ||
18852 ABS->getOperand(0)->getValueType(0) != AbsInputVT)
18855 SDValue SUB = ABS->getOperand(0);
18856 unsigned Opcode0 = SUB->getOperand(0).getOpcode();
18857 unsigned Opcode1 = SUB->getOperand(1).getOpcode();
18859 if (SUB->getOperand(0)->getValueType(0) != AbsInputVT ||
18860 SUB->getOperand(1)->getValueType(0) != AbsInputVT)
18864 bool IsZExt =
false;
18872 SDValue EXT0 = SUB->getOperand(0);
18873 SDValue EXT1 = SUB->getOperand(1);
18890 UABDHigh8Op0, UABDHigh8Op1);
18901 UABDLo8Op0, UABDLo8Op1);
18909 return DAG.
getNode(ISD::VECREDUCE_ADD,
DL, MVT::i32, UADDLP);
18922 if (!
N->getValueType(0).isScalableVector() ||
18923 (!ST->hasSVE2p1() && !(ST->hasSME2() && ST->isStreaming())))
18928 return Use->getOpcode() == ISD::EXTRACT_SUBVECTOR;
18931 auto MaskEC =
N->getValueType(0).getVectorElementCount();
18932 if (!MaskEC.isKnownMultipleOf(NumExts))
18946 if (
Use->getValueType(0).getVectorElementCount() != ExtMinEC)
18950 unsigned Offset =
Use->getConstantOperandVal(1);
18952 if (Extracts[Part] !=
nullptr)
18955 Extracts[Part] =
Use;
18971 EVT ExtVT = Extracts[0]->getValueType(0);
18975 DCI.
CombineTo(Extracts[0], R.getValue(0));
18976 DCI.
CombineTo(Extracts[1], R.getValue(1));
18980 if (NumExts == 2) {
18981 assert(
N->getValueType(0) == DoubleExtVT);
18987 for (
unsigned I = 2;
I < NumExts;
I += 2) {
18992 DCI.
CombineTo(Extracts[
I + 1], R.getValue(1));
18994 R.getValue(0), R.getValue(1)));
19008 if (!ST->isNeonAvailable())
19011 if (!ST->hasDotProd())
19022 unsigned DotOpcode;
19026 if (
A.getOperand(0).getValueType() !=
B.getOperand(0).getValueType())
19028 auto OpCodeA =
A.getOpcode();
19032 auto OpCodeB =
B.getOpcode();
19036 if (OpCodeA == OpCodeB) {
19041 if (!ST->hasMatMulInt8())
19043 DotOpcode = AArch64ISD::USDOT;
19048 DotOpcode = AArch64ISD::UDOT;
19050 DotOpcode = AArch64ISD::SDOT;
19055 EVT Op0VT =
A.getOperand(0).getValueType();
19058 if (!IsValidElementCount || !IsValidSize)
19067 B =
B.getOperand(0);
19070 unsigned NumOfVecReduce;
19072 if (IsMultipleOf16) {
19074 TargetType = MVT::v4i32;
19077 TargetType = MVT::v2i32;
19080 if (NumOfVecReduce == 1) {
19083 A.getOperand(0),
B);
19084 return DAG.
getNode(ISD::VECREDUCE_ADD,
DL,
N->getValueType(0), Dot);
19090 for (;
I < VecReduce16Num;
I += 1) {
19107 DAG.
getNode(ISD::VECREDUCE_ADD,
DL,
N->getValueType(0), ConcatSDot16);
19109 if (VecReduce8Num == 0)
19110 return VecReduceAdd16;
19122 DAG.
getNode(ISD::VECREDUCE_ADD,
DL,
N->getValueType(0), Dot);
19132 auto DetectAddExtract = [&](
SDValue A) {
19136 EVT VT =
A.getValueType();
19161 : AArch64ISD::SADDLP;
19165 if (
SDValue R = DetectAddExtract(
A))
19168 if (
A.getOperand(0).getOpcode() ==
ISD::ADD &&
A.getOperand(0).hasOneUse())
19172 if (
A.getOperand(1).getOpcode() ==
ISD::ADD &&
A.getOperand(1).hasOneUse())
19185 EVT VT =
A.getValueType();
19186 if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64)
19197 if (ExtVT0 != ExtVT1 ||
19212 return DAG.
getNode(AArch64ISD::NVCAST,
SDLoc(
A), MVT::v8i16, Uaddlv);
19229 MVT OpVT =
A.getSimpleValueType();
19230 assert(
N->getSimpleValueType(0) == OpVT &&
19231 "The operand type should be consistent with the result type of UADDV");
19235 if (KnownLeadingLanes.
isZero())
19251AArch64TargetLowering::BuildSDIVPow2(
SDNode *
N,
const APInt &Divisor,
19258 EVT VT =
N->getValueType(0);
19263 if (VT.
isVector() && Subtarget->isSVEorStreamingSVEAvailable())
19267 if ((VT != MVT::i32 && VT != MVT::i64) ||
19273 if (Divisor == 2 ||
19274 Divisor == APInt(Divisor.
getBitWidth(), -2,
true))
19281AArch64TargetLowering::BuildSREMPow2(
SDNode *
N,
const APInt &Divisor,
19288 EVT VT =
N->getValueType(0);
19296 if ((VT != MVT::i32 && VT != MVT::i64) ||
19312 CSNeg = DAG.
getNode(AArch64ISD::CSNEG,
DL, VT,
And,
And, CCVal, Cmp);
19323 CSNeg = DAG.
getNode(AArch64ISD::CSNEG,
DL, VT, AndPos, AndNeg, CCVal,
19338 case Intrinsic::aarch64_sve_cntb:
19340 case Intrinsic::aarch64_sve_cnth:
19342 case Intrinsic::aarch64_sve_cntw:
19344 case Intrinsic::aarch64_sve_cntd:
19373 return TypeNode->
getVT();
19383 if (Mask == UCHAR_MAX)
19385 else if (Mask == USHRT_MAX)
19387 else if (Mask == UINT_MAX)
19409 unsigned ExtendOpcode = Extend.
getOpcode();
19425 if (PreExtendType == MVT::Other ||
19430 bool SeenZExtOrSExt = !IsAnyExt;
19438 unsigned Opc =
Op.getOpcode();
19445 if (SeenZExtOrSExt && OpcIsSExt != IsSExt)
19448 IsSExt = OpcIsSExt;
19449 SeenZExtOrSExt =
true;
19456 EVT PreExtendLegalType =
19462 PreExtendLegalType));
19472 unsigned ExtOpc = !SeenZExtOrSExt
19475 return DAG.
getNode(ExtOpc,
DL, VT, NBV);
19482 EVT VT =
Mul->getValueType(0);
19483 if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64)
19494 return DAG.
getNode(
Mul->getOpcode(),
DL, VT, Op0 ? Op0 :
Mul->getOperand(0),
19495 Op1 ? Op1 :
Mul->getOperand(1));
19510 EVT VT =
Mul->getValueType(0);
19512 int ConstMultiplier =
19518 unsigned AbsConstValue =
abs(ConstMultiplier);
19519 unsigned OperandShift =
19528 unsigned B = ConstMultiplier < 0 ? 32 : 31;
19529 unsigned CeilAxOverB = (AbsConstValue + (
B - 1)) /
B;
19533 if (LowerBound > UpperBound)
19538 int Shift = std::min(std::max( 0, LowerBound), UpperBound);
19541 int32_t RdsvlMul = (AbsConstValue >> (OperandShift + Shift)) *
19542 (ConstMultiplier < 0 ? -1 : 1);
19543 auto Rdsvl = DAG.
getNode(AArch64ISD::RDSVL,
DL, MVT::i64,
19556 EVT VT =
N->getValueType(0);
19557 if (VT != MVT::v2i64 && VT != MVT::v1i64 && VT != MVT::v2i32 &&
19558 VT != MVT::v4i32 && VT != MVT::v4i16 && VT != MVT::v8i16)
19560 if (
N->getOperand(0).getOpcode() !=
ISD::AND ||
19561 N->getOperand(0).getOperand(0).getOpcode() !=
ISD::SRL)
19574 if (!V1.
isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
19575 V3 != (HalfSize - 1))
19586 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, CM);
19594 EVT VT =
N->getValueType(0);
19600 N->getOperand(0).getOperand(0).getValueType() !=
19601 N->getOperand(1).getOperand(0).getValueType())
19605 N->getOperand(0).getOpcode() !=
N->getOperand(1).getOpcode())
19608 SDValue N0 =
N->getOperand(0).getOperand(0);
19609 SDValue N1 =
N->getOperand(1).getOperand(0);
19614 if ((S2 == MVT::i32 &&
S1 == MVT::i8) ||
19615 (S2 == MVT::i64 && (
S1 == MVT::i8 ||
S1 == MVT::i16))) {
19648 EVT VT =
N->getValueType(0);
19652 unsigned AddSubOpc;
19654 auto IsAddSubWith1 = [&](
SDValue V) ->
bool {
19655 AddSubOpc = V->getOpcode();
19667 if (IsAddSubWith1(N0)) {
19669 return DAG.
getNode(AddSubOpc,
DL, VT, N1, MulVal);
19672 if (IsAddSubWith1(N1)) {
19674 return DAG.
getNode(AddSubOpc,
DL, VT, N0, MulVal);
19685 const APInt &ConstValue =
C->getAPIntValue();
19692 if (ConstValue.
sge(1) && ConstValue.
sle(16))
19707 unsigned TrailingZeroes = ConstValue.
countr_zero();
19708 if (TrailingZeroes) {
19716 if (
N->hasOneUse() && (
N->user_begin()->getOpcode() ==
ISD::ADD ||
19717 N->user_begin()->getOpcode() ==
ISD::SUB))
19722 APInt ShiftedConstValue = ConstValue.
ashr(TrailingZeroes);
19725 auto Shl = [&](
SDValue N0,
unsigned N1) {
19756 for (
unsigned i = 1; i <
BitWidth / 2; i++) {
19776 unsigned TrailingZeroes = CVMinus1.
countr_zero();
19777 APInt SCVMinus1 = CVMinus1.
ashr(TrailingZeroes) - 1;
19793 unsigned TrailingZeroes = CVMinus1.
countr_zero();
19794 APInt CVPlus1 = CVMinus1.
ashr(TrailingZeroes) + 1;
19814 APInt SCVMinus1 = ShiftedConstValue - 1;
19815 APInt SCVPlus1 = ShiftedConstValue + 1;
19816 APInt CVPlus1 = ConstValue + 1;
19820 return Shl(
Add(Shl(N0, ShiftAmt), N0), TrailingZeroes);
19823 return Sub(Shl(N0, ShiftAmt), N0);
19825 ShiftAmt = SCVPlus1.
logBase2() + TrailingZeroes;
19826 return Sub(Shl(N0, ShiftAmt), Shl(N0, TrailingZeroes));
19828 if (Subtarget->hasALULSLFast() &&
19829 isPowPlusPlusConst(ConstValue, CVM, CVN)) {
19830 APInt CVMMinus1 = CVM - 1;
19831 APInt CVNMinus1 = CVN - 1;
19832 unsigned ShiftM1 = CVMMinus1.
logBase2();
19833 unsigned ShiftN1 = CVNMinus1.
logBase2();
19835 if (ShiftM1 <= 4 && ShiftN1 <= 4) {
19837 return Add(Shl(MVal, ShiftN1), MVal);
19840 if (Subtarget->hasALULSLFast() &&
19841 isPowPlusPlusOneConst(ConstValue, CVM, CVN)) {
19845 if (ShiftM <= 4 && ShiftN <= 4) {
19851 if (Subtarget->hasALULSLFast() &&
19852 isPowMinusMinusOneConst(ConstValue, CVM, CVN)) {
19856 if (ShiftM <= 4 && ShiftN <= 4) {
19865 APInt SCVPlus1 = -ShiftedConstValue + 1;
19866 APInt CVNegPlus1 = -ConstValue + 1;
19867 APInt CVNegMinus1 = -ConstValue - 1;
19870 return Sub(N0, Shl(N0, ShiftAmt));
19872 ShiftAmt = CVNegMinus1.
logBase2();
19873 return Negate(
Add(Shl(N0, ShiftAmt), N0));
19875 ShiftAmt = SCVPlus1.
logBase2() + TrailingZeroes;
19876 return Sub(Shl(N0, TrailingZeroes), Shl(N0, ShiftAmt));
19896 EVT VT =
N->getValueType(0);
19898 N->getOperand(0)->getOperand(0)->getOpcode() !=
ISD::SETCC ||
19899 VT.
getSizeInBits() !=
N->getOperand(0)->getValueType(0).getSizeInBits())
19909 if (!BV->isConstant())
19914 EVT IntVT = BV->getValueType(0);
19921 N->getOperand(0)->getOperand(0), MaskConst);
19935 if (
N->isStrictFPOpcode())
19946 return !VT.
isVector() && VT != MVT::bf16 && VT != MVT::f128;
19949 SDValue SrcVal =
N->getOperand(0);
19951 EVT DestTy =
N->getValueType(0);
19958 if (DestTy.
bitsGT(SrcTy)) {
19967 if (SrcVecTy == MVT::nxv2i32 || DestVecTy == MVT::nxv2i32)
19973 DAG.
getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
19990 EVT VT =
N->getValueType(0);
19991 if (VT != MVT::f32 && VT != MVT::f64)
19995 if (VT.
getSizeInBits() !=
N->getOperand(0).getValueSizeInBits())
20016 (
N->getOpcode() ==
ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF;
20035 if (!
N->getValueType(0).isSimple())
20039 if (!
Op.getValueType().isSimple() ||
Op.getOpcode() !=
ISD::FMUL)
20042 if (!
Op.getValueType().is64BitVector() && !
Op.getValueType().is128BitVector())
20049 MVT FloatTy =
Op.getSimpleValueType().getVectorElementType();
20051 if (FloatBits != 32 && FloatBits != 64 &&
20052 (FloatBits != 16 || !Subtarget->hasFullFP16()))
20055 MVT IntTy =
N->getSimpleValueType(0).getVectorElementType();
20056 uint32_t IntBits = IntTy.getSizeInBits();
20057 if (IntBits != 16 && IntBits != 32 && IntBits != 64)
20061 if (IntBits > FloatBits)
20066 int32_t Bits = IntBits == 64 ? 64 : 32;
20068 if (
C == -1 ||
C == 0 ||
C > Bits)
20071 EVT ResTy =
Op.getValueType().changeVectorElementTypeToInteger();
20085 unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
20086 : Intrinsic::aarch64_neon_vcvtfp2fxu;
20092 if (IntBits < FloatBits)
20109 EVT VT =
N->getValueType(0);
20113 if (CSel0.
getOpcode() != AArch64ISD::CSEL ||
20132 if (Cmp1.
getOpcode() != AArch64ISD::SUBS &&
20133 Cmp0.
getOpcode() == AArch64ISD::SUBS) {
20138 if (Cmp1.
getOpcode() != AArch64ISD::SUBS)
20158 if (Op1 && Op1->getAPIntValue().isNegative() &&
20159 Op1->getAPIntValue().sgt(-32)) {
20166 AbsOp1, NZCVOp, Condition, Cmp0);
20169 Cmp1.
getOperand(1), NZCVOp, Condition, Cmp0);
20193 MaskForTy = 0xffull;
20196 MaskForTy = 0xffffull;
20199 MaskForTy = 0xffffffffull;
20208 return Op0->getAPIntValue().getLimitedValue() == MaskForTy;
20216 while (
Op.getOpcode() == AArch64ISD::REINTERPRET_CAST &&
20218 Op =
Op->getOperand(0);
20228 unsigned Opc = Src->getOpcode();
20231 if (
Opc == AArch64ISD::UUNPKHI ||
Opc == AArch64ISD::UUNPKLO) {
20245 auto MaskAndTypeMatch = [ExtVal](
EVT VT) ->
bool {
20246 return ((ExtVal == 0xFF && VT == MVT::i8) ||
20247 (ExtVal == 0xFFFF && VT == MVT::i16) ||
20248 (ExtVal == 0xFFFFFFFF && VT == MVT::i32));
20254 if (MaskAndTypeMatch(EltTy))
20260 if (MaskedLoadOp && (MaskedLoadOp->getExtensionType() ==
ISD::ZEXTLOAD ||
20263 if (MaskAndTypeMatch(EltTy))
20287 return N->getOperand(1);
20289 return N->getOperand(0);
20296 if (!Src.hasOneUse())
20304 case AArch64ISD::LD1_MERGE_ZERO:
20305 case AArch64ISD::LDNF1_MERGE_ZERO:
20306 case AArch64ISD::LDFF1_MERGE_ZERO:
20309 case AArch64ISD::GLD1_MERGE_ZERO:
20310 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
20311 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
20312 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
20313 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
20314 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
20315 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
20316 case AArch64ISD::GLDFF1_MERGE_ZERO:
20317 case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
20318 case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
20319 case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
20320 case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
20321 case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
20322 case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
20323 case AArch64ISD::GLDNT1_MERGE_ZERO:
20344 EVT VT =
N->getValueType(0);
20350 for (
auto U :
N->users())
20381 EVT VT =
N->getValueType(0);
20421 DefBits = ~(DefBits | ZeroSplat);
20428 UndefBits = ~(UndefBits | ZeroSplat);
20430 UndefBits, &
LHS)) ||
20444 EVT VT =
N->getValueType(0);
20447 if (!
N->getFlags().hasAllowReassociation())
20454 unsigned Opc =
A.getConstantOperandVal(0);
20455 if (
Opc != Intrinsic::aarch64_neon_vcmla_rot0 &&
20456 Opc != Intrinsic::aarch64_neon_vcmla_rot90 &&
20457 Opc != Intrinsic::aarch64_neon_vcmla_rot180 &&
20458 Opc != Intrinsic::aarch64_neon_vcmla_rot270)
20463 A.getOperand(2),
A.getOperand(3));
20479 return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64;
20481 return VT == MVT::i64;
20493 (
N.getOpcode() == ISD::GET_ACTIVE_LANE_MASK) ||
20495 (
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege ||
20496 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege_x2 ||
20497 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt ||
20498 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt_x2 ||
20499 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehi ||
20500 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehi_x2 ||
20501 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehs ||
20502 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehs_x2 ||
20503 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele ||
20504 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele_x2 ||
20505 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo ||
20506 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo_x2 ||
20507 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels ||
20508 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels_x2 ||
20509 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt ||
20510 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt_x2)))
20569 if (VS.getOpcode() != ISD::VSCALE)
20573 if (VS.getConstantOperandVal(0) != NumEls)
20596 MVT::bf16, MVT::f32, MVT::f64}),
20602 if (!TLI.
isOperationLegal(ISD::VECTOR_FIND_LAST_ACTIVE, Mask.getValueType()))
20605 return DAG.
getNode(AArch64ISD::LASTB,
SDLoc(
N),
N->getValueType(0), Mask,
20621 SDValue N0 =
N->getOperand(0), N1 =
N->getOperand(1);
20623 EVT VT =
N->getValueType(0);
20624 const bool FullFP16 = Subtarget->hasFullFP16();
20656 if (Shuffle && Shuffle->
getMaskElt(0) == 1 &&
20671 {N0->getOperand(0), Extract1, Extract2});
20690 unsigned OffsetElts = 0;
20706 Load->getMemoryVT().isByteSized() &&
20708 return U.getResNo() != N0.getResNo() ||
20709 (U.getUser()->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20710 !any_of(U.getUser()->uses(), [](const SDUse &U2) {
20711 return U2.getUser()->getOpcode() ==
20712 ISD::INSERT_VECTOR_ELT ||
20713 U2.getUser()->getOpcode() == ISD::BUILD_VECTOR ||
20714 U2.getUser()->getOpcode() == ISD::SCALAR_TO_VECTOR;
20721 unsigned Offset = (OffsetElts +
N->getConstantOperandVal(1)) *
20722 Load->getValueType(0).getScalarSizeInBits() / 8;
20731 DAG.
getExtLoad(ExtType,
DL, VT, Load->getChain(), BasePtr,
20732 Load->getPointerInfo().getWithOffset(
Offset),
20733 Load->getValueType(0).getScalarType(),
20735 Load->getMemOperand()->getFlags(), Load->getAAInfo());
20748 EVT VT =
N->getValueType(0);
20749 SDValue N0 =
N->getOperand(0), N1 =
N->getOperand(1);
20774 (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
20776 MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
20778 for (
size_t i = 0; i < Mask.size(); ++i)
20783 DAG.
getNode(ISD::BITCAST,
DL, MidVT, N00),
20784 DAG.
getNode(ISD::BITCAST,
DL, MidVT, N10), Mask));
20800 if (N00Opc == AArch64ISD::VLSHR && N10Opc == AArch64ISD::VLSHR &&
20806 NScalarSize =
N->getValueType(0).getScalarSizeInBits();
20808 if (N001ConstVal == N101ConstVal && N001ConstVal > NScalarSize) {
20809 N000 = DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, N000);
20810 N100 = DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, N100);
20815 return DAG.
getNode(AArch64ISD::VLSHR,
DL, VT, Uzp, NewShiftConstant);
20820 if (
N->getOperand(0).getValueType() == MVT::v4i8 ||
20821 N->getOperand(0).getValueType() == MVT::v2i16 ||
20822 N->getOperand(0).getValueType() == MVT::v2i8) {
20823 EVT SrcVT =
N->getOperand(0).getValueType();
20827 if (
N->getNumOperands() % 2 == 0 &&
20829 if (V.getValueType() != SrcVT)
20833 LoadSDNode *LD = dyn_cast<LoadSDNode>(V);
20834 return LD && V.hasOneUse() && LD->isSimple() && !LD->isIndexed() &&
20835 LD->getExtensionType() == ISD::NON_EXTLOAD;
20837 EVT FVT = SrcVT == MVT::v2i8 ? MVT::f16 : MVT::f32;
20841 for (
unsigned i = 0; i <
N->getNumOperands(); i++) {
20848 LD->getBasePtr(), LD->getMemOperand());
20850 Ops.push_back(NewLoad);
20869 auto isBitwiseVectorNegate = [](
SDValue V) {
20870 return V->getOpcode() ==
ISD::XOR &&
20896 if (
N->getNumOperands() == 2 && N0Opc == N1Opc && VT.
is128BitVector() &&
20908 return DAG.
getNode(N0Opc,
DL, VT, Concat0, Concat1);
20912 auto IsRSHRN = [](
SDValue Shr) {
20913 if (Shr.getOpcode() != AArch64ISD::VLSHR)
20916 EVT VT =
Op.getValueType();
20917 unsigned ShtAmt = Shr.getConstantOperandVal(1);
20922 if (
Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
20924 Op.getOperand(1).getConstantOperandVal(0)
20925 <<
Op.getOperand(1).getConstantOperandVal(1));
20926 else if (
Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
20929 Op.getOperand(1).getConstantOperandVal(0));
20933 if (Imm != 1ULL << (ShtAmt - 1))
20939 if (
N->getNumOperands() == 2 && IsRSHRN(N0) &&
20947 X.getValueType().getDoubleNumVectorElementsVT(*DCI.
DAG.
getContext());
20958 if (
N->getNumOperands() == 2 && N0Opc == AArch64ISD::ZIP1 &&
20965 return DAG.
getNode(AArch64ISD::ZIP1,
DL, VT, E0, E1);