72#include "llvm/IR/IntrinsicsAArch64.h"
108#define DEBUG_TYPE "aarch64-lower"
111STATISTIC(NumShiftInserts,
"Number of vector shift inserts");
112STATISTIC(NumOptimizedImms,
"Number of times immediates were optimized");
119 cl::desc(
"Allow AArch64 Local Dynamic TLS code generation"),
124 cl::desc(
"Enable AArch64 logical imm instruction "
134 cl::desc(
"Combine extends of AArch64 masked "
135 "gather intrinsics"),
139 cl::desc(
"Combine ext and trunc to TBL"),
154 cl::desc(
"Enable / disable SVE scalable vectors in Global ISel"),
161 AArch64::X3, AArch64::X4, AArch64::X5,
162 AArch64::X6, AArch64::X7};
164 AArch64::Q3, AArch64::Q4, AArch64::Q5,
165 AArch64::Q6, AArch64::Q7};
190 return MVT::nxv8bf16;
197 switch (EC.getKnownMinValue()) {
213 "Expected scalable predicate vector type!");
235 "Expected legal vector type!");
282 switch (
Op.getOpcode()) {
291 switch (
Op.getConstantOperandVal(0)) {
294 case Intrinsic::aarch64_sve_ptrue:
295 case Intrinsic::aarch64_sve_pnext:
296 case Intrinsic::aarch64_sve_cmpeq:
297 case Intrinsic::aarch64_sve_cmpne:
298 case Intrinsic::aarch64_sve_cmpge:
299 case Intrinsic::aarch64_sve_cmpgt:
300 case Intrinsic::aarch64_sve_cmphs:
301 case Intrinsic::aarch64_sve_cmphi:
302 case Intrinsic::aarch64_sve_cmpeq_wide:
303 case Intrinsic::aarch64_sve_cmpne_wide:
304 case Intrinsic::aarch64_sve_cmpge_wide:
305 case Intrinsic::aarch64_sve_cmpgt_wide:
306 case Intrinsic::aarch64_sve_cmplt_wide:
307 case Intrinsic::aarch64_sve_cmple_wide:
308 case Intrinsic::aarch64_sve_cmphs_wide:
309 case Intrinsic::aarch64_sve_cmphi_wide:
310 case Intrinsic::aarch64_sve_cmplo_wide:
311 case Intrinsic::aarch64_sve_cmpls_wide:
312 case Intrinsic::aarch64_sve_fcmpeq:
313 case Intrinsic::aarch64_sve_fcmpne:
314 case Intrinsic::aarch64_sve_fcmpge:
315 case Intrinsic::aarch64_sve_fcmpgt:
316 case Intrinsic::aarch64_sve_fcmpuo:
317 case Intrinsic::aarch64_sve_facgt:
318 case Intrinsic::aarch64_sve_facge:
319 case Intrinsic::aarch64_sve_whilege:
320 case Intrinsic::aarch64_sve_whilegt:
321 case Intrinsic::aarch64_sve_whilehi:
322 case Intrinsic::aarch64_sve_whilehs:
323 case Intrinsic::aarch64_sve_whilele:
324 case Intrinsic::aarch64_sve_whilelo:
325 case Intrinsic::aarch64_sve_whilels:
326 case Intrinsic::aarch64_sve_whilelt:
327 case Intrinsic::aarch64_sve_match:
328 case Intrinsic::aarch64_sve_nmatch:
329 case Intrinsic::aarch64_sve_whilege_x2:
330 case Intrinsic::aarch64_sve_whilegt_x2:
331 case Intrinsic::aarch64_sve_whilehi_x2:
332 case Intrinsic::aarch64_sve_whilehs_x2:
333 case Intrinsic::aarch64_sve_whilele_x2:
334 case Intrinsic::aarch64_sve_whilelo_x2:
335 case Intrinsic::aarch64_sve_whilels_x2:
336 case Intrinsic::aarch64_sve_whilelt_x2:
342static std::tuple<SDValue, SDValue>
362 const auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
363 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
369 AddrDisc = DAG->
getRegister(AArch64::NoRegister, MVT::i64);
371 return std::make_tuple(
390 if (Subtarget->hasLS64()) {
396 if (Subtarget->hasFPARMv8()) {
404 if (Subtarget->hasNEON()) {
408 addDRType(MVT::v2f32);
409 addDRType(MVT::v8i8);
410 addDRType(MVT::v4i16);
411 addDRType(MVT::v2i32);
412 addDRType(MVT::v1i64);
413 addDRType(MVT::v1f64);
414 addDRType(MVT::v4f16);
415 addDRType(MVT::v4bf16);
417 addQRType(MVT::v4f32);
418 addQRType(MVT::v2f64);
419 addQRType(MVT::v16i8);
420 addQRType(MVT::v8i16);
421 addQRType(MVT::v4i32);
422 addQRType(MVT::v2i64);
423 addQRType(MVT::v8f16);
424 addQRType(MVT::v8bf16);
463 if (Subtarget->hasSVE2p1() || Subtarget->hasSME2()) {
588 if (Subtarget->hasFPARMv8()) {
594 if (Subtarget->hasFPARMv8()) {
648 if (Subtarget->hasCSSC()) {
727 if (Subtarget->hasFullFP16()) {
764 auto LegalizeNarrowFP = [
this](
MVT ScalarVT) {
867 if (!Subtarget->hasFullFP16()) {
868 LegalizeNarrowFP(MVT::f16);
870 LegalizeNarrowFP(MVT::bf16);
888 for (
MVT Ty : {MVT::f32, MVT::f64})
890 if (Subtarget->hasFullFP16())
898 for (
MVT Ty : {MVT::f32, MVT::f64})
900 if (Subtarget->hasFullFP16())
913 if (!Subtarget->hasLSE() && !Subtarget->outlineAtomics()) {
925 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
951#define LCALLNAMES(A, B, N) \
952 setLibcallName(A##N##_RELAX, #B #N "_relax"); \
953 setLibcallName(A##N##_ACQ, #B #N "_acq"); \
954 setLibcallName(A##N##_REL, #B #N "_rel"); \
955 setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
956#define LCALLNAME4(A, B) \
957 LCALLNAMES(A, B, 1) \
958 LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
959#define LCALLNAME5(A, B) \
960 LCALLNAMES(A, B, 1) \
961 LCALLNAMES(A, B, 2) \
962 LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
963 LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
964 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
965 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
966 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
967 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
968 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
974 if (Subtarget->hasLSE128()) {
988 if (Subtarget->hasLSE2()) {
1051 if (WideVT.getScalarSizeInBits() > NarrowVT.getScalarSizeInBits()) {
1057 if (Subtarget->hasFPARMv8()) {
1190 if (Subtarget->hasSME())
1238 for (
auto VT : {MVT::v2i32, MVT::v2i64, MVT::v4i32})
1241 if (Subtarget->hasFullFP16()) {
1274 for (
auto VT : {MVT::v1i64, MVT::v2i64}) {
1290 for (
MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1291 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1298 for (
MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1309 for (
MVT VT : { MVT::v4f16, MVT::v2f32,
1310 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1311 if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1320 for (
MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1321 MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1343 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
1370 for (
MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
1372 if (Subtarget->hasFullFP16())
1373 for (
MVT Ty : {MVT::v4f16, MVT::v8f16})
1379 for (
MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
1381 if (Subtarget->hasFullFP16())
1382 for (
MVT Ty : {MVT::v4f16, MVT::v8f16})
1405 for (
MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1408 for (
MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
1415 if (VT.is128BitVector() || VT.is64BitVector()) {
1430 for (
MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
1436 if (Subtarget->hasSME()) {
1444 {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
1453 for (
auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
1518 if (Subtarget->hasSVE2() ||
1519 (Subtarget->hasSME() && Subtarget->
isStreaming()))
1525 for (
auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
1531 for (
auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32})
1535 { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
1536 MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
1540 {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
1554 if (VT != MVT::nxv16i1) {
1562 {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
1563 MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1564 MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1603 for (
auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1604 MVT::nxv4f32, MVT::nxv2f64}) {
1680 for (
auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1692 if (Subtarget->hasSVEB16B16()) {
1712 if (!Subtarget->hasSVEB16B16()) {
1725 for (
auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1726 MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1741 addTypeForFixedLengthSVE(VT);
1746 addTypeForFixedLengthSVE(VT);
1750 for (
auto VT : {MVT::v8i8, MVT::v4i16})
1755 for (
auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
1757 for (
auto VT : {MVT::v8f16, MVT::v4f32})
1783 for (
auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1784 MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
1795 for (
auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
1806 for (
auto VT : {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1})
1812 for (
auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64,
1813 MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1814 MVT::nxv4f32, MVT::nxv2f64, MVT::nxv2bf16, MVT::nxv4bf16,
1815 MVT::nxv8bf16, MVT::v4f16, MVT::v8f16, MVT::v2f32,
1816 MVT::v4f32, MVT::v1f64, MVT::v2f64, MVT::v8i8,
1817 MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1818 MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1823 for (
auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1824 MVT::nxv4f32, MVT::nxv2f64, MVT::v4f16, MVT::v8f16,
1825 MVT::v2f32, MVT::v4f32, MVT::v2f64})
1830 {MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv2f32,
1831 MVT::nxv2f64, MVT::nxv4i8, MVT::nxv4i16, MVT::nxv4i32, MVT::nxv4f32})
1836 for (
auto VT : {MVT::v2i8, MVT::v2i16, MVT::v2i32, MVT::v2i64, MVT::v2f32,
1837 MVT::v2f64, MVT::v4i8, MVT::v4i16, MVT::v4i32, MVT::v4f32})
1841 if (Subtarget->hasSVE2()) {
1850 if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
1857 if (Subtarget->hasSVE()) {
1885 for (
int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
1888 if ((libcallName !=
nullptr) && (libcallName[0] !=
'#')) {
1895void AArch64TargetLowering::addTypeForNEON(
MVT VT) {
1905 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
1926 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
1927 ((VT == MVT::v4bf16 || VT == MVT::v8bf16 || VT == MVT::v4f16 ||
1928 VT == MVT::v8f16) &&
1929 Subtarget->hasFullFP16()))
1952 if (VT != MVT::v8i8 && VT != MVT::v16i8)
1961 for (
unsigned Opcode :
1979 for (
unsigned Opcode :
2015 if (Subtarget->hasD128()) {
2024 if (!Subtarget->hasSVE())
2029 if (ResVT != MVT::nxv2i1 && ResVT != MVT::nxv4i1 && ResVT != MVT::nxv8i1 &&
2030 ResVT != MVT::nxv16i1 && ResVT != MVT::v2i1 && ResVT != MVT::v4i1 &&
2031 ResVT != MVT::v8i1 && ResVT != MVT::v16i1)
2035 if (OpVT != MVT::i32 && OpVT != MVT::i64)
2043 if (
I->getIntrinsicID() != Intrinsic::experimental_vector_partial_reduce_add)
2047 auto Op1 =
I->getOperand(1);
2062 return VT != MVT::nxv16i1 && VT != MVT::nxv8i1 && VT != MVT::nxv4i1 &&
2063 VT != MVT::nxv2i1 && VT != MVT::v16i1 && VT != MVT::v8i1 &&
2064 VT != MVT::v4i1 && VT != MVT::v2i1;
2068 unsigned SearchSize)
const {
2073 if (VT == MVT::nxv8i16 || VT == MVT::v8i16)
2074 return SearchSize != 8;
2075 if (VT == MVT::nxv16i8 || VT == MVT::v16i8 || VT == MVT::v8i8)
2076 return SearchSize != 8 && SearchSize != 16;
2080void AArch64TargetLowering::addTypeForFixedLengthSVE(
MVT VT) {
2105 while (InnerVT != VT) {
2119 while (InnerVT != VT) {
2224void AArch64TargetLowering::addDRType(
MVT VT) {
2230void AArch64TargetLowering::addQRType(
MVT VT) {
2249 Imm =
C->getZExtValue();
2260 return N->getOpcode() == Opc &&
2265 const APInt &Demanded,
2268 uint64_t OldImm = Imm, NewImm, Enc;
2273 if (Imm == 0 || Imm == Mask ||
2277 unsigned EltSize =
Size;
2294 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
2296 uint64_t Sum = RotatedImm + NonDemandedBits;
2297 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
2298 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
2299 NewImm = (Imm | Ones) & Mask;
2327 while (EltSize <
Size) {
2328 NewImm |= NewImm << EltSize;
2334 "demanded bits should never be altered");
2335 assert(OldImm != NewImm &&
"the new imm shouldn't be equal to the old imm");
2338 EVT VT =
Op.getValueType();
2344 if (NewImm == 0 || NewImm == OrigMask) {
2369 EVT VT =
Op.getValueType();
2383 switch (
Op.getOpcode()) {
2387 NewOpc =
Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
2390 NewOpc =
Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
2393 NewOpc =
Size == 32 ? AArch64::EORWri : AArch64::EORXri;
2408 switch (
Op.getOpcode()) {
2414 if (
SrcOp.getValueSizeInBits() !=
Op.getScalarValueSizeInBits()) {
2415 assert(
SrcOp.getValueSizeInBits() >
Op.getScalarValueSizeInBits() &&
2416 "Expected DUP implicit truncation");
2417 Known = Known.
trunc(
Op.getScalarValueSizeInBits());
2431 ~(
Op->getConstantOperandAPInt(1) <<
Op->getConstantOperandAPInt(2))
2481 case Intrinsic::aarch64_ldaxr:
2482 case Intrinsic::aarch64_ldxr: {
2484 EVT VT = cast<MemIntrinsicSDNode>(
Op)->getMemoryVT();
2494 unsigned IntNo =
Op.getConstantOperandVal(0);
2498 case Intrinsic::aarch64_neon_uaddlv: {
2499 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
2501 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
2502 unsigned Bound = (VT == MVT::v8i8) ? 11 : 12;
2509 case Intrinsic::aarch64_neon_umaxv:
2510 case Intrinsic::aarch64_neon_uminv: {
2515 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
2517 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
2521 }
else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
2535 unsigned Depth)
const {
2536 EVT VT =
Op.getValueType();
2538 unsigned Opcode =
Op.getOpcode();
2563 return std::min<uint64_t>(Tmp +
Op.getConstantOperandVal(1), VTBits);
2577 unsigned *
Fast)
const {
2587 if (ElementSizeBits % 8 == 0 && Alignment >=
Align(ElementSizeBits / 8))
2591 if (Subtarget->requiresStrictAlign())
2596 *
Fast = !Subtarget->isMisaligned128StoreSlow() || VT.
getStoreSize() != 16 ||
2615 unsigned *
Fast)
const {
2616 if (Subtarget->requiresStrictAlign())
2621 *
Fast = !Subtarget->isMisaligned128StoreSlow() ||
2645#define MAKE_CASE(V) \
3005 Register DestReg =
MI.getOperand(0).getReg();
3006 Register IfTrueReg =
MI.getOperand(1).getReg();
3007 Register IfFalseReg =
MI.getOperand(2).getReg();
3008 unsigned CondCode =
MI.getOperand(3).getImm();
3009 bool NZCVKilled =
MI.getOperand(4).isKill();
3040 MI.eraseFromParent();
3048 "SEH does not use catchret!");
3060 Register TargetReg =
MI.getOperand(0).getReg();
3062 TII.probedStackAlloc(
MBBI, TargetReg,
false);
3064 MI.eraseFromParent();
3065 return NextInst->getParent();
3076 MIB.
add(
MI.getOperand(1));
3077 MIB.
add(
MI.getOperand(2));
3078 MIB.
add(
MI.getOperand(3));
3079 MIB.
add(
MI.getOperand(4));
3080 MIB.
add(
MI.getOperand(5));
3082 MI.eraseFromParent();
3093 MIB.
add(
MI.getOperand(0));
3094 MIB.
add(
MI.getOperand(1));
3095 MIB.
add(
MI.getOperand(2));
3096 MIB.
add(
MI.getOperand(1));
3098 MI.eraseFromParent();
3105 bool Op0IsDef)
const {
3111 for (
unsigned I = 1;
I <
MI.getNumOperands(); ++
I)
3112 MIB.
add(
MI.getOperand(
I));
3114 MI.eraseFromParent();
3124 unsigned StartIdx = 0;
3126 bool HasTile = BaseReg != AArch64::ZA;
3127 bool HasZPROut = HasTile &&
MI.getOperand(0).isReg();
3129 MIB.
add(
MI.getOperand(StartIdx));
3133 MIB.
addReg(BaseReg +
MI.getOperand(StartIdx).getImm(),
3135 MIB.
addReg(BaseReg +
MI.getOperand(StartIdx).getImm());
3139 if (
MI.getOperand(0).isReg() && !
MI.getOperand(1).isImm()) {
3140 MIB.
add(
MI.getOperand(StartIdx));
3145 for (
unsigned I = StartIdx;
I <
MI.getNumOperands(); ++
I)
3146 MIB.
add(
MI.getOperand(
I));
3148 MI.eraseFromParent();
3157 MIB.
add(
MI.getOperand(0));
3159 unsigned Mask =
MI.getOperand(0).getImm();
3160 for (
unsigned I = 0;
I < 8;
I++) {
3161 if (Mask & (1 <<
I))
3165 MI.eraseFromParent();
3176 if (TPIDR2.Uses > 0) {
3210 "Lazy ZA save is not yet supported on Windows");
3214 if (TPIDR2.
Uses > 0) {
3220 Register SP =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3221 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(TargetOpcode::COPY), SP)
3225 auto Size =
MI.getOperand(1).getReg();
3226 auto Dest =
MI.getOperand(0).getReg();
3227 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(AArch64::MSUBXrrr), Dest)
3251 "Lazy ZA save is not yet supported on Windows");
3256 auto Size =
MI.getOperand(1).getReg();
3257 auto Dest =
MI.getOperand(0).getReg();
3258 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(AArch64::SUBXrx64), AArch64::SP)
3262 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(TargetOpcode::COPY), Dest)
3268 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(TargetOpcode::IMPLICIT_DEF),
3269 MI.getOperand(0).getReg());
3291 MI.getOperand(0).getReg())
3295 MI.getOperand(0).getReg())
3305 if (SMEOrigInstr != -1) {
3309 switch (SMEMatrixType) {
3325 switch (
MI.getOpcode()) {
3331 case AArch64::InitTPIDR2Obj:
3333 case AArch64::AllocateZABuffer:
3335 case AArch64::AllocateSMESaveBuffer:
3337 case AArch64::GetSMESaveSize:
3339 case AArch64::F128CSEL:
3341 case TargetOpcode::STATEPOINT:
3347 MI.addOperand(*
MI.getMF(),
3353 case TargetOpcode::STACKMAP:
3354 case TargetOpcode::PATCHPOINT:
3357 case TargetOpcode::PATCHABLE_EVENT_CALL:
3358 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
3361 case AArch64::CATCHRET:
3364 case AArch64::PROBED_STACKALLOC_DYN:
3367 case AArch64::LD1_MXIPXX_H_PSEUDO_B:
3368 return EmitTileLoad(AArch64::LD1_MXIPXX_H_B, AArch64::ZAB0,
MI, BB);
3369 case AArch64::LD1_MXIPXX_H_PSEUDO_H:
3370 return EmitTileLoad(AArch64::LD1_MXIPXX_H_H, AArch64::ZAH0,
MI, BB);
3371 case AArch64::LD1_MXIPXX_H_PSEUDO_S:
3372 return EmitTileLoad(AArch64::LD1_MXIPXX_H_S, AArch64::ZAS0,
MI, BB);
3373 case AArch64::LD1_MXIPXX_H_PSEUDO_D:
3374 return EmitTileLoad(AArch64::LD1_MXIPXX_H_D, AArch64::ZAD0,
MI, BB);
3375 case AArch64::LD1_MXIPXX_H_PSEUDO_Q:
3376 return EmitTileLoad(AArch64::LD1_MXIPXX_H_Q, AArch64::ZAQ0,
MI, BB);
3377 case AArch64::LD1_MXIPXX_V_PSEUDO_B:
3378 return EmitTileLoad(AArch64::LD1_MXIPXX_V_B, AArch64::ZAB0,
MI, BB);
3379 case AArch64::LD1_MXIPXX_V_PSEUDO_H:
3380 return EmitTileLoad(AArch64::LD1_MXIPXX_V_H, AArch64::ZAH0,
MI, BB);
3381 case AArch64::LD1_MXIPXX_V_PSEUDO_S:
3382 return EmitTileLoad(AArch64::LD1_MXIPXX_V_S, AArch64::ZAS0,
MI, BB);
3383 case AArch64::LD1_MXIPXX_V_PSEUDO_D:
3384 return EmitTileLoad(AArch64::LD1_MXIPXX_V_D, AArch64::ZAD0,
MI, BB);
3385 case AArch64::LD1_MXIPXX_V_PSEUDO_Q:
3386 return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0,
MI, BB);
3387 case AArch64::LDR_ZA_PSEUDO:
3389 case AArch64::LDR_TX_PSEUDO:
3391 case AArch64::STR_TX_PSEUDO:
3393 case AArch64::ZERO_M_PSEUDO:
3395 case AArch64::ZERO_T_PSEUDO:
3397 case AArch64::MOVT_TIZ_PSEUDO:
3424 N =
N->getOperand(0).getNode();
3432 auto Opnd0 =
N->getOperand(0);
3585 CondCode, CondCode2);
3592 bool IsLegal = (
C >> 12 == 0) || ((
C & 0xFFFULL) == 0 &&
C >> 24 == 0);
3594 <<
" legal: " << (IsLegal ?
"yes\n" :
"no\n"));
3615 (isIntEqualitySetCC(
CC) ||
3623 EVT VT =
LHS.getValueType();
3628 if ((VT == MVT::f16 && !FullFP16) || VT == MVT::bf16) {
3633 Chain =
RHS.getValue(1);
3637 return DAG.
getNode(Opcode, dl, {MVT::i32, MVT::Other}, {Chain,
LHS,
RHS});
3642 EVT VT =
LHS.getValueType();
3647 if ((VT == MVT::f16 && !FullFP16) || VT == MVT::bf16) {
3665 isIntEqualitySetCC(
CC)) {
3684 return LHS.getValue(1);
3750 unsigned Opcode = 0;
3753 if (
LHS.getValueType().isFloatingPoint()) {
3754 assert(
LHS.getValueType() != MVT::f128);
3755 if ((
LHS.getValueType() == MVT::f16 && !FullFP16) ||
3756 LHS.getValueType() == MVT::bf16) {
3762 APInt Imm = Const->getAPIntValue();
3763 if (Imm.isNegative() && Imm.sgt(-32)) {
3771 isIntEqualitySetCC(
CC)) {
3802 bool &MustBeFirst,
bool WillNegate,
3803 unsigned Depth = 0) {
3811 MustBeFirst =
false;
3818 bool IsOR = Opcode ==
ISD::OR;
3830 if (MustBeFirstL && MustBeFirstR)
3836 if (!CanNegateL && !CanNegateR)
3840 CanNegate = WillNegate && CanNegateL && CanNegateR;
3843 MustBeFirst = !CanNegate;
3848 MustBeFirst = MustBeFirstL || MustBeFirstR;
3872 bool isInteger =
LHS.getValueType().isInteger();
3874 CC = getSetCCInverse(
CC,
LHS.getValueType());
3880 assert(
LHS.getValueType().isFloatingPoint());
3893 Predicate = ExtraCC;
3906 bool IsOR = Opcode ==
ISD::OR;
3912 assert(ValidL &&
"Valid conjunction/disjunction tree");
3919 assert(ValidR &&
"Valid conjunction/disjunction tree");
3924 assert(!MustBeFirstR &&
"Valid conjunction/disjunction tree");
3933 bool NegateAfterAll;
3937 assert(CanNegateR &&
"at least one side must be negatable");
3938 assert(!MustBeFirstR &&
"invalid conjunction/disjunction tree");
3942 NegateAfterR =
true;
3945 NegateR = CanNegateR;
3946 NegateAfterR = !CanNegateR;
3949 NegateAfterAll = !Negate;
3951 assert(Opcode ==
ISD::AND &&
"Valid conjunction/disjunction tree");
3952 assert(!Negate &&
"Valid conjunction/disjunction tree");
3956 NegateAfterR =
false;
3957 NegateAfterAll =
false;
3977 bool DummyCanNegate;
3978 bool DummyMustBeFirst;
3990 auto isSupportedExtend = [&](
SDValue V) {
3995 if (
ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
3996 uint64_t Mask = MaskCst->getZExtValue();
3997 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
4003 if (!
Op.hasOneUse())
4006 if (isSupportedExtend(
Op))
4009 unsigned Opc =
Op.getOpcode();
4011 if (
ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(
Op.getOperand(1))) {
4012 uint64_t Shift = ShiftCst->getZExtValue();
4013 if (isSupportedExtend(
Op.getOperand(0)))
4014 return (Shift <= 4) ? 2 : 1;
4015 EVT VT =
Op.getValueType();
4016 if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
4027 EVT VT =
RHS.getValueType();
4036 if ((VT == MVT::i32 &&
C != 0x80000000 &&
4038 (VT == MVT::i64 &&
C != 0x80000000ULL &&
4047 if ((VT == MVT::i32 &&
C != 0 &&
4057 if ((VT == MVT::i32 &&
C != INT32_MAX &&
4068 if ((VT == MVT::i32 &&
C != UINT32_MAX &&
4091 if (!isa<ConstantSDNode>(
RHS) ||
4107 if (isIntEqualitySetCC(
CC) && isa<ConstantSDNode>(
RHS)) {
4128 cast<LoadSDNode>(
LHS)->getMemoryVT() == MVT::i16 &&
4129 LHS.getNode()->hasNUsesOfValue(1, 0)) {
4130 int16_t ValueofRHS =
RHS->getAsZExtVal();
4158static std::pair<SDValue, SDValue>
4160 assert((
Op.getValueType() == MVT::i32 ||
Op.getValueType() == MVT::i64) &&
4161 "Unsupported value type");
4167 switch (
Op.getOpcode()) {
4191 if (
Op.getValueType() == MVT::i32) {
4214 assert(
Op.getValueType() == MVT::i64 &&
"Expected an i64 value type");
4243 Overflow =
Value.getValue(1);
4245 return std::make_pair(
Value, Overflow);
4251 return LowerToScalableOp(
Op, DAG);
4298 if (
LHS.getValueType() != MVT::i32 &&
LHS.getValueType() != MVT::i64)
4305 if (!CFVal || !CTVal)
4342 return Cmp.getValue(1);
4371 unsigned Opcode,
bool IsSigned) {
4372 EVT VT0 =
Op.getValue(0).getValueType();
4373 EVT VT1 =
Op.getValue(1).getValueType();
4375 if (VT0 != MVT::i32 && VT0 != MVT::i64)
4429 unsigned IsWrite =
Op.getConstantOperandVal(2);
4430 unsigned Locality =
Op.getConstantOperandVal(3);
4431 unsigned IsData =
Op.getConstantOperandVal(4);
4433 bool IsStream = !Locality;
4437 assert(Locality <= 3 &&
"Prefetch locality out-of-range");
4441 Locality = 3 - Locality;
4445 unsigned PrfOp = (IsWrite << 4) |
4462 if (LHSConstOp && RHSConst) {
4466 uint64_t NewMaskValue = LHSConstValue & ~(RHSConstant - 1);
4479 EVT VT =
Op.getValueType();
4487 if (VT == MVT::nxv2f32 || VT == MVT::nxv4f32)
4490 if (VT != MVT::nxv2f64)
4504 return LowerFixedLengthFPExtendToSVE(
Op, DAG);
4506 bool IsStrict =
Op->isStrictFPOpcode();
4507 SDValue Op0 =
Op.getOperand(IsStrict ? 1 : 0);
4509 if (VT == MVT::f64) {
4511 if (Op0VT == MVT::f32 || Op0VT == MVT::f16)
4514 if (Op0VT == MVT::bf16 && IsStrict) {
4517 {Op0,
Op.getOperand(0)});
4521 if (Op0VT == MVT::bf16)
4554 assert(
Op.getValueType() == MVT::f128 &&
"Unexpected lowering");
4560 EVT VT =
Op.getValueType();
4561 bool IsStrict =
Op->isStrictFPOpcode();
4562 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
4564 bool Trunc =
Op.getConstantOperandVal(IsStrict ? 2 : 1) == 1;
4571 constexpr EVT I32 = MVT::nxv4i32;
4577 if (SrcVT == MVT::nxv2f32 || SrcVT == MVT::nxv4f32) {
4578 if (Subtarget->hasBF16())
4579 return LowerToPredicatedOp(
Op, DAG,
4582 Narrow = getSVESafeBitCast(I32, SrcVal, DAG);
4587 }
else if (SrcVT == MVT::nxv2f64 &&
4592 Pg, SrcVal, DAG.
getUNDEF(MVT::nxv2f32));
4598 NewOps.
push_back(
Op.getOperand(IsStrict ? 2 : 1));
4599 return DAG.
getNode(
Op.getOpcode(),
DL, VT, NewOps,
Op->getFlags());
4613 EVT I1 =
I32.changeElementType(MVT::i1);
4617 Narrow = DAG.
getSelect(
DL, I32, IsNaN, NaN, Narrow);
4622 return getSVESafeBitCast(VT, Narrow, DAG);
4626 return LowerFixedLengthFPRoundToSVE(
Op, DAG);
4631 !((Subtarget->hasNEON() || Subtarget->hasSME()) &&
4632 Subtarget->hasBF16())) {
4668 Narrow = DAG.
getSelect(dl, I32, IsNaN, NaN, Narrow);
4675 EVT I16 =
I32.changeVectorElementType(MVT::i16);
4685 if (SrcVT != MVT::f128) {
4702 bool IsStrict =
Op->isStrictFPOpcode();
4703 EVT InVT =
Op.getOperand(IsStrict ? 1 : 0).getValueType();
4704 EVT VT =
Op.getValueType();
4710 return LowerToPredicatedOp(
Op, DAG, Opcode);
4715 return LowerFixedLengthFPToIntToSVE(
Op, DAG);
4726 {
Op.getOperand(0),
Op.getOperand(1)});
4727 return DAG.
getNode(
Op.getOpcode(), dl, {VT, MVT::Other},
4728 {Ext.getValue(1), Ext.getValue(0)});
4731 Op.getOpcode(), dl,
Op.getValueType(),
4737 if (VTSize < InVTSize) {
4742 {Op.getOperand(0), Op.getOperand(1)});
4752 if (VTSize > InVTSize) {
4759 {
Op.getOperand(0),
Op.getOperand(1)});
4760 return DAG.
getNode(
Op.getOpcode(), dl, {VT, MVT::Other},
4761 {Ext.getValue(1), Ext.getValue(0)});
4764 return DAG.
getNode(
Op.getOpcode(), dl, VT, Ext);
4773 Op.getOperand(IsStrict ? 1 : 0), DAG.
getConstant(0, dl, MVT::i64));
4776 return DAG.
getNode(
Op.getOpcode(), dl, {ScalarVT, MVT::Other},
4777 {Op.getOperand(0), Extract});
4778 return DAG.
getNode(
Op.getOpcode(), dl, ScalarVT, Extract);
4787 bool IsStrict =
Op->isStrictFPOpcode();
4788 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
4791 return LowerVectorFP_TO_INT(
Op, DAG);
4794 if ((SrcVal.
getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
4800 {
Op.getOperand(0), SrcVal});
4801 return DAG.
getNode(
Op.getOpcode(), dl, {Op.getValueType(), MVT::Other},
4802 {Ext.getValue(1), Ext.getValue(0)});
4805 Op.getOpcode(), dl,
Op.getValueType(),
4818AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(
SDValue Op,
4824 EVT DstVT =
Op.getValueType();
4825 EVT SatVT = cast<VTSDNode>(
Op.getOperand(1))->getVT();
4830 assert(SatWidth <= DstElementWidth &&
4831 "Saturation width cannot exceed result width");
4844 if ((SrcElementVT == MVT::f16 &&
4845 (!Subtarget->hasFullFP16() || DstElementWidth > 16)) ||
4846 SrcElementVT == MVT::bf16) {
4856 SrcElementVT = MVT::f32;
4857 SrcElementWidth = 32;
4858 }
else if (SrcElementVT != MVT::f64 && SrcElementVT != MVT::f32 &&
4859 SrcElementVT != MVT::f16 && SrcElementVT != MVT::bf16)
4864 if (SatWidth == 64 && SrcElementWidth < 64) {
4868 SrcElementVT = MVT::f64;
4869 SrcElementWidth = 64;
4872 if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth) {
4887 if (SrcElementWidth < SatWidth || SrcElementVT == MVT::f64)
4894 SrcVal2 ? DAG.
getNode(
Op.getOpcode(),
DL, IntVT, SrcVal2,
4930 return LowerVectorFP_TO_INT_SAT(
Op, DAG);
4932 EVT DstVT =
Op.getValueType();
4933 EVT SatVT = cast<VTSDNode>(
Op.getOperand(1))->getVT();
4936 assert(SatWidth <= DstWidth &&
"Saturation width cannot exceed result width");
4939 if ((SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) || SrcVT == MVT::bf16) {
4942 }
else if (SrcVT != MVT::f64 && SrcVT != MVT::f32 && SrcVT != MVT::f16 &&
4948 if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 ||
4949 (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
4950 DstVT == SatVT && (DstVT == MVT::i64 || DstVT == MVT::i32))
4951 return DAG.
getNode(
Op.getOpcode(),
DL, DstVT, SrcVal,
4957 if (DstWidth < SatWidth)
4981 EVT VT =
Op.getValueType();
5004 bool IsStrict =
Op->isStrictFPOpcode();
5005 EVT VT =
Op.getValueType();
5008 EVT InVT =
In.getValueType();
5009 unsigned Opc =
Op.getOpcode();
5017 In = DAG.
getNode(CastOpc, dl, CastVT, In);
5018 return DAG.
getNode(Opc, dl, VT, In);
5023 return LowerToPredicatedOp(
Op, DAG, Opcode);
5028 return LowerFixedLengthIntToFPToSVE(
Op, DAG);
5035 {Op.getOperand(0), In});
5037 {
Op.getValueType(), MVT::Other},
5048 if (VTSize < InVTSize) {
5053 In = DAG.
getNode(Opc, dl, {CastVT, MVT::Other},
5054 {
Op.getOperand(0), In});
5056 {
In.getValue(1),
In.getValue(0),
5064 if (VTSize > InVTSize) {
5067 In = DAG.
getNode(CastOpc, dl, CastVT, In);
5069 return DAG.
getNode(Opc, dl, {VT, MVT::Other}, {
Op.getOperand(0), In});
5070 return DAG.
getNode(Opc, dl, VT, In);
5081 return DAG.
getNode(
Op.getOpcode(), dl, {ScalarVT, MVT::Other},
5082 {Op.getOperand(0), Extract});
5083 return DAG.
getNode(
Op.getOpcode(), dl, ScalarVT, Extract);
5091 if (
Op.getValueType().isVector())
5092 return LowerVectorINT_TO_FP(
Op, DAG);
5094 bool IsStrict =
Op->isStrictFPOpcode();
5095 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
5100 auto IntToFpViaPromotion = [&](
EVT PromoteVT) {
5104 {Op.getOperand(0), SrcVal});
5106 {
Op.getValueType(), MVT::Other},
5111 DAG.
getNode(
Op.getOpcode(), dl, PromoteVT, SrcVal),
5115 if (
Op.getValueType() == MVT::bf16) {
5116 unsigned MaxWidth = IsSigned
5120 if (MaxWidth <= 24) {
5121 return IntToFpViaPromotion(MVT::f32);
5125 if (MaxWidth <= 53) {
5126 return IntToFpViaPromotion(MVT::f64);
5177 IsStrict ? DAG.
getNode(
Op.getOpcode(),
DL, {MVT::f64, MVT::Other},
5178 {Op.getOperand(0), ToRound})
5179 : DAG.
getNode(
Op.getOpcode(),
DL, MVT::f64, ToRound);
5206 {
Op.getValueType(), MVT::Other},
5215 if (
Op.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
5216 return IntToFpViaPromotion(MVT::f32);
5225 if (
Op.getValueType() != MVT::f128)
5244 Entry.IsSExt =
false;
5245 Entry.IsZExt =
false;
5246 Args.push_back(Entry);
5249 : RTLIB::SINCOS_STRET_F32;
5260 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
5261 return CallResult.first;
5268 EVT OpVT =
Op.getValueType();
5269 EVT ArgVT =
Op.getOperand(0).getValueType();
5272 return LowerFixedLengthBitcastToSVE(
Op, DAG);
5280 "Expected int->fp bitcast!");
5293 return getSVESafeBitCast(OpVT, ExtResult, DAG);
5304 return getSVESafeBitCast(OpVT,
Op.getOperand(0), DAG);
5307 if (OpVT != MVT::f16 && OpVT != MVT::bf16)
5311 if (ArgVT == MVT::f16 || ArgVT == MVT::bf16)
5314 assert(ArgVT == MVT::i16);
5324static std::optional<uint64_t>
5328 return std::nullopt;
5333 return std::nullopt;
5335 return C->getZExtValue();
5340 EVT VT =
N.getValueType();
5345 for (
const SDValue &Elt :
N->op_values()) {
5348 unsigned HalfSize = EltSize / 2;
5350 if (!
isIntN(HalfSize,
C->getSExtValue()))
5353 if (!
isUIntN(HalfSize,
C->getZExtValue()))
5365 EVT VT =
N.getValueType();
5387 unsigned Opcode =
N.getOpcode();
5398 unsigned Opcode =
N.getOpcode();
5419 {Chain, DAG.
getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
5505 Chain, DAG.
getConstant(Intrinsic::aarch64_set_fpcr,
DL, MVT::i64), FPCR};
5538 if (IsN0SExt && IsN1SExt)
5544 if (IsN0ZExt && IsN1ZExt)
5551 if (IsN0ZExt || IsN1ZExt) {
5561 if (IsN0SExt || IsN1SExt) {
5569 if (!IsN1SExt && !IsN1ZExt)
5591 EVT VT =
Op.getValueType();
5600 "unexpected type for custom-lowering ISD::MUL");
5614 if (VT == MVT::v1i64) {
5615 if (Subtarget->hasSVE())
5632 if (Subtarget->hasSVE())
5648 "unexpected types for extended operands to VMULL");
5671 if (VT == MVT::nxv1i1 &&
Pattern == AArch64SVEPredPattern::all)
5678 bool IsSigned,
bool IsEqual) {
5679 if (!isa<ConstantSDNode>(
Op.getOperand(1)) ||
5680 !isa<ConstantSDNode>(
Op.getOperand(2)))
5684 APInt X =
Op.getConstantOperandAPInt(1);
5685 APInt Y =
Op.getConstantOperandAPInt(2);
5690 if (IsSigned ?
Y.isMaxSignedValue() :
Y.isMaxValue())
5694 APInt NumActiveElems =
5695 IsSigned ?
Y.ssub_ov(
X, Overflow) :
Y.usub_ov(
X, Overflow);
5702 NumActiveElems = IsSigned ? NumActiveElems.
sadd_ov(One, Overflow)
5703 : NumActiveElems.
uadd_ov(One, Overflow);
5708 std::optional<unsigned> PredPattern =
5710 unsigned MinSVEVectorSize = std::max(
5712 unsigned ElementSize = 128 /
Op.getValueType().getVectorMinNumElements();
5713 if (PredPattern != std::nullopt &&
5714 NumActiveElems.
getZExtValue() <= (MinSVEVectorSize / ElementSize))
5715 return getPTrue(DAG, dl,
Op.getValueType(), *PredPattern);
5724 EVT InVT =
Op.getValueType();
5728 "Expected a predicate-to-predicate bitcast");
5732 "Only expect to cast between legal scalable predicate types!");
5742 Op.getConstantOperandVal(0) == Intrinsic::aarch64_sve_convert_to_svbool &&
5743 Op.getOperand(1).getValueType().bitsGT(VT))
5744 Op =
Op.getOperand(1);
5775 CLI.setDebugLoc(
DL).setChain(Chain).setLibCallee(
5777 RetTy, Callee, std::move(Args));
5778 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
5824 SDValue TileSlice =
N->getOperand(2);
5827 int32_t ConstAddend = 0;
5833 ConstAddend = cast<ConstantSDNode>(VecNum.
getOperand(1))->getSExtValue();
5835 }
else if (
auto ImmNode = dyn_cast<ConstantSDNode>(VecNum)) {
5836 ConstAddend = ImmNode->getSExtValue();
5840 int32_t ImmAddend = ConstAddend % 16;
5841 if (int32_t
C = (ConstAddend - ImmAddend)) {
5843 VarAddend = VarAddend
5864 {
N.getOperand(0), TileSlice,
Base,
5873 auto Op1 =
Op.getOperand(1);
5874 auto Op2 =
Op.getOperand(2);
5875 auto Mask =
Op.getOperand(3);
5878 EVT Op2VT = Op2.getValueType();
5879 EVT ResVT =
Op.getValueType();
5883 "Expected 8-bit or 16-bit characters.");
5925 ID, Mask, Op1, Op2);
5936 unsigned IntNo =
Op.getConstantOperandVal(1);
5941 case Intrinsic::aarch64_prefetch: {
5945 unsigned IsWrite =
Op.getConstantOperandVal(3);
5946 unsigned Locality =
Op.getConstantOperandVal(4);
5947 unsigned IsStream =
Op.getConstantOperandVal(5);
5948 unsigned IsData =
Op.getConstantOperandVal(6);
5949 unsigned PrfOp = (IsWrite << 4) |
5957 case Intrinsic::aarch64_sme_str:
5958 case Intrinsic::aarch64_sme_ldr: {
5961 case Intrinsic::aarch64_sme_za_enable:
5967 case Intrinsic::aarch64_sme_za_disable:
5978 unsigned IntNo =
Op.getConstantOperandVal(1);
5983 case Intrinsic::aarch64_mops_memset_tag: {
5984 auto Node = cast<MemIntrinsicSDNode>(
Op.getNode());
5990 auto Alignment =
Node->getMemOperand()->getAlign();
5991 bool IsVol =
Node->isVolatile();
5992 auto DstPtrInfo =
Node->getPointerInfo();
5996 SDValue MS = SDI.EmitMOPS(AArch64::MOPSMemorySetTaggingPseudo, DAG,
DL,
5997 Chain, Dst, Val,
Size, Alignment, IsVol,
6011 unsigned IntNo =
Op.getConstantOperandVal(0);
6015 case Intrinsic::thread_pointer: {
6019 case Intrinsic::aarch64_neon_abs: {
6020 EVT Ty =
Op.getValueType();
6021 if (Ty == MVT::i64) {
6032 case Intrinsic::aarch64_neon_pmull64: {
6036 std::optional<uint64_t> LHSLane =
6038 std::optional<uint64_t> RHSLane =
6041 assert((!LHSLane || *LHSLane < 2) &&
"Expect lane to be None or 0 or 1");
6042 assert((!RHSLane || *RHSLane < 2) &&
"Expect lane to be None or 0 or 1");
6048 auto TryVectorizeOperand = [](
SDValue N, std::optional<uint64_t> NLane,
6049 std::optional<uint64_t> OtherLane,
6055 if (NLane && *NLane == 1)
6060 if (OtherLane && *OtherLane == 1) {
6067 if (NLane && *NLane == 0)
6080 assert(
N.getValueType() == MVT::i64 &&
6081 "Intrinsic aarch64_neon_pmull64 requires i64 parameters");
6085 LHS = TryVectorizeOperand(LHS, LHSLane, RHSLane, dl, DAG);
6086 RHS = TryVectorizeOperand(RHS, RHSLane, LHSLane, dl, DAG);
6090 case Intrinsic::aarch64_neon_smax:
6092 Op.getOperand(1),
Op.getOperand(2));
6093 case Intrinsic::aarch64_neon_umax:
6095 Op.getOperand(1),
Op.getOperand(2));
6096 case Intrinsic::aarch64_neon_smin:
6098 Op.getOperand(1),
Op.getOperand(2));
6099 case Intrinsic::aarch64_neon_umin:
6101 Op.getOperand(1),
Op.getOperand(2));
6102 case Intrinsic::aarch64_neon_scalar_sqxtn:
6103 case Intrinsic::aarch64_neon_scalar_sqxtun:
6104 case Intrinsic::aarch64_neon_scalar_uqxtn: {
6105 assert(
Op.getValueType() == MVT::i32 ||
Op.getValueType() == MVT::f32);
6106 if (
Op.getValueType() == MVT::i32)
6111 Op.getOperand(1))));
6114 case Intrinsic::aarch64_neon_sqxtn:
6117 case Intrinsic::aarch64_neon_sqxtun:
6120 case Intrinsic::aarch64_neon_uqxtn:
6123 case Intrinsic::aarch64_neon_sqshrn:
6124 if (
Op.getValueType().isVector())
6127 Op.getOperand(1).getValueType(),
6128 Op.getOperand(1),
Op.getOperand(2)));
6130 case Intrinsic::aarch64_neon_sqshrun:
6131 if (
Op.getValueType().isVector())
6134 Op.getOperand(1).getValueType(),
6135 Op.getOperand(1),
Op.getOperand(2)));
6137 case Intrinsic::aarch64_neon_uqshrn:
6138 if (
Op.getValueType().isVector())
6141 Op.getOperand(1).getValueType(),
6142 Op.getOperand(1),
Op.getOperand(2)));
6144 case Intrinsic::aarch64_neon_sqrshrn:
6145 if (
Op.getValueType().isVector())
6150 Op.getOperand(1),
Op.getOperand(2)));
6152 case Intrinsic::aarch64_neon_sqrshrun:
6153 if (
Op.getValueType().isVector())
6158 Op.getOperand(1),
Op.getOperand(2)));
6160 case Intrinsic::aarch64_neon_uqrshrn:
6161 if (
Op.getValueType().isVector())
6167 case Intrinsic::aarch64_sve_whilelo:
6170 case Intrinsic::aarch64_sve_whilelt:
6173 case Intrinsic::aarch64_sve_whilels:
6176 case Intrinsic::aarch64_sve_whilele:
6179 case Intrinsic::aarch64_sve_sunpkhi:
6182 case Intrinsic::aarch64_sve_sunpklo:
6185 case Intrinsic::aarch64_sve_uunpkhi:
6188 case Intrinsic::aarch64_sve_uunpklo:
6191 case Intrinsic::aarch64_sve_clasta_n:
6193 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6194 case Intrinsic::aarch64_sve_clastb_n:
6196 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6197 case Intrinsic::aarch64_sve_lasta:
6199 Op.getOperand(1),
Op.getOperand(2));
6200 case Intrinsic::aarch64_sve_lastb:
6202 Op.getOperand(1),
Op.getOperand(2));
6203 case Intrinsic::aarch64_sve_rev:
6206 case Intrinsic::aarch64_sve_tbl:
6208 Op.getOperand(1),
Op.getOperand(2));
6209 case Intrinsic::aarch64_sve_trn1:
6211 Op.getOperand(1),
Op.getOperand(2));
6212 case Intrinsic::aarch64_sve_trn2:
6214 Op.getOperand(1),
Op.getOperand(2));
6215 case Intrinsic::aarch64_sve_uzp1:
6217 Op.getOperand(1),
Op.getOperand(2));
6218 case Intrinsic::aarch64_sve_uzp2:
6220 Op.getOperand(1),
Op.getOperand(2));
6221 case Intrinsic::aarch64_sve_zip1:
6223 Op.getOperand(1),
Op.getOperand(2));
6224 case Intrinsic::aarch64_sve_zip2:
6226 Op.getOperand(1),
Op.getOperand(2));
6227 case Intrinsic::aarch64_sve_splice:
6229 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6230 case Intrinsic::aarch64_sve_ptrue:
6231 return getPTrue(DAG, dl,
Op.getValueType(),
Op.getConstantOperandVal(1));
6232 case Intrinsic::aarch64_sve_clz:
6234 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6235 case Intrinsic::aarch64_sme_cntsb:
6238 case Intrinsic::aarch64_sme_cntsh: {
6243 case Intrinsic::aarch64_sme_cntsw: {
6249 case Intrinsic::aarch64_sme_cntsd: {
6255 case Intrinsic::aarch64_sve_cnt: {
6258 if (
Data.getValueType().isFloatingPoint())
6261 Op.getOperand(2),
Data,
Op.getOperand(1));
6263 case Intrinsic::aarch64_sve_dupq_lane:
6264 return LowerDUPQLane(
Op, DAG);
6265 case Intrinsic::aarch64_sve_convert_from_svbool:
6266 if (
Op.getValueType() == MVT::aarch64svcount)
6269 case Intrinsic::aarch64_sve_convert_to_svbool:
6270 if (
Op.getOperand(1).getValueType() == MVT::aarch64svcount)
6273 case Intrinsic::aarch64_sve_fneg:
6275 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6276 case Intrinsic::aarch64_sve_frintp:
6278 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6279 case Intrinsic::aarch64_sve_frintm:
6281 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6282 case Intrinsic::aarch64_sve_frinti:
6284 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6285 case Intrinsic::aarch64_sve_frintx:
6287 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6288 case Intrinsic::aarch64_sve_frinta:
6290 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6291 case Intrinsic::aarch64_sve_frintn:
6293 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6294 case Intrinsic::aarch64_sve_frintz:
6296 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6297 case Intrinsic::aarch64_sve_ucvtf:
6299 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6301 case Intrinsic::aarch64_sve_scvtf:
6303 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6305 case Intrinsic::aarch64_sve_fcvtzu:
6307 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6309 case Intrinsic::aarch64_sve_fcvtzs:
6311 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6313 case Intrinsic::aarch64_sve_fsqrt:
6315 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6316 case Intrinsic::aarch64_sve_frecpx:
6318 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6319 case Intrinsic::aarch64_sve_frecpe_x:
6322 case Intrinsic::aarch64_sve_frecps_x:
6324 Op.getOperand(1),
Op.getOperand(2));
6325 case Intrinsic::aarch64_sve_frsqrte_x:
6328 case Intrinsic::aarch64_sve_frsqrts_x:
6330 Op.getOperand(1),
Op.getOperand(2));
6331 case Intrinsic::aarch64_sve_fabs:
6333 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6334 case Intrinsic::aarch64_sve_abs:
6336 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6337 case Intrinsic::aarch64_sve_neg:
6339 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6340 case Intrinsic::aarch64_sve_insr: {
6343 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
6347 Op.getOperand(1), Scalar);
6349 case Intrinsic::aarch64_sve_rbit:
6351 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6353 case Intrinsic::aarch64_sve_revb:
6355 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6356 case Intrinsic::aarch64_sve_revh:
6358 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6359 case Intrinsic::aarch64_sve_revw:
6361 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6362 case Intrinsic::aarch64_sve_revd:
6364 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6365 case Intrinsic::aarch64_sve_sxtb:
6368 Op.getOperand(2),
Op.getOperand(3),
6369 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i8)),
6371 case Intrinsic::aarch64_sve_sxth:
6374 Op.getOperand(2),
Op.getOperand(3),
6375 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i16)),
6377 case Intrinsic::aarch64_sve_sxtw:
6380 Op.getOperand(2),
Op.getOperand(3),
6381 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i32)),
6383 case Intrinsic::aarch64_sve_uxtb:
6386 Op.getOperand(2),
Op.getOperand(3),
6387 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i8)),
6389 case Intrinsic::aarch64_sve_uxth:
6392 Op.getOperand(2),
Op.getOperand(3),
6393 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i16)),
6395 case Intrinsic::aarch64_sve_uxtw:
6398 Op.getOperand(2),
Op.getOperand(3),
6399 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i32)),
6401 case Intrinsic::localaddress: {
6404 unsigned Reg =
RegInfo->getLocalAddressRegister(MF);
6406 Op.getSimpleValueType());
6409 case Intrinsic::eh_recoverfp: {
6414 SDValue IncomingFPOp =
Op.getOperand(2);
6416 auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->
getGlobal() :
nullptr);
6419 "llvm.eh.recoverfp must take a function as the first argument");
6420 return IncomingFPOp;
6423 case Intrinsic::aarch64_neon_vsri:
6424 case Intrinsic::aarch64_neon_vsli:
6425 case Intrinsic::aarch64_sve_sri:
6426 case Intrinsic::aarch64_sve_sli: {
6427 EVT Ty =
Op.getValueType();
6434 bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri ||
6435 IntNo == Intrinsic::aarch64_sve_sri;
6437 return DAG.
getNode(Opcode, dl, Ty,
Op.getOperand(1),
Op.getOperand(2),
6441 case Intrinsic::aarch64_neon_srhadd:
6442 case Intrinsic::aarch64_neon_urhadd:
6443 case Intrinsic::aarch64_neon_shadd:
6444 case Intrinsic::aarch64_neon_uhadd: {
6445 bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
6446 IntNo == Intrinsic::aarch64_neon_shadd);
6447 bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
6448 IntNo == Intrinsic::aarch64_neon_urhadd);
6449 unsigned Opcode = IsSignedAdd
6452 return DAG.
getNode(Opcode, dl,
Op.getValueType(),
Op.getOperand(1),
6455 case Intrinsic::aarch64_neon_saddlp:
6456 case Intrinsic::aarch64_neon_uaddlp: {
6457 unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uaddlp
6460 return DAG.
getNode(Opcode, dl,
Op.getValueType(),
Op.getOperand(1));
6462 case Intrinsic::aarch64_neon_sdot:
6463 case Intrinsic::aarch64_neon_udot:
6464 case Intrinsic::aarch64_sve_sdot:
6465 case Intrinsic::aarch64_sve_udot: {
6466 unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
6467 IntNo == Intrinsic::aarch64_sve_udot)
6470 return DAG.
getNode(Opcode, dl,
Op.getValueType(),
Op.getOperand(1),
6471 Op.getOperand(2),
Op.getOperand(3));
6473 case Intrinsic::aarch64_neon_usdot:
6474 case Intrinsic::aarch64_sve_usdot: {
6476 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6478 case Intrinsic::get_active_lane_mask: {
6482 EVT VT =
Op.getValueType();
6495 Op.getOperand(1),
Op.getOperand(2));
6500 case Intrinsic::aarch64_neon_saddlv:
6501 case Intrinsic::aarch64_neon_uaddlv: {
6502 EVT OpVT =
Op.getOperand(1).getValueType();
6503 EVT ResVT =
Op.getValueType();
6505 ((ResVT == MVT::i32 && (OpVT == MVT::v8i8 || OpVT == MVT::v16i8 ||
6506 OpVT == MVT::v8i16 || OpVT == MVT::v4i16)) ||
6507 (ResVT == MVT::i64 && (OpVT == MVT::v4i32 || OpVT == MVT::v2i32))) &&
6508 "Unexpected aarch64_neon_u/saddlv type");
6514 dl, ResVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64,
Op.getOperand(1));
6518 return EXTRACT_VEC_ELT;
6520 case Intrinsic::experimental_cttz_elts: {
6537 case Intrinsic::experimental_vector_match: {
6543bool AArch64TargetLowering::shouldExtendGSIndex(
EVT VT,
EVT &EltTy)
const {
6552bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(
SDValue Extend,
6568bool AArch64TargetLowering::isVectorLoadExtDesirable(
SDValue ExtVal)
const {
6578 if (
auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal->
getOperand(0))) {
6585 unsigned NumExtMaskedLoads = 0;
6586 for (
auto *U : Ld->getMask()->users())
6587 if (isa<MaskedLoadSDNode>(U))
6588 NumExtMaskedLoads++;
6590 if (NumExtMaskedLoads <= 1)
6599 std::map<std::tuple<bool, bool, bool>,
unsigned> AddrModes = {
6600 {std::make_tuple(
false,
false,
false),
6602 {std::make_tuple(
false,
false,
true),
6604 {std::make_tuple(
false,
true,
false),
6606 {std::make_tuple(
false,
true,
true),
6608 {std::make_tuple(
true,
false,
false),
6610 {std::make_tuple(
true,
false,
true),
6612 {std::make_tuple(
true,
true,
false),
6614 {std::make_tuple(
true,
true,
true),
6617 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
6618 return AddrModes.find(Key)->second;
6654 EVT VT =
Op.getValueType();
6678 EVT IndexVT =
Index.getValueType();
6691 "Cannot lower when not using SVE for fixed vectors!");
6700 Index.getValueType().getVectorElementType() == MVT::i64 ||
6701 Mask.getValueType().getVectorElementType() == MVT::i64)
6766 EVT IndexVT =
Index.getValueType();
6779 "Cannot lower when not using SVE for fixed vectors!");
6791 Index.getValueType().getVectorElementType() == MVT::i64 ||
6792 Mask.getValueType().getVectorElementType() == MVT::i64)
6802 if (PromotedVT != VT)
6826 assert(LoadNode &&
"Expected custom lowering of a masked load node");
6827 EVT VT =
Op->getValueType(0);
6830 return LowerFixedLengthVectorMLoadToSVE(
Op, DAG);
6854 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16);
6867 {Undef, Undef, Undef, Undef});
6877 return DAG.
getStore(ST->getChain(),
DL, ExtractTrunc,
6878 ST->getBasePtr(), ST->getMemOperand());
6888 assert (StoreNode &&
"Can only custom lower store nodes");
6899 return LowerFixedLengthVectorStoreToSVE(
Op, DAG);
6911 MemVT == MVT::v4i8) {
6935 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
6939 }
else if (MemVT == MVT::i128 && StoreNode->
isVolatile()) {
6940 return LowerStore128(
Op, DAG);
6941 }
else if (MemVT == MVT::i64x8) {
6946 EVT PtrVT =
Base.getValueType();
6947 for (
unsigned i = 0; i < 8; i++) {
6968 bool IsStoreRelease =
6971 assert((Subtarget->hasFeature(AArch64::FeatureLSE2) &&
6972 Subtarget->hasFeature(AArch64::FeatureRCPC3) && IsStoreRelease) ||
6984 std::swap(StoreValue.first, StoreValue.second);
6987 {StoreNode->getChain(), StoreValue.first, StoreValue.second,
6988 StoreNode->getBasePtr()},
6997 assert(LoadNode &&
"Expected custom lowering of a load node");
7003 EVT PtrVT =
Base.getValueType();
7004 for (
unsigned i = 0; i < 8; i++) {
7018 EVT VT =
Op->getValueType(0);
7019 assert((VT == MVT::v4i16 || VT == MVT::v4i32) &&
"Expected v4i16 or v4i32");
7025 if (Subtarget->requiresStrictAlign() && LoadNode->
getAlign() <
Align(4))
7045 if (VT == MVT::v4i32)
7057 EVT MaskVT =
Mask.getValueType();
7060 const bool HasPassthru = !Passthru.
isUndef();
7064 assert(VecVT.
isVector() &&
"Input to VECTOR_COMPRESS must be vector.");
7073 if (MinElmts != 2 && MinElmts != 4)
7077 if (IsFixedLength) {
7087 DAG.
getUNDEF(ScalableMaskVT), Mask,
7092 DAG.
getUNDEF(ScalableVecVT), Passthru,
7096 MaskVT =
Mask.getValueType();
7105 if (ContainerVT != VecVT) {
7112 DAG.
getConstant(Intrinsic::aarch64_sve_compact,
DL, MVT::i64), Mask, Vec);
7118 DAG.
getConstant(Intrinsic::aarch64_sve_cntp,
DL, MVT::i64), Mask, Mask);
7122 DAG.
getConstant(Intrinsic::aarch64_sve_whilelo,
DL, MVT::i64),
7130 if (IsFixedLength) {
7140 if (ContainerVT != VecVT) {
7142 Compressed = DAG.
getBitcast(VecVT, Compressed);
7150 MVT VT =
Op.getSimpleValueType();
7189 if (
auto *ShiftNo = dyn_cast<ConstantSDNode>(Shifts)) {
7191 MVT VT =
Op.getSimpleValueType();
7194 unsigned int NewShiftNo =
7209 EVT XScalarTy =
X.getValueType();
7214 switch (
Op.getSimpleValueType().SimpleTy) {
7223 ExpVT = MVT::nxv4i32;
7227 ExpVT = MVT::nxv2i64;
7238 AArch64SVEPredPattern::all);
7241 DAG.
getConstant(Intrinsic::aarch64_sve_fscale,
DL, MVT::i64),
7245 if (
X.getValueType() != XScalarTy)
7256 "ADJUST_TRAMPOLINE operation is only supported on Linux.");
7258 return Op.getOperand(0);
7280 Entry.Ty = IntPtrTy;
7282 Args.push_back(Entry);
7284 if (
auto *FI = dyn_cast<FrameIndexSDNode>(Trmp.
getNode())) {
7292 Args.push_back(Entry);
7294 Args.push_back(Entry);
7296 Args.push_back(Entry);
7300 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
7304 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
7305 return CallResult.second;
7313 switch (
Op.getOpcode()) {
7318 return LowerBITCAST(
Op, DAG);
7320 return LowerGlobalAddress(
Op, DAG);
7322 return LowerGlobalTLSAddress(
Op, DAG);
7324 return LowerPtrAuthGlobalAddress(
Op, DAG);
7326 return LowerADJUST_TRAMPOLINE(
Op, DAG);
7328 return LowerINIT_TRAMPOLINE(
Op, DAG);
7332 return LowerSETCC(
Op, DAG);
7334 return LowerSETCCCARRY(
Op, DAG);
7338 return LowerBR_CC(
Op, DAG);
7340 return LowerSELECT(
Op, DAG);
7342 return LowerSELECT_CC(
Op, DAG);
7344 return LowerJumpTable(
Op, DAG);
7346 return LowerBR_JT(
Op, DAG);
7348 return LowerBRIND(
Op, DAG);
7350 return LowerConstantPool(
Op, DAG);
7352 return LowerBlockAddress(
Op, DAG);
7354 return LowerVASTART(
Op, DAG);
7356 return LowerVACOPY(
Op, DAG);
7358 return LowerVAARG(
Op, DAG);
7406 return LowerFP_ROUND(
Op, DAG);
7409 return LowerFP_EXTEND(
Op, DAG);
7411 return LowerFRAMEADDR(
Op, DAG);
7413 return LowerSPONENTRY(
Op, DAG);
7415 return LowerRETURNADDR(
Op, DAG);
7417 return LowerADDROFRETURNADDR(
Op, DAG);
7419 return LowerCONCAT_VECTORS(
Op, DAG);
7421 return LowerINSERT_VECTOR_ELT(
Op, DAG);
7423 return LowerEXTRACT_VECTOR_ELT(
Op, DAG);
7425 return LowerBUILD_VECTOR(
Op, DAG);
7427 return LowerZERO_EXTEND_VECTOR_INREG(
Op, DAG);
7429 return LowerVECTOR_SHUFFLE(
Op, DAG);
7431 return LowerSPLAT_VECTOR(
Op, DAG);
7433 return LowerEXTRACT_SUBVECTOR(
Op, DAG);
7435 return LowerINSERT_SUBVECTOR(
Op, DAG);
7438 return LowerDIV(
Op, DAG);
7443 return LowerMinMax(
Op, DAG);
7447 return LowerVectorSRA_SRL_SHL(
Op, DAG);
7451 return LowerShiftParts(
Op, DAG);
7454 return LowerCTPOP_PARITY(
Op, DAG);
7456 return LowerFCOPYSIGN(
Op, DAG);
7458 return LowerVectorOR(
Op, DAG);
7460 return LowerXOR(
Op, DAG);
7467 return LowerINT_TO_FP(
Op, DAG);
7472 return LowerFP_TO_INT(
Op, DAG);
7475 return LowerFP_TO_INT_SAT(
Op, DAG);
7477 return LowerFSINCOS(
Op, DAG);
7479 return LowerGET_ROUNDING(
Op, DAG);
7481 return LowerSET_ROUNDING(
Op, DAG);
7483 return LowerGET_FPMODE(
Op, DAG);
7485 return LowerSET_FPMODE(
Op, DAG);
7487 return LowerRESET_FPMODE(
Op, DAG);
7489 return LowerMUL(
Op, DAG);
7495 return LowerINTRINSIC_W_CHAIN(
Op, DAG);
7497 return LowerINTRINSIC_WO_CHAIN(
Op, DAG);
7499 return LowerINTRINSIC_VOID(
Op, DAG);
7501 if (cast<MemSDNode>(
Op)->getMemoryVT() == MVT::i128) {
7502 assert(Subtarget->hasLSE2() || Subtarget->hasRCPC3());
7503 return LowerStore128(
Op, DAG);
7507 return LowerSTORE(
Op, DAG);
7509 return LowerFixedLengthVectorMStoreToSVE(
Op, DAG);
7511 return LowerMGATHER(
Op, DAG);
7513 return LowerMSCATTER(
Op, DAG);
7515 return LowerVECREDUCE_SEQ_FADD(
Op, DAG);
7529 return LowerVECREDUCE(
Op, DAG);
7531 return LowerATOMIC_LOAD_AND(
Op, DAG);
7533 return LowerDYNAMIC_STACKALLOC(
Op, DAG);
7535 return LowerVSCALE(
Op, DAG);
7537 return LowerVECTOR_COMPRESS(
Op, DAG);
7541 return LowerFixedLengthVectorIntExtendToSVE(
Op, DAG);
7544 EVT ExtraVT = cast<VTSDNode>(
Op.getOperand(1))->getVT();
7546 if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
7547 (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
7550 return LowerToPredicatedOp(
Op, DAG,
7554 return LowerTRUNCATE(
Op, DAG);
7556 return LowerMLOAD(
Op, DAG);
7560 return LowerFixedLengthVectorLoadToSVE(
Op, DAG);
7561 return LowerLOAD(
Op, DAG);
7565 return LowerToScalableOp(
Op, DAG);
7575 return LowerFixedLengthVectorSelectToSVE(
Op, DAG);
7577 return LowerABS(
Op, DAG);
7591 return LowerBitreverse(
Op, DAG);
7597 return LowerCTTZ(
Op, DAG);
7599 return LowerVECTOR_SPLICE(
Op, DAG);
7601 return LowerVECTOR_DEINTERLEAVE(
Op, DAG);
7603 return LowerVECTOR_INTERLEAVE(
Op, DAG);
7606 if (
Op.getValueType().isVector())
7607 return LowerVectorXRINT(
Op, DAG);
7611 assert((
Op.getOperand(0).getValueType() == MVT::f16 ||
7612 Op.getOperand(0).getValueType() == MVT::bf16) &&
7613 "Expected custom lowering of rounding operations only for f16");
7616 return DAG.
getNode(
Op.getOpcode(),
DL,
Op.getValueType(), Ext);
7622 assert((
Op.getOperand(1).getValueType() == MVT::f16 ||
7623 Op.getOperand(1).getValueType() == MVT::bf16) &&
7624 "Expected custom lowering of rounding operations only for f16");
7627 {
Op.getOperand(0),
Op.getOperand(1)});
7628 return DAG.
getNode(
Op.getOpcode(),
DL, {Op.getValueType(), MVT::Other},
7629 {Ext.getValue(1), Ext.getValue(0)});
7632 assert(
Op.getOperand(2).getValueType() == MVT::i128 &&
7633 "WRITE_REGISTER custom lowering is only for 128-bit sysregs");
7638 std::pair<SDValue, SDValue> Pair =
7643 SysRegName, Pair.first, Pair.second);
7653 return LowerVECTOR_HISTOGRAM(
Op, DAG);
7662 EVT VT,
bool OverrideNEON)
const {
7712 unsigned Opcode =
N->getOpcode();
7717 unsigned IID =
N->getConstantOperandVal(0);
7718 if (IID < Intrinsic::num_intrinsics)
7732 if (IID == Intrinsic::aarch64_neon_umull ||
7734 IID == Intrinsic::aarch64_neon_smull ||
7743 bool IsVarArg)
const {
7823SDValue AArch64TargetLowering::LowerFormalArguments(
7853 unsigned NumArgs =
Ins.size();
7855 unsigned CurArgIdx = 0;
7856 for (
unsigned i = 0; i != NumArgs; ++i) {
7858 if (Ins[i].isOrigArg()) {
7859 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
7860 CurArgIdx =
Ins[i].getOrigArgIndex();
7867 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
7869 else if (ActualMVT == MVT::i16)
7872 bool UseVarArgCC =
false;
7874 UseVarArgCC = isVarArg;
7878 assert(!Res &&
"Call operand has unhandled type");
7883 bool IsLocallyStreaming =
7884 !
Attrs.hasStreamingInterface() &&
Attrs.hasStreamingBody();
7889 unsigned ExtraArgLocs = 0;
7890 for (
unsigned i = 0, e =
Ins.size(); i != e; ++i) {
7893 if (Ins[i].
Flags.isByVal()) {
7897 int Size =
Ins[i].Flags.getByValSize();
7898 unsigned NumRegs = (
Size + 7) / 8;
7910 if (Ins[i].
Flags.isSwiftAsync())
7919 if (RegVT == MVT::i32)
7920 RC = &AArch64::GPR32RegClass;
7921 else if (RegVT == MVT::i64)
7922 RC = &AArch64::GPR64RegClass;
7923 else if (RegVT == MVT::f16 || RegVT == MVT::bf16)
7924 RC = &AArch64::FPR16RegClass;
7925 else if (RegVT == MVT::f32)
7926 RC = &AArch64::FPR32RegClass;
7928 RC = &AArch64::FPR64RegClass;
7930 RC = &AArch64::FPR128RegClass;
7934 RC = &AArch64::PPRRegClass;
7935 }
else if (RegVT == MVT::aarch64svcount) {
7937 RC = &AArch64::PPRRegClass;
7940 RC = &AArch64::ZPRRegClass;
7947 if (IsLocallyStreaming) {
7981 "Indirect arguments should be scalable on most subtargets");
8005 !
Ins[i].Flags.isInConsecutiveRegs())
8006 BEAlign = 8 - ArgSize;
8015 unsigned ObjOffset = ArgOffset + BEAlign;
8045 "Indirect arguments should be scalable on most subtargets");
8066 "Indirect arguments should be scalable on most subtargets");
8069 unsigned NumParts = 1;
8070 if (Ins[i].
Flags.isInConsecutiveRegs()) {
8071 while (!Ins[i + NumParts - 1].
Flags.isInConsecutiveRegsLast())
8080 while (NumParts > 0) {
8088 DL,
Ptr.getValueType(),
8089 APInt(
Ptr.getValueSizeInBits().getFixedValue(), PartSize));
8092 APInt(
Ptr.getValueSizeInBits().getFixedValue(), PartSize),
DL,
8093 Ptr.getValueType());
8108 if (Ins[i].isOrigArg()) {
8109 Argument *OrigArg =
F.getArg(Ins[i].getOrigArgIndex());
8111 if (!Ins[i].
Flags.isZExt()) {
8125 if (IsLocallyStreaming) {
8127 if (
Attrs.hasStreamingCompatibleInterface()) {
8128 PStateSM = getRuntimePStateSM(DAG, Chain,
DL, MVT::i64);
8141 for (
unsigned I=0;
I<InVals.
size(); ++
I) {
8158 saveVarArgRegisters(CCInfo, DAG,
DL, Chain);
8162 unsigned VarArgsOffset = CCInfo.getStackSize();
8176 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
8180 if (!CCInfo.isAllocated(AArch64::X8)) {
8191 for (
unsigned I = 0, E =
Ins.size();
I != E; ++
I) {
8193 Ins[
I].Flags.isInReg()) &&
8194 Ins[
I].Flags.isSRet()) {
8209 unsigned StackArgSize = CCInfo.getStackSize();
8211 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
8215 StackArgSize =
alignTo(StackArgSize, 16);
8243 DAG.
getVTList(MVT::i64, MVT::Other), {Chain, SVL});
8248 {Chain, Size, DAG.getConstant(1, DL, MVT::i64)});
8253 { Buffer.getValue(1), Buffer.getValue(0)});
8258 DAG.
getVTList(MVT::i64, MVT::Other), Chain);
8265 DAG.
getVTList(MVT::i64, MVT::Other), {Chain, BufferSize});
8270 {Chain, BufferSize, DAG.getConstant(1, DL, MVT::i64)});
8283 if (
I.Flags.isSwiftSelf() ||
I.Flags.isSwiftError() ||
8284 I.Flags.isSwiftAsync()) {
8288 "Swift attributes can't be used with preserve_none",
8298void AArch64TargetLowering::saveVarArgRegisters(
CCState &CCInfo,
8321 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
8323 if (GPRSaveSize != 0) {
8326 if (GPRSaveSize & 15)
8346 for (
unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
8352 MF, GPRIdx, (i - FirstVariadicGPR) * 8)
8362 if (Subtarget->hasFPARMv8() && !IsWin64) {
8364 const unsigned NumFPRArgRegs =
FPRArgRegs.size();
8367 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
8369 if (FPRSaveSize != 0) {
8374 for (
unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
8389 if (!MemOps.
empty()) {
8396SDValue AArch64TargetLowering::LowerCallResult(
8400 SDValue ThisVal,
bool RequiresSMChange)
const {
8403 for (
unsigned i = 0; i != RVLocs.
size(); ++i) {
8408 if (i == 0 && isThisReturn) {
8410 "unexpected return calling convention register assignment");
8509 unsigned NumArgs = Outs.
size();
8510 for (
unsigned i = 0; i != NumArgs; ++i) {
8511 MVT ArgVT = Outs[i].VT;
8514 bool UseVarArgCC =
false;
8518 if (IsCalleeWin64) {
8521 UseVarArgCC = !Outs[i].IsFixed;
8532 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
8534 else if (ActualMVT == MVT::i16)
8540 assert(!Res &&
"Call operand has unhandled type");
8545bool AArch64TargetLowering::isEligibleForTailCallOptimization(
8546 const CallLoweringInfo &CLI)
const {
8552 bool IsVarArg = CLI.IsVarArg;
8565 if (CallerAttrs.requiresSMChange(CalleeAttrs) ||
8566 CallerAttrs.requiresLazySave(CalleeAttrs) ||
8567 CallerAttrs.requiresPreservingAllZAState(CalleeAttrs) ||
8568 CallerAttrs.hasStreamingBody())
8579 bool CCMatch = CallerCC == CalleeCC;
8594 if (i->hasByValAttr())
8603 if (i->hasInRegAttr())
8621 (!
TT.isOSWindows() ||
TT.isOSBinFormatELF() ||
TT.isOSBinFormatMachO()))
8642 const uint32_t *CallerPreserved =
TRI->getCallPreservedMask(MF, CallerCC);
8644 const uint32_t *CalleePreserved =
TRI->getCallPreservedMask(MF, CalleeCC);
8646 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
8647 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
8649 if (!
TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
8658 CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs,
C);
8662 if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) {
8671 if (!ArgLoc.isRegLoc())
8683 A.getValVT().isScalableVector() ||
8685 "Expected value to be scalable");
8705 int ClobberedFI)
const {
8708 int64_t LastByte = FirstByte + MFI.
getObjectSize(ClobberedFI) - 1;
8719 if (FI->getIndex() < 0) {
8721 int64_t InLastByte = InFirstByte;
8724 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
8725 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
8733bool AArch64TargetLowering::DoesCalleeRestoreStack(
CallingConv::ID CallCC,
8734 bool TailCallOpt)
const {
8745 APInt RequredZero(SizeInBits, 0xFE);
8747 bool ZExtBool = (Bits.Zero & RequredZero) == RequredZero;
8767 switch (
MI.getOpcode()) {
8768 case AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO:
8769 RegClass = &AArch64::ZPR2StridedOrContiguousRegClass;
8771 case AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO:
8772 RegClass = &AArch64::ZPR4StridedOrContiguousRegClass;
8779 for (
unsigned I = 1;
I <
MI.getNumOperands(); ++
I) {
8781 assert(MO.
isReg() &&
"Unexpected operand to FORM_TRANSPOSED_REG_TUPLE");
8784 if (!Def || !Def->getParent()->isCopy())
8788 unsigned OpSubReg = CopySrc.
getSubReg();
8793 if (!CopySrcOp || !CopySrcOp->
isReg() || OpSubReg !=
SubReg ||
8794 MRI.getRegClass(CopySrcOp->
getReg()) != RegClass)
8801void AArch64TargetLowering::AdjustInstrPostInstrSelection(
MachineInstr &
MI,
8807 if (
MI.getOpcode() == AArch64::MSRpstatesvcrImm1 ||
8808 MI.getOpcode() == AArch64::MSRpstatePseudo) {
8809 for (
unsigned I =
MI.getNumOperands() - 1;
I > 0; --
I)
8811 MO.isReg() && MO.isImplicit() && MO.isDef() &&
8812 (AArch64::GPR32RegClass.contains(MO.getReg()) ||
8813 AArch64::GPR64RegClass.contains(MO.getReg())))
8814 MI.removeOperand(
I);
8817 if (
MI.getOperand(0).getImm() == AArch64SVCR::SVCRSM ||
8818 MI.getOperand(0).getImm() == AArch64SVCR::SVCRSMZA) {
8826 if (
MI.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO ||
8827 MI.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO) {
8835 TII->get(TargetOpcode::REG_SEQUENCE),
8836 MI.getOperand(0).getReg());
8838 for (
unsigned I = 1;
I <
MI.getNumOperands(); ++
I) {
8839 MIB.
add(
MI.getOperand(
I));
8840 MIB.
addImm(AArch64::zsub0 + (
I - 1));
8843 MI.eraseFromParent();
8854 (
MI.getOpcode() == AArch64::ADDXri ||
8855 MI.getOpcode() == AArch64::SUBXri)) {
8880 assert(PStateSM &&
"PStateSM should be defined");
8906 Args.push_back(Entry);
8915 Callee, std::move(Args));
8935AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
8944 bool &IsTailCall = CLI.IsTailCall;
8946 bool IsVarArg = CLI.IsVarArg;
8950 bool IsThisReturn =
false;
8954 bool IsCFICall = CLI.CB && CLI.CB->isIndirectCall() && CLI.CFIType;
8955 bool IsSibCall =
false;
8956 bool GuardWithBTI =
false;
8958 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
8959 !Subtarget->noBTIAtReturnTwice()) {
8968 unsigned NumArgs = Outs.
size();
8970 for (
unsigned i = 0; i != NumArgs; ++i) {
8973 "currently not supported");
8984 RetCCInfo.AnalyzeCallResult(Ins, RetCC);
8990 if (!Loc.isRegLoc())
8992 return AArch64::ZPRRegClass.contains(Loc.getLocReg()) ||
8993 AArch64::PPRRegClass.
contains(Loc.getLocReg());
8995 if (
any_of(RVLocs, HasSVERegLoc) ||
any_of(ArgLocs, HasSVERegLoc))
9001 IsTailCall = isEligibleForTailCallOptimization(CLI);
9013 if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
9015 "site marked musttail");
9033 if (IsTailCall && !IsSibCall) {
9038 NumBytes =
alignTo(NumBytes, 16);
9043 FPDiff = NumReusableBytes - NumBytes;
9047 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (
unsigned)-FPDiff)
9055 assert(FPDiff % 16 == 0 &&
"unaligned stack on tail call");
9062 else if (
auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
9063 CalleeAttrs =
SMEAttrs(ES->getSymbol());
9065 auto DescribeCallsite =
9068 if (
auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
9069 R <<
ore::NV(
"Callee", ES->getSymbol());
9070 else if (CLI.CB && CLI.CB->getCalledFunction())
9071 R <<
ore::NV(
"Callee", CLI.CB->getCalledFunction()->getName());
9073 R <<
"unknown callee";
9078 bool RequiresLazySave = CallerAttrs.requiresLazySave(CalleeAttrs);
9079 bool RequiresSaveAllZA =
9080 CallerAttrs.requiresPreservingAllZAState(CalleeAttrs);
9081 if (RequiresLazySave) {
9093 Chain = DAG.
getTruncStore(Chain,
DL, NumZaSaveSlices, NumZaSaveSlicesAddr,
9097 DAG.
getConstant(Intrinsic::aarch64_sme_set_tpidr2,
DL, MVT::i32),
9105 return DescribeCallsite(R) <<
" sets up a lazy save for ZA";
9107 }
else if (RequiresSaveAllZA) {
9109 "Cannot share state that may not exist");
9115 bool RequiresSMChange = CallerAttrs.requiresSMChange(CalleeAttrs);
9116 if (RequiresSMChange) {
9117 if (CallerAttrs.hasStreamingInterfaceOrBody())
9119 else if (CallerAttrs.hasNonStreamingInterface())
9122 PStateSM = getRuntimePStateSM(DAG, Chain,
DL, MVT::i64);
9129 DescribeCallsite(R) <<
" requires a streaming mode transition";
9136 bool ShouldPreserveZT0 = CallerAttrs.requiresPreservingZT0(CalleeAttrs);
9140 if (ShouldPreserveZT0) {
9152 bool DisableZA = CallerAttrs.requiresDisablingZABeforeCall(CalleeAttrs);
9153 assert((!DisableZA || !RequiresLazySave) &&
9154 "Lazy-save should have PSTATE.SM=1 on entry to the function");
9175 if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
9177 for (
const auto &
F : Forwards) {
9184 unsigned ExtraArgLocs = 0;
9185 for (
unsigned i = 0, e = Outs.
size(); i != e; ++i) {
9203 if (Outs[i].ArgVT == MVT::i1) {
9225 assert(VA.
getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits");
9242 "Indirect arguments should be scalable on most subtargets");
9246 unsigned NumParts = 1;
9247 if (Outs[i].
Flags.isInConsecutiveRegs()) {
9248 while (!Outs[i + NumParts - 1].
Flags.isInConsecutiveRegsLast())
9250 StoreSize *= NumParts;
9276 DL,
Ptr.getValueType(),
9277 APInt(
Ptr.getValueSizeInBits().getFixedValue(), PartSize));
9280 APInt(
Ptr.getValueSizeInBits().getFixedValue(), PartSize),
DL,
9281 Ptr.getValueType());
9296 if (i == 0 &&
Flags.isReturned() && !
Flags.isSwiftSelf() &&
9297 Outs[0].VT == MVT::i64) {
9299 "unexpected calling convention register assignment");
9300 assert(!
Ins.empty() && Ins[0].VT == MVT::i64 &&
9301 "unexpected use of 'returned'");
9302 IsThisReturn =
true;
9311 [=](
const std::pair<unsigned, SDValue> &Elt) {
9321 return ArgReg.Reg == VA.getLocReg();
9350 OpSize =
Flags.isByVal() ?
Flags.getByValSize() * 8
9352 OpSize = (OpSize + 7) / 8;
9354 !
Flags.isInConsecutiveRegs()) {
9356 BEAlign = 8 - OpSize;
9359 int32_t
Offset = LocMemOffset + BEAlign;
9373 Chain = addTokenForArgument(Chain, DAG, MF.
getFrameInfo(), FI);
9381 if (Outs[i].
Flags.isByVal()) {
9385 Chain,
DL, DstAddr, Arg, SizeNode,
9386 Outs[i].
Flags.getNonZeroByValAlign(),
9423 if (!MemOpChains.
empty())
9427 if (RequiresSMChange) {
9430 DAG.
getVTList(MVT::Other, MVT::Glue), Chain);
9443 for (
auto &RegToPass : RegsToPass) {
9445 RegToPass.second, InGlue);
9453 unsigned OpFlags = 0;
9454 if (
auto *
G = dyn_cast<GlobalAddressSDNode>(Callee)) {
9455 CalledGlobal =
G->getGlobal();
9465 }
else if (
auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
9469 const char *
Sym = S->getSymbol();
9482 if (IsTailCall && !IsSibCall) {
9489 std::vector<SDValue> Ops;
9491 Ops.push_back(Callee);
9498 "tail calls cannot be marked with clang.arc.attachedcall");
9505 Ops.insert(Ops.begin() + 1, GA);
9508 }
else if (GuardWithBTI) {
9522 "Invalid auth call key");
9526 std::tie(IntDisc, AddrDisc) =
9534 Ops.push_back(IntDisc);
9535 Ops.push_back(AddrDisc);
9540 for (
auto &RegToPass : RegsToPass)
9542 RegToPass.second.getValueType()));
9549 Mask =
TRI->getThisReturnPreservedMask(MF, CallConv);
9551 IsThisReturn =
false;
9552 Mask =
TRI->getCallPreservedMask(MF, CallConv);
9555 Mask =
TRI->getCallPreservedMask(MF, CallConv);
9558 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
9560 if (
TRI->isAnyArgRegReserved(MF))
9561 TRI->emitReservedArgRegCallError(MF);
9563 assert(Mask &&
"Missing call preserved mask for calling convention");
9567 Ops.push_back(InGlue);
9575 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
9585 Chain = DAG.
getNode(Opc,
DL, {MVT::Other, MVT::Glue}, Ops);
9596 DoesCalleeRestoreStack(CallConv, TailCallOpt) ?
alignTo(NumBytes, 16) : 0;
9604 Chain, InGlue, CallConv, IsVarArg, RVLocs,
DL, DAG, InVals, IsThisReturn,
9605 IsThisReturn ? OutVals[0] :
SDValue(), RequiresSMChange);
9610 if (RequiresSMChange) {
9611 assert(PStateSM &&
"Expected a PStateSM to be set");
9617 InGlue =
Result.getValue(1);
9620 DAG.
getVTList(MVT::Other, MVT::Glue), {Result, InGlue});
9624 if (CallerAttrs.requiresEnablingZAAfterCall(CalleeAttrs))
9631 if (ShouldPreserveZT0)
9634 {Result, DAG.getConstant(0, DL, MVT::i32), ZTFrameIdx});
9636 if (RequiresLazySave) {
9640 TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
9645 DAG.
getConstant(Intrinsic::aarch64_sme_get_tpidr2,
DL, MVT::i32));
9657 RestoreRoutine, RegMask,
Result.getValue(1)});
9662 DAG.
getConstant(Intrinsic::aarch64_sme_set_tpidr2,
DL, MVT::i32),
9665 }
else if (RequiresSaveAllZA) {
9670 if (RequiresSMChange || RequiresLazySave || ShouldPreserveZT0 ||
9671 RequiresSaveAllZA) {
9672 for (
unsigned I = 0;
I < InVals.
size(); ++
I) {
9688 if (
O.Flags.isSwiftSelf() ||
O.Flags.isSwiftError() ||
9689 O.Flags.isSwiftAsync()) {
9693 "Swift attributes can't be used with preserve_none",
9703bool AArch64TargetLowering::CanLowerReturn(
9708 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
9730 for (
unsigned i = 0, realRVLocIdx = 0; i != RVLocs.
size();
9731 ++i, ++realRVLocIdx) {
9734 SDValue Arg = OutVals[realRVLocIdx];
9740 if (Outs[i].ArgVT == MVT::i1) {
9756 assert(VA.
getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits");
9765 llvm::find_if(RetVals, [=](
const std::pair<unsigned, SDValue> &Elt) {
9779 if (FuncAttrs.hasStreamingBody() && !FuncAttrs.hasStreamingInterface()) {
9780 if (FuncAttrs.hasStreamingCompatibleInterface()) {
9782 assert(
Reg.isValid() &&
"PStateSM Register is invalid");
9794 for (
auto &RetVal : RetVals) {
9795 if (FuncAttrs.hasStreamingBody() && !FuncAttrs.hasStreamingInterface() &&
9798 RetVal.second.getValueType(), RetVal.second);
9799 Chain = DAG.
getCopyToReg(Chain,
DL, RetVal.first, RetVal.second, Glue);
9802 DAG.
getRegister(RetVal.first, RetVal.second.getValueType()));
9813 unsigned RetValReg = AArch64::X0;
9815 RetValReg = AArch64::X8;
9826 if (AArch64::GPR64RegClass.
contains(*
I))
9828 else if (AArch64::FPR64RegClass.
contains(*
I))
9839 RetOps.push_back(Glue);
9848 getAddr(cast<ExternalSymbolSDNode>(Arm64ECRetDest), DAG, 0);
9851 RetOps.insert(RetOps.begin() + 1, Arm64ECRetDest);
9865 unsigned Flag)
const {
9867 N->getOffset(), Flag);
9872 unsigned Flag)
const {
9878 unsigned Flag)
const {
9880 N->getOffset(), Flag);
9885 unsigned Flag)
const {
9891 unsigned Flag)
const {
9896template <
class NodeTy>
9898 unsigned Flags)
const {
9914template <
class NodeTy>
9916 unsigned Flags)
const {
9930template <
class NodeTy>
9932 unsigned Flags)
const {
9944template <
class NodeTy>
9946 unsigned Flags)
const {
9962 "unexpected offset in global node");
9967 return getGOT(GN, DAG, OpFlags);
9973 Result = getAddrLarge(GN, DAG, OpFlags);
9975 Result = getAddrTiny(GN, DAG, OpFlags);
9977 Result = getAddr(GN, DAG, OpFlags);
10016AArch64TargetLowering::LowerDarwinGlobalTLSAddress(
SDValue Op,
10019 "This function expects a Darwin target");
10024 const GlobalValue *GV = cast<GlobalAddressSDNode>(
Op)->getGlobal();
10034 PtrMemVT,
DL, Chain, DescAddr,
10188SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(
SDValue SymAddr,
10200 Chain = DAG.
getNode(Opcode,
DL, NodeTys, {Chain, SymAddr});
10207AArch64TargetLowering::LowerELFGlobalTLSAddress(
SDValue Op,
10227 "in local exec TLS model");
10243 return LowerELFTLSLocalExec(GV, ThreadBase,
DL, DAG);
10264 TPOff = LowerELFTLSDescCallSeq(SymAddr,
DL, DAG);
10271 GV,
DL, MVT::i64, 0,
10288 TPOff = LowerELFTLSDescCallSeq(SymAddr,
DL, DAG);
10296AArch64TargetLowering::LowerWindowsGlobalTLSAddress(
SDValue Op,
10335 Chain =
TLS.getValue(1);
10361 return LowerDarwinGlobalTLSAddress(
Op, DAG);
10363 return LowerELFGlobalTLSAddress(
Op, DAG);
10365 return LowerWindowsGlobalTLSAddress(
Op, DAG);
10402 const auto *TGN = cast<GlobalAddressSDNode>(TGA.
getNode());
10403 assert(TGN->getGlobal()->hasExternalWeakLinkage());
10409 if (TGN->getOffset() != 0)
10411 "unsupported non-zero offset in weak ptrauth global reference");
10418 {TGA, Key, Discriminator}),
10423AArch64TargetLowering::LowerPtrAuthGlobalAddress(
SDValue Op,
10426 uint64_t KeyC =
Op.getConstantOperandVal(1);
10427 SDValue AddrDiscriminator =
Op.getOperand(2);
10428 uint64_t DiscriminatorC =
Op.getConstantOperandVal(3);
10429 EVT VT =
Op.getValueType();
10437 if (!isUInt<16>(DiscriminatorC))
10439 "constant discriminator in ptrauth global out of range [0, 0xffff]");
10445 int64_t PtrOffsetC = 0;
10447 PtrOffsetC =
Ptr.getConstantOperandVal(1);
10448 Ptr =
Ptr.getOperand(0);
10450 const auto *PtrN = cast<GlobalAddressSDNode>(
Ptr.getNode());
10454 const unsigned OpFlags =
10458 "unsupported non-GOT op flags on ptrauth global reference");
10461 PtrOffsetC += PtrN->getOffset();
10464 assert(PtrN->getTargetFlags() == 0 &&
10465 "unsupported target flags on ptrauth global");
10470 ? AddrDiscriminator
10474 if (!NeedsGOTLoad) {
10478 {TPtr, Key, TAddrDiscriminator, Discriminator}),
10487 {TPtr, Key, TAddrDiscriminator, Discriminator}),
10502 cast<VTSDNode>(Val.
getOperand(1))->getVT().getFixedSizeInBits() -
10524 bool ProduceNonFlagSettingCondBr =
10530 if (
LHS.getValueType() == MVT::f128) {
10535 if (!
RHS.getNode()) {
10555 OFCC = getInvertedCondCode(OFCC);
10562 if (
LHS.getValueType().isInteger()) {
10564 (
LHS.getValueType() == MVT::i32 ||
LHS.getValueType() == MVT::i64));
10569 if (RHSC && RHSC->
getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
10576 isa<ConstantSDNode>(
LHS.getOperand(1)) &&
10592 isa<ConstantSDNode>(
LHS.getOperand(1)) &&
10609 DAG.
getConstant(SignBitPos, dl, MVT::i64), Dest);
10613 LHS.getOpcode() !=
ISD::AND && ProduceNonFlagSettingCondBr) {
10620 DAG.
getConstant(SignBitPos, dl, MVT::i64), Dest);
10629 assert(
LHS.getValueType() == MVT::f16 ||
LHS.getValueType() == MVT::bf16 ||
10630 LHS.getValueType() == MVT::f32 ||
LHS.getValueType() == MVT::f64);
10655 EVT VT =
Op.getValueType();
10683 return getSVESafeBitCast(VT,
Op, DAG);
10690 auto SetVecVal = [&](
int Idx = -1) {
10697 VecVal1 = BitCast(VecVT, In1, DAG);
10698 VecVal2 = BitCast(VecVT, In2, DAG);
10704 }
else if (VT == MVT::f64) {
10705 VecVT = MVT::v2i64;
10706 SetVecVal(AArch64::dsub);
10707 }
else if (VT == MVT::f32) {
10708 VecVT = MVT::v4i32;
10709 SetVecVal(AArch64::ssub);
10710 }
else if (VT == MVT::f16 || VT == MVT::bf16) {
10711 VecVT = MVT::v8i16;
10712 SetVecVal(AArch64::hsub);
10723 if (VT == MVT::f64 || VT == MVT::v2f64) {
10732 if (VT == MVT::f16 || VT == MVT::bf16)
10734 if (VT == MVT::f32)
10736 if (VT == MVT::f64)
10739 return BitCast(VT, BSP, DAG);
10745 Attribute::NoImplicitFloat))
10748 EVT VT =
Op.getValueType();
10762 if (VT == MVT::i32 && IsParity)
10773 if (VT == MVT::i32 || VT == MVT::i64) {
10774 if (VT == MVT::i32)
10780 if (VT == MVT::i32)
10787 }
else if (VT == MVT::i128) {
10798 assert(!IsParity &&
"ISD::PARITY of vector types not supported");
10800 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
10801 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
10802 "Unexpected type for custom ctpop lowering");
10810 EVT DT = VT == MVT::v2i64 ? MVT::v4i32 : VT;
10814 if (VT == MVT::v2i64) {
10817 }
else if (VT == MVT::v2i32) {
10819 }
else if (VT == MVT::v4i32) {
10829 unsigned EltSize = 8;
10842 EVT VT =
Op.getValueType();
10855 EVT VT =
Op.getValueType();
10857 unsigned Opcode =
Op.getOpcode();
10901 EVT VT =
Op.getValueType();
10952 SmallVector<std::pair<SDValue, SDValue>, 16> &WorkList) {
10958 N =
N->getOperand(0);
10962 WorkList.push_back(std::make_pair(
N->getOperand(0),
N->getOperand(1)));
10968 if (
N->getOpcode() !=
ISD::OR || !
N->hasOneUse())
10982 EVT VT =
N->getValueType(0);
10992 unsigned NumXors = 0;
10997 std::tie(XOR0, XOR1) = WorkList[0];
11000 for (
unsigned I = 1;
I < WorkList.
size();
I++) {
11001 std::tie(XOR0, XOR1) = WorkList[
I];
11003 Cmp = DAG.
getNode(LogicOp,
DL, VT, Cmp, CmpChain);
11015 if (
Op.getValueType().isVector())
11016 return LowerVSETCC(
Op, DAG);
11018 bool IsStrict =
Op->isStrictFPOpcode();
11020 unsigned OpNo = IsStrict ? 1 : 0;
11023 Chain =
Op.getOperand(0);
11030 EVT VT =
Op.getValueType();
11036 if (
LHS.getValueType() == MVT::f128) {
11041 if (!
RHS.getNode()) {
11042 assert(
LHS.getValueType() ==
Op.getValueType() &&
11043 "Unexpected setcc expansion!");
11048 if (
LHS.getValueType().isInteger()) {
11064 assert(
LHS.getValueType() == MVT::bf16 ||
LHS.getValueType() == MVT::f16 ||
11065 LHS.getValueType() == MVT::f32 ||
LHS.getValueType() == MVT::f64);
11109 EVT VT =
LHS.getValueType();
11110 if (VT != MVT::i32 && VT != MVT::i64)
11118 LHS, RHS, InvCarry);
11120 EVT OpVT =
Op.getValueType();
11140 if (
LHS.getValueType() == MVT::f128) {
11145 if (!
RHS.getNode()) {
11152 if ((
LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
11153 LHS.getValueType() == MVT::bf16) {
11159 if (
LHS.getValueType().isInteger()) {
11161 (
LHS.getValueType() == MVT::i32 ||
LHS.getValueType() == MVT::i64));
11172 EVT VT =
LHS.getValueType();
11185 LHS.getValueType() ==
RHS.getValueType()) {
11186 EVT VT =
LHS.getValueType();
11192 Shift = DAG.
getNOT(dl, Shift, VT);
11205 }
else if (CTVal && CFVal && CTVal->
isOne() && CFVal->
isZero()) {
11225 }
else if (CTVal && CFVal) {
11233 if (TrueVal == ~FalseVal) {
11235 }
else if (FalseVal > std::numeric_limits<int64_t>::min() &&
11236 TrueVal == -FalseVal) {
11247 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
11250 if (TrueVal32 > FalseVal32) {
11259 if ((TrueVal64 == FalseVal64 + 1) || (TrueVal64 + 1 == FalseVal64)) {
11262 if (TrueVal > FalseVal) {
11293 if (CTVal && CTVal == RHSVal && AArch64CC ==
AArch64CC::EQ)
11295 else if (CFVal && CFVal == RHSVal && AArch64CC ==
AArch64CC::NE)
11298 assert (CTVal && CFVal &&
"Expected constant operands for CSNEG.");
11312 return DAG.
getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
11316 assert(
LHS.getValueType() == MVT::f16 ||
LHS.getValueType() == MVT::f32 ||
11317 LHS.getValueType() == MVT::f64);
11331 if (RHSVal && RHSVal->
isZero()) {
11339 CFVal && CFVal->
isZero() &&
11362 EVT Ty =
Op.getValueType();
11363 auto Idx =
Op.getConstantOperandAPInt(2);
11364 int64_t IdxVal =
Idx.getSExtValue();
11366 "Only expect scalable vectors for custom lowering of VECTOR_SPLICE");
11375 std::optional<unsigned> PredPattern;
11393 if (IdxVal >= 0 && (IdxVal *
BlockSize / 8) < 256)
11407 return LowerSELECT_CC(
CC, LHS, RHS, TVal, FVal,
DL, DAG);
11417 EVT Ty =
Op.getValueType();
11418 if (Ty == MVT::aarch64svcount) {
11465 CC = cast<CondCodeSDNode>(CCVal.
getOperand(2))->get();
11474 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
11481 SDValue Res = LowerSELECT_CC(
CC, LHS, RHS, TVal, FVal,
DL, DAG);
11483 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
11499 return getAddrLarge(JT, DAG);
11501 return getAddrTiny(JT, DAG);
11502 return getAddr(JT, DAG);
11512 int JTI = cast<JumpTableSDNode>(
JT.getNode())->getIndex();
11520 "aarch64-jump-table-hardening")) {
11528 "jump table hardening only supported on MachO/ELF");
11559 std::optional<uint16_t> BADisc =
11571 {Dest,
Key, Disc, AddrDisc, Chain});
11582 return getGOT(CP, DAG);
11585 return getAddrLarge(CP, DAG);
11587 return getAddrTiny(CP, DAG);
11589 return getAddr(CP, DAG);
11597 if (std::optional<uint16_t> BADisc =
11612 {TargetBA,
Key, AddrDisc, Disc});
11620 return getAddrLarge(BAN, DAG);
11622 return getAddrTiny(BAN, DAG);
11624 return getAddr(BAN, DAG);
11636 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
11667 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
11685 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
11757 return LowerWin64_VASTART(
Op, DAG);
11759 return LowerDarwin_VASTART(
Op, DAG);
11761 return LowerAAPCS_VASTART(
Op, DAG);
11770 unsigned VaListSize =
11774 const Value *DestSV = cast<SrcValueSDNode>(
Op.getOperand(3))->getValue();
11775 const Value *SrcSV = cast<SrcValueSDNode>(
Op.getOperand(4))->getValue();
11779 Align(PtrSize),
false,
false,
nullptr,
11786 "automatic va_arg instruction only works on Darwin");
11788 const Value *
V = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
11789 EVT VT =
Op.getValueType();
11804 "currently not supported");
11821 ArgSize = std::max(ArgSize, MinSlotSize);
11822 bool NeedFPTrunc =
false;
11825 NeedFPTrunc =
true;
11859 EVT VT =
Op.getValueType();
11861 unsigned Depth =
Op.getConstantOperandVal(0);
11885#define GET_REGISTER_MATCHER
11886#include "AArch64GenAsmMatcher.inc"
11893 if (AArch64::X1 <= Reg && Reg <= AArch64::X28) {
11895 unsigned DwarfRegNum =
MRI->getDwarfRegNum(Reg,
false);
11897 !
MRI->isReservedReg(MF, Reg))
11910 EVT VT =
Op.getValueType();
11926 EVT VT =
Op.getValueType();
11928 unsigned Depth =
Op.getConstantOperandVal(0);
11931 SDValue FrameAddr = LowerFRAMEADDR(
Op, DAG);
11948 if (Subtarget->hasPAuth()) {
11976 bool OptForSize)
const {
11977 bool IsLegal =
false;
11986 const APInt ImmInt = Imm.bitcastToAPInt();
11987 if (VT == MVT::f64)
11989 else if (VT == MVT::f32)
11991 else if (VT == MVT::f16 || VT == MVT::bf16)
12001 if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
12010 "Should be able to build any value with at most 4 moves");
12011 unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 4 : 2));
12012 IsLegal =
Insn.size() <= Limit;
12016 <<
" imm value: "; Imm.dump(););
12028 if ((ST->hasNEON() &&
12029 (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
12030 VT == MVT::f32 || VT == MVT::v1f32 || VT == MVT::v2f32 ||
12031 VT == MVT::v4f32)) ||
12033 (VT == MVT::nxv8f16 || VT == MVT::nxv4f32 || VT == MVT::nxv2f64))) {
12040 constexpr unsigned AccurateBits = 8;
12042 ExtraSteps = DesiredBits <= AccurateBits
12047 return DAG.
getNode(Opcode,
SDLoc(Operand), VT, Operand);
12057 EVT VT =
Op.getValueType();
12064AArch64TargetLowering::getSqrtResultForDenormInput(
SDValue Op,
12073 bool Reciprocal)
const {
12077 DAG, ExtraSteps)) {
12085 for (
int i = ExtraSteps; i > 0; --i) {
12103 int &ExtraSteps)
const {
12106 DAG, ExtraSteps)) {
12114 for (
int i = ExtraSteps; i > 0; --i) {
12154const char *AArch64TargetLowering::LowerXConstraint(
EVT ConstraintVT)
const {
12162 if (!Subtarget->hasFPARMv8())
12187static std::optional<std::pair<unsigned, const TargetRegisterClass *>>
12190 Constraint[1] !=
'p')
12191 return std::nullopt;
12193 Constraint = Constraint.
substr(2, Constraint.
size() - 3);
12194 bool IsPredicateAsCount = Constraint.
starts_with(
"n");
12195 if (IsPredicateAsCount)
12200 return std::nullopt;
12202 if (IsPredicateAsCount)
12203 return std::make_pair(AArch64::PN0 + V, &AArch64::PNRRegClass);
12205 return std::make_pair(AArch64::P0 + V, &AArch64::PPRRegClass);
12208static std::optional<PredicateConstraint>
12211 .Case(
"Uph", PredicateConstraint::Uph)
12212 .
Case(
"Upl", PredicateConstraint::Upl)
12213 .
Case(
"Upa", PredicateConstraint::Upa)
12219 if (VT != MVT::aarch64svcount &&
12223 switch (Constraint) {
12224 case PredicateConstraint::Uph:
12225 return VT == MVT::aarch64svcount ? &AArch64::PNR_p8to15RegClass
12226 : &AArch64::PPR_p8to15RegClass;
12227 case PredicateConstraint::Upl:
12228 return VT == MVT::aarch64svcount ? &AArch64::PNR_3bRegClass
12229 : &AArch64::PPR_3bRegClass;
12230 case PredicateConstraint::Upa:
12231 return VT == MVT::aarch64svcount ? &AArch64::PNRRegClass
12232 : &AArch64::PPRRegClass;
12240static std::optional<ReducedGprConstraint>
12243 .Case(
"Uci", ReducedGprConstraint::Uci)
12244 .
Case(
"Ucj", ReducedGprConstraint::Ucj)
12253 switch (Constraint) {
12254 case ReducedGprConstraint::Uci:
12255 return &AArch64::MatrixIndexGPR32_8_11RegClass;
12256 case ReducedGprConstraint::Ucj:
12257 return &AArch64::MatrixIndexGPR32_12_15RegClass;
12298SDValue AArch64TargetLowering::LowerAsmOutputForConstraint(
12300 const AsmOperandInfo &OpInfo,
SelectionDAG &DAG)
const {
12305 if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
12306 OpInfo.ConstraintVT.getSizeInBits() < 8)
12321 if (OpInfo.ConstraintVT.getSizeInBits() <= 32)
12332AArch64TargetLowering::getConstraintType(
StringRef Constraint)
const {
12333 if (Constraint.
size() == 1) {
12334 switch (Constraint[0]) {
12371AArch64TargetLowering::getSingleConstraintMatchWeight(
12372 AsmOperandInfo &
info,
const char *constraint)
const {
12374 Value *CallOperandVal =
info.CallOperandVal;
12377 if (!CallOperandVal)
12381 switch (*constraint) {
12403std::pair<unsigned, const TargetRegisterClass *>
12404AArch64TargetLowering::getRegForInlineAsmConstraint(
12406 if (Constraint.
size() == 1) {
12407 switch (Constraint[0]) {
12410 return std::make_pair(0U,
nullptr);
12412 return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass);
12414 return std::make_pair(0U, &AArch64::GPR64commonRegClass);
12415 return std::make_pair(0U, &AArch64::GPR32commonRegClass);
12417 if (!Subtarget->hasFPARMv8())
12421 return std::make_pair(0U, &AArch64::ZPRRegClass);
12422 return std::make_pair(0U,
nullptr);
12424 if (VT == MVT::Other)
12428 return std::make_pair(0U, &AArch64::FPR16RegClass);
12430 return std::make_pair(0U, &AArch64::FPR32RegClass);
12432 return std::make_pair(0U, &AArch64::FPR64RegClass);
12434 return std::make_pair(0U, &AArch64::FPR128RegClass);
12440 if (!Subtarget->hasFPARMv8())
12443 return std::make_pair(0U, &AArch64::ZPR_4bRegClass);
12445 return std::make_pair(0U, &AArch64::FPR128_loRegClass);
12448 if (!Subtarget->hasFPARMv8())
12451 return std::make_pair(0U, &AArch64::ZPR_3bRegClass);
12459 return std::make_pair(0U, RegClass);
12463 return std::make_pair(0U, RegClass);
12465 if (
StringRef(
"{cc}").equals_insensitive(Constraint) ||
12467 return std::make_pair(
unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
12469 if (Constraint ==
"{za}") {
12470 return std::make_pair(
unsigned(AArch64::ZA), &AArch64::MPRRegClass);
12473 if (Constraint ==
"{zt0}") {
12474 return std::make_pair(
unsigned(AArch64::ZT0), &AArch64::ZTRRegClass);
12479 std::pair<unsigned, const TargetRegisterClass *> Res;
12484 unsigned Size = Constraint.
size();
12485 if ((
Size == 4 ||
Size == 5) && Constraint[0] ==
'{' &&
12486 tolower(Constraint[1]) ==
'v' && Constraint[
Size - 1] ==
'}') {
12489 if (!
Failed && RegNo >= 0 && RegNo <= 31) {
12494 Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
12495 Res.second = &AArch64::FPR64RegClass;
12497 Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
12498 Res.second = &AArch64::FPR128RegClass;
12504 if (Res.second && !Subtarget->hasFPARMv8() &&
12505 !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
12506 !AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
12507 return std::make_pair(0U,
nullptr);
12514 bool AllowUnknown)
const {
12515 if (Subtarget->hasLS64() && Ty->
isIntegerTy(512))
12516 return EVT(MVT::i64x8);
12523void AArch64TargetLowering::LowerAsmOperandForConstraint(
12529 if (Constraint.
size() != 1)
12532 char ConstraintLetter = Constraint[0];
12533 switch (ConstraintLetter) {
12544 if (
Op.getValueType() == MVT::i64)
12545 Result = DAG.
getRegister(AArch64::XZR, MVT::i64);
12547 Result = DAG.
getRegister(AArch64::WZR, MVT::i32);
12569 switch (ConstraintLetter) {
12577 if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
12582 if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
12583 CVal =
C->getSExtValue();
12610 if (!isUInt<32>(CVal))
12614 if ((CVal & 0xFFFF) == CVal)
12616 if ((CVal & 0xFFFF0000ULL) == CVal)
12619 if ((NCVal & 0xFFFFULL) == NCVal)
12621 if ((NCVal & 0xFFFF0000ULL) == NCVal)
12628 if ((CVal & 0xFFFFULL) == CVal)
12630 if ((CVal & 0xFFFF0000ULL) == CVal)
12632 if ((CVal & 0xFFFF00000000ULL) == CVal)
12634 if ((CVal & 0xFFFF000000000000ULL) == CVal)
12637 if ((NCVal & 0xFFFFULL) == NCVal)
12639 if ((NCVal & 0xFFFF0000ULL) == NCVal)
12641 if ((NCVal & 0xFFFF00000000ULL) == NCVal)
12643 if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
12657 Ops.push_back(Result);
12684 EVT EltType = V.getValueType().getVectorElementType();
12694 EVT VT =
Op.getValueType();
12696 "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
12700 if (VT != MVT::v16i8 && VT != MVT::v8i8)
12704 assert((NumElts == 8 || NumElts == 16) &&
12705 "Need to have exactly 8 or 16 elements in vector.");
12711 for (
unsigned i = 0; i < NumElts; ++i) {
12716 SDValue OperandSourceVec = V.getOperand(0);
12718 SourceVec = OperandSourceVec;
12719 else if (SourceVec != OperandSourceVec)
12725 SDValue MaskSource = V.getOperand(1);
12727 if (!isa<ConstantSDNode>(MaskSource.
getOperand(1)))
12732 }
else if (!AndMaskConstants.
empty()) {
12746 if (!isa<ConstantSDNode>(MaskIdx) ||
12747 !cast<ConstantSDNode>(MaskIdx)->getConstantIntValue()->equalsInt(i))
12752 if (!MaskSourceVec) {
12756 }
else if (MaskSourceVec != MaskSource->
getOperand(0)) {
12770 if (!AndMaskConstants.
empty())
12776 DAG.
getConstant(Intrinsic::aarch64_neon_tbl1, dl, MVT::i32), SourceVec,
12785 LLVM_DEBUG(
dbgs() <<
"AArch64TargetLowering::ReconstructShuffle\n");
12787 EVT VT =
Op.getValueType();
12789 "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
12792 struct ShuffleSourceInfo {
12807 ShuffleSourceInfo(
SDValue Vec)
12808 : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
12809 ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
12811 bool operator ==(
SDValue OtherVec) {
return Vec == OtherVec; }
12817 for (
unsigned i = 0; i < NumElts; ++i) {
12822 !isa<ConstantSDNode>(V.getOperand(1)) ||
12823 V.getOperand(0).getValueType().isScalableVector()) {
12825 dbgs() <<
"Reshuffle failed: "
12826 "a shuffle can only come from building a vector from "
12827 "various elements of other fixed-width vectors, provided "
12828 "their indices are constant\n");
12833 SDValue SourceVec = V.getOperand(0);
12834 auto Source =
find(Sources, SourceVec);
12835 if (Source == Sources.
end())
12836 Source = Sources.
insert(Sources.
end(), ShuffleSourceInfo(SourceVec));
12839 unsigned EltNo = V.getConstantOperandVal(1);
12840 Source->MinElt = std::min(Source->MinElt, EltNo);
12841 Source->MaxElt = std::max(Source->MaxElt, EltNo);
12846 if ((Sources.
size() == 3 || Sources.
size() == 4) && NumElts > 4) {
12851 for (
unsigned I = 0;
I < NumElts; ++
I) {
12854 for (
unsigned OF = 0; OF < OutputFactor; OF++)
12855 Mask.push_back(-1);
12861 unsigned Lane = V.getConstantOperandVal(1);
12862 for (
unsigned S = 0; S < Sources.
size(); S++) {
12863 if (V.getOperand(0) == Sources[S].Vec) {
12864 unsigned InputSize = Sources[S].Vec.getScalarValueSizeInBits();
12865 unsigned InputBase = 16 * S + Lane * InputSize / 8;
12866 for (
unsigned OF = 0; OF < OutputFactor; OF++)
12867 Mask.push_back(InputBase + OF);
12877 ? Intrinsic::aarch64_neon_tbl3
12878 : Intrinsic::aarch64_neon_tbl4,
12880 for (
unsigned i = 0; i < Sources.
size(); i++) {
12881 SDValue Src = Sources[i].Vec;
12882 EVT SrcVT = Src.getValueType();
12885 "Expected a legally typed vector");
12893 for (
unsigned i = 0; i < Mask.size(); i++)
12895 assert((Mask.size() == 8 || Mask.size() == 16) &&
12896 "Expected a v8i8 or v16i8 Mask");
12898 DAG.
getBuildVector(Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, dl, TBLMask));
12902 Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, TBLOperands);
12906 if (Sources.
size() > 2) {
12907 LLVM_DEBUG(
dbgs() <<
"Reshuffle failed: currently only do something "
12908 <<
"sensible when at most two source vectors are "
12916 for (
auto &Source : Sources) {
12917 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
12918 if (SrcEltTy.
bitsLT(SmallestEltTy)) {
12919 SmallestEltTy = SrcEltTy;
12922 unsigned ResMultiplier =
12931 for (
auto &Src : Sources) {
12932 EVT SrcVT = Src.ShuffleVec.getValueType();
12945 assert(2 * SrcVTSize == VTSize);
12950 DAG.
getUNDEF(Src.ShuffleVec.getValueType()));
12956 dbgs() <<
"Reshuffle failed: result vector too small to extract\n");
12960 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
12962 dbgs() <<
"Reshuffle failed: span too large for a VEXT to cope\n");
12966 if (Src.MinElt >= NumSrcElts) {
12971 Src.WindowBase = -NumSrcElts;
12972 }
else if (Src.MaxElt < NumSrcElts) {
12989 dbgs() <<
"Reshuffle failed: don't know how to lower AArch64ISD::EXT "
12990 "for SVE vectors.");
12997 Src.WindowBase = -Src.MinElt;
13004 for (
auto &Src : Sources) {
13005 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
13006 if (SrcEltTy == SmallestEltTy)
13017 Src.WindowBase *= Src.WindowScale;
13022 for (
auto Src : Sources)
13023 assert(Src.ShuffleVec.getValueType() == ShuffleVT);
13031 if (Entry.isUndef())
13034 auto Src =
find(Sources, Entry.getOperand(0));
13035 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
13040 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
13043 int LanesDefined = BitsDefined / BitsPerShuffleLane;
13047 int *LaneMask = &Mask[i * ResMultiplier];
13049 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
13050 ExtractBase += NumElts * (Src - Sources.
begin());
13051 for (
int j = 0; j < LanesDefined; ++j)
13052 LaneMask[j] = ExtractBase + j;
13057 LLVM_DEBUG(
dbgs() <<
"Reshuffle failed: illegal shuffle mask\n");
13062 for (
unsigned i = 0; i < Sources.
size(); ++i)
13075 dbgs() <<
"Reshuffle, creating node: "; V.dump(););
13094 unsigned ExpectedElt = Imm;
13095 for (
unsigned i = 1; i < NumElts; ++i) {
13099 if (ExpectedElt == NumElts)
13104 if (ExpectedElt !=
static_cast<unsigned>(M[i]))
13115 if (V.getValueType() != MVT::v16i8)
13117 assert(V.getNumOperands() == 16 &&
"Expected 16 operands on the BUILDVECTOR");
13119 for (
unsigned X = 0;
X < 4;
X++) {
13122 SDValue BaseExt = V.getOperand(
X * 4);
13126 !isa<ConstantSDNode>(BaseExt.
getOperand(1)) ||
13131 for (
unsigned Y = 1;
Y < 4;
Y++) {
13134 Ext.getOperand(0) !=
Base ||
13135 !isa<ConstantSDNode>(Ext.getOperand(1)) ||
13136 Ext.getConstantOperandVal(1) !=
Y)
13147 V.getOperand(0).getOperand(0), V.getOperand(4).getOperand(0),
13148 V.getOperand(8).getOperand(0), V.getOperand(12).getOperand(0)};
13150 if (V.getValueType() == MVT::v4i32)
13166 unsigned &DupLaneOp) {
13168 "Only possible block sizes for wide DUP are: 16, 32, 64");
13187 for (
size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
13188 for (
size_t I = 0;
I < NumEltsPerBlock;
I++) {
13189 int Elt = M[BlockIndex * NumEltsPerBlock +
I];
13193 if ((
unsigned)Elt >= SingleVecNumElements)
13195 if (BlockElts[
I] < 0)
13196 BlockElts[
I] = Elt;
13197 else if (BlockElts[
I] != Elt)
13206 auto FirstRealEltIter =
find_if(BlockElts, [](
int Elt) {
return Elt >= 0; });
13207 assert(FirstRealEltIter != BlockElts.
end() &&
13208 "Shuffle with all-undefs must have been caught by previous cases, "
13210 if (FirstRealEltIter == BlockElts.
end()) {
13216 size_t FirstRealIndex = FirstRealEltIter - BlockElts.
begin();
13218 if ((
unsigned)*FirstRealEltIter < FirstRealIndex)
13221 size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
13224 if (Elt0 % NumEltsPerBlock != 0)
13228 for (
size_t I = 0;
I < NumEltsPerBlock;
I++)
13229 if (BlockElts[
I] >= 0 && (
unsigned)BlockElts[
I] != Elt0 +
I)
13232 DupLaneOp = Elt0 / NumEltsPerBlock;
13241 const int *FirstRealElt =
find_if(M, [](
int Elt) {
return Elt >= 0; });
13246 APInt ExpectedElt =
APInt(MaskBits, *FirstRealElt + 1,
false,
13250 bool FoundWrongElt = std::any_of(FirstRealElt + 1, M.end(), [&](
int Elt) {
13251 return Elt != ExpectedElt++ && Elt != -1;
13283 if (NumElts % 2 != 0)
13285 WhichResult = (M[0] == 0 ? 0 : 1);
13286 unsigned Idx = WhichResult * NumElts / 2;
13287 for (
unsigned i = 0; i != NumElts; i += 2) {
13288 if ((M[i] >= 0 && (
unsigned)M[i] !=
Idx) ||
13302 WhichResult = (M[0] == 0 ? 0 : 1);
13303 for (
unsigned j = 0; j != 2; ++j) {
13304 unsigned Idx = WhichResult;
13305 for (
unsigned i = 0; i != Half; ++i) {
13306 int MIdx = M[i + j * Half];
13307 if (MIdx >= 0 && (
unsigned)MIdx !=
Idx)
13321 if (NumElts % 2 != 0)
13323 WhichResult = (M[0] == 0 ? 0 : 1);
13324 for (
unsigned i = 0; i < NumElts; i += 2) {
13325 if ((M[i] >= 0 && (
unsigned)M[i] != i + WhichResult) ||
13326 (M[i + 1] >= 0 && (
unsigned)M[i + 1] != i + WhichResult))
13333 bool &DstIsLeft,
int &Anomaly) {
13334 if (M.size() !=
static_cast<size_t>(NumInputElements))
13337 int NumLHSMatch = 0, NumRHSMatch = 0;
13338 int LastLHSMismatch = -1, LastRHSMismatch = -1;
13340 for (
int i = 0; i < NumInputElements; ++i) {
13350 LastLHSMismatch = i;
13352 if (M[i] == i + NumInputElements)
13355 LastRHSMismatch = i;
13358 if (NumLHSMatch == NumInputElements - 1) {
13360 Anomaly = LastLHSMismatch;
13362 }
else if (NumRHSMatch == NumInputElements - 1) {
13364 Anomaly = LastRHSMismatch;
13377 for (
int I = 0, E = NumElts / 2;
I != E;
I++) {
13382 int Offset = NumElts / 2;
13383 for (
int I = NumElts / 2, E = NumElts;
I != E;
I++) {
13384 if (Mask[
I] !=
I + SplitLHS *
Offset)
13393 EVT VT =
Op.getValueType();
13428 unsigned OpNum = (PFEntry >> 26) & 0x0F;
13429 unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
13430 unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
13452 if (LHSID == (1 * 9 + 2) * 9 + 3)
13454 assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 &&
"Illegal OP_COPY!");
13458 if (OpNum == OP_MOVLANE) {
13460 auto getPFIDLane = [](
unsigned ID,
int Elt) ->
int {
13461 assert(Elt < 4 &&
"Expected Perfect Lanes to be less than 4");
13467 return (
ID % 9 == 8) ? -1 :
ID % 9;
13476 assert(RHSID < 8 &&
"Expected a lane index for RHSID!");
13477 unsigned ExtLane = 0;
13483 int MaskElt = getPFIDLane(
ID, (RHSID & 0x01) << 1) >> 1;
13485 MaskElt = (getPFIDLane(
ID, ((RHSID & 0x01) << 1) + 1) - 1) >> 1;
13486 assert(MaskElt >= 0 &&
"Didn't expect an undef movlane index!");
13487 ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2);
13488 Input = MaskElt < 2 ? V1 : V2;
13494 "Expected 16 or 32 bit shuffle elemements");
13499 int MaskElt = getPFIDLane(
ID, RHSID);
13500 assert(MaskElt >= 0 &&
"Didn't expect an undef movlane index!");
13501 ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4);
13502 Input = MaskElt < 4 ? V1 : V2;
13504 if (VT == MVT::v4i16) {
13547 if (EltTy == MVT::i8)
13549 else if (EltTy == MVT::i16 || EltTy == MVT::f16 || EltTy == MVT::bf16)
13551 else if (EltTy == MVT::i32 || EltTy == MVT::f32)
13553 else if (EltTy == MVT::i64 || EltTy == MVT::f64)
13561 return DAG.
getNode(Opcode, dl, VT, OpLHS, Lane);
13592 EVT EltVT =
Op.getValueType().getVectorElementType();
13604 bool IsUndefOrZero = V2.isUndef() ||
isZerosVector(V2.getNode());
13605 MVT IndexVT = MVT::v8i8;
13606 unsigned IndexLen = 8;
13607 if (
Op.getValueSizeInBits() == 128) {
13608 IndexVT = MVT::v16i8;
13613 for (
int Val : ShuffleMask) {
13614 for (
unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
13615 unsigned Offset = Byte + Val * BytesPerElt;
13618 if (IsUndefOrZero &&
Offset >= IndexLen)
13628 if (IsUndefOrZero) {
13633 DAG.
getConstant(Intrinsic::aarch64_neon_tbl1,
DL, MVT::i32), V1Cst,
13636 if (IndexLen == 8) {
13640 DAG.
getConstant(Intrinsic::aarch64_neon_tbl1,
DL, MVT::i32), V1Cst,
13651 DAG.
getConstant(Intrinsic::aarch64_neon_tbl2,
DL, MVT::i32), V1Cst,
13660 if (EltType == MVT::i8)
13662 if (EltType == MVT::i16 || EltType == MVT::f16 || EltType == MVT::bf16)
13664 if (EltType == MVT::i32 || EltType == MVT::f32)
13666 if (EltType == MVT::i64 || EltType == MVT::f64)
13675 auto getScaledOffsetDup = [](
SDValue BitCast,
int &LaneC,
MVT &CastVT) {
13686 unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
13688 if (ExtIdxInBits % CastedEltBitWidth != 0)
13696 LaneC += ExtIdxInBits / CastedEltBitWidth;
13703 unsigned SrcVecNumElts =
13710 if (getScaledOffsetDup(V, Lane, CastVT)) {
13711 V = DAG.
getBitcast(CastVT, V.getOperand(0).getOperand(0));
13713 V.getOperand(0).getValueType().is128BitVector()) {
13716 Lane += V.getConstantOperandVal(1);
13717 V = V.getOperand(0);
13743 EVT VT =
Op.getValueType();
13753 if (ElementSize > 32 || ElementSize == 1)
13783 EVT VT =
Op.getValueType();
13797 for (
unsigned I = 0;
I < 16;
I++) {
13798 if (ShuffleMask[
I] < 16)
13802 dyn_cast<ConstantSDNode>(Mask2->
getOperand(ShuffleMask[
I] - 16));
13805 TBLMaskParts[
I] = DAG.
getConstant(
C->getSExtValue() + 32, dl, MVT::i32);
13822AArch64TargetLowering::LowerZERO_EXTEND_VECTOR_INREG(
SDValue Op,
13825 EVT VT =
Op.getValueType();
13829 "Unexpected extension factor.");
13842 EVT VT =
Op.getValueType();
13847 return LowerFixedLengthVECTOR_SHUFFLEToSVE(
Op, DAG);
13860 "Unexpected VECTOR_SHUFFLE mask size!");
13886 for (
unsigned LaneSize : {64U, 32U, 16U}) {
13898 V1 =
constructDup(V1, Lane, dl, NewVecTy, Opcode, DAG);
13906 if (
isREVMask(ShuffleMask, EltSize, NumElts, 64))
13908 if (
isREVMask(ShuffleMask, EltSize, NumElts, 32))
13910 if (
isREVMask(ShuffleMask, EltSize, NumElts, 16))
13913 if (((NumElts == 8 && EltSize == 16) || (NumElts == 16 && EltSize == 8)) &&
13920 bool ReverseEXT =
false;
13922 if (
isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
13934 unsigned WhichResult;
13935 if (
isZIPMask(ShuffleMask, NumElts, WhichResult)) {
13939 if (
isUZPMask(ShuffleMask, NumElts, WhichResult)) {
13943 if (
isTRNMask(ShuffleMask, NumElts, WhichResult)) {
13967 if (
isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
13972 int SrcLane = ShuffleMask[Anomaly];
13973 if (SrcLane >= NumInputElements) {
13975 SrcLane -= NumElts;
13982 ScalarVT = MVT::i32;
13995 if (NumElts == 4) {
13996 unsigned PFIndexes[4];
13997 for (
unsigned i = 0; i != 4; ++i) {
13998 if (ShuffleMask[i] < 0)
14001 PFIndexes[i] = ShuffleMask[i];
14005 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
14006 PFIndexes[2] * 9 + PFIndexes[3];
14016 "Expected larger vector element sizes to be handled already");
14018 for (
int M : ShuffleMask)
14020 M >=
static_cast<int>(NumElts) ? 0 : 0xffffffff, dl, MVT::i32));
14034 EVT VT =
Op.getValueType();
14037 return LowerToScalableOp(
Op, DAG);
14040 "Unexpected vector type!");
14043 if (isa<ConstantSDNode>(
Op.getOperand(0)))
14055 if (VT == MVT::nxv1i1)
14067 EVT VT =
Op.getValueType();
14079 auto *CIdx = dyn_cast<ConstantSDNode>(Idx128);
14080 if (CIdx && (CIdx->getZExtValue() <= 3)) {
14110 APInt &UndefBits) {
14112 APInt SplatBits, SplatUndef;
14113 unsigned SplatBitSize;
14115 if (BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14118 for (
unsigned i = 0; i < NumSplats; ++i) {
14119 CnstBits <<= SplatBitSize;
14120 UndefBits <<= SplatBitSize;
14122 UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.
getSizeInBits());
14133 const APInt &Bits) {
14134 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
14136 EVT VT =
Op.getValueType();
14155 const SDValue *LHS =
nullptr) {
14156 EVT VT =
Op.getValueType();
14161 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
14164 bool isAdvSIMDModImm =
false;
14184 if (isAdvSIMDModImm) {
14189 Mov = DAG.
getNode(NewOp, dl, MovTy,
14194 Mov = DAG.
getNode(NewOp, dl, MovTy,
14208 const SDValue *LHS =
nullptr) {
14209 EVT VT =
Op.getValueType();
14214 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
14217 bool isAdvSIMDModImm =
false;
14229 if (isAdvSIMDModImm) {
14234 Mov = DAG.
getNode(NewOp, dl, MovTy,
14239 Mov = DAG.
getNode(NewOp, dl, MovTy,
14253 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
14255 EVT VT =
Op.getValueType();
14257 bool isAdvSIMDModImm =
false;
14269 if (isAdvSIMDModImm) {
14283 const APInt &Bits) {
14284 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
14286 EVT VT =
Op.getValueType();
14304 const APInt &Bits) {
14305 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
14307 EVT VT =
Op.getValueType();
14310 bool isAdvSIMDModImm =
false;
14314 MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
14319 MovTy = MVT::v2f64;
14322 if (isAdvSIMDModImm) {
14346 for (
unsigned i = 1; i < NumElts; ++i)
14347 if (dyn_cast<ConstantSDNode>(Bvec->
getOperand(i)) != FirstElt)
14356 N =
N.getOperand(0);
14362 unsigned NumElts =
N.getValueType().getVectorMinNumElements();
14366 N =
N.getOperand(0);
14369 if (
N.getValueType().getVectorMinNumElements() < NumElts)
14380 N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
14381 return N.getValueType().getVectorMinNumElements() >= NumElts;
14388 unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
14389 if (MaxSVESize && MinSVESize == MaxSVESize) {
14391 unsigned PatNumElts =
14393 return PatNumElts == (NumElts * VScale);
14407 EVT VT =
N->getValueType(0);
14417 SDValue FirstOp =
N->getOperand(0);
14418 unsigned FirstOpc = FirstOp.
getOpcode();
14419 SDValue SecondOp =
N->getOperand(1);
14420 unsigned SecondOpc = SecondOp.
getOpcode();
14451 if (ShiftHasPredOp) {
14457 C2 =
C.getZExtValue();
14459 dyn_cast<ConstantSDNode>(Shift.
getOperand(1)))
14460 C2 = C2node->getZExtValue();
14474 assert(C1nodeImm && C1nodeShift);
14476 C1AsAPInt = C1AsAPInt.
zextOrTrunc(ElemSizeInBits);
14482 if (C2 > ElemSizeInBits)
14487 if (C1AsAPInt != RequiredC1)
14511 return LowerToScalableOp(
Op, DAG);
14517 EVT VT =
Op.getValueType();
14523 dyn_cast<BuildVectorSDNode>(
Op.getOperand(1).getNode());
14526 LHS =
Op.getOperand(1);
14527 BVN = dyn_cast<BuildVectorSDNode>(
Op.getOperand(0).getNode());
14544 UndefBits, &LHS)) ||
14560 EVT VT =
Op.getValueType();
14572 if (
auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
14574 CstLane->getAPIntValue().trunc(EltTy.
getSizeInBits()).getZExtValue(),
14576 }
else if (Lane.getNode()->isUndef()) {
14579 assert(Lane.getValueType() == MVT::i32 &&
14580 "Unexpected BUILD_VECTOR operand type");
14589 EVT VT =
Op.getValueType();
14591 "Expected a legal NEON vector");
14597 auto TryMOVIWithBits = [&](
APInt DefBits) {
14611 APInt NotDefBits = ~DefBits;
14621 if (
SDValue R = TryMOVIWithBits(DefBits))
14623 if (
SDValue R = TryMOVIWithBits(UndefBits))
14627 auto TryWithFNeg = [&](
APInt DefBits,
MVT FVT) {
14633 unsigned NumElts = VT.
getSizeInBits() / FVT.getScalarSizeInBits();
14634 for (
unsigned i = 0; i < NumElts; i++)
14635 NegBits |= Neg << (FVT.getScalarSizeInBits() * i);
14636 NegBits = DefBits ^ NegBits;
14640 if (
SDValue NewOp = TryMOVIWithBits(NegBits)) {
14651 if ((R = TryWithFNeg(DefBits, MVT::f32)) ||
14652 (R = TryWithFNeg(DefBits, MVT::f64)) ||
14653 (ST->hasFullFP16() && (R = TryWithFNeg(DefBits, MVT::f16))))
14660SDValue AArch64TargetLowering::LowerFixedLengthBuildVectorToSVE(
14662 EVT VT =
Op.getValueType();
14665 auto *BVN = cast<BuildVectorSDNode>(
Op);
14686 NumElems -
count_if(
Op->op_values(), IsExtractElt) > 4)
14693 return Op.isUndef() ? Undef
14694 : DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
14695 ContainerVT, Undef, Op, ZeroI64);
14699 while (Intermediates.
size() > 1) {
14702 for (
unsigned I = 0;
I < Intermediates.
size();
I += 2) {
14705 Intermediates[
I / 2] =
14710 Intermediates.
resize(Intermediates.
size() / 2);
14721 EVT VT =
Op.getValueType();
14724 cast<BuildVectorSDNode>(
Op)->isConstantSequence();
14726 return LowerFixedLengthBuildVectorToSVE(
Op, DAG);
14744 Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
14745 if (Val.isZero() || (VT.
isInteger() && Val.isAllOnes()))
14749 if (
Const->isZero() && !
Const->isNegative())
14770 bool isOnlyLowElement =
true;
14771 bool usesOnlyOneValue =
true;
14772 bool usesOnlyOneConstantValue =
true;
14774 bool AllLanesExtractElt =
true;
14775 unsigned NumConstantLanes = 0;
14776 unsigned NumDifferentLanes = 0;
14777 unsigned NumUndefLanes = 0;
14781 unsigned ConsecutiveValCount = 0;
14783 for (
unsigned i = 0; i < NumElts; ++i) {
14786 AllLanesExtractElt =
false;
14792 isOnlyLowElement =
false;
14797 ++NumConstantLanes;
14798 if (!ConstantValue.
getNode())
14800 else if (ConstantValue != V)
14801 usesOnlyOneConstantValue =
false;
14804 if (!
Value.getNode())
14806 else if (V !=
Value) {
14807 usesOnlyOneValue =
false;
14808 ++NumDifferentLanes;
14811 if (PrevVal != V) {
14812 ConsecutiveValCount = 0;
14827 DifferentValueMap[
V] = ++ConsecutiveValCount;
14830 if (!
Value.getNode()) {
14832 dbgs() <<
"LowerBUILD_VECTOR: value undefined, creating undef node\n");
14840 LLVM_DEBUG(
dbgs() <<
"LowerBUILD_VECTOR: only low element used, creating 1 "
14841 "SCALAR_TO_VECTOR node\n");
14845 if (AllLanesExtractElt) {
14851 for (
unsigned i = 0; i < NumElts; ++i) {
14854 if (!isa<ConstantSDNode>(
N->getOperand(1))) {
14877 uint64_t Val =
N->getConstantOperandVal(1);
14878 if (Val == 2 * i) {
14882 if (Val - 1 == 2 * i) {
14909 if (usesOnlyOneValue) {
14912 Value.getValueType() != VT) {
14914 dbgs() <<
"LowerBUILD_VECTOR: use DUP for non-constant splats\n");
14922 if (
Value.getValueSizeInBits() == 64) {
14924 dbgs() <<
"LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
14936 assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||
14937 EltTy == MVT::f64) &&
"Unsupported floating-point vector type");
14939 dbgs() <<
"LowerBUILD_VECTOR: float constant splats, creating int "
14940 "BITCASTS, and try again\n");
14942 for (
unsigned i = 0; i < NumElts; ++i)
14946 LLVM_DEBUG(
dbgs() <<
"LowerBUILD_VECTOR: trying to lower new vector: ";
14948 Val = LowerBUILD_VECTOR(Val, DAG);
14958 bool PreferDUPAndInsert =
14960 NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
14961 NumDifferentLanes >= NumConstantLanes;
14967 if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
14971 APInt ConstantValueAPInt(1, 0);
14972 if (
auto *
C = dyn_cast<ConstantSDNode>(ConstantValue))
14973 ConstantValueAPInt =
C->getAPIntValue().zextOrTrunc(BitSize);
14975 !ConstantValueAPInt.isAllOnes()) {
14983 for (
unsigned i = 0; i < NumElts; ++i) {
14997 dbgs() <<
"LowerBUILD_VECTOR: all elements are constant, use default "
15009 if (NumElts >= 4) {
15017 if (PreferDUPAndInsert) {
15022 for (
unsigned I = 0;
I < NumElts; ++
I)
15033 if (DifferentValueMap.
size() == 2 && NumUndefLanes == 0) {
15045 bool canUseVECTOR_CONCAT =
true;
15046 for (
auto Pair : DifferentValueMap) {
15048 if (Pair.second != NumElts / 2)
15049 canUseVECTOR_CONCAT =
false;
15062 if (canUseVECTOR_CONCAT) {
15085 if (NumElts >= 8) {
15088 SDValue FirstLaneVal =
Op.getOperand(0);
15089 for (
unsigned i = 0; i < NumElts; ++i) {
15091 if (FirstLaneVal == Val)
15115 dbgs() <<
"LowerBUILD_VECTOR: alternatives failed, creating sequence "
15116 "of INSERT_VECTOR_ELT\n");
15133 LLVM_DEBUG(
dbgs() <<
"Creating node for op0, it is not undefined:\n");
15139 dbgs() <<
"Creating nodes for the other vector elements:\n";
15141 for (; i < NumElts; ++i) {
15152 dbgs() <<
"LowerBUILD_VECTOR: use default expansion, failed to find "
15153 "better alternative\n");
15161 return LowerFixedLengthConcatVectorsToSVE(
Op, DAG);
15163 assert(
Op.getValueType().isScalableVector() &&
15165 "Expected legal scalable vector type!");
15170 "Unexpected number of operands in CONCAT_VECTORS");
15172 if (NumOperands == 2)
15177 while (ConcatOps.size() > 1) {
15178 for (
unsigned I = 0, E = ConcatOps.size();
I != E;
I += 2) {
15186 ConcatOps.resize(ConcatOps.size() / 2);
15188 return ConcatOps[0];
15200 return LowerFixedLengthInsertVectorElt(
Op, DAG);
15202 EVT VT =
Op.getOperand(0).getValueType();
15216 ExtendedValue,
Op.getOperand(2));
15229AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(
SDValue Op,
15232 EVT VT =
Op.getOperand(0).getValueType();
15241 MVT ExtractTy = VectorVT == MVT::nxv2i64 ? MVT::i64 : MVT::i32;
15243 Extend,
Op.getOperand(1));
15248 return LowerFixedLengthExtractVectorElt(
Op, DAG);
15256 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
15257 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
15258 VT == MVT::v8f16 || VT == MVT::v8bf16)
15261 if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
15262 VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
15273 if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
15283 EVT VT =
Op.getValueType();
15285 "Only cases that extract a fixed length vector are supported!");
15286 EVT InVT =
Op.getOperand(0).getValueType();
15294 unsigned Idx =
Op.getConstantOperandVal(1);
15313 if (PackedVT != InVT) {
15336 assert(
Op.getValueType().isScalableVector() &&
15337 "Only expect to lower inserts into scalable vectors!");
15339 EVT InVT =
Op.getOperand(1).getValueType();
15340 unsigned Idx =
Op.getConstantOperandVal(2);
15345 EVT VT =
Op.getValueType();
15361 if (
Idx < (NumElts / 2))
15387 Vec0 = getSVESafeBitCast(NarrowVT, Vec0, DAG);
15388 Vec1 = getSVESafeBitCast(NarrowVT, Vec1, DAG);
15405 "Invalid subvector index!");
15411 return getSVESafeBitCast(VT, Narrow, DAG);
15419 std::optional<unsigned> PredPattern =
15441 !isa<ConstantSDNode>(
Op->getOperand(0)))
15444 SplatVal =
Op->getConstantOperandVal(0);
15445 if (
Op.getValueType().getVectorElementType() != MVT::i64)
15446 SplatVal = (int32_t)SplatVal;
15454 SplatVal = -SplatVal;
15462 EVT VT =
Op.getValueType();
15466 return LowerFixedLengthVectorIntDivideToSVE(
Op, DAG);
15486 if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
15487 return LowerToPredicatedOp(
Op, DAG, PredOpcode);
15492 if (VT == MVT::nxv16i8)
15493 WidenedVT = MVT::nxv8i16;
15494 else if (VT == MVT::nxv8i16)
15495 WidenedVT = MVT::nxv4i32;
15505 SDValue ResultLo = DAG.
getNode(
Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo);
15506 SDValue ResultHi = DAG.
getNode(
Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi);
15512bool AArch64TargetLowering::shouldExpandBuildVectorWithShuffles(
15513 EVT VT,
unsigned DefinedValues)
const {
15533 unsigned DummyUnsigned;
15541 isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
15542 isTRNMask(M, NumElts, DummyUnsigned) ||
15543 isUZPMask(M, NumElts, DummyUnsigned) ||
15544 isZIPMask(M, NumElts, DummyUnsigned) ||
15548 isINSMask(M, NumElts, DummyBool, DummyInt) ||
15564 Op =
Op.getOperand(0);
15566 APInt SplatBits, SplatUndef;
15567 unsigned SplatBitSize;
15569 if (!BVN || !BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
15570 HasAnyUndefs, ElementBits) ||
15571 SplatBitSize > ElementBits)
15582 assert(VT.
isVector() &&
"vector shift count is not a vector type");
15586 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
15593 assert(VT.
isVector() &&
"vector shift count is not a vector type");
15597 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
15602 EVT VT =
Op.getValueType();
15607 EVT OpVT =
Op.getOperand(0).getValueType();
15619 return LowerFixedLengthVectorTruncateToSVE(
Op, DAG);
15629 unsigned &ShiftValue,
15642 ShiftValue = ShiftOp1->getZExtValue();
15651 "ResVT must be truncated or same type as the shift.");
15654 if (ShiftValue > ExtraBits && !
Add->getFlags().hasNoUnsignedWrap())
15661 uint64_t AddValue = AddOp1->getZExtValue();
15662 if (AddValue != 1ULL << (ShiftValue - 1))
15665 RShOperand =
Add->getOperand(0);
15671 EVT VT =
Op.getValueType();
15675 if (!
Op.getOperand(1).getValueType().isVector())
15679 switch (
Op.getOpcode()) {
15685 if (
isVShiftLImm(
Op.getOperand(1), VT,
false, Cnt) && Cnt < EltSize)
15691 Op.getOperand(0),
Op.getOperand(1));
15695 (Subtarget->hasSVE2() ||
15696 (Subtarget->hasSME() && Subtarget->
isStreaming()))) {
15698 unsigned ShiftValue;
15709 return LowerToPredicatedOp(
Op, DAG, Opc);
15713 if (
isVShiftRImm(
Op.getOperand(1), VT,
false, Cnt) && Cnt < EltSize) {
15716 return DAG.
getNode(Opc,
DL, VT,
Op.getOperand(0),
15723 unsigned Opc = (
Op.getOpcode() ==
ISD::SRA) ? Intrinsic::aarch64_neon_sshl
15724 : Intrinsic::aarch64_neon_ushl;
15732 return NegShiftLeft;
15741 EVT SrcVT =
LHS.getValueType();
15743 "function only supposed to emit natural comparisons");
15747 unsigned SplatBitSize = 0;
15752 SplatBitSize, HasAnyUndefs);
15754 bool IsZero = IsCnst && SplatValue == 0;
15757 bool IsMinusOne = IsCnst && SplatValue.
isAllOnes();
15769 return DAG.
getNOT(dl, Fcmeq, VT);
15813 return DAG.
getNOT(dl, Cmeq, VT);
15852 if (
Op.getValueType().isScalableVector())
15857 return LowerFixedLengthVectorSetccToSVE(
Op, DAG);
15862 EVT CmpVT =
LHS.getValueType().changeVectorElementTypeToInteger();
15865 if (
LHS.getValueType().getVectorElementType().isInteger()) {
15876 bool OneNaN =
false;
15895 if ((!FullFP16 &&
LHS.getValueType().getVectorElementType() == MVT::f16) ||
15896 LHS.getValueType().getVectorElementType() == MVT::bf16) {
15897 if (
LHS.getValueType().getVectorNumElements() == 4) {
15902 CmpVT = MVT::v4i32;
15907 assert((!FullFP16 &&
LHS.getValueType().getVectorElementType() != MVT::f16) ||
15908 LHS.getValueType().getVectorElementType() != MVT::bf16 ||
15909 LHS.getValueType().getVectorElementType() != MVT::f128);
15920 if (!
Cmp.getNode())
15950 unsigned ScalarOpcode;
15968 "Expected power-of-2 length vector");
15976 if (ElemVT == MVT::i1) {
15978 if (NumElems > 16) {
15981 EVT HalfVT =
Lo.getValueType();
15992 unsigned ExtendedWidth = 64;
15995 ExtendedWidth = 128;
16000 unsigned ExtendOp =
16008 NumElems == 2 && ExtendedWidth == 128) {
16009 Extended = DAG.
getBitcast(MVT::v4i32, Extended);
16010 ExtendedVT = MVT::i32;
16012 switch (ScalarOpcode) {
16033 VecVT =
Lo.getValueType();
16049 for (
unsigned Shift = NumElems / 2; Shift > 0; Shift /= 2) {
16054 Scalar = DAG.
getNode(ScalarOpcode,
DL, ScalarVT, Scalar, Shifted);
16068 EVT SrcVT = Src.getValueType();
16081 return LowerPredReductionToSVE(
Op, DAG);
16083 switch (
Op.getOpcode()) {
16117 switch (
Op.getOpcode()) {
16122 Op.getValueType(), dl, DAG);
16142 if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
16147 MVT VT =
Op.getSimpleValueType();
16148 assert(VT != MVT::i128 &&
"Handled elsewhere, code replicated.");
16153 Op.getOperand(0),
Op.getOperand(1), RHS,
16158AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
SDValue Op,
16167 cast<ConstantSDNode>(
Op.getOperand(2))->getMaybeAlignValue();
16168 EVT VT =
Node->getValueType(0);
16171 "no-stack-arg-probe")) {
16179 SDValue Ops[2] = {SP, Chain};
16199 Chain, Callee, DAG.
getRegister(AArch64::X15, MVT::i64),
16219 SDValue Ops[2] = {SP, Chain};
16224AArch64TargetLowering::LowerInlineDYNAMIC_STACKALLOC(
SDValue Op,
16232 cast<ConstantSDNode>(
Op.getOperand(2))->getMaybeAlignValue();
16234 EVT VT =
Node->getValueType(0);
16246 SDValue Ops[2] = {SP, Chain};
16251AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(
SDValue Op,
16256 return LowerWindowsDYNAMIC_STACKALLOC(
Op, DAG);
16258 return LowerInlineDYNAMIC_STACKALLOC(
Op, DAG);
16264 unsigned NewOp)
const {
16265 if (Subtarget->hasSVE2())
16266 return LowerToPredicatedOp(
Op, DAG, NewOp);
16274 EVT VT =
Op.getValueType();
16275 assert(VT != MVT::i64 &&
"Expected illegal VSCALE node");
16278 APInt MulImm =
Op.getConstantOperandAPInt(0);
16284template <
unsigned NumVecs>
16294 for (
unsigned I = 0;
I < NumVecs; ++
I)
16303 Info.align.reset();
16314 unsigned Intrinsic)
const {
16315 auto &
DL =
I.getDataLayout();
16316 switch (Intrinsic) {
16317 case Intrinsic::aarch64_sve_st2:
16318 return setInfoSVEStN<2>(*
this,
DL,
Info,
I);
16319 case Intrinsic::aarch64_sve_st3:
16320 return setInfoSVEStN<3>(*
this,
DL,
Info,
I);
16321 case Intrinsic::aarch64_sve_st4:
16322 return setInfoSVEStN<4>(*
this,
DL,
Info,
I);
16323 case Intrinsic::aarch64_neon_ld2:
16324 case Intrinsic::aarch64_neon_ld3:
16325 case Intrinsic::aarch64_neon_ld4:
16326 case Intrinsic::aarch64_neon_ld1x2:
16327 case Intrinsic::aarch64_neon_ld1x3:
16328 case Intrinsic::aarch64_neon_ld1x4: {
16330 uint64_t NumElts =
DL.getTypeSizeInBits(
I.getType()) / 64;
16332 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
16334 Info.align.reset();
16339 case Intrinsic::aarch64_neon_ld2lane:
16340 case Intrinsic::aarch64_neon_ld3lane:
16341 case Intrinsic::aarch64_neon_ld4lane:
16342 case Intrinsic::aarch64_neon_ld2r:
16343 case Intrinsic::aarch64_neon_ld3r:
16344 case Intrinsic::aarch64_neon_ld4r: {
16348 auto *StructTy = cast<StructType>(
RetTy);
16349 unsigned NumElts = StructTy->getNumElements();
16350 Type *VecTy = StructTy->getElementType(0);
16353 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
16355 Info.align.reset();
16360 case Intrinsic::aarch64_neon_st2:
16361 case Intrinsic::aarch64_neon_st3:
16362 case Intrinsic::aarch64_neon_st4:
16363 case Intrinsic::aarch64_neon_st1x2:
16364 case Intrinsic::aarch64_neon_st1x3:
16365 case Intrinsic::aarch64_neon_st1x4: {
16367 unsigned NumElts = 0;
16368 for (
const Value *Arg :
I.args()) {
16369 Type *ArgTy = Arg->getType();
16372 NumElts +=
DL.getTypeSizeInBits(ArgTy) / 64;
16375 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
16377 Info.align.reset();
16382 case Intrinsic::aarch64_neon_st2lane:
16383 case Intrinsic::aarch64_neon_st3lane:
16384 case Intrinsic::aarch64_neon_st4lane: {
16386 unsigned NumElts = 0;
16388 Type *VecTy =
I.getArgOperand(0)->getType();
16391 for (
const Value *Arg :
I.args()) {
16392 Type *ArgTy = Arg->getType();
16399 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
16401 Info.align.reset();
16406 case Intrinsic::aarch64_ldaxr:
16407 case Intrinsic::aarch64_ldxr: {
16408 Type *ValTy =
I.getParamElementType(0);
16411 Info.ptrVal =
I.getArgOperand(0);
16413 Info.align =
DL.getABITypeAlign(ValTy);
16417 case Intrinsic::aarch64_stlxr:
16418 case Intrinsic::aarch64_stxr: {
16419 Type *ValTy =
I.getParamElementType(1);
16422 Info.ptrVal =
I.getArgOperand(1);
16424 Info.align =
DL.getABITypeAlign(ValTy);
16428 case Intrinsic::aarch64_ldaxp:
16429 case Intrinsic::aarch64_ldxp:
16431 Info.memVT = MVT::i128;
16432 Info.ptrVal =
I.getArgOperand(0);
16437 case Intrinsic::aarch64_stlxp:
16438 case Intrinsic::aarch64_stxp:
16440 Info.memVT = MVT::i128;
16441 Info.ptrVal =
I.getArgOperand(2);
16446 case Intrinsic::aarch64_sve_ldnt1: {
16447 Type *ElTy = cast<VectorType>(
I.getType())->getElementType();
16450 Info.ptrVal =
I.getArgOperand(1);
16452 Info.align =
DL.getABITypeAlign(ElTy);
16456 case Intrinsic::aarch64_sve_stnt1: {
16458 cast<VectorType>(
I.getArgOperand(0)->getType())->getElementType();
16461 Info.ptrVal =
I.getArgOperand(2);
16463 Info.align =
DL.getABITypeAlign(ElTy);
16467 case Intrinsic::aarch64_mops_memset_tag: {
16468 Value *Dst =
I.getArgOperand(0);
16469 Value *Val =
I.getArgOperand(1);
16474 Info.align =
I.getParamAlign(0).valueOrOne();
16500 MemSDNode *Mem = dyn_cast<MemSDNode>(Load);
16505 Base.getOperand(1).hasOneUse() &&
16512 uint64_t ShiftAmount =
Base.getOperand(1).getConstantOperandVal(1);
16514 if (ShiftAmount ==
Log2_32(LoadBytes))
16524 if ((VT == MVT::i64 || VT == MVT::i32) && Extend->
use_size()) {
16543 return NumBits1 > NumBits2;
16550 return NumBits1 > NumBits2;
16557 if (
I->getOpcode() != Instruction::FMul)
16560 if (!
I->hasOneUse())
16565 if (!(
User->getOpcode() == Instruction::FSub ||
16566 User->getOpcode() == Instruction::FAdd))
16587 return NumBits1 == 32 && NumBits2 == 64;
16594 return NumBits1 == 32 && NumBits2 == 64;
16612bool AArch64TargetLowering::isExtFreeImpl(
const Instruction *Ext)
const {
16613 if (isa<FPExtInst>(Ext))
16617 if (Ext->getType()->isVectorTy())
16620 for (
const Use &U : Ext->uses()) {
16625 const Instruction *Instr = cast<Instruction>(U.getUser());
16628 switch (Instr->getOpcode()) {
16629 case Instruction::Shl:
16630 if (!isa<ConstantInt>(Instr->getOperand(1)))
16633 case Instruction::GetElementPtr: {
16635 auto &
DL = Ext->getDataLayout();
16636 std::advance(GTI, U.getOperandNo()-1);
16649 if (ShiftAmt == 0 || ShiftAmt > 4)
16653 case Instruction::Trunc:
16656 if (Instr->getType() == Ext->getOperand(0)->getType())
16670 unsigned NumElts,
bool IsLittleEndian,
16672 if (DstWidth % 8 != 0 || DstWidth <= 16 || DstWidth > 64)
16675 assert(DstWidth % SrcWidth == 0 &&
16676 "TBL lowering is not supported for a conversion instruction with this "
16677 "source and destination element type.");
16679 unsigned Factor = DstWidth / SrcWidth;
16680 unsigned MaskLen = NumElts * Factor;
16683 Mask.resize(MaskLen, NumElts);
16685 unsigned SrcIndex = 0;
16686 for (
unsigned I = IsLittleEndian ? 0 : Factor - 1;
I < MaskLen;
I += Factor)
16687 Mask[
I] = SrcIndex++;
16695 bool IsLittleEndian) {
16696 auto *SrcTy = cast<FixedVectorType>(
Op->getType());
16697 unsigned NumElts = SrcTy->getNumElements();
16698 auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
16699 auto DstWidth = cast<IntegerType>(DstTy->
getElementType())->getBitWidth();
16709 if (DstTy != ZExtTy)
16710 Result = Builder.
CreateZExt(Result, ZExtTy);
16716 bool IsLittleEndian) {
16717 auto *SrcTy = cast<FixedVectorType>(
Op->getType());
16718 auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
16719 auto DstWidth = cast<IntegerType>(DstTy->
getElementType())->getBitWidth();
16723 !IsLittleEndian, Mask))
16735 int NumElements = cast<FixedVectorType>(TI->
getType())->getNumElements();
16737 auto *DstTy = cast<FixedVectorType>(TI->
getType());
16738 assert(SrcTy->getElementType()->isIntegerTy() &&
16739 "Non-integer type source vector element is not supported");
16740 assert(DstTy->getElementType()->isIntegerTy(8) &&
16741 "Unsupported destination vector element type");
16742 unsigned SrcElemTySz =
16743 cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
16744 unsigned DstElemTySz =
16745 cast<IntegerType>(DstTy->getElementType())->getBitWidth();
16746 assert((SrcElemTySz % DstElemTySz == 0) &&
16747 "Cannot lower truncate to tbl instructions for a source element size "
16748 "that is not divisible by the destination element size");
16749 unsigned TruncFactor = SrcElemTySz / DstElemTySz;
16750 assert((SrcElemTySz == 16 || SrcElemTySz == 32 || SrcElemTySz == 64) &&
16751 "Unsupported source vector element type size");
16759 for (
int Itr = 0; Itr < 16; Itr++) {
16760 if (Itr < NumElements)
16762 IsLittleEndian ? Itr * TruncFactor
16763 : Itr * TruncFactor + (TruncFactor - 1)));
16768 int MaxTblSz = 128 * 4;
16769 int MaxSrcSz = SrcElemTySz * NumElements;
16771 (MaxTblSz > MaxSrcSz) ? NumElements : (MaxTblSz / SrcElemTySz);
16772 assert(ElemsPerTbl <= 16 &&
16773 "Maximum elements selected using TBL instruction cannot exceed 16!");
16775 int ShuffleCount = 128 / SrcElemTySz;
16777 for (
int i = 0; i < ShuffleCount; ++i)
16784 while (ShuffleLanes.
back() < NumElements) {
16788 if (Parts.
size() == 4) {
16791 Builder.
CreateIntrinsic(Intrinsic::aarch64_neon_tbl4, VecTy, Parts));
16795 for (
int i = 0; i < ShuffleCount; ++i)
16796 ShuffleLanes[i] += ShuffleCount;
16800 "Lowering trunc for vectors requiring different TBL instructions is "
16804 if (!Parts.
empty()) {
16806 switch (Parts.
size()) {
16808 TblID = Intrinsic::aarch64_neon_tbl1;
16811 TblID = Intrinsic::aarch64_neon_tbl2;
16814 TblID = Intrinsic::aarch64_neon_tbl3;
16825 "more than 2 tbl instructions!");
16828 if (ElemsPerTbl < 16) {
16830 std::iota(FinalMask.
begin(), FinalMask.
end(), 0);
16835 if (ElemsPerTbl < 16) {
16836 std::iota(FinalMask.
begin(), FinalMask.
begin() + ElemsPerTbl, 0);
16837 std::iota(FinalMask.
begin() + ElemsPerTbl, FinalMask.
end(), 16);
16839 std::iota(FinalMask.
begin(), FinalMask.
end(), 0);
16861 if (!L || L->getHeader() !=
I->getParent() ||
F->hasMinSize() ||
16865 auto *SrcTy = dyn_cast<FixedVectorType>(
I->getOperand(0)->getType());
16866 auto *DstTy = dyn_cast<FixedVectorType>(
I->getType());
16867 if (!SrcTy || !DstTy)
16873 auto *ZExt = dyn_cast<ZExtInst>(
I);
16874 if (ZExt && SrcTy->getElementType()->isIntegerTy(8)) {
16875 auto DstWidth = DstTy->getElementType()->getScalarSizeInBits();
16876 if (DstWidth % 8 != 0)
16879 auto *TruncDstType =
16883 auto SrcWidth = SrcTy->getElementType()->getScalarSizeInBits();
16887 if (SrcWidth * 2 >= TruncDstType->getElementType()->getScalarSizeInBits())
16890 DstTy = TruncDstType;
16896 if (SrcWidth * 4 <= DstWidth &&
I->hasOneUser()) {
16897 auto *SingleUser = cast<Instruction>(*
I->user_begin());
16902 if (DstTy->getScalarSizeInBits() >= 64)
16907 Builder, ZExt->getOperand(0), cast<FixedVectorType>(ZExt->getType()),
16911 ZExt->replaceAllUsesWith(Result);
16912 ZExt->eraseFromParent();
16916 auto *UIToFP = dyn_cast<UIToFPInst>(
I);
16917 if (UIToFP && ((SrcTy->getElementType()->isIntegerTy(8) &&
16918 DstTy->getElementType()->isFloatTy()) ||
16919 (SrcTy->getElementType()->isIntegerTy(16) &&
16920 DstTy->getElementType()->isDoubleTy()))) {
16925 assert(ZExt &&
"Cannot fail for the i8 to float conversion");
16927 I->replaceAllUsesWith(UI);
16928 I->eraseFromParent();
16932 auto *SIToFP = dyn_cast<SIToFPInst>(
I);
16933 if (SIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
16934 DstTy->getElementType()->isFloatTy()) {
16939 assert(Shuffle &&
"Cannot fail for the i8 to float conversion");
16941 auto *AShr = Builder.
CreateAShr(Cast, 24,
"",
true);
16943 I->replaceAllUsesWith(SI);
16944 I->eraseFromParent();
16950 auto *FPToUI = dyn_cast<FPToUIInst>(
I);
16952 (SrcTy->getNumElements() == 8 || SrcTy->getNumElements() == 16) &&
16953 SrcTy->getElementType()->isFloatTy() &&
16954 DstTy->getElementType()->isIntegerTy(8)) {
16956 auto *WideConv = Builder.
CreateFPToUI(FPToUI->getOperand(0),
16958 auto *TruncI = Builder.
CreateTrunc(WideConv, DstTy);
16959 I->replaceAllUsesWith(TruncI);
16960 I->eraseFromParent();
16969 auto *TI = dyn_cast<TruncInst>(
I);
16970 if (TI && DstTy->getElementType()->isIntegerTy(8) &&
16971 ((SrcTy->getElementType()->isIntegerTy(32) ||
16972 SrcTy->getElementType()->isIntegerTy(64)) &&
16973 (SrcTy->getNumElements() == 16 || SrcTy->getNumElements() == 8))) {
16982 Align &RequiredAligment)
const {
16987 RequiredAligment =
Align(1);
16989 return NumBits == 32 || NumBits == 64;
16996 unsigned VecSize = 128;
16999 if (UseScalable && isa<FixedVectorType>(VecTy))
17001 return std::max<unsigned>(1, (MinElts * ElSize + 127) / VecSize);
17006 if (Subtarget->
getProcFamily() == AArch64Subtarget::Falkor &&
17016 unsigned MinElts = EC.getKnownMinValue();
17018 UseScalable =
false;
17025 if (isa<ScalableVectorType>(VecTy) &&
17034 if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
17037 if (EC.isScalable()) {
17038 UseScalable =
true;
17039 return isPowerOf2_32(MinElts) && (MinElts * ElSize) % 128 == 0;
17042 unsigned VecSize =
DL.getTypeSizeInBits(VecTy);
17044 unsigned MinSVEVectorSize =
17046 if (VecSize % MinSVEVectorSize == 0 ||
17049 UseScalable =
true;
17056 return Subtarget->
isNeonAvailable() && (VecSize == 64 || VecSize % 128 == 0);
17088 bool Scalable,
Type *LDVTy,
17090 assert(Factor >= 2 && Factor <= 4 &&
"Invalid interleave factor");
17091 static const Intrinsic::ID SVELoads[3] = {Intrinsic::aarch64_sve_ld2_sret,
17092 Intrinsic::aarch64_sve_ld3_sret,
17093 Intrinsic::aarch64_sve_ld4_sret};
17094 static const Intrinsic::ID NEONLoads[3] = {Intrinsic::aarch64_neon_ld2,
17095 Intrinsic::aarch64_neon_ld3,
17096 Intrinsic::aarch64_neon_ld4};
17105 bool Scalable,
Type *STVTy,
17107 assert(Factor >= 2 && Factor <= 4 &&
"Invalid interleave factor");
17108 static const Intrinsic::ID SVEStores[3] = {Intrinsic::aarch64_sve_st2,
17109 Intrinsic::aarch64_sve_st3,
17110 Intrinsic::aarch64_sve_st4};
17111 static const Intrinsic::ID NEONStores[3] = {Intrinsic::aarch64_neon_st2,
17112 Intrinsic::aarch64_neon_st3,
17113 Intrinsic::aarch64_neon_st4};
17136 "Invalid interleave factor");
17137 assert(!Shuffles.
empty() &&
"Empty shufflevector input");
17139 "Unmatched number of shufflevectors and indices");
17157 SI->getType()->getScalarSizeInBits() * 4 ==
17158 SI->user_back()->getType()->getScalarSizeInBits();
17164 auto *FVTy = cast<FixedVectorType>(VTy);
17168 Type *EltTy = FVTy->getElementType();
17176 FVTy->getNumElements() / NumLoads);
17188 LDVTy->getElementCount());
17191 UseScalable, LDVTy, PtrTy);
17198 Value *PTrue =
nullptr;
17200 std::optional<unsigned> PgPattern =
17205 PgPattern = AArch64SVEPredPattern::all;
17209 PTrue = Builder.
CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
17213 for (
unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
17219 FVTy->getNumElements() * Factor);
17223 LdN = Builder.
CreateCall(LdNFunc, {PTrue, BaseAddr},
"ldN");
17225 LdN = Builder.
CreateCall(LdNFunc, BaseAddr,
"ldN");
17228 for (
unsigned i = 0; i < Shuffles.
size(); i++) {
17230 unsigned Index = Indices[i];
17243 FVTy->getNumElements()));
17245 SubVecs[SVI].push_back(SubVec);
17254 auto &SubVec = SubVecs[SVI];
17257 SVI->replaceAllUsesWith(WideVec);
17263template <
typename Iter>
17265 int MaxLookupDist = 20;
17266 unsigned IdxWidth =
DL.getIndexSizeInBits(0);
17267 APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
17268 const Value *PtrA1 =
17269 Ptr->stripAndAccumulateInBoundsConstantOffsets(
DL, OffsetA);
17271 while (++It !=
End) {
17272 if (It->isDebugOrPseudoInst())
17274 if (MaxLookupDist-- == 0)
17276 if (
const auto *SI = dyn_cast<StoreInst>(&*It)) {
17277 const Value *PtrB1 =
17278 SI->getPointerOperand()->stripAndAccumulateInBoundsConstantOffsets(
17280 if (PtrA1 == PtrB1 &&
17281 (OffsetA.sextOrTrunc(IdxWidth) - OffsetB.
sextOrTrunc(IdxWidth))
17318 unsigned Factor)
const {
17321 "Invalid interleave factor");
17323 auto *VecTy = cast<FixedVectorType>(SVI->
getType());
17324 assert(VecTy->getNumElements() % Factor == 0 &&
"Invalid interleaved store");
17326 unsigned LaneLen = VecTy->getNumElements() / Factor;
17327 Type *EltTy = VecTy->getElementType();
17348 Type *IntTy =
DL.getIntPtrType(EltTy);
17349 unsigned NumOpElts =
17350 cast<FixedVectorType>(Op0->
getType())->getNumElements();
17362 LaneLen /= NumStores;
17369 Value *BaseAddr = SI->getPointerOperand();
17383 if (Factor == 2 && SubVecTy->getPrimitiveSizeInBits() == 64 &&
17391 Type *PtrTy = SI->getPointerOperandType();
17393 STVTy->getElementCount());
17396 UseScalable, STVTy, PtrTy);
17398 Value *PTrue =
nullptr;
17400 std::optional<unsigned> PgPattern =
17405 DL.getTypeSizeInBits(SubVecTy))
17406 PgPattern = AArch64SVEPredPattern::all;
17410 PTrue = Builder.
CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
17414 for (
unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
17419 for (
unsigned i = 0; i < Factor; i++) {
17421 unsigned IdxI = StoreCount * LaneLen * Factor + i;
17422 if (Mask[IdxI] >= 0) {
17426 unsigned StartMask = 0;
17427 for (
unsigned j = 1; j < LaneLen; j++) {
17428 unsigned IdxJ = StoreCount * LaneLen * Factor + j * Factor + i;
17429 if (Mask[IdxJ] >= 0) {
17430 StartMask = Mask[IdxJ] - j;
17456 if (StoreCount > 0)
17458 BaseAddr, LaneLen * Factor);
17471 auto *Extr1 = dyn_cast<ExtractValueInst>(*(DI->
user_begin()));
17472 auto *Extr2 = dyn_cast<ExtractValueInst>(*(++DI->
user_begin()));
17473 if (!Extr1 || !Extr2)
17476 DeinterleavedValues.
resize(2);
17478 DeinterleavedValues[0x1 & (Extr1->getIndices()[0])] = Extr1;
17479 DeinterleavedValues[0x1 & (Extr2->getIndices()[0])] = Extr2;
17480 if (!DeinterleavedValues[0] || !DeinterleavedValues[1])
17484 if (!
match(DeinterleavedValues[0], m_ExtractValue<0>((
m_Specific(DI)))) ||
17485 !
match(DeinterleavedValues[1], m_ExtractValue<1>((
m_Specific(DI))))) {
17490 DeInterleaveDeadInsts.
insert(DeInterleaveDeadInsts.
end(),
17491 DeinterleavedValues.
begin(),
17492 DeinterleavedValues.
end());
17518 auto *Extr1 = dyn_cast<ExtractValueInst>(*(DI->
user_begin()));
17519 auto *Extr2 = dyn_cast<ExtractValueInst>(*(++DI->
user_begin()));
17520 if (!Extr1 || !Extr2)
17523 if (!Extr1->hasOneUse() || !Extr2->hasOneUse())
17525 auto *DI1 = *(Extr1->user_begin());
17526 auto *DI2 = *(Extr2->user_begin());
17528 if (!DI1->hasNUses(2) || !DI2->hasNUses(2))
17531 auto *
A = dyn_cast<ExtractValueInst>(*(DI1->user_begin()));
17532 auto *
C = dyn_cast<ExtractValueInst>(*(++DI1->user_begin()));
17533 auto *
B = dyn_cast<ExtractValueInst>(*(DI2->user_begin()));
17534 auto *
D = dyn_cast<ExtractValueInst>(*(++DI2->user_begin()));
17537 if (!
A || !
B || !
C || !
D)
17540 DeinterleavedValues.
resize(4);
17542 DeinterleavedValues[0x3 &
17543 ((
A->getIndices()[0] * 2) + Extr1->getIndices()[0])] =
A;
17544 DeinterleavedValues[0x3 &
17545 ((
B->getIndices()[0] * 2) + Extr2->getIndices()[0])] =
B;
17546 DeinterleavedValues[0x3 &
17547 ((
C->getIndices()[0] * 2) + Extr1->getIndices()[0])] =
C;
17548 DeinterleavedValues[0x3 &
17549 ((
D->getIndices()[0] * 2) + Extr2->getIndices()[0])] =
D;
17550 if (!DeinterleavedValues[0] || !DeinterleavedValues[1] ||
17551 !DeinterleavedValues[2] || !DeinterleavedValues[3])
17569 DeInterleaveDeadInsts.
insert(DeInterleaveDeadInsts.
end(),
17570 DeinterleavedValues.
begin(),
17571 DeinterleavedValues.
end());
17572 DeInterleaveDeadInsts.
push_back(cast<Instruction>(DI1));
17573 DeInterleaveDeadInsts.
push_back(cast<Instruction>(Extr1));
17574 DeInterleaveDeadInsts.
push_back(cast<Instruction>(DI2));
17575 DeInterleaveDeadInsts.
push_back(cast<Instruction>(Extr2));
17602 unsigned Factor = DeinterleavedValues.
size();
17603 assert((Factor == 2 || Factor == 4) &&
17604 "Currently supported Factor is 2 or 4 only");
17624 UseScalable, LdTy, PtrTy);
17627 Value *Pred =
nullptr;
17633 if (NumLoads > 1) {
17636 for (
unsigned I = 0;
I < NumLoads; ++
I) {
17640 Value *LdN =
nullptr;
17647 for (
unsigned J = 0; J < Factor; ++J) {
17654 for (
unsigned J = 0; J < Factor; ++J)
17659 Result = Builder.
CreateCall(LdNFunc, {Pred, BaseAddr},
"ldN");
17661 Result = Builder.
CreateCall(LdNFunc, BaseAddr,
"ldN");
17663 for (
unsigned I = 0;
I < Factor;
I++) {
17665 DeinterleavedValues[
I]->replaceAllUsesWith(NewExtract);
17668 DeadInsts.
insert(DeadInsts.
end(), DeInterleaveDeadInsts.
begin(),
17669 DeInterleaveDeadInsts.
end());
17700 cast<Instruction>(cast<Instruction>(
II)->getOperand(0)));
17702 cast<Instruction>(cast<Instruction>(
II)->getOperand(1)));
17720 if (
II->getIntrinsicID() != Intrinsic::vector_interleave2)
17729 unsigned Factor = InterleavedValues.
size();
17730 assert((Factor == 2 || Factor == 4) &&
17731 "Currently supported Factor is 2 or 4 only");
17750 Type *PtrTy = SI->getPointerOperandType();
17752 UseScalable, StTy, PtrTy);
17756 Value *BaseAddr = SI->getPointerOperand();
17757 Value *Pred =
nullptr;
17763 auto ExtractedValues = InterleavedValues;
17767 for (
unsigned I = 0;
I < NumStores; ++
I) {
17769 if (NumStores > 1) {
17774 for (
unsigned J = 0; J < Factor; J++) {
17775 InterleavedValues[J] =
17779 InterleavedValues[InterleavedValues.
size() - 1] =
Address;
17781 Builder.
CreateCall(StNFunc, InterleavedValues);
17784 InterleaveDeadInsts.
end());
17790 bool CanImplicitFloat = !FuncAttributes.
hasFnAttr(Attribute::NoImplicitFloat);
17791 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
17792 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
17796 bool IsSmallMemset =
Op.isMemset() &&
Op.size() < 32;
17797 auto AlignmentIsAcceptable = [&](
EVT VT,
Align AlignCheck) {
17798 if (
Op.isAligned(AlignCheck))
17806 if (CanUseNEON &&
Op.isMemset() && !IsSmallMemset &&
17807 AlignmentIsAcceptable(MVT::v16i8,
Align(16)))
17809 if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128,
Align(16)))
17811 if (
Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64,
Align(8)))
17813 if (
Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32,
Align(4)))
17820 bool CanImplicitFloat = !FuncAttributes.
hasFnAttr(Attribute::NoImplicitFloat);
17821 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
17822 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
17826 bool IsSmallMemset =
Op.isMemset() &&
Op.size() < 32;
17827 auto AlignmentIsAcceptable = [&](
EVT VT,
Align AlignCheck) {
17828 if (
Op.isAligned(AlignCheck))
17836 if (CanUseNEON &&
Op.isMemset() && !IsSmallMemset &&
17837 AlignmentIsAcceptable(MVT::v2i64,
Align(16)))
17839 if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128,
Align(16)))
17841 if (
Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64,
Align(8)))
17843 if (
Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32,
Align(4)))
17850 if (Immed == std::numeric_limits<int64_t>::min()) {
17852 <<
": avoid UB for INT64_MIN\n");
17856 Immed = std::abs(Immed);
17857 bool IsLegal = ((Immed >> 12) == 0 ||
17858 ((Immed & 0xfff) == 0 && Immed >> 24 == 0));
17860 <<
" legal add imm: " << (IsLegal ?
"yes" :
"no") <<
"\n");
17866 if (!Subtarget->hasSVE2())
17874 return isInt<6>(Imm / 16);
17885 return std::abs(Imm / 8) <= 16;
17888 return std::abs(Imm / 4) <= 16;
17891 return std::abs(Imm / 2) <= 16;
17918 if (
Insn.size() > 1)
17955 if (AM.
Scale == 1) {
17958 }
else if (AM.
Scale == 2) {
17971 if (isa<ScalableVectorType>(Ty)) {
17976 uint64_t VecNumBytes =
DL.getTypeSizeInBits(Ty).getKnownMinValue() / 8;
17983 DL.getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()) / 8;
17999 uint64_t NumBits =
DL.getTypeSizeInBits(Ty);
18000 NumBytes = NumBits / 8;
18013 int64_t MaxOffset)
const {
18014 int64_t HighPart = MinOffset & ~0xfffULL;
18037 return Subtarget->hasFullFP16();
18070 static const MCPhysReg ScratchRegs[] = {
18071 AArch64::X16, AArch64::X17, AArch64::LR, 0
18073 return ScratchRegs;
18077 static const MCPhysReg RCRegs[] = {AArch64::FPCR};
18086 "Expected shift op");
18088 SDValue ShiftLHS =
N->getOperand(0);
18089 EVT VT =
N->getValueType(0);
18102 isa<ConstantSDNode>(ShiftLHS.
getOperand(1))) {
18107 if (
auto *SRLC = dyn_cast<ConstantSDNode>(AndLHS.
getOperand(1))) {
18109 if (
auto *SHLC = dyn_cast<ConstantSDNode>(
N->getOperand(1)))
18110 return SRLC->getZExtValue() == SHLC->getZExtValue();
18122 (
N->getOperand(0).getOpcode() ==
ISD::SHL ||
18123 N->getOperand(0).getOpcode() ==
ISD::SRL) &&
18124 "Expected XOR(SHIFT) pattern");
18127 auto *XorC = dyn_cast<ConstantSDNode>(
N->getOperand(1));
18128 auto *ShiftC = dyn_cast<ConstantSDNode>(
N->getOperand(0).getOperand(1));
18129 if (XorC && ShiftC) {
18130 unsigned MaskIdx, MaskLen;
18131 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
18132 unsigned ShiftAmt = ShiftC->getZExtValue();
18133 unsigned BitWidth =
N->getValueType(0).getScalarSizeInBits();
18134 if (
N->getOperand(0).getOpcode() ==
ISD::SHL)
18135 return MaskIdx == ShiftAmt && MaskLen == (
BitWidth - ShiftAmt);
18136 return MaskIdx == 0 && MaskLen == (
BitWidth - ShiftAmt);
18146 N->getOperand(0).getOpcode() ==
ISD::SRL) ||
18148 N->getOperand(0).getOpcode() ==
ISD::SHL)) &&
18149 "Expected shift-shift mask");
18151 if (!
N->getOperand(0)->hasOneUse())
18155 EVT VT =
N->getValueType(0);
18156 if (
N->getOpcode() ==
ISD::SRL && (VT == MVT::i32 || VT == MVT::i64)) {
18157 auto *C1 = dyn_cast<ConstantSDNode>(
N->getOperand(0).getOperand(1));
18158 auto *C2 = dyn_cast<ConstantSDNode>(
N->getOperand(1));
18159 return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue());
18164 if (
N->getOpcode() ==
ISD::SHL &&
N->hasOneUse()) {
18165 if (
auto C2 = dyn_cast<ConstantSDNode>(
N->getOperand(1))) {
18166 unsigned ShlAmt = C2->getZExtValue();
18167 if (
auto ShouldADD = *
N->user_begin();
18168 ShouldADD->getOpcode() ==
ISD::ADD && ShouldADD->hasOneUse()) {
18169 if (
auto ShouldLOAD = dyn_cast<LoadSDNode>(*ShouldADD->user_begin())) {
18170 unsigned ByteVT = ShouldLOAD->getMemoryVT().getSizeInBits() / 8;
18171 if ((1ULL << ShlAmt) == ByteVT &&
18183 unsigned BinOpcode,
EVT VT)
const {
18195 int64_t Val = Imm.getSExtValue();
18199 if ((int64_t)Val < 0)
18202 Val &= (1LL << 32) - 1;
18210 unsigned Index)
const {
18223 EVT VT =
N->getValueType(0);
18224 if (!Subtarget->hasNEON() || !VT.
isVector())
18236 auto *ShiftAmt = dyn_cast<ConstantSDNode>(Shift.
getOperand(1));
18238 if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.
getSizeInBits() - 1)
18262 if (
N->getValueType(0) != MVT::i32)
18265 SDValue VecReduceOp0 =
N->getOperand(0);
18266 unsigned Opcode = VecReduceOp0.
getOpcode();
18273 if (ABS->getOperand(0)->getOpcode() !=
ISD::SUB ||
18274 ABS->getOperand(0)->getValueType(0) != MVT::v16i32)
18277 SDValue SUB = ABS->getOperand(0);
18278 unsigned Opcode0 = SUB->getOperand(0).getOpcode();
18279 unsigned Opcode1 = SUB->getOperand(1).getOpcode();
18281 if (SUB->getOperand(0)->getValueType(0) != MVT::v16i32 ||
18282 SUB->getOperand(1)->getValueType(0) != MVT::v16i32)
18286 bool IsZExt =
false;
18294 SDValue EXT0 = SUB->getOperand(0);
18295 SDValue EXT1 = SUB->getOperand(1);
18312 UABDHigh8Op0, UABDHigh8Op1);
18323 UABDLo8Op0, UABDLo8Op1);
18342 if (!ST->isNeonAvailable())
18345 if (!ST->hasDotProd())
18356 unsigned DotOpcode;
18360 if (
A.getOperand(0).getValueType() !=
B.getOperand(0).getValueType())
18362 auto OpCodeA =
A.getOpcode();
18366 auto OpCodeB =
B.getOpcode();
18370 if (OpCodeA == OpCodeB) {
18375 if (!ST->hasMatMulInt8())
18389 EVT Op0VT =
A.getOperand(0).getValueType();
18392 if (!IsValidElementCount || !IsValidSize)
18401 B =
B.getOperand(0);
18404 unsigned NumOfVecReduce;
18406 if (IsMultipleOf16) {
18408 TargetType = MVT::v4i32;
18411 TargetType = MVT::v2i32;
18414 if (NumOfVecReduce == 1) {
18417 A.getOperand(0),
B);
18424 for (;
I < VecReduce16Num;
I += 1) {
18443 if (VecReduce8Num == 0)
18444 return VecReduceAdd16;
18467 auto DetectAddExtract = [&](
SDValue A) {
18471 EVT VT =
A.getValueType();
18499 if (
SDValue R = DetectAddExtract(
A))
18502 if (
A.getOperand(0).getOpcode() ==
ISD::ADD &&
A.getOperand(0).hasOneUse())
18506 if (
A.getOperand(1).getOpcode() ==
ISD::ADD &&
A.getOperand(1).hasOneUse())
18519 EVT VT =
A.getValueType();
18520 if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64)
18531 if (ExtVT0 != ExtVT1 ||
18574AArch64TargetLowering::BuildSDIVPow2(
SDNode *
N,
const APInt &Divisor,
18581 EVT VT =
N->getValueType(0);
18590 if ((VT != MVT::i32 && VT != MVT::i64) ||
18596 if (Divisor == 2 ||
18604AArch64TargetLowering::BuildSREMPow2(
SDNode *
N,
const APInt &Divisor,
18611 EVT VT =
N->getValueType(0);
18619 if ((VT != MVT::i32 && VT != MVT::i64) ||
18661 case Intrinsic::aarch64_sve_cntb:
18663 case Intrinsic::aarch64_sve_cnth:
18665 case Intrinsic::aarch64_sve_cntw:
18667 case Intrinsic::aarch64_sve_cntd:
18696 return TypeNode->
getVT();
18706 if (Mask == UCHAR_MAX)
18708 else if (Mask == USHRT_MAX)
18710 else if (Mask == UINT_MAX)
18732 unsigned ExtendOpcode = Extend.
getOpcode();
18748 if (PreExtendType == MVT::Other ||
18753 bool SeenZExtOrSExt = !IsAnyExt;
18761 unsigned Opc =
Op.getOpcode();
18768 if (SeenZExtOrSExt && OpcIsSExt != IsSExt)
18771 IsSExt = OpcIsSExt;
18772 SeenZExtOrSExt =
true;
18779 EVT PreExtendLegalType =
18785 PreExtendLegalType));
18793 cast<ShuffleVectorSDNode>(BV)->getMask());
18795 unsigned ExtOpc = !SeenZExtOrSExt
18798 return DAG.
getNode(ExtOpc,
DL, VT, NBV);
18805 EVT VT =
Mul->getValueType(0);
18806 if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64)
18817 return DAG.
getNode(
Mul->getOpcode(),
DL, VT, Op0 ? Op0 :
Mul->getOperand(0),
18818 Op1 ? Op1 :
Mul->getOperand(1));
18824 EVT VT =
N->getValueType(0);
18825 if (VT != MVT::v2i64 && VT != MVT::v1i64 && VT != MVT::v2i32 &&
18826 VT != MVT::v4i32 && VT != MVT::v4i16 && VT != MVT::v8i16)
18828 if (
N->getOperand(0).getOpcode() !=
ISD::AND ||
18829 N->getOperand(0).getOperand(0).getOpcode() !=
ISD::SRL)
18842 if (!V1.
isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
18843 V3 != (HalfSize - 1))
18861 EVT VT =
N->getValueType(0);
18867 N->getOperand(0).getOperand(0).getValueType() !=
18868 N->getOperand(1).getOperand(0).getValueType())
18872 N->getOperand(0).getOpcode() !=
N->getOperand(1).getOpcode())
18875 SDValue N0 =
N->getOperand(0).getOperand(0);
18876 SDValue N1 =
N->getOperand(1).getOperand(0);
18881 if ((S2 == MVT::i32 &&
S1 == MVT::i8) ||
18882 (S2 == MVT::i64 && (
S1 == MVT::i8 ||
S1 == MVT::i16))) {
18915 EVT VT =
N->getValueType(0);
18919 unsigned AddSubOpc;
18921 auto IsAddSubWith1 = [&](
SDValue V) ->
bool {
18922 AddSubOpc = V->getOpcode();
18924 SDValue Opnd = V->getOperand(1);
18925 MulOper = V->getOperand(0);
18928 if (
auto C = dyn_cast<ConstantSDNode>(Opnd))
18934 if (IsAddSubWith1(N0)) {
18936 return DAG.
getNode(AddSubOpc,
DL, VT, N1, MulVal);
18939 if (IsAddSubWith1(N1)) {
18941 return DAG.
getNode(AddSubOpc,
DL, VT, N0, MulVal);
18945 if (!isa<ConstantSDNode>(N1))
18949 const APInt &ConstValue =
C->getAPIntValue();
18956 if (ConstValue.
sge(1) && ConstValue.
sle(16))
18971 unsigned TrailingZeroes = ConstValue.
countr_zero();
18972 if (TrailingZeroes) {
18980 if (
N->hasOneUse() && (
N->user_begin()->getOpcode() ==
ISD::ADD ||
18981 N->user_begin()->getOpcode() ==
ISD::SUB))
18986 APInt ShiftedConstValue = ConstValue.
ashr(TrailingZeroes);
18989 auto Shl = [&](
SDValue N0,
unsigned N1) {
19020 for (
unsigned i = 1; i <
BitWidth / 2; i++) {
19040 unsigned TrailingZeroes = CVMinus1.
countr_zero();
19041 APInt SCVMinus1 = CVMinus1.
ashr(TrailingZeroes) - 1;
19057 unsigned TrailingZeroes = CVMinus1.
countr_zero();
19058 APInt CVPlus1 = CVMinus1.
ashr(TrailingZeroes) + 1;
19078 APInt SCVMinus1 = ShiftedConstValue - 1;
19079 APInt SCVPlus1 = ShiftedConstValue + 1;
19080 APInt CVPlus1 = ConstValue + 1;
19084 return Shl(
Add(Shl(N0, ShiftAmt), N0), TrailingZeroes);
19087 return Sub(Shl(N0, ShiftAmt), N0);
19089 ShiftAmt = SCVPlus1.
logBase2() + TrailingZeroes;
19090 return Sub(Shl(N0, ShiftAmt), Shl(N0, TrailingZeroes));
19092 if (Subtarget->hasALULSLFast() &&
19093 isPowPlusPlusConst(ConstValue, CVM, CVN)) {
19094 APInt CVMMinus1 = CVM - 1;
19095 APInt CVNMinus1 = CVN - 1;
19096 unsigned ShiftM1 = CVMMinus1.
logBase2();
19097 unsigned ShiftN1 = CVNMinus1.
logBase2();
19099 if (ShiftM1 <= 4 && ShiftN1 <= 4) {
19101 return Add(Shl(MVal, ShiftN1), MVal);
19104 if (Subtarget->hasALULSLFast() &&
19105 isPowPlusPlusOneConst(ConstValue, CVM, CVN)) {
19109 if (ShiftM <= 4 && ShiftN <= 4) {
19115 if (Subtarget->hasALULSLFast() &&
19116 isPowMinusMinusOneConst(ConstValue, CVM, CVN)) {
19120 if (ShiftM <= 4 && ShiftN <= 4) {
19129 APInt SCVPlus1 = -ShiftedConstValue + 1;
19130 APInt CVNegPlus1 = -ConstValue + 1;
19131 APInt CVNegMinus1 = -ConstValue - 1;
19134 return Sub(N0, Shl(N0, ShiftAmt));
19136 ShiftAmt = CVNegMinus1.
logBase2();
19137 return Negate(
Add(Shl(N0, ShiftAmt), N0));
19139 ShiftAmt = SCVPlus1.
logBase2() + TrailingZeroes;
19140 return Sub(Shl(N0, TrailingZeroes), Shl(N0, ShiftAmt));
19160 EVT VT =
N->getValueType(0);
19162 N->getOperand(0)->getOperand(0)->getOpcode() !=
ISD::SETCC ||
19163 VT.
getSizeInBits() !=
N->getOperand(0)->getValueType(0).getSizeInBits())
19171 dyn_cast<BuildVectorSDNode>(
N->getOperand(0)->getOperand(1))) {
19173 if (!BV->isConstant())
19178 EVT IntVT = BV->getValueType(0);
19185 N->getOperand(0)->getOperand(0), MaskConst);
19199 if (
N->isStrictFPOpcode())
19210 return !VT.
isVector() && VT != MVT::bf16 && VT != MVT::f128;
19213 SDValue SrcVal =
N->getOperand(0);
19215 EVT DestTy =
N->getValueType(0);
19222 if (DestTy.
bitsGT(SrcTy)) {
19231 if (SrcVecTy == MVT::nxv2i32 || DestVecTy == MVT::nxv2i32)
19237 DAG.
getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
19254 EVT VT =
N->getValueType(0);
19255 if (VT != MVT::f32 && VT != MVT::f64)
19259 if (VT.
getSizeInBits() !=
N->getOperand(0).getValueSizeInBits())
19269 !cast<LoadSDNode>(N0)->isVolatile()) {
19299 if (!
N->getValueType(0).isSimple())
19303 if (!
Op.getValueType().isSimple() ||
Op.getOpcode() !=
ISD::FMUL)
19306 if (!
Op.getValueType().is64BitVector() && !
Op.getValueType().is128BitVector())
19310 if (!isa<BuildVectorSDNode>(ConstVec))
19313 MVT FloatTy =
Op.getSimpleValueType().getVectorElementType();
19315 if (FloatBits != 32 && FloatBits != 64 &&
19316 (FloatBits != 16 || !Subtarget->hasFullFP16()))
19319 MVT IntTy =
N->getSimpleValueType(0).getVectorElementType();
19321 if (IntBits != 16 && IntBits != 32 && IntBits != 64)
19325 if (IntBits > FloatBits)
19330 int32_t Bits = IntBits == 64 ? 64 : 32;
19332 if (
C == -1 ||
C == 0 ||
C > Bits)
19335 EVT ResTy =
Op.getValueType().changeVectorElementTypeToInteger();
19341 EVT SatVT = cast<VTSDNode>(
N->getOperand(1))->getVT();
19349 unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
19350 : Intrinsic::aarch64_neon_vcvtfp2fxu;
19356 if (IntBits < FloatBits)
19364 EVT VT =
N->getValueType(0);
19390 for (
int i = 1; i >= 0; --i) {
19391 for (
int j = 1; j >= 0; --j) {
19428 uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
19429 for (
int i = 1; i >= 0; --i)
19430 for (
int j = 1; j >= 0; --j) {
19441 if (!BVN0 || !BVN1)
19444 bool FoundMatch =
true;
19448 if (!CN0 || !CN1 ||
19450 FoundMatch =
false;
19473 EVT VT =
N->getValueType(0);
19521 auto *Op1 = dyn_cast<ConstantSDNode>(Cmp1.
getOperand(1));
19522 if (Op1 && Op1->getAPIntValue().isNegative() &&
19523 Op1->getAPIntValue().sgt(-32)) {
19530 NZCVOp, Condition, Cmp0);
19533 Cmp1.
getOperand(1), NZCVOp, Condition, Cmp0);
19544 EVT VT =
N->getValueType(0);
19565 MaskForTy = 0xffull;
19568 MaskForTy = 0xffffull;
19571 MaskForTy = 0xffffffffull;
19579 if (
auto *Op0 = dyn_cast<ConstantSDNode>(
N->getOperand(0)))
19580 return Op0->getAPIntValue().getLimitedValue() == MaskForTy;
19590 Op =
Op->getOperand(0);
19600 unsigned Opc = Src->getOpcode();
19604 SDValue UnpkOp = Src->getOperand(0);
19617 auto MaskAndTypeMatch = [ExtVal](
EVT VT) ->
bool {
19618 return ((ExtVal == 0xFF && VT == MVT::i8) ||
19619 (ExtVal == 0xFFFF && VT == MVT::i16) ||
19620 (ExtVal == 0xFFFFFFFF && VT == MVT::i32));
19626 if (MaskAndTypeMatch(EltTy))
19631 auto MaskedLoadOp = dyn_cast<MaskedLoadSDNode>(UnpkOp);
19632 if (MaskedLoadOp && (MaskedLoadOp->getExtensionType() ==
ISD::ZEXTLOAD ||
19635 if (MaskAndTypeMatch(EltTy))
19659 return N->getOperand(1);
19661 return N->getOperand(0);
19668 if (!Src.hasOneUse())
19679 MemVT = cast<VTSDNode>(Src->getOperand(3))->getVT();
19696 MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
19716 EVT VT =
N->getValueType(0);
19722 for (
auto U :
N->users())
19753 EVT VT =
N->getValueType(0);
19793 DefBits = ~(DefBits | ZeroSplat);
19800 UndefBits = ~(UndefBits | ZeroSplat);
19802 UndefBits, &
LHS)) ||
19816 EVT VT =
N->getValueType(0);
19819 if (!
N->getFlags().hasAllowReassociation())
19826 unsigned Opc =
A.getConstantOperandVal(0);
19827 if (Opc != Intrinsic::aarch64_neon_vcmla_rot0 &&
19828 Opc != Intrinsic::aarch64_neon_vcmla_rot90 &&
19829 Opc != Intrinsic::aarch64_neon_vcmla_rot180 &&
19830 Opc != Intrinsic::aarch64_neon_vcmla_rot270)
19835 A.getOperand(2),
A.getOperand(3));
19851 return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64;
19853 return VT == MVT::i64;
19865 (
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege ||
19866 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt ||
19867 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehi ||
19868 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehs ||
19869 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele ||
19870 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo ||
19871 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels ||
19872 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt ||
19874 N.getConstantOperandVal(0) == Intrinsic::get_active_lane_mask)))
19937 if (VS.getConstantOperandVal(0) != NumEls)
19956 SDValue N0 =
N->getOperand(0), N1 =
N->getOperand(1);
19958 EVT VT =
N->getValueType(0);
19987 Shuffle = dyn_cast<ShuffleVectorSDNode>(N00);
19991 if (Shuffle && Shuffle->
getMaskElt(0) == 1 &&
20006 {N0->getOperand(0), Extract1, Extract2});
20020 EVT VT =
N->getValueType(0);
20021 SDValue N0 =
N->getOperand(0), N1 =
N->getOperand(1);
20046 (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
20048 MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
20050 for (
size_t i = 0; i < Mask.size(); ++i)
20078 NScalarSize =
N->getValueType(0).getScalarSizeInBits();
20080 if (N001ConstVal == N101ConstVal && N001ConstVal > NScalarSize) {
20085 DAG.
getConstant(N001ConstVal - NScalarSize, dl, MVT::i32);
20092 if (
N->getOperand(0).getValueType() == MVT::v4i8 ||
20093 N->getOperand(0).getValueType() == MVT::v2i16 ||
20094 N->getOperand(0).getValueType() == MVT::v2i8) {
20095 EVT SrcVT =
N->getOperand(0).getValueType();
20099 if (
N->getNumOperands() % 2 == 0 &&
20101 if (V.getValueType() != SrcVT)
20105 LoadSDNode *LD = dyn_cast<LoadSDNode>(V);
20106 return LD && V.hasOneUse() && LD->isSimple() && !LD->isIndexed() &&
20107 LD->getExtensionType() == ISD::NON_EXTLOAD;
20109 EVT FVT = SrcVT == MVT::v2i8 ? MVT::f16 : MVT::f32;
20113 for (
unsigned i = 0; i <
N->getNumOperands(); i++) {
20120 LD->getBasePtr(), LD->getMemOperand());
20141 auto isBitwiseVectorNegate = [](
SDValue V) {
20142 return V->getOpcode() ==
ISD::XOR &&
20168 if (
N->getNumOperands() == 2 && N0Opc == N1Opc && VT.
is128BitVector() &&
20179 return DAG.
getNode(N0Opc, dl, VT, Concat0, Concat1);
20183 auto IsRSHRN = [](
SDValue Shr) {
20187 EVT VT =
Op.getValueType();
20188 unsigned ShtAmt = Shr.getConstantOperandVal(1);
20195 Op.getOperand(1).getConstantOperandVal(0)
20196 <<
Op.getOperand(1).getConstantOperandVal(1));
20198 isa<ConstantSDNode>(
Op.getOperand(1).getOperand(0)))
20200 Op.getOperand(1).getConstantOperandVal(0));
20204 if (Imm != 1ULL << (ShtAmt - 1))
20210 if (
N->getNumOperands() == 2 && IsRSHRN(N0) &&
20218 X.getValueType().getDoubleNumVectorElementsVT(*DCI.
DAG.
getContext());
20260 MVT RHSTy =
RHS.getValueType().getSimpleVT();
20266 dbgs() <<
"aarch64-lower: concat_vectors bitcast simplification\n");
20282 EVT VT =
N->getValueType(0);
20293 if (isa<ConstantSDNode>(V.getOperand(0)))
20304 SDValue SubVec =
N->getOperand(1);
20305 uint64_t IdxVal =
N->getConstantOperandVal(2);
20316 if (IdxVal == 0 && Vec.
isUndef())
20322 (IdxVal != 0 && IdxVal != NumSubElts))
20367 EVT ResTy =
N->getValueType(0);
20378 VecResTy = MVT::v4f32;
20380 VecResTy = MVT::v2f64;
20405 MVT VT =
N.getSimpleValueType();
20407 N.getConstantOperandVal(1) == 0)
20408 N =
N.getOperand(0);
20410 switch (
N.getOpcode()) {
20435 if (
N.getValueType().is64BitVector()) {
20447 N =
N.getOperand(0);
20450 if (
N.getOperand(0).getValueType().isScalableVector())
20452 return N.getConstantOperandAPInt(1) ==
20453 N.getOperand(0).getValueType().getVectorNumElements() / 2;
20518 if (!TValue || !FValue)
20522 if (!TValue->
isOne()) {