28#include "llvm/IR/IntrinsicsAArch64.h"
31#include <initializer_list>
33#define DEBUG_TYPE "aarch64-legalinfo"
36using namespace LegalizeActions;
37using namespace LegalizeMutations;
38using namespace LegalityPredicates;
39using namespace MIPatternMatch;
43 using namespace TargetOpcode;
61 std::initializer_list<LLT> PackedVectorAllTypeList = {
71 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
78 const bool HasFP16 = ST.hasFullFP16();
79 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
81 const bool HasCSSC = ST.hasCSSC();
82 const bool HasRCPC3 = ST.hasRCPC3();
85 .legalFor({p0, s8, s16, s32, s64})
86 .legalFor(PackedVectorAllTypeList)
91 return Query.
Types[0].isVector() &&
92 (Query.
Types[0].getElementType() != s64 ||
93 Query.
Types[0].getNumElements() != 2);
96 LLT EltTy = Query.
Types[0].getElementType();
99 return std::make_pair(0, EltTy);
104 .legalFor(PackedVectorAllTypeList)
115 .
legalFor({s32, s64, v4s32, v2s32, v2s64})
116 .widenScalarToNextPow2(0)
120 .legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8})
123 return Query.
Opcode == G_MUL && Query.
Types[0] == v2s64;
127 .widenScalarToNextPow2(0)
135 return Query.
Types[0].getNumElements() <= 2;
140 return Query.
Types[0].getNumElements() <= 4;
145 return Query.
Types[0].getNumElements() <= 16;
152 const auto &SrcTy = Query.
Types[0];
153 const auto &AmtTy = Query.
Types[1];
154 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
155 AmtTy.getSizeInBits() == 32;
169 .widenScalarToNextPow2(0)
178 .
legalFor({{p0, s64}, {v2p0, v2s64}})
179 .clampScalar(1, s64, s64);
184 .legalFor({s32, s64})
186 .clampScalar(0, s32, s64)
191 .lowerFor({s8, s16, s32, s64, v2s64, v4s32, v2s32})
193 .clampScalarOrElt(0, s32, s64)
194 .clampNumElements(0, v2s32, v4s32)
195 .clampNumElements(0, v2s64, v2s64)
196 .moreElementsToNextPow2(0);
200 .widenScalarToNextPow2(0, 32)
205 .legalFor({s64, v8s16, v16s8, v4s32})
209 {G_SMIN, G_SMAX, G_UMIN, G_UMAX});
212 .
legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
219 .
legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32});
230 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
231 .legalFor({{s32, s32}, {s64, s32}})
232 .clampScalar(0, s32, s64)
237 .legalFor({MinFPScalar, s32, s64, v2s64, v4s32, v2s32})
238 .clampScalar(0, MinFPScalar, s64)
245 G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
246 G_FNEARBYINT, G_INTRINSIC_LRINT})
251 const auto &Ty = Query.
Types[0];
252 return Ty.isVector() && Ty.getElementType() == s16 &&
255 [=](
const LegalityQuery &Query) {
return std::make_pair(0, s16); })
260 return Query.
Types[0] == s16 && !ST.hasFullFP16();
262 [=](
const LegalityQuery &Query) {
return std::make_pair(0, s32); })
263 .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16});
266 {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW})
271 .libcallFor({s32, s64, v2s32, v4s32, v2s64});
295 for (
unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
298 if (Op == G_SEXTLOAD)
303 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
311 {v2s32, p0, s64, 8}})
312 .widenScalarToNextPow2(0)
313 .clampScalar(0, s32, s64)
316 .unsupportedIfMemSizeNotPow2()
331 return HasRCPC3 && Query.
Types[0] == s128 &&
335 return Query.
Types[0] == s128 &&
338 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
345 {v16s8, p0, s128, 8},
347 {v8s16, p0, s128, 8},
349 {v4s32, p0, s128, 8},
350 {v2s64, p0, s128, 8}})
352 .legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 8}})
353 .widenScalarToNextPow2(0, 8)
359 return Query.
Types[0].isScalar() &&
361 Query.
Types[0].getSizeInBits() > 32;
364 .clampMaxNumElements(0, s8, 16)
374 return HasRCPC3 && Query.
Types[0] == s128 &&
378 return Query.
Types[0] == s128 &&
381 .legalForTypesWithMemDesc(
382 {{s8, p0, s8, 8}, {s16, p0, s8, 8},
385 {s16, p0, s16, 8}, {s32, p0, s16, 8},
387 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
388 {s64, p0, s64, 8}, {s64, p0, s32, 8},
389 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
390 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
391 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
392 .clampScalar(0, s8, s64)
394 return Query.
Types[0].isScalar() &&
398 .clampMaxNumElements(0, s8, 16)
410 .widenScalarToNextPow2(0)
414 const auto &Ty = Query.
Types[0];
415 if (HasFP16 && Ty == s16)
417 return Ty == s32 || Ty == s64 || Ty == s128;
419 .clampScalar(0, MinFPScalar, s128);
434 .clampScalar(1, s32, s64)
435 .clampScalar(0, s32, s32)
436 .minScalarEltSameAsIf(
450 .clampNumElements(0, v2s32, v4s32);
457 const auto &Ty = Query.
Types[0];
458 return Ty.isVector() && Ty.getElementType() == s16 && !HasFP16;
460 [=](
const LegalityQuery &Query) {
return std::make_pair(0, s16); })
465 return Query.
Types[0] == s16 && !HasFP16;
467 [=](
const LegalityQuery &Query) {
return std::make_pair(0, s32); })
468 .legalFor({{s16, s16},
477 .clampScalar(1, s32, s64)
478 .clampScalar(0, s32, s32)
479 .minScalarEltSameAsIf(
487 .clampNumElements(0, v2s32, v4s32);
491 unsigned DstSize = Query.
Types[0].getSizeInBits();
493 if (DstSize == 128 && !Query.
Types[0].isVector())
513 .legalIf(ExtLegalFunc)
529 .legalFor(PackedVectorAllTypeList)
535 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
536 .clampMaxNumElements(0, s32, 2);
539 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
540 .clampMaxNumElements(0, s64, 2);
544 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
545 .widenScalarToNextPow2(0)
551 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
552 .clampScalar(1, s32, s64)
560 .clampScalar(0, s32, s32);
564 .
legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
565 .widenScalarToNextPow2(0)
580 .
legalFor({{s64, p0}, {v2s64, v2p0}})
581 .widenScalarToNextPow2(0, 64)
586 return Query.
Types[0].getSizeInBits() != Query.
Types[1].getSizeInBits();
588 .legalFor({{p0, s64}, {v2p0, v2s64}});
597 v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
606 .clampScalar(0, s8, s64)
615 return Query.
Types[0].getSizeInBits() == 128;
617 .clampScalar(0, s32, s64)
621 {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
622 G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
623 G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
624 .clampScalar(0, s32, s64)
630 for (
unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
631 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
632 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
639 switch (Q.
Types[BigTyIdx].getSizeInBits()) {
647 switch (Q.
Types[LitTyIdx].getSizeInBits()) {
661 const LLT &EltTy = Query.
Types[1].getElementType();
662 return Query.
Types[0] != EltTy;
667 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
668 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
669 VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s32 ||
676 return Query.
Types[1].getNumElements() <= 2;
681 return Query.
Types[1].getNumElements() <= 4;
686 return Query.
Types[1].getNumElements() <= 8;
691 return Query.
Types[1].getNumElements() <= 16;
694 .minScalarOrElt(0, s8)
713 .clampNumElements(0, v4s32, v4s32)
722 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
724 .widenScalarToNextPow2(1, 32)
725 .clampScalar(1, s32, s64)
726 .scalarSameSizeAs(0, 1);
732 .widenScalarToNextPow2(0, 32)
743 return (HasCSSC &&
typeInSet(0, {s32, s64})(Query));
746 return (!HasCSSC &&
typeInSet(0, {s32, s64})(Query));
763 return !Query.
Types[1].isVector();
767 return Query.
Types[0].isVector() && Query.
Types[1].isVector() &&
768 Query.
Types[0].getNumElements() >
769 Query.
Types[1].getNumElements();
773 .clampNumElements(0, v4s32, v4s32)
774 .clampNumElements(0, v2s64, v2s64)
777 return Query.
Types[0].isVector() && Query.
Types[1].isVector() &&
778 Query.
Types[0].getNumElements() <
779 Query.
Types[1].getNumElements();
784 .
legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}});
789 return Query.
Types[0] == p0 && Query.
Types[1] == s64;
801 .customForCartesianProduct({p0}, {s8}, {s64})
805 .legalForCartesianProduct({p0}, {p0}, {s64})
821 .legalFor({s32, s64});
823 .legalFor(PackedVectorAllTypeList)
828 .
legalFor({{s32, v2s32}, {s64, v2s64}})
829 .clampMaxNumElements(1, s64, 2)
835 {{s8, v16s8}, {s16, v8s16}, {s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
836 .clampMaxNumElements(1, s64, 2)
841 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
857 return std::make_pair(1, SrcTy.
divide(2));
870 return Q.
Types[0].isScalar() && Q.
Types[1].getScalarSizeInBits() < 64;
876 .customFor({{s32, s32}, {s64, s64}});
882 .legalFor({{s32, s32},
886 .customFor({{s128, s128},
894 .legalFor({{v8s8, v8s8},
896 .customFor({{s32, s32},
905 .clampScalar(0, s32, s128)
906 .widenScalarToNextPow2(0)
907 .minScalarEltSameAsIf(always, 1, 0)
908 .maxScalarEltSameAsIf(always, 1, 0);
915 .legalFor({MinFPScalar, s32, s64})
917 .minScalar(0, MinFPScalar);
920 .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
922 const auto &Ty = Query.
Types[0];
923 return (Ty == v8s16 || Ty == v4s16) && HasFP16;
925 .minScalar(0, MinFPScalar)
933 .legalFor({{s64, s32}, {s64, s64}});
943 verify(*ST.getInstrInfo());
951 switch (
MI.getOpcode()) {
955 case TargetOpcode::G_VAARG:
956 return legalizeVaArg(
MI,
MRI, MIRBuilder);
957 case TargetOpcode::G_LOAD:
958 case TargetOpcode::G_STORE:
959 return legalizeLoadStore(
MI,
MRI, MIRBuilder, Observer);
960 case TargetOpcode::G_SHL:
961 case TargetOpcode::G_ASHR:
962 case TargetOpcode::G_LSHR:
963 return legalizeShlAshrLshr(
MI,
MRI, MIRBuilder, Observer);
964 case TargetOpcode::G_GLOBAL_VALUE:
965 return legalizeSmallCMGlobalValue(
MI,
MRI, MIRBuilder, Observer);
966 case TargetOpcode::G_TRUNC:
967 return legalizeVectorTrunc(
MI, Helper);
968 case TargetOpcode::G_SBFX:
969 case TargetOpcode::G_UBFX:
970 return legalizeBitfieldExtract(
MI,
MRI, Helper);
971 case TargetOpcode::G_ROTR:
972 return legalizeRotate(
MI,
MRI, Helper);
973 case TargetOpcode::G_CTPOP:
974 return legalizeCTPOP(
MI,
MRI, Helper);
975 case TargetOpcode::G_ATOMIC_CMPXCHG:
976 return legalizeAtomicCmpxchg128(
MI,
MRI, Helper);
977 case TargetOpcode::G_CTTZ:
978 return legalizeCTTZ(
MI, Helper);
979 case TargetOpcode::G_BZERO:
980 case TargetOpcode::G_MEMCPY:
981 case TargetOpcode::G_MEMMOVE:
982 case TargetOpcode::G_MEMSET:
983 return legalizeMemOps(
MI, Helper);
984 case TargetOpcode::G_FCOPYSIGN:
985 return legalizeFCopySign(
MI, Helper);
997 LLT AmtTy =
MRI.getType(AmtReg);
1003 MI.getOperand(2).setReg(NewAmt.getReg(0));
1011 for (
int I = 0;
I < NumParts; ++
I)
1016bool AArch64LegalizerInfo::legalizeVectorTrunc(
1030 LLT DstTy =
MRI.getType(DstReg);
1031 LLT SrcTy =
MRI.getType(SrcReg);
1044 for (
unsigned I = 0;
I < SplitSrcs.
size(); ++
I)
1051 MI.getOperand(1).setReg(
Concat.getReg(0));
1056bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1059 assert(
MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1064 auto &GlobalOp =
MI.getOperand(1);
1065 const auto* GV = GlobalOp.getGlobal();
1066 if (GV->isThreadLocal())
1075 auto Offset = GlobalOp.getOffset();
1080 MRI.setRegClass(
ADRP.getReg(0), &AArch64::GPR64RegClass);
1097 "Should not have folded in an offset for a tagged global!");
1099 .addGlobalAddress(GV, 0x100000000,
1102 MRI.setRegClass(
ADRP.getReg(0), &AArch64::GPR64RegClass);
1106 .addGlobalAddress(GV,
Offset,
1108 MI.eraseFromParent();
1114 switch (
MI.getIntrinsicID()) {
1115 case Intrinsic::vacopy: {
1117 unsigned VaListSize =
1129 VaListSize,
Align(PtrSize)));
1133 VaListSize,
Align(PtrSize)));
1134 MI.eraseFromParent();
1137 case Intrinsic::get_dynamic_area_offset: {
1140 MI.eraseFromParent();
1143 case Intrinsic::aarch64_mops_memset_tag: {
1144 assert(
MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1147 auto &
Value =
MI.getOperand(3);
1149 Value.setReg(ZExtValueReg);
1152 case Intrinsic::prefetch: {
1154 auto &AddrVal =
MI.getOperand(1);
1156 int64_t IsWrite =
MI.getOperand(2).getImm();
1157 int64_t Locality =
MI.getOperand(3).getImm();
1158 int64_t IsData =
MI.getOperand(4).getImm();
1160 bool IsStream = Locality == 0;
1161 if (Locality != 0) {
1162 assert(Locality <= 3 &&
"Prefetch locality out-of-range");
1166 Locality = 3 - Locality;
1170 (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
1173 MI.eraseFromParent();
1176 case Intrinsic::aarch64_prefetch: {
1178 auto &AddrVal =
MI.getOperand(1);
1180 int64_t IsWrite =
MI.getOperand(2).getImm();
1181 int64_t
Target =
MI.getOperand(3).getImm();
1182 int64_t IsStream =
MI.getOperand(4).getImm();
1183 int64_t IsData =
MI.getOperand(5).getImm();
1185 unsigned PrfOp = (IsWrite << 4) |
1191 MI.eraseFromParent();
1199bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1202 assert(
MI.getOpcode() == TargetOpcode::G_ASHR ||
1203 MI.getOpcode() == TargetOpcode::G_LSHR ||
1204 MI.getOpcode() == TargetOpcode::G_SHL);
1212 int64_t Amount = VRegAndVal->Value.getSExtValue();
1217 MI.getOperand(2).setReg(ExtCst.getReg(0));
1230 isShiftedInt<7, 3>(NewOffset)) {
1238bool AArch64LegalizerInfo::legalizeLoadStore(
1241 assert(
MI.getOpcode() == TargetOpcode::G_STORE ||
1242 MI.getOpcode() == TargetOpcode::G_LOAD);
1253 const LLT ValTy =
MRI.getType(ValReg);
1258 bool IsLoad =
MI.getOpcode() == TargetOpcode::G_LOAD;
1262 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
1268 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
1274 assert(ST->hasLSE2() &&
"ldp/stp not single copy atomic without +lse2");
1276 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
1281 NewI = MIRBuilder.
buildInstr(Opcode, {s64, s64}, {});
1287 Opcode, {}, {
Split->getOperand(0),
Split->getOperand(1)});
1291 NewI.
addUse(
MI.getOperand(1).getReg());
1302 *
MRI.getTargetRegisterInfo(),
1304 MI.eraseFromParent();
1310 LLVM_DEBUG(
dbgs() <<
"Tried to do custom legalization on wrong load/store");
1316 auto &MMO = **
MI.memoperands_begin();
1319 if (
MI.getOpcode() == TargetOpcode::G_STORE) {
1323 auto NewLoad = MIRBuilder.
buildLoad(NewTy,
MI.getOperand(1), MMO);
1326 MI.eraseFromParent();
1334 Align Alignment(
MI.getOperand(2).getImm());
1336 Register ListPtr =
MI.getOperand(1).getReg();
1338 LLT PtrTy =
MRI.getType(ListPtr);
1349 if (Alignment > PtrAlign) {
1353 auto ListTmp = MIRBuilder.
buildPtrAdd(PtrTy,
List, AlignMinus1.getReg(0));
1358 LLT ValTy =
MRI.getType(Dst);
1363 ValTy, std::max(Alignment, PtrAlign)));
1374 MI.eraseFromParent();
1378bool AArch64LegalizerInfo::legalizeBitfieldExtract(
1412 LLT Ty =
MRI.getType(Val);
1416 "Expected src and dst to have the same type!");
1424 auto Add = MIRBuilder.
buildAdd(s64, CTPOP1, CTPOP2);
1427 MI.eraseFromParent();
1431 if (!ST->hasNEON() ||
1432 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
1444 assert((
Size == 32 ||
Size == 64 ||
Size == 128) &&
"Expected only 32, 64, or 128 bit scalars!");
1459 Opc = Intrinsic::aarch64_neon_uaddlv;
1462 Opc = Intrinsic::aarch64_neon_uaddlp;
1465 Opc = Intrinsic::aarch64_neon_uaddlp;
1469 Opc = Intrinsic::aarch64_neon_uaddlp;
1474 Opc = Intrinsic::aarch64_neon_uaddlp;
1477 Opc = Intrinsic::aarch64_neon_uaddlp;
1483 for (
LLT HTy : HAddTys) {
1494 MI.eraseFromParent();
1498bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
1502 auto Addr =
MI.getOperand(1).getReg();
1503 auto DesiredI = MIRBuilder.
buildUnmerge({s64, s64},
MI.getOperand(2));
1504 auto NewI = MIRBuilder.
buildUnmerge({s64, s64},
MI.getOperand(3));
1505 auto DstLo =
MRI.createGenericVirtualRegister(s64);
1506 auto DstHi =
MRI.createGenericVirtualRegister(s64);
1519 auto Ordering = (*
MI.memoperands_begin())->getMergedOrdering();
1523 Opcode = AArch64::CASPAX;
1526 Opcode = AArch64::CASPLX;
1530 Opcode = AArch64::CASPALX;
1533 Opcode = AArch64::CASPX;
1538 auto CASDst =
MRI.createGenericVirtualRegister(s128);
1539 auto CASDesired =
MRI.createGenericVirtualRegister(s128);
1540 auto CASNew =
MRI.createGenericVirtualRegister(s128);
1541 MIRBuilder.
buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
1542 .addUse(DesiredI->getOperand(0).getReg())
1544 .
addUse(DesiredI->getOperand(1).getReg())
1545 .
addImm(AArch64::subo64);
1546 MIRBuilder.
buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
1550 .
addImm(AArch64::subo64);
1552 CAS = MIRBuilder.
buildInstr(Opcode, {CASDst}, {CASDesired, CASNew,
Addr});
1560 auto Ordering = (*
MI.memoperands_begin())->getMergedOrdering();
1564 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
1567 Opcode = AArch64::CMP_SWAP_128_RELEASE;
1571 Opcode = AArch64::CMP_SWAP_128;
1574 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
1578 auto Scratch =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1579 CAS = MIRBuilder.
buildInstr(Opcode, {DstLo, DstHi, Scratch},
1580 {
Addr, DesiredI->getOperand(0),
1581 DesiredI->getOperand(1), NewI->
getOperand(0),
1587 *
MRI.getTargetRegisterInfo(),
1591 MI.eraseFromParent();
1599 LLT Ty =
MRI.getType(
MI.getOperand(1).getReg());
1601 MIRBuilder.
buildCTLZ(
MI.getOperand(0).getReg(), BitReverse);
1602 MI.eraseFromParent();
1611 if (
MI.getOpcode() == TargetOpcode::G_MEMSET) {
1613 auto &
Value =
MI.getOperand(1);
1616 Value.setReg(ZExtValueReg);
1628 LLT DstTy =
MRI.getType(Dst);
1631 assert((DstSize == 32 || DstSize == 64) &&
"Unexpected dst type!");
1632 assert(
MRI.getType(
MI.getOperand(2).getReg()) == DstTy &&
1633 "Expected homogeneous types!");
1652 EltMask = 0x80000000ULL;
1662 VecTy, Undef,
MI.getOperand(1).getReg(), Zero);
1664 VecTy, Undef,
MI.getOperand(2).getReg(), Zero);
1671 auto Sel = MIRBuilder.
buildInstr(AArch64::G_BIT, {VecTy}, {Ins1, Ins2,
Mask});
1677 DstRegs.push_back(
MRI.createGenericVirtualRegister(DstTy));
1679 MI.eraseFromParent();
unsigned const MachineRegisterInfo * MRI
static void extractParts(Register Reg, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs)
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
This file declares the targeting of the RegisterBankInfo class for AArch64.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Interface for Targets to specify which operations they can successfully select and how the others sho...
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static constexpr int Concat[]
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
bool isTargetWindows() const
const AArch64InstrInfo * getInstrInfo() const override
bool isTargetDarwin() const
bool isTargetILP32() const
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
const RegisterBankInfo * getRegBankInfo() const override
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & fewerElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Remove elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LegalizeRuleSet & customFor(std::initializer_list< LLT > Types)
LegalizeResult lowerBitCount(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Helper class to build MachineInstr.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects)
Build and insert either a G_INTRINSIC (if HasSideEffects is false) or G_INTRINSIC_W_SIDE_EFFECTS inst...
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
const TargetMachine & getTargetMachine() const
Primary interface to the complete machine description for the target machine.
Target - Wrapper for Target specific information.
LLVM Value Representation.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
LegalityPredicate isScalar(unsigned TypeIdx)
True iff the specified type index is a scalar.
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
@ Undef
Value of the register doesn't matter.
This is an optimization pass for GlobalISel generic memory operations.
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
AtomicOrdering
Atomic ordering for LLVM's memory model.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
unsigned Log2(Align A)
Returns the log2 of the alignment.
This struct is a compact representation of a valid (non-zero power of two) alignment.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
This class contains a discriminated union of information about pointers in memory operands,...