26 #include <initializer_list>
29 #define DEBUG_TYPE "aarch64-legalinfo"
32 using namespace LegalizeActions;
33 using namespace LegalizeMutations;
34 using namespace LegalityPredicates;
38 using namespace TargetOpcode;
59 std::initializer_list<LLT> PackedVectorAllTypeList = {
69 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
76 const bool HasFP16 = ST.hasFullFP16();
77 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
80 .legalFor({p0,
s1, s8, s16, s32, s64})
81 .legalFor(PackedVectorAllTypeList)
86 return Query.
Types[0].isVector() &&
87 (Query.
Types[0].getElementType() != s64 ||
88 Query.
Types[0].getNumElements() != 2);
91 LLT EltTy = Query.
Types[0].getElementType();
94 return std::make_pair(0, EltTy);
98 .legalFor(PackedVectorAllTypeList)
103 .
legalFor({s32, s64, v4s32, v2s32, v2s64})
104 .clampScalar(0, s32, s64)
108 .legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8})
111 return Query.
Opcode == G_MUL && Query.
Types[0] == v2s64;
115 .clampScalar(0, s32, s64)
123 const auto &SrcTy = Query.
Types[0];
124 const auto &AmtTy = Query.
Types[1];
125 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
126 AmtTy.getSizeInBits() == 32;
140 .clampScalar(1, s32, s64)
149 .
legalFor({{p0, s64}, {v2p0, v2s64}})
150 .clampScalar(1, s64, s64);
155 .legalFor({s32, s64})
157 .clampScalar(0, s32, s64)
162 .lowerFor({
s1, s8, s16, s32, s64});
172 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
173 .legalFor({{s32,
s1}, {s64,
s1}})
174 .clampScalar(0, s32, s64)
178 .legalFor({s32, s64, v2s64, v4s32, v2s32})
179 .clampNumElements(0, v2s32, v4s32)
185 G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
186 G_FNEARBYINT, G_INTRINSIC_LRINT})
191 const auto &Ty = Query.
Types[0];
192 return Ty.isVector() && Ty.getElementType() == s16 &&
195 [=](
const LegalityQuery &Query) {
return std::make_pair(0, s16); })
200 return Query.
Types[0] == s16 && !ST.hasFullFP16();
202 [=](
const LegalityQuery &Query) {
return std::make_pair(0, s32); })
203 .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16});
206 {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW})
211 .libcallFor({s32, s64, v2s32, v4s32, v2s64});
215 return Query.
Types[0].getSizeInBits() <= Query.
Types[1].getSizeInBits();
220 if (Ty0 != s32 && Ty0 != s64 && Ty0 != p0)
225 .clampScalar(0, s32, s64)
233 return Query.
Types[0].getSizeInBits() >= Query.
Types[1].getSizeInBits();
238 if (Ty1 != s32 && Ty1 != s64 && Ty1 != s128)
245 .clampScalar(1, s32, s128)
252 .legalForTypesWithMemDesc({{s32, p0, 8, 8},
261 .clampScalar(0, s32, s64)
290 {v2s64, p0, 128, 8}})
292 .legalForTypesWithMemDesc({{s32, p0, 8, 8}, {s32, p0, 16, 8}})
293 .clampScalar(0, s8, s64)
297 return Query.
Types[0].getSizeInBits() != Query.
MMODescrs[0].SizeInBits;
299 .widenScalarToNextPow2(0)
321 {v2s64, p0, 128, 8}})
322 .clampScalar(0, s8, s64)
325 return Query.
Types[0].isScalar() &&
329 .clampMaxNumElements(0, s8, 16)
338 .clampScalar(0, s8, s64)
342 const auto &Ty = Query.
Types[0];
343 if (HasFP16 && Ty == s16)
345 return Ty == s32 || Ty == s64 || Ty == s128;
347 .clampScalar(0, MinFPScalar, s128);
350 .legalFor({{s32, s32},
361 .clampScalar(1, s32, s64)
378 .clampNumElements(0, v2s32, v4s32);
382 unsigned DstSize = Query.
Types[0].getSizeInBits();
384 if (DstSize == 128 && !Query.
Types[0].isVector())
408 .legalIf(ExtLegalFunc)
427 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
428 .clampMaxNumElements(0, s32, 2);
431 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
432 .clampMaxNumElements(0, s64, 2);
436 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
437 .clampScalar(0, s32, s64)
443 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
444 .clampScalar(1, s32, s64)
455 .clampScalar(0, s32, s64)
475 return Query.
Types[0].getSizeInBits() != Query.
Types[1].getSizeInBits();
477 .legalFor({{p0, s64}});
486 v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
495 .clampScalar(0, s8, s64)
505 {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
506 G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
507 G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX, G_ATOMIC_CMPXCHG})
516 for (
unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
517 unsigned BigTyIdx =
Op == G_MERGE_VALUES ? 0 : 1;
518 unsigned LitTyIdx =
Op == G_MERGE_VALUES ? 1 : 0;
520 auto notValidElt = [](
const LegalityQuery &Query,
unsigned TypeIdx) {
521 const LLT &Ty = Query.
Types[TypeIdx];
543 [=](
const LegalityQuery &Query) {
return notValidElt(Query, 0); },
546 [=](
const LegalityQuery &Query) {
return notValidElt(Query, 1); },
550 .clampScalar(BigTyIdx, s8, s512)
553 const LLT &Ty = Query.
Types[BigTyIdx];
560 const LLT &Ty = Query.
Types[BigTyIdx];
561 unsigned NewSizeInBits = 1
563 if (NewSizeInBits >= 256) {
565 if (RoundedTo < NewSizeInBits)
566 NewSizeInBits = RoundedTo;
568 return std::make_pair(BigTyIdx,
LLT::scalar(NewSizeInBits));
573 .clampScalar(LitTyIdx, s8, s256)
579 const LLT &BigTy = Query.
Types[BigTyIdx];
580 const LLT &LitTy = Query.
Types[LitTyIdx];
594 const LLT &EltTy = Query.
Types[1].getElementType();
595 return Query.
Types[0] != EltTy;
600 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
601 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
602 VecTy == v16s8 || VecTy == v2s32 || VecTy == v2p0;
608 return Query.
Types[1].getNumElements() <= 2;
613 return Query.
Types[1].getNumElements() <= 4;
618 return Query.
Types[1].getNumElements() <= 8;
623 return Query.
Types[1].getNumElements() <= 16;
626 .minScalarOrElt(0, s8);
640 .clampNumElements(0, v4s32, v4s32)
648 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
660 for (
auto &Ty : {v2s32, v4s32, v2s64, v2p0, v16s8, v8s16}) {
669 return !Query.
Types[1].isVector();
671 .clampNumElements(0, v4s32, v4s32)
675 .
legalFor({{v4s32, v2s32}, {v8s16, v4s16}});
680 return Query.
Types[0] == p0 && Query.
Types[1] == s64;
693 .
legalFor({{s32, v2s32}, {s64, v2s64}})
694 .clampMaxNumElements(1, s64, 2)
700 {{s8, v16s8}, {s16, v8s16}, {s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
701 .clampMaxNumElements(1, s64, 2)
713 return Q.
Types[0].isScalar() && Q.
Types[1].getScalarSizeInBits() < 64;
719 .customFor({{s32, s32}, {s64, s64}});
724 verify(*ST.getInstrInfo());
732 switch (
MI.getOpcode()) {
736 case TargetOpcode::G_VAARG:
737 return legalizeVaArg(
MI,
MRI, MIRBuilder);
738 case TargetOpcode::G_LOAD:
739 case TargetOpcode::G_STORE:
740 return legalizeLoadStore(
MI,
MRI, MIRBuilder, Observer);
741 case TargetOpcode::G_SHL:
742 case TargetOpcode::G_ASHR:
743 case TargetOpcode::G_LSHR:
744 return legalizeShlAshrLshr(
MI,
MRI, MIRBuilder, Observer);
745 case TargetOpcode::G_GLOBAL_VALUE:
746 return legalizeSmallCMGlobalValue(
MI,
MRI, MIRBuilder, Observer);
747 case TargetOpcode::G_TRUNC:
748 return legalizeVectorTrunc(
MI, Helper);
749 case TargetOpcode::G_SBFX:
750 case TargetOpcode::G_UBFX:
751 return legalizeBitfieldExtract(
MI,
MRI, Helper);
752 case TargetOpcode::G_ROTR:
753 return legalizeRotate(
MI,
MRI, Helper);
771 MI.getOperand(2).setReg(NewAmt.getReg(0));
779 for (
int I = 0;
I < NumParts; ++
I)
784 bool AArch64LegalizerInfo::legalizeVectorTrunc(
811 for (
unsigned I = 0;
I < SplitSrcs.size(); ++
I)
818 MI.getOperand(1).setReg(
Concat.getReg(0));
823 bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
826 assert(
MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
831 auto &GlobalOp =
MI.getOperand(1);
832 const auto* GV = GlobalOp.getGlobal();
833 if (GV->isThreadLocal())
842 auto Offset = GlobalOp.getOffset();
864 "Should not have folded in an offset for a tagged global!");
866 .addGlobalAddress(GV, 0x100000000,
873 .addGlobalAddress(GV,
Offset,
875 MI.eraseFromParent();
884 bool AArch64LegalizerInfo::legalizeShlAshrLshr(
887 assert(
MI.getOpcode() == TargetOpcode::G_ASHR ||
888 MI.getOpcode() == TargetOpcode::G_LSHR ||
889 MI.getOpcode() == TargetOpcode::G_SHL);
897 int64_t Amount = VRegAndVal->Value.getSExtValue();
902 MI.getOperand(2).setReg(ExtCst.getReg(0));
907 bool AArch64LegalizerInfo::legalizeLoadStore(
910 assert(
MI.getOpcode() == TargetOpcode::G_STORE ||
911 MI.getOpcode() == TargetOpcode::G_LOAD);
926 LLVM_DEBUG(
dbgs() <<
"Tried to do custom legalization on wrong load/store");
932 auto &MMO = **
MI.memoperands_begin();
933 if (
MI.getOpcode() == TargetOpcode::G_STORE) {
937 auto NewLoad = MIRBuilder.
buildLoad(NewTy,
MI.getOperand(1), MMO);
940 MI.eraseFromParent();
948 Align Alignment(
MI.getOperand(2).getImm());
963 if (Alignment > PtrAlign) {
967 auto ListTmp = MIRBuilder.
buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
976 ValSize,
std::max(Alignment, PtrAlign)));
987 MI.eraseFromParent();
991 bool AArch64LegalizerInfo::legalizeBitfieldExtract(