29#include "llvm/IR/IntrinsicsAArch64.h"
32#include <initializer_list>
34#define DEBUG_TYPE "aarch64-legalinfo"
68 std::initializer_list<LLT> PackedVectorAllTypeList = {
74 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
78 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
81 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
88 const bool HasFP16 = ST.hasFullFP16();
89 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
91 const bool HasCSSC = ST.hasCSSC();
92 const bool HasRCPC3 = ST.hasRCPC3();
93 const bool HasSVE = ST.hasSVE();
96 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
97 .legalFor({p0, s8, s16, s32, s64})
98 .legalFor({v2s8, v4s8, v8s8, v16s8, v2s16, v4s16, v8s16, v2s32, v4s32,
100 .widenScalarToNextPow2(0)
113 .legalFor(PackedVectorAllTypeList)
127 .widenScalarToNextPow2(0)
132 .maxScalarIf(
typeInSet(0, {s64, p0}), 1, s32);
137 .widenScalarToNextPow2(1)
142 .maxScalarIf(
typeInSet(1, {s64, p0}), 0, s32)
143 .maxScalarIf(
typeInSet(1, {s128}), 0, s64);
146 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
147 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
148 .widenScalarToNextPow2(0)
156 return Query.
Types[0].getNumElements() <= 2;
161 return Query.
Types[0].getNumElements() <= 4;
166 return Query.
Types[0].getNumElements() <= 16;
173 .
legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
174 .widenScalarToNextPow2(0)
182 return Query.
Types[0].getNumElements() <= 2;
187 return Query.
Types[0].getNumElements() <= 4;
192 return Query.
Types[0].getNumElements() <= 16;
200 const auto &SrcTy = Query.
Types[0];
201 const auto &AmtTy = Query.
Types[1];
202 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
203 AmtTy.getSizeInBits() == 32;
217 .widenScalarToNextPow2(0)
231 .
legalFor({{p0, s64}, {v2p0, v2s64}})
232 .clampScalarOrElt(1, s64, s64)
238 .legalFor({s32, s64})
240 .clampScalar(0, s32, s64)
245 .lowerFor({s8, s16, s32, s64, v2s32, v4s32, v2s64})
254 .widenScalarToNextPow2(0, 32)
259 .legalFor({s64, v16s8, v8s16, v4s32})
263 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
264 .legalFor(HasCSSC, {s32, s64})
265 .minScalar(HasCSSC, 0, s32)
274 .legalFor(PackedVectorAllTypeList)
278 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
282 [=](
const LegalityQuery &Query) {
return std::make_pair(0, v4s16); })
285 [=](
const LegalityQuery &Query) {
return std::make_pair(0, v2s32); })
286 .clampNumElements(0, v8s8, v16s8)
294 {G_ABDS, G_ABDU, G_UAVGFLOOR, G_UAVGCEIL, G_SAVGFLOOR, G_SAVGCEIL})
295 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
299 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
300 .legalFor({{s32, s32}, {s64, s32}})
301 .clampScalar(0, s32, s64)
306 .customFor({{s32, s32}, {s32, s64}, {s64, s64}})
312 return Q.
Types[0].isScalar() && Q.
Types[1].getScalarSizeInBits() < 64;
318 .customFor({{s32, s32}, {s64, s64}});
322 .
legalFor(HasCSSC, {{s32, s32}, {s64, s64}})
323 .legalFor({{v8s8, v8s8}, {v16s8, v16s8}})
324 .customFor(!HasCSSC, {{s32, s32}, {s64, s64}})
325 .customFor({{s128, s128},
331 .clampScalar(0, s32, s128)
343 .legalFor({{s32, s32},
351 .widenScalarToNextPow2(1, 32)
369 .customFor(!HasCSSC, {s32, s64});
375 .widenScalarToNextPow2(0, 32)
387 .
legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
396 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
397 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
398 .clampNumElements(0, v8s8, v16s8)
407 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
408 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
409 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
410 .legalFor({s32, s64, v2s32, v4s32, v2s64})
411 .legalFor(HasFP16, {s16, v4s16, v8s16})
421 .legalFor({s32, s64, v2s32, v4s32, v2s64})
422 .legalFor(HasFP16, {s16, v4s16, v8s16})
437 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
438 G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
439 G_FSINH, G_FTANH, G_FMODF})
448 .
libcallFor({{s32, s32}, {s64, s32}, {s128, s32}});
451 .legalFor({{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}})
452 .legalFor(HasFP16, {{s32, s16}, {s64, s16}})
457 .legalFor({{s64, s32}, {s64, s64}})
458 .legalFor(HasFP16, {{s64, s16}})
476 for (
unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
479 if (
Op == G_SEXTLOAD)
484 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
492 {v2s32, p0, s64, 8}})
493 .widenScalarToNextPow2(0)
494 .clampScalar(0, s32, s64)
497 .unsupportedIfMemSizeNotPow2()
509 return HasRCPC3 && Query.
Types[0] == s128 &&
513 return Query.
Types[0] == s128 &&
516 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
523 {v16s8, p0, s128, 8},
525 {v8s16, p0, s128, 8},
527 {v4s32, p0, s128, 8},
528 {v2s64, p0, s128, 8}})
530 .legalForTypesWithMemDesc(
531 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
532 .legalForTypesWithMemDesc({
534 {nxv16s8, p0, nxv16s8, 8},
535 {nxv8s16, p0, nxv8s16, 8},
536 {nxv4s32, p0, nxv4s32, 8},
537 {nxv2s64, p0, nxv2s64, 8},
539 .widenScalarToNextPow2(0, 8)
550 return Query.
Types[0].isScalar() &&
552 Query.
Types[0].getSizeInBits() > 32;
561 .customIf(IsPtrVecPred)
567 return HasRCPC3 && Query.
Types[0] == s128 &&
571 return Query.
Types[0] == s128 &&
579 {{s8, p0, s8, 8}, {s16, p0, s8, 8},
582 {s16, p0, s16, 8}, {s32, p0, s16, 8},
584 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
585 {s64, p0, s64, 8}, {s64, p0, s32, 8},
586 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
587 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
588 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
589 .legalForTypesWithMemDesc({
594 {nxv16s8, p0, nxv16s8, 8},
595 {nxv8s16, p0, nxv8s16, 8},
596 {nxv4s32, p0, nxv4s32, 8},
597 {nxv2s64, p0, nxv2s64, 8},
599 .clampScalar(0, s8, s64)
602 return Query.
Types[0].isScalar() &&
606 .clampMaxNumElements(0, s8, 16)
615 return Query.
Types[0].getSizeInBits() ==
616 Query.
MMODescrs[0].MemoryTy.getSizeInBits();
622 .customIf(IsPtrVecPred)
640 {p0, v16s8, v16s8, 8},
641 {p0, v4s16, v4s16, 8},
642 {p0, v8s16, v8s16, 8},
643 {p0, v2s32, v2s32, 8},
644 {p0, v4s32, v4s32, 8},
645 {p0, v2s64, v2s64, 8},
651 auto IndexedLoadBasicPred = [=](
const LegalityQuery &Query) {
679 return MemTy == s8 || MemTy == s16;
681 return MemTy == s8 || MemTy == s16 || MemTy == s32;
689 .widenScalarToNextPow2(0)
694 .clampScalar(0, MinFPScalar, s128);
698 .
legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
707 return Ty.isVector() && !SrcTy.isPointerVector() &&
708 Ty.getElementType() != SrcTy.getElementType();
716 return Query.
Types[1].isPointerVector();
733 .legalFor(HasFP16, {{s32, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
742 return Ty.isVector() && !SrcTy.isPointerVector() &&
743 Ty.getElementType() != SrcTy.getElementType();
746 .clampNumElements(1, v4s16, v8s16)
754 unsigned DstSize = Query.
Types[0].getSizeInBits();
757 if (Query.
Types[0].isVector())
760 if (DstSize < 8 || DstSize >= 128 || !
isPowerOf2_32(DstSize))
768 unsigned SrcSize = SrcTy.getSizeInBits();
775 .legalIf(ExtLegalFunc)
776 .
legalFor({{v8s16, v8s8}, {v4s32, v4s16}, {v2s64, v2s32}})
777 .clampScalar(0, s64, s64)
784 return (Query.
Types[0].getScalarSizeInBits() >
785 Query.
Types[1].getScalarSizeInBits() * 2) &&
786 Query.
Types[0].isVector() &&
787 (Query.
Types[1].getScalarSizeInBits() == 8 ||
788 Query.
Types[1].getScalarSizeInBits() == 16);
790 .clampMinNumElements(1, s8, 8)
795 .
legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
806 return DstTy.
isVector() && SrcTy.getSizeInBits() > 128 &&
809 .clampMinNumElements(0, s8, 8)
814 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
815 .clampNumElements(0, v2s32, v2s32);
819 .legalFor(PackedVectorAllTypeList)
830 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
831 .libcallFor({{s16, s128}, {s32, s128}, {s64, s128}})
837 SrcTy.getScalarSizeInBits() == 64 &&
841 .clampNumElements(1, v4s32, v4s32)
847 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
848 .libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
854 return SrcTy.isVector() && DstTy.
isVector() &&
855 SrcTy.getScalarSizeInBits() == 16 &&
859 .clampNumElements(0, v4s32, v4s32)
865 .legalFor({{s32, s32},
873 {{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
880 return Query.
Types[1] == s16 && Query.
Types[0].getSizeInBits() > 64;
889 return Query.
Types[0].getScalarSizeInBits() <= 64 &&
890 Query.
Types[0].getScalarSizeInBits() >
891 Query.
Types[1].getScalarSizeInBits();
896 return Query.
Types[1].getScalarSizeInBits() <= 64 &&
897 Query.
Types[0].getScalarSizeInBits() <
898 Query.
Types[1].getScalarSizeInBits();
901 .clampNumElements(0, v4s16, v8s16)
905 {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
908 .legalFor({{s32, s32},
917 {{s16, s16}, {s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
925 return Query.
Types[1] == s16 && Query.
Types[0].getSizeInBits() > 64;
935 unsigned ITySize = Query.
Types[0].getScalarSizeInBits();
936 return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
937 ITySize > Query.
Types[1].getScalarSizeInBits();
942 unsigned FTySize = Query.
Types[1].getScalarSizeInBits();
943 return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
944 Query.
Types[0].getScalarSizeInBits() < FTySize;
953 .legalFor({{s32, s32},
961 {{s16, s32}, {s16, s64}, {v4s16, v4s16}, {v8s16, v8s16}})
968 return Query.
Types[1].isVector() &&
969 Query.
Types[1].getScalarSizeInBits() == 64 &&
970 Query.
Types[0].getScalarSizeInBits() == 16;
972 .widenScalarOrEltToNextPow2OrMinSize(0, HasFP16 ? 16 : 32)
976 return Query.
Types[0].getScalarSizeInBits() == 32 &&
977 Query.
Types[1].getScalarSizeInBits() == 64;
982 return Query.
Types[1].getScalarSizeInBits() <= 64 &&
983 Query.
Types[0].getScalarSizeInBits() <
984 Query.
Types[1].getScalarSizeInBits();
989 return Query.
Types[0].getScalarSizeInBits() <= 64 &&
990 Query.
Types[0].getScalarSizeInBits() >
991 Query.
Types[1].getScalarSizeInBits();
994 .clampNumElements(0, v4s16, v8s16)
1008 .clampScalar(0, s32, s32);
1012 .
legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
1013 .widenScalarToNextPow2(0)
1032 .
legalFor({{s64, p0}, {v2s64, v2p0}})
1033 .widenScalarToNextPow2(0, 64)
1039 return Query.
Types[0].getSizeInBits() != Query.
Types[1].getSizeInBits();
1041 .legalFor({{p0, s64}, {v2p0, v2s64}})
1042 .clampMaxNumElements(1, s64, 2);
1049 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
1050 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
1055 return DstTy.
isScalar() && SrcTy.isVector() &&
1056 SrcTy.getScalarSizeInBits() == 1;
1059 return Query.
Types[0].isVector() != Query.
Types[1].isVector();
1073 .clampScalar(0, s8, s64)
1080 bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
1083 .
legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1084 .customFor(!UseOutlineAtomics, {{s128, p0}})
1085 .libcallFor(UseOutlineAtomics,
1086 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})
1087 .clampScalar(0, s32, s64);
1090 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
1092 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1093 .libcallFor(UseOutlineAtomics,
1094 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
1095 .clampScalar(0, s32, s64);
1100 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
1102 .clampScalar(0, s32, s64);
1107 for (
unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
1108 unsigned BigTyIdx =
Op == G_MERGE_VALUES ? 0 : 1;
1109 unsigned LitTyIdx =
Op == G_MERGE_VALUES ? 1 : 0;
1116 switch (Q.
Types[BigTyIdx].getSizeInBits()) {
1124 switch (Q.
Types[LitTyIdx].getSizeInBits()) {
1138 .
legalFor(HasSVE, {{s16, nxv16s8, s64},
1139 {s16, nxv8s16, s64},
1140 {s32, nxv4s32, s64},
1141 {s64, nxv2s64, s64}})
1143 const LLT &EltTy = Query.
Types[1].getElementType();
1144 if (Query.
Types[1].isScalableVector())
1146 return Query.
Types[0] != EltTy;
1151 return VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s16 ||
1152 VecTy == v4s16 || VecTy == v8s16 || VecTy == v2s32 ||
1153 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2p0;
1159 return Query.
Types[1].isFixedVector() &&
1160 Query.
Types[1].getNumElements() <= 2;
1165 return Query.
Types[1].isFixedVector() &&
1166 Query.
Types[1].getNumElements() <= 4;
1171 return Query.
Types[1].isFixedVector() &&
1172 Query.
Types[1].getNumElements() <= 8;
1177 return Query.
Types[1].isFixedVector() &&
1178 Query.
Types[1].getNumElements() <= 16;
1181 .minScalarOrElt(0, s8)
1192 typeInSet(0, {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64, v2p0}))
1193 .legalFor(HasSVE, {{nxv16s8, s32, s64},
1194 {nxv8s16, s32, s64},
1195 {nxv4s32, s32, s64},
1196 {nxv2s64, s64, s64}})
1215 .clampNumElements(0, v4s32, v4s32)
1233 {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, DstTy);
1237 return Query.
Types[0].getNumElements() >
1238 Query.
Types[1].getNumElements();
1244 return Query.
Types[0].getNumElements() <
1245 Query.
Types[1].getNumElements();
1248 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1261 .
legalFor({{v16s8, v8s8}, {v8s16, v4s16}, {v4s32, v2s32}})
1264 return Query.
Types[0].isFixedVector() &&
1265 Query.
Types[1].isFixedVector() &&
1266 Query.
Types[0].getScalarSizeInBits() >= 8 &&
1268 Query.
Types[0].getSizeInBits() <= 128 &&
1269 Query.
Types[1].getSizeInBits() <= 64;
1278 SrcTy.getNumElements())));
1282 .
legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1288 .
legalFor(HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1307 .customForCartesianProduct({p0}, {s8}, {s64})
1311 .legalForCartesianProduct({p0}, {p0}, {s64})
1327 .
legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1328 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1329 .minScalarOrElt(0, MinFPScalar)
1371 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1372 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1373 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1374 .minScalarOrElt(0, MinFPScalar)
1389 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1390 .legalFor({{s8, v8s8},
1398 return Query.
Types[1].isVector() &&
1399 Query.
Types[1].getElementType() != s8 &&
1400 Query.
Types[1].getNumElements() & 1;
1403 .clampMaxNumElements(1, s64, 2)
1411 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1418 if (SrcTy.isScalar())
1423 return SrcTy.getSizeInBits() > 64;
1427 return std::make_pair(1, SrcTy.divide(2));
1437 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1447 verify(*ST.getInstrInfo());
1456 switch (
MI.getOpcode()) {
1460 case TargetOpcode::G_VAARG:
1461 return legalizeVaArg(
MI, MRI, MIRBuilder);
1462 case TargetOpcode::G_LOAD:
1463 case TargetOpcode::G_STORE:
1464 return legalizeLoadStore(
MI, MRI, MIRBuilder, Observer);
1465 case TargetOpcode::G_SHL:
1466 case TargetOpcode::G_ASHR:
1467 case TargetOpcode::G_LSHR:
1468 return legalizeShlAshrLshr(
MI, MRI, MIRBuilder, Observer);
1469 case TargetOpcode::G_GLOBAL_VALUE:
1470 return legalizeSmallCMGlobalValue(
MI, MRI, MIRBuilder, Observer);
1471 case TargetOpcode::G_SBFX:
1472 case TargetOpcode::G_UBFX:
1473 return legalizeBitfieldExtract(
MI, MRI, Helper);
1474 case TargetOpcode::G_FSHL:
1475 case TargetOpcode::G_FSHR:
1476 return legalizeFunnelShift(
MI, MRI, MIRBuilder, Observer, Helper);
1477 case TargetOpcode::G_ROTR:
1478 return legalizeRotate(
MI, MRI, Helper);
1479 case TargetOpcode::G_CTPOP:
1480 return legalizeCTPOP(
MI, MRI, Helper);
1481 case TargetOpcode::G_ATOMIC_CMPXCHG:
1482 return legalizeAtomicCmpxchg128(
MI, MRI, Helper);
1483 case TargetOpcode::G_CTTZ:
1484 return legalizeCTTZ(
MI, Helper);
1485 case TargetOpcode::G_BZERO:
1486 case TargetOpcode::G_MEMCPY:
1487 case TargetOpcode::G_MEMMOVE:
1488 case TargetOpcode::G_MEMSET:
1489 return legalizeMemOps(
MI, Helper);
1490 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1491 return legalizeExtractVectorElt(
MI, MRI, Helper);
1492 case TargetOpcode::G_DYN_STACKALLOC:
1493 return legalizeDynStackAlloc(
MI, Helper);
1494 case TargetOpcode::G_PREFETCH:
1495 return legalizePrefetch(
MI, Helper);
1496 case TargetOpcode::G_ABS:
1498 case TargetOpcode::G_ICMP:
1499 return legalizeICMP(
MI, MRI, MIRBuilder);
1500 case TargetOpcode::G_BITCAST:
1501 return legalizeBitcast(
MI, Helper);
1502 case TargetOpcode::G_FPTRUNC:
1505 return legalizeFptrunc(
MI, MIRBuilder, MRI);
1513 assert(
MI.getOpcode() == TargetOpcode::G_BITCAST &&
"Unexpected opcode");
1514 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
1517 if (!DstTy.isScalar() || !SrcTy.isVector() ||
1522 MI.eraseFromParent();
1531 assert(
MI.getOpcode() == TargetOpcode::G_FSHL ||
1532 MI.getOpcode() == TargetOpcode::G_FSHR);
1536 Register ShiftNo =
MI.getOperand(3).getReg();
1542 LLT OperationTy = MRI.
getType(
MI.getOperand(0).getReg());
1546 if (!VRegAndVal || VRegAndVal->Value.urem(
BitWidth) == 0)
1552 Amount =
MI.getOpcode() == TargetOpcode::G_FSHL ?
BitWidth - Amount : Amount;
1556 if (ShiftTy.
getSizeInBits() == 64 &&
MI.getOpcode() == TargetOpcode::G_FSHR &&
1563 if (
MI.getOpcode() == TargetOpcode::G_FSHR) {
1565 MI.getOperand(3).setReg(Cast64.getReg(0));
1570 else if (
MI.getOpcode() == TargetOpcode::G_FSHL) {
1571 MIRBuilder.
buildInstr(TargetOpcode::G_FSHR, {
MI.getOperand(0).getReg()},
1572 {
MI.getOperand(1).getReg(),
MI.getOperand(2).getReg(),
1574 MI.eraseFromParent();
1583 Register SrcReg1 =
MI.getOperand(2).getReg();
1584 Register SrcReg2 =
MI.getOperand(3).getReg();
1585 LLT DstTy = MRI.
getType(DstReg);
1586 LLT SrcTy = MRI.
getType(SrcReg1);
1603 MIRBuilder.
buildNot(DstReg, CmpReg);
1605 MI.eraseFromParent();
1615 LLT AmtTy = MRI.
getType(AmtReg);
1621 MI.getOperand(2).setReg(NewAmt.getReg(0));
1626bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1629 assert(
MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1634 auto &GlobalOp =
MI.getOperand(1);
1636 if (GlobalOp.isSymbol())
1638 const auto* GV = GlobalOp.getGlobal();
1639 if (GV->isThreadLocal())
1642 auto &TM = ST->getTargetLowering()->getTargetMachine();
1643 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1648 auto Offset = GlobalOp.getOffset();
1653 MRI.
setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1670 "Should not have folded in an offset for a tagged global!");
1672 .addGlobalAddress(GV, 0x100000000,
1675 MRI.
setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1678 MIRBuilder.
buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1679 .addGlobalAddress(GV,
Offset,
1681 MI.eraseFromParent();
1690 auto LowerUnaryOp = [&
MI, &MIB](
unsigned Opcode) {
1692 MI.eraseFromParent();
1695 auto LowerBinOp = [&
MI, &MIB](
unsigned Opcode) {
1697 {
MI.getOperand(2),
MI.getOperand(3)});
1698 MI.eraseFromParent();
1701 auto LowerTriOp = [&
MI, &MIB](
unsigned Opcode) {
1703 {
MI.getOperand(2),
MI.getOperand(3),
MI.getOperand(4)});
1704 MI.eraseFromParent();
1709 switch (IntrinsicID) {
1710 case Intrinsic::vacopy: {
1711 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1712 unsigned VaListSize =
1713 (ST->isTargetDarwin() || ST->isTargetWindows())
1715 : ST->isTargetILP32() ? 20 : 32;
1723 VaListSize,
Align(PtrSize)));
1727 VaListSize,
Align(PtrSize)));
1728 MI.eraseFromParent();
1731 case Intrinsic::get_dynamic_area_offset: {
1733 MI.eraseFromParent();
1736 case Intrinsic::aarch64_mops_memset_tag: {
1737 assert(
MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1740 auto &
Value =
MI.getOperand(3);
1742 Value.setReg(ExtValueReg);
1745 case Intrinsic::aarch64_prefetch: {
1746 auto &AddrVal =
MI.getOperand(1);
1748 int64_t IsWrite =
MI.getOperand(2).getImm();
1749 int64_t
Target =
MI.getOperand(3).getImm();
1750 int64_t IsStream =
MI.getOperand(4).getImm();
1751 int64_t IsData =
MI.getOperand(5).getImm();
1753 unsigned PrfOp = (IsWrite << 4) |
1759 MI.eraseFromParent();
1762 case Intrinsic::aarch64_range_prefetch: {
1763 auto &AddrVal =
MI.getOperand(1);
1765 int64_t IsWrite =
MI.getOperand(2).getImm();
1766 int64_t IsStream =
MI.getOperand(3).getImm();
1767 unsigned PrfOp = (IsStream << 2) | IsWrite;
1769 MIB.
buildInstr(AArch64::G_AARCH64_RANGE_PREFETCH)
1772 .
addUse(
MI.getOperand(4).getReg());
1773 MI.eraseFromParent();
1776 case Intrinsic::aarch64_prefetch_ir: {
1777 auto &AddrVal =
MI.getOperand(1);
1779 MI.eraseFromParent();
1782 case Intrinsic::aarch64_neon_uaddv:
1783 case Intrinsic::aarch64_neon_saddv:
1784 case Intrinsic::aarch64_neon_umaxv:
1785 case Intrinsic::aarch64_neon_smaxv:
1786 case Intrinsic::aarch64_neon_uminv:
1787 case Intrinsic::aarch64_neon_sminv: {
1788 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1789 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1790 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1792 auto OldDst =
MI.getOperand(0).getReg();
1793 auto OldDstTy = MRI.
getType(OldDst);
1795 if (OldDstTy == NewDstTy)
1801 MI.getOperand(0).setReg(NewDst);
1805 MIB.
buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1810 case Intrinsic::aarch64_neon_uaddlp:
1811 case Intrinsic::aarch64_neon_saddlp: {
1812 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1814 : AArch64::G_SADDLP;
1816 MI.eraseFromParent();
1820 case Intrinsic::aarch64_neon_uaddlv:
1821 case Intrinsic::aarch64_neon_saddlv: {
1822 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1824 : AArch64::G_SADDLV;
1851 MI.eraseFromParent();
1855 case Intrinsic::aarch64_neon_smax:
1856 return LowerBinOp(TargetOpcode::G_SMAX);
1857 case Intrinsic::aarch64_neon_smin:
1858 return LowerBinOp(TargetOpcode::G_SMIN);
1859 case Intrinsic::aarch64_neon_umax:
1860 return LowerBinOp(TargetOpcode::G_UMAX);
1861 case Intrinsic::aarch64_neon_umin:
1862 return LowerBinOp(TargetOpcode::G_UMIN);
1863 case Intrinsic::aarch64_neon_fmax:
1864 return LowerBinOp(TargetOpcode::G_FMAXIMUM);
1865 case Intrinsic::aarch64_neon_fmin:
1866 return LowerBinOp(TargetOpcode::G_FMINIMUM);
1867 case Intrinsic::aarch64_neon_fmaxnm:
1868 return LowerBinOp(TargetOpcode::G_FMAXNUM);
1869 case Intrinsic::aarch64_neon_fminnm:
1870 return LowerBinOp(TargetOpcode::G_FMINNUM);
1871 case Intrinsic::aarch64_neon_pmull:
1872 case Intrinsic::aarch64_neon_pmull64:
1873 return LowerBinOp(AArch64::G_PMULL);
1874 case Intrinsic::aarch64_neon_smull:
1875 return LowerBinOp(AArch64::G_SMULL);
1876 case Intrinsic::aarch64_neon_umull:
1877 return LowerBinOp(AArch64::G_UMULL);
1878 case Intrinsic::aarch64_neon_sabd:
1879 return LowerBinOp(TargetOpcode::G_ABDS);
1880 case Intrinsic::aarch64_neon_uabd:
1881 return LowerBinOp(TargetOpcode::G_ABDU);
1882 case Intrinsic::aarch64_neon_uhadd:
1883 return LowerBinOp(TargetOpcode::G_UAVGFLOOR);
1884 case Intrinsic::aarch64_neon_urhadd:
1885 return LowerBinOp(TargetOpcode::G_UAVGCEIL);
1886 case Intrinsic::aarch64_neon_shadd:
1887 return LowerBinOp(TargetOpcode::G_SAVGFLOOR);
1888 case Intrinsic::aarch64_neon_srhadd:
1889 return LowerBinOp(TargetOpcode::G_SAVGCEIL);
1890 case Intrinsic::aarch64_neon_sqshrn: {
1895 {MRI.
getType(
MI.getOperand(2).getReg())},
1896 {
MI.getOperand(2),
MI.getOperand(3).getImm()});
1898 MIB.
buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {
MI.getOperand(0)}, {Shr});
1899 MI.eraseFromParent();
1902 case Intrinsic::aarch64_neon_sqshrun: {
1907 {MRI.
getType(
MI.getOperand(2).getReg())},
1908 {
MI.getOperand(2),
MI.getOperand(3).getImm()});
1910 MIB.
buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {
MI.getOperand(0)}, {Shr});
1911 MI.eraseFromParent();
1914 case Intrinsic::aarch64_neon_sqrshrn: {
1918 auto Shr = MIB.
buildInstr(AArch64::G_SRSHR_I,
1919 {MRI.
getType(
MI.getOperand(2).getReg())},
1920 {
MI.getOperand(2),
MI.getOperand(3).getImm()});
1922 MIB.
buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {
MI.getOperand(0)}, {Shr});
1923 MI.eraseFromParent();
1926 case Intrinsic::aarch64_neon_sqrshrun: {
1930 auto Shr = MIB.
buildInstr(AArch64::G_SRSHR_I,
1931 {MRI.
getType(
MI.getOperand(2).getReg())},
1932 {
MI.getOperand(2),
MI.getOperand(3).getImm()});
1934 MIB.
buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {
MI.getOperand(0)}, {Shr});
1935 MI.eraseFromParent();
1938 case Intrinsic::aarch64_neon_uqrshrn: {
1942 auto Shr = MIB.
buildInstr(AArch64::G_URSHR_I,
1943 {MRI.
getType(
MI.getOperand(2).getReg())},
1944 {
MI.getOperand(2),
MI.getOperand(3).getImm()});
1946 MIB.
buildInstr(TargetOpcode::G_TRUNC_USAT_U, {
MI.getOperand(0)}, {Shr});
1947 MI.eraseFromParent();
1950 case Intrinsic::aarch64_neon_uqshrn: {
1955 {MRI.
getType(
MI.getOperand(2).getReg())},
1956 {
MI.getOperand(2),
MI.getOperand(3).getImm()});
1958 MIB.
buildInstr(TargetOpcode::G_TRUNC_USAT_U, {
MI.getOperand(0)}, {Shr});
1959 MI.eraseFromParent();
1962 case Intrinsic::aarch64_neon_sqshlu: {
1968 MIB.
buildInstr(AArch64::G_SQSHLU_I, {
MI.getOperand(0)},
1970 .addImm(ShiftAmount->getSExtValue());
1971 MI.eraseFromParent();
1976 case Intrinsic::aarch64_neon_vsli: {
1978 AArch64::G_SLI, {
MI.getOperand(0)},
1979 {
MI.getOperand(2),
MI.getOperand(3),
MI.getOperand(4).getImm()});
1980 MI.eraseFromParent();
1983 case Intrinsic::aarch64_neon_vsri: {
1985 AArch64::G_SRI, {
MI.getOperand(0)},
1986 {
MI.getOperand(2),
MI.getOperand(3),
MI.getOperand(4).getImm()});
1987 MI.eraseFromParent();
1990 case Intrinsic::aarch64_neon_abs: {
1992 MIB.
buildInstr(TargetOpcode::G_ABS, {
MI.getOperand(0)}, {
MI.getOperand(2)});
1993 MI.eraseFromParent();
1996 case Intrinsic::aarch64_neon_sqadd: {
1998 return LowerBinOp(TargetOpcode::G_SADDSAT);
2001 case Intrinsic::aarch64_neon_sqsub: {
2003 return LowerBinOp(TargetOpcode::G_SSUBSAT);
2006 case Intrinsic::aarch64_neon_uqadd: {
2008 return LowerBinOp(TargetOpcode::G_UADDSAT);
2011 case Intrinsic::aarch64_neon_uqsub: {
2013 return LowerBinOp(TargetOpcode::G_USUBSAT);
2016 case Intrinsic::aarch64_neon_udot:
2017 return LowerTriOp(AArch64::G_UDOT);
2018 case Intrinsic::aarch64_neon_sdot:
2019 return LowerTriOp(AArch64::G_SDOT);
2020 case Intrinsic::aarch64_neon_usdot:
2021 return LowerTriOp(AArch64::G_USDOT);
2022 case Intrinsic::aarch64_neon_sqxtn:
2023 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
2024 case Intrinsic::aarch64_neon_sqxtun:
2025 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_U);
2026 case Intrinsic::aarch64_neon_uqxtn:
2027 return LowerUnaryOp(TargetOpcode::G_TRUNC_USAT_U);
2028 case Intrinsic::aarch64_neon_fcvtzu:
2029 return LowerUnaryOp(TargetOpcode::G_FPTOUI_SAT);
2030 case Intrinsic::aarch64_neon_fcvtzs:
2031 return LowerUnaryOp(TargetOpcode::G_FPTOSI_SAT);
2033 case Intrinsic::vector_reverse:
2041bool AArch64LegalizerInfo::legalizeShlAshrLshr(
2044 assert(
MI.getOpcode() == TargetOpcode::G_ASHR ||
2045 MI.getOpcode() == TargetOpcode::G_LSHR ||
2046 MI.getOpcode() == TargetOpcode::G_SHL);
2059 MI.getOperand(2).setReg(ExtCst.getReg(0));
2080bool AArch64LegalizerInfo::legalizeLoadStore(
2083 assert(
MI.getOpcode() == TargetOpcode::G_STORE ||
2084 MI.getOpcode() == TargetOpcode::G_LOAD);
2095 const LLT ValTy = MRI.
getType(ValReg);
2100 bool IsLoad =
MI.getOpcode() == TargetOpcode::G_LOAD;
2104 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
2110 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
2116 assert(ST->hasLSE2() &&
"ldp/stp not single copy atomic without +lse2");
2118 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
2121 MachineInstrBuilder NewI;
2123 NewI = MIRBuilder.
buildInstr(Opcode, {s64, s64}, {});
2129 Opcode, {}, {
Split->getOperand(0),
Split->getOperand(1)});
2133 NewI.
addUse(
MI.getOperand(1).getReg());
2145 *ST->getRegBankInfo());
2146 MI.eraseFromParent();
2152 LLVM_DEBUG(
dbgs() <<
"Tried to do custom legalization on wrong load/store");
2158 auto &MMO = **
MI.memoperands_begin();
2161 if (
MI.getOpcode() == TargetOpcode::G_STORE) {
2165 auto NewLoad = MIRBuilder.
buildLoad(NewTy,
MI.getOperand(1), MMO);
2168 MI.eraseFromParent();
2175 MachineFunction &MF = MIRBuilder.
getMF();
2176 Align Alignment(
MI.getOperand(2).getImm());
2178 Register ListPtr =
MI.getOperand(1).getReg();
2180 LLT PtrTy = MRI.
getType(ListPtr);
2190 MachineInstrBuilder DstPtr;
2191 if (Alignment > PtrAlign) {
2195 auto ListTmp = MIRBuilder.
buildPtrAdd(PtrTy,
List, AlignMinus1.getReg(0));
2205 ValTy, std::max(Alignment, PtrAlign)));
2216 MI.eraseFromParent();
2220bool AArch64LegalizerInfo::legalizeBitfieldExtract(
2251 MachineIRBuilder &MIRBuilder = Helper.
MIRBuilder;
2258 "Expected src and dst to have the same type!");
2266 auto Add = MIRBuilder.
buildAdd(s64, CTPOP1, CTPOP2);
2269 MI.eraseFromParent();
2273 if (!ST->hasNEON() ||
2274 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
2286 assert((Size == 32 || Size == 64 || Size == 128) &&
"Expected only 32, 64, or 128 bit scalars!");
2288 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
2300 LLT Dt = Ty == LLT::fixed_vector(2, 64) ? LLT::fixed_vector(4, 32) : Ty;
2301 auto Zeros = MIRBuilder.buildConstant(Dt, 0);
2302 auto Ones = MIRBuilder.buildConstant(VTy, 1);
2303 MachineInstrBuilder Sum;
2305 if (Ty == LLT::fixed_vector(2, 64)) {
2307 MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2308 Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
2310 Sum = MIRBuilder.
buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones,
CTPOP});
2312 Sum = MIRBuilder.
buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones,
CTPOP});
2318 MI.eraseFromParent();
2326 Opc = Intrinsic::aarch64_neon_uaddlv;
2327 HAddTys.push_back(LLT::scalar(32));
2329 Opc = Intrinsic::aarch64_neon_uaddlp;
2332 Opc = Intrinsic::aarch64_neon_uaddlp;
2336 Opc = Intrinsic::aarch64_neon_uaddlp;
2341 Opc = Intrinsic::aarch64_neon_uaddlp;
2344 Opc = Intrinsic::aarch64_neon_uaddlp;
2350 for (
LLT HTy : HAddTys) {
2360 MI.eraseFromParent();
2364bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2366 MachineIRBuilder &MIRBuilder = Helper.
MIRBuilder;
2368 auto Addr =
MI.getOperand(1).getReg();
2369 auto DesiredI = MIRBuilder.
buildUnmerge({s64, s64},
MI.getOperand(2));
2370 auto NewI = MIRBuilder.
buildUnmerge({s64, s64},
MI.getOperand(3));
2374 MachineInstrBuilder CAS;
2385 auto Ordering = (*
MI.memoperands_begin())->getMergedOrdering();
2389 Opcode = AArch64::CASPAX;
2392 Opcode = AArch64::CASPLX;
2396 Opcode = AArch64::CASPALX;
2399 Opcode = AArch64::CASPX;
2407 MIRBuilder.
buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
2408 .addUse(DesiredI->getOperand(0).getReg())
2410 .
addUse(DesiredI->getOperand(1).getReg())
2411 .
addImm(AArch64::subo64);
2412 MIRBuilder.
buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
2416 .
addImm(AArch64::subo64);
2418 CAS = MIRBuilder.
buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
2426 auto Ordering = (*
MI.memoperands_begin())->getMergedOrdering();
2430 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2433 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2437 Opcode = AArch64::CMP_SWAP_128;
2440 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2445 CAS = MIRBuilder.
buildInstr(Opcode, {DstLo, DstHi, Scratch},
2446 {Addr, DesiredI->getOperand(0),
2447 DesiredI->getOperand(1), NewI->
getOperand(0),
2454 *ST->getRegBankInfo());
2457 MI.eraseFromParent();
2463 MachineIRBuilder &MIRBuilder = Helper.
MIRBuilder;
2464 MachineRegisterInfo &MRI = *MIRBuilder.
getMRI();
2465 LLT Ty = MRI.
getType(
MI.getOperand(1).getReg());
2467 MIRBuilder.
buildCTLZ(
MI.getOperand(0).getReg(), BitReverse);
2468 MI.eraseFromParent();
2474 MachineIRBuilder &MIRBuilder = Helper.
MIRBuilder;
2477 if (
MI.getOpcode() == TargetOpcode::G_MEMSET) {
2480 auto &
Value =
MI.getOperand(1);
2483 Value.setReg(ExtValueReg);
2490bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2504bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2506 MachineFunction &MF = *
MI.getParent()->getParent();
2507 MachineIRBuilder &MIRBuilder = Helper.
MIRBuilder;
2508 MachineRegisterInfo &MRI = *MIRBuilder.
getMRI();
2520 Register AllocSize =
MI.getOperand(1).getReg();
2524 "Unexpected type for dynamic alloca");
2526 "Unexpected type for dynamic alloca");
2534 MIRBuilder.
buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2535 MRI.
setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2536 MIRBuilder.
setInsertPt(*NewMI->getParent(), NewMI);
2539 MI.eraseFromParent();
2546 auto &AddrVal =
MI.getOperand(0);
2548 int64_t IsWrite =
MI.getOperand(1).getImm();
2549 int64_t Locality =
MI.getOperand(2).getImm();
2550 int64_t
IsData =
MI.getOperand(3).getImm();
2552 bool IsStream = Locality == 0;
2553 if (Locality != 0) {
2554 assert(Locality <= 3 &&
"Prefetch locality out-of-range");
2558 Locality = 3 - Locality;
2561 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2564 MI.eraseFromParent();
2571 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
2573 "Expected a power of 2 elements");
2591 int StepSize = ElemCount % 4 ? 2 : 4;
2598 for (
unsigned i = 0; i < ElemCount / 2; ++i)
2605 for (
auto SrcReg : RegsToUnmergeTo) {
2607 MIRBuilder.
buildInstr(AArch64::G_FPTRUNC_ODD, {v2s32}, {SrcReg})
2615 for (
unsigned LoopIter = 0; LoopIter < ElemCount / StepSize; ++LoopIter) {
2616 if (StepSize == 4) {
2620 {v4s32}, {TruncOddDstRegs[
Index++], TruncOddDstRegs[
Index++]})
2632 if (RegsToMerge.
size() == 1) {
2634 MI.eraseFromParent();
2641 MI.eraseFromParent();
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
Interface for Targets to specify which operations they can successfully select and how the others sho...
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static constexpr MCPhysReg SPReg
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
int64_t getSExtValue() const
Get sign extended value.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
LLVM_ABI void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & widenScalarOrEltToNextPow2OrMinSize(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & maxScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned SmallTypeIdx)
Conditionally narrow the scalar or elt to match the size of another.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & bitcastIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
The specified type index is coerced if predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerFor(std::initializer_list< LLT > Types)
The instruction is lowered when type index 0 is any type in the given list.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & alwaysLegal()
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
unsigned immIdx(unsigned ImmIdx)
LegalizeRuleSet & widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Wrapper class representing virtual and physical registers.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
CodeModel::Model getCodeModel() const
Returns the code model.
Target - Wrapper for Target specific information.
LLVM Value Representation.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
LLVM_ABI LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
LLVM_ABI LegalityPredicate isPointerVector(unsigned TypeIdx)
True iff the specified type index is a vector of pointers (with any address space).
LLVM_ABI LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LLVM_ABI LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LLVM_ABI LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
LLVM_ABI LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LLVM_ABI LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
LLVM_ABI LegalityPredicate scalarWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's wider than the given size.
LLVM_ABI LegalityPredicate scalarNarrowerThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's narrower than the given size.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
LLVM_ABI LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LLVM_ABI LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LLVM_ABI LegalizeMutation changeElementTo(unsigned TypeIdx, unsigned FromTypeIdx)
Keep the same scalar or element type as the given type index.
LLVM_ABI LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LLVM_ABI LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LLVM_ABI LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
LLVM_ABI void constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
std::function< bool(const LegalityQuery &)> LegalityPredicate
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
AtomicOrdering
Atomic ordering for LLVM's memory model.
DWARFExpression::Operation Op
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
unsigned Log2(Align A)
Returns the log2 of the alignment.
This struct is a compact representation of a valid (non-zero power of two) alignment.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
This class contains a discriminated union of information about pointers in memory operands,...