LLVM 18.0.0git
AArch64PostLegalizerLowering.cpp
Go to the documentation of this file.
1//=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Post-legalization lowering for instructions.
11///
12/// This is used to offload pattern matching from the selector.
13///
14/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually
15/// a G_ZIP, G_UZP, etc.
16///
17/// General optimization combines should be handled by either the
18/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.
19///
20//===----------------------------------------------------------------------===//
21
23#include "AArch64Subtarget.h"
44#include "llvm/IR/InstrTypes.h"
46#include "llvm/Support/Debug.h"
48#include <optional>
49
50#define GET_GICOMBINER_DEPS
51#include "AArch64GenPostLegalizeGILowering.inc"
52#undef GET_GICOMBINER_DEPS
53
54#define DEBUG_TYPE "aarch64-postlegalizer-lowering"
55
56using namespace llvm;
57using namespace MIPatternMatch;
58using namespace AArch64GISelUtils;
59
60namespace {
61
62#define GET_GICOMBINER_TYPES
63#include "AArch64GenPostLegalizeGILowering.inc"
64#undef GET_GICOMBINER_TYPES
65
66/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
67///
68/// Used for matching target-supported shuffles before codegen.
69struct ShuffleVectorPseudo {
70 unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
71 Register Dst; ///< Destination register.
72 SmallVector<SrcOp, 2> SrcOps; ///< Source registers.
73 ShuffleVectorPseudo(unsigned Opc, Register Dst,
74 std::initializer_list<SrcOp> SrcOps)
75 : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
76 ShuffleVectorPseudo() = default;
77};
78
79/// Check if a vector shuffle corresponds to a REV instruction with the
80/// specified blocksize.
81bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts,
82 unsigned BlockSize) {
83 assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
84 "Only possible block sizes for REV are: 16, 32, 64");
85 assert(EltSize != 64 && "EltSize cannot be 64 for REV mask.");
86
87 unsigned BlockElts = M[0] + 1;
88
89 // If the first shuffle index is UNDEF, be optimistic.
90 if (M[0] < 0)
91 BlockElts = BlockSize / EltSize;
92
93 if (BlockSize <= EltSize || BlockSize != BlockElts * EltSize)
94 return false;
95
96 for (unsigned i = 0; i < NumElts; ++i) {
97 // Ignore undef indices.
98 if (M[i] < 0)
99 continue;
100 if (static_cast<unsigned>(M[i]) !=
101 (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
102 return false;
103 }
104
105 return true;
106}
107
108/// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts.
109/// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult.
110bool isTRNMask(ArrayRef<int> M, unsigned NumElts, unsigned &WhichResult) {
111 if (NumElts % 2 != 0)
112 return false;
113 WhichResult = (M[0] == 0 ? 0 : 1);
114 for (unsigned i = 0; i < NumElts; i += 2) {
115 if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != i + WhichResult) ||
116 (M[i + 1] >= 0 &&
117 static_cast<unsigned>(M[i + 1]) != i + NumElts + WhichResult))
118 return false;
119 }
120 return true;
121}
122
123/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
124/// sources of the shuffle are different.
125std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
126 unsigned NumElts) {
127 // Look for the first non-undef element.
128 auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
129 if (FirstRealElt == M.end())
130 return std::nullopt;
131
132 // Use APInt to handle overflow when calculating expected element.
133 unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
134 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
135
136 // The following shuffle indices must be the successive elements after the
137 // first real element.
138 if (any_of(
139 make_range(std::next(FirstRealElt), M.end()),
140 [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))
141 return std::nullopt;
142
143 // The index of an EXT is the first element if it is not UNDEF.
144 // Watch out for the beginning UNDEFs. The EXT index should be the expected
145 // value of the first element. E.g.
146 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
147 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
148 // ExpectedElt is the last mask index plus 1.
149 uint64_t Imm = ExpectedElt.getZExtValue();
150 bool ReverseExt = false;
151
152 // There are two difference cases requiring to reverse input vectors.
153 // For example, for vector <4 x i32> we have the following cases,
154 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
155 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
156 // For both cases, we finally use mask <5, 6, 7, 0>, which requires
157 // to reverse two input vectors.
158 if (Imm < NumElts)
159 ReverseExt = true;
160 else
161 Imm -= NumElts;
162 return std::make_pair(ReverseExt, Imm);
163}
164
165/// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts.
166/// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult.
167bool isUZPMask(ArrayRef<int> M, unsigned NumElts, unsigned &WhichResult) {
168 WhichResult = (M[0] == 0 ? 0 : 1);
169 for (unsigned i = 0; i != NumElts; ++i) {
170 // Skip undef indices.
171 if (M[i] < 0)
172 continue;
173 if (static_cast<unsigned>(M[i]) != 2 * i + WhichResult)
174 return false;
175 }
176 return true;
177}
178
179/// \return true if \p M is a zip mask for a shuffle vector of \p NumElts.
180/// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult.
181bool isZipMask(ArrayRef<int> M, unsigned NumElts, unsigned &WhichResult) {
182 if (NumElts % 2 != 0)
183 return false;
184
185 // 0 means use ZIP1, 1 means use ZIP2.
186 WhichResult = (M[0] == 0 ? 0 : 1);
187 unsigned Idx = WhichResult * NumElts / 2;
188 for (unsigned i = 0; i != NumElts; i += 2) {
189 if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != Idx) ||
190 (M[i + 1] >= 0 && static_cast<unsigned>(M[i + 1]) != Idx + NumElts))
191 return false;
192 Idx += 1;
193 }
194 return true;
195}
196
197/// Helper function for matchINS.
198///
199/// \returns a value when \p M is an ins mask for \p NumInputElements.
200///
201/// First element of the returned pair is true when the produced
202/// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR.
203///
204/// Second element is the destination lane for the G_INSERT_VECTOR_ELT.
205std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M,
206 int NumInputElements) {
207 if (M.size() != static_cast<size_t>(NumInputElements))
208 return std::nullopt;
209 int NumLHSMatch = 0, NumRHSMatch = 0;
210 int LastLHSMismatch = -1, LastRHSMismatch = -1;
211 for (int Idx = 0; Idx < NumInputElements; ++Idx) {
212 if (M[Idx] == -1) {
213 ++NumLHSMatch;
214 ++NumRHSMatch;
215 continue;
216 }
217 M[Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx;
218 M[Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx;
219 }
220 const int NumNeededToMatch = NumInputElements - 1;
221 if (NumLHSMatch == NumNeededToMatch)
222 return std::make_pair(true, LastLHSMismatch);
223 if (NumRHSMatch == NumNeededToMatch)
224 return std::make_pair(false, LastRHSMismatch);
225 return std::nullopt;
226}
227
228/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
229/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
230bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
231 ShuffleVectorPseudo &MatchInfo) {
232 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
233 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
234 Register Dst = MI.getOperand(0).getReg();
235 Register Src = MI.getOperand(1).getReg();
236 LLT Ty = MRI.getType(Dst);
237 unsigned EltSize = Ty.getScalarSizeInBits();
238
239 // Element size for a rev cannot be 64.
240 if (EltSize == 64)
241 return false;
242
243 unsigned NumElts = Ty.getNumElements();
244
245 // Try to produce G_REV64
246 if (isREVMask(ShuffleMask, EltSize, NumElts, 64)) {
247 MatchInfo = ShuffleVectorPseudo(AArch64::G_REV64, Dst, {Src});
248 return true;
249 }
250
251 // TODO: Produce G_REV32 and G_REV16 once we have proper legalization support.
252 // This should be identical to above, but with a constant 32 and constant
253 // 16.
254 return false;
255}
256
257/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
258/// a G_TRN1 or G_TRN2 instruction.
259bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
260 ShuffleVectorPseudo &MatchInfo) {
261 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
262 unsigned WhichResult;
263 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
264 Register Dst = MI.getOperand(0).getReg();
265 unsigned NumElts = MRI.getType(Dst).getNumElements();
266 if (!isTRNMask(ShuffleMask, NumElts, WhichResult))
267 return false;
268 unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
269 Register V1 = MI.getOperand(1).getReg();
270 Register V2 = MI.getOperand(2).getReg();
271 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
272 return true;
273}
274
275/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
276/// a G_UZP1 or G_UZP2 instruction.
277///
278/// \param [in] MI - The shuffle vector instruction.
279/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
280bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
281 ShuffleVectorPseudo &MatchInfo) {
282 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
283 unsigned WhichResult;
284 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
285 Register Dst = MI.getOperand(0).getReg();
286 unsigned NumElts = MRI.getType(Dst).getNumElements();
287 if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
288 return false;
289 unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
290 Register V1 = MI.getOperand(1).getReg();
291 Register V2 = MI.getOperand(2).getReg();
292 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
293 return true;
294}
295
296bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
297 ShuffleVectorPseudo &MatchInfo) {
298 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
299 unsigned WhichResult;
300 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
301 Register Dst = MI.getOperand(0).getReg();
302 unsigned NumElts = MRI.getType(Dst).getNumElements();
303 if (!isZipMask(ShuffleMask, NumElts, WhichResult))
304 return false;
305 unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
306 Register V1 = MI.getOperand(1).getReg();
307 Register V2 = MI.getOperand(2).getReg();
308 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
309 return true;
310}
311
312/// Helper function for matchDup.
313bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
315 ShuffleVectorPseudo &MatchInfo) {
316 if (Lane != 0)
317 return false;
318
319 // Try to match a vector splat operation into a dup instruction.
320 // We're looking for this pattern:
321 //
322 // %scalar:gpr(s64) = COPY $x0
323 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
324 // %cst0:gpr(s32) = G_CONSTANT i32 0
325 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
326 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
327 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
328 // %zerovec(<2 x s32>)
329 //
330 // ...into:
331 // %splat = G_DUP %scalar
332
333 // Begin matching the insert.
334 auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,
335 MI.getOperand(1).getReg(), MRI);
336 if (!InsMI)
337 return false;
338 // Match the undef vector operand.
339 if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),
340 MRI))
341 return false;
342
343 // Match the index constant 0.
344 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt()))
345 return false;
346
347 MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),
348 {InsMI->getOperand(2).getReg()});
349 return true;
350}
351
352/// Helper function for matchDup.
353bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
355 ShuffleVectorPseudo &MatchInfo) {
356 assert(Lane >= 0 && "Expected positive lane?");
357 // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
358 // lane's definition directly.
359 auto *BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR,
360 MI.getOperand(1).getReg(), MRI);
361 if (!BuildVecMI)
362 return false;
363 Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();
364 MatchInfo =
365 ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});
366 return true;
367}
368
369bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
370 ShuffleVectorPseudo &MatchInfo) {
371 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
372 auto MaybeLane = getSplatIndex(MI);
373 if (!MaybeLane)
374 return false;
375 int Lane = *MaybeLane;
376 // If this is undef splat, generate it via "just" vdup, if possible.
377 if (Lane < 0)
378 Lane = 0;
379 if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
380 return true;
381 if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
382 return true;
383 return false;
384}
385
386// Check if an EXT instruction can handle the shuffle mask when the vector
387// sources of the shuffle are the same.
388bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) {
389 unsigned NumElts = Ty.getNumElements();
390
391 // Assume that the first shuffle index is not UNDEF. Fail if it is.
392 if (M[0] < 0)
393 return false;
394
395 // If this is a VEXT shuffle, the immediate value is the index of the first
396 // element. The other shuffle indices must be the successive elements after
397 // the first one.
398 unsigned ExpectedElt = M[0];
399 for (unsigned I = 1; I < NumElts; ++I) {
400 // Increment the expected index. If it wraps around, just follow it
401 // back to index zero and keep going.
402 ++ExpectedElt;
403 if (ExpectedElt == NumElts)
404 ExpectedElt = 0;
405
406 if (M[I] < 0)
407 continue; // Ignore UNDEF indices.
408 if (ExpectedElt != static_cast<unsigned>(M[I]))
409 return false;
410 }
411
412 return true;
413}
414
415bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
416 ShuffleVectorPseudo &MatchInfo) {
417 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
418 Register Dst = MI.getOperand(0).getReg();
419 LLT DstTy = MRI.getType(Dst);
420 Register V1 = MI.getOperand(1).getReg();
421 Register V2 = MI.getOperand(2).getReg();
422 auto Mask = MI.getOperand(3).getShuffleMask();
424 auto ExtInfo = getExtMask(Mask, DstTy.getNumElements());
425 uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
426
427 if (!ExtInfo) {
428 if (!getOpcodeDef<GImplicitDef>(V2, MRI) ||
429 !isSingletonExtMask(Mask, DstTy))
430 return false;
431
432 Imm = Mask[0] * ExtFactor;
433 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm});
434 return true;
435 }
436 bool ReverseExt;
437 std::tie(ReverseExt, Imm) = *ExtInfo;
438 if (ReverseExt)
439 std::swap(V1, V2);
440 Imm *= ExtFactor;
441 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
442 return true;
443}
444
445/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
446/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
447void applyShuffleVectorPseudo(MachineInstr &MI,
448 ShuffleVectorPseudo &MatchInfo) {
449 MachineIRBuilder MIRBuilder(MI);
450 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
451 MI.eraseFromParent();
452}
453
454/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
455/// Special-cased because the constant operand must be emitted as a G_CONSTANT
456/// for the imported tablegen patterns to work.
457void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
458 MachineIRBuilder MIRBuilder(MI);
459 if (MatchInfo.SrcOps[2].getImm() == 0)
460 MIRBuilder.buildCopy(MatchInfo.Dst, MatchInfo.SrcOps[0]);
461 else {
462 // Tablegen patterns expect an i32 G_CONSTANT as the final op.
463 auto Cst =
464 MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
465 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
466 {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
467 }
468 MI.eraseFromParent();
469}
470
471/// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a
472/// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair.
473///
474/// e.g.
475/// %shuf = G_SHUFFLE_VECTOR %left, %right, shufflemask(0, 0)
476///
477/// Can be represented as
478///
479/// %extract = G_EXTRACT_VECTOR_ELT %left, 0
480/// %ins = G_INSERT_VECTOR_ELT %left, %extract, 1
481///
482bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI,
483 std::tuple<Register, int, Register, int> &MatchInfo) {
484 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
485 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
486 Register Dst = MI.getOperand(0).getReg();
487 int NumElts = MRI.getType(Dst).getNumElements();
488 auto DstIsLeftAndDstLane = isINSMask(ShuffleMask, NumElts);
489 if (!DstIsLeftAndDstLane)
490 return false;
491 bool DstIsLeft;
492 int DstLane;
493 std::tie(DstIsLeft, DstLane) = *DstIsLeftAndDstLane;
494 Register Left = MI.getOperand(1).getReg();
495 Register Right = MI.getOperand(2).getReg();
496 Register DstVec = DstIsLeft ? Left : Right;
497 Register SrcVec = Left;
498
499 int SrcLane = ShuffleMask[DstLane];
500 if (SrcLane >= NumElts) {
501 SrcVec = Right;
502 SrcLane -= NumElts;
503 }
504
505 MatchInfo = std::make_tuple(DstVec, DstLane, SrcVec, SrcLane);
506 return true;
507}
508
509void applyINS(MachineInstr &MI, MachineRegisterInfo &MRI,
510 MachineIRBuilder &Builder,
511 std::tuple<Register, int, Register, int> &MatchInfo) {
512 Builder.setInstrAndDebugLoc(MI);
513 Register Dst = MI.getOperand(0).getReg();
514 auto ScalarTy = MRI.getType(Dst).getElementType();
515 Register DstVec, SrcVec;
516 int DstLane, SrcLane;
517 std::tie(DstVec, DstLane, SrcVec, SrcLane) = MatchInfo;
518 auto SrcCst = Builder.buildConstant(LLT::scalar(64), SrcLane);
519 auto Extract = Builder.buildExtractVectorElement(ScalarTy, SrcVec, SrcCst);
520 auto DstCst = Builder.buildConstant(LLT::scalar(64), DstLane);
521 Builder.buildInsertVectorElement(Dst, DstVec, Extract, DstCst);
522 MI.eraseFromParent();
523}
524
525/// isVShiftRImm - Check if this is a valid vector for the immediate
526/// operand of a vector shift right operation. The value must be in the range:
527/// 1 <= Value <= ElementBits for a right shift.
529 int64_t &Cnt) {
530 assert(Ty.isVector() && "vector shift count is not a vector type");
531 MachineInstr *MI = MRI.getVRegDef(Reg);
532 auto Cst = getAArch64VectorSplatScalar(*MI, MRI);
533 if (!Cst)
534 return false;
535 Cnt = *Cst;
536 int64_t ElementBits = Ty.getScalarSizeInBits();
537 return Cnt >= 1 && Cnt <= ElementBits;
538}
539
540/// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
541bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
542 int64_t &Imm) {
543 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
544 MI.getOpcode() == TargetOpcode::G_LSHR);
545 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
546 if (!Ty.isVector())
547 return false;
548 return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);
549}
550
551void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
552 int64_t &Imm) {
553 unsigned Opc = MI.getOpcode();
554 assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);
555 unsigned NewOpc =
556 Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
557 MachineIRBuilder MIB(MI);
558 auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm);
559 MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef});
560 MI.eraseFromParent();
561}
562
563/// Determine if it is possible to modify the \p RHS and predicate \p P of a
564/// G_ICMP instruction such that the right-hand side is an arithmetic immediate.
565///
566/// \returns A pair containing the updated immediate and predicate which may
567/// be used to optimize the instruction.
568///
569/// \note This assumes that the comparison has been legalized.
570std::optional<std::pair<uint64_t, CmpInst::Predicate>>
571tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
572 const MachineRegisterInfo &MRI) {
573 const auto &Ty = MRI.getType(RHS);
574 if (Ty.isVector())
575 return std::nullopt;
576 unsigned Size = Ty.getSizeInBits();
577 assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?");
578
579 // If the RHS is not a constant, or the RHS is already a valid arithmetic
580 // immediate, then there is nothing to change.
581 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, MRI);
582 if (!ValAndVReg)
583 return std::nullopt;
584 uint64_t C = ValAndVReg->Value.getZExtValue();
585 if (isLegalArithImmed(C))
586 return std::nullopt;
587
588 // We have a non-arithmetic immediate. Check if adjusting the immediate and
589 // adjusting the predicate will result in a legal arithmetic immediate.
590 switch (P) {
591 default:
592 return std::nullopt;
595 // Check for
596 //
597 // x slt c => x sle c - 1
598 // x sge c => x sgt c - 1
599 //
600 // When c is not the smallest possible negative number.
601 if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
602 (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
603 return std::nullopt;
605 C -= 1;
606 break;
609 // Check for
610 //
611 // x ult c => x ule c - 1
612 // x uge c => x ugt c - 1
613 //
614 // When c is not zero.
615 if (C == 0)
616 return std::nullopt;
618 C -= 1;
619 break;
622 // Check for
623 //
624 // x sle c => x slt c + 1
625 // x sgt c => s sge c + 1
626 //
627 // When c is not the largest possible signed integer.
628 if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
629 (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
630 return std::nullopt;
632 C += 1;
633 break;
636 // Check for
637 //
638 // x ule c => x ult c + 1
639 // x ugt c => s uge c + 1
640 //
641 // When c is not the largest possible unsigned integer.
642 if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
643 (Size == 64 && C == UINT64_MAX))
644 return std::nullopt;
646 C += 1;
647 break;
648 }
649
650 // Check if the new constant is valid, and return the updated constant and
651 // predicate if it is.
652 if (Size == 32)
653 C = static_cast<uint32_t>(C);
654 if (!isLegalArithImmed(C))
655 return std::nullopt;
656 return {{C, P}};
657}
658
659/// Determine whether or not it is possible to update the RHS and predicate of
660/// a G_ICMP instruction such that the RHS will be selected as an arithmetic
661/// immediate.
662///
663/// \p MI - The G_ICMP instruction
664/// \p MatchInfo - The new RHS immediate and predicate on success
665///
666/// See tryAdjustICmpImmAndPred for valid transformations.
667bool matchAdjustICmpImmAndPred(
669 std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {
670 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
671 Register RHS = MI.getOperand(3).getReg();
672 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
673 if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) {
674 MatchInfo = *MaybeNewImmAndPred;
675 return true;
676 }
677 return false;
678}
679
680void applyAdjustICmpImmAndPred(
681 MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,
682 MachineIRBuilder &MIB, GISelChangeObserver &Observer) {
684 MachineOperand &RHS = MI.getOperand(3);
686 auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()),
687 MatchInfo.first);
688 Observer.changingInstr(MI);
689 RHS.setReg(Cst->getOperand(0).getReg());
690 MI.getOperand(1).setPredicate(MatchInfo.second);
691 Observer.changedInstr(MI);
692}
693
694bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
695 std::pair<unsigned, int> &MatchInfo) {
696 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
697 Register Src1Reg = MI.getOperand(1).getReg();
698 const LLT SrcTy = MRI.getType(Src1Reg);
699 const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
700
701 auto LaneIdx = getSplatIndex(MI);
702 if (!LaneIdx)
703 return false;
704
705 // The lane idx should be within the first source vector.
706 if (*LaneIdx >= SrcTy.getNumElements())
707 return false;
708
709 if (DstTy != SrcTy)
710 return false;
711
712 LLT ScalarTy = SrcTy.getElementType();
713 unsigned ScalarSize = ScalarTy.getSizeInBits();
714
715 unsigned Opc = 0;
716 switch (SrcTy.getNumElements()) {
717 case 2:
718 if (ScalarSize == 64)
719 Opc = AArch64::G_DUPLANE64;
720 else if (ScalarSize == 32)
721 Opc = AArch64::G_DUPLANE32;
722 break;
723 case 4:
724 if (ScalarSize == 32)
725 Opc = AArch64::G_DUPLANE32;
726 else if (ScalarSize == 16)
727 Opc = AArch64::G_DUPLANE16;
728 break;
729 case 8:
730 if (ScalarSize == 8)
731 Opc = AArch64::G_DUPLANE8;
732 else if (ScalarSize == 16)
733 Opc = AArch64::G_DUPLANE16;
734 break;
735 case 16:
736 if (ScalarSize == 8)
737 Opc = AArch64::G_DUPLANE8;
738 break;
739 default:
740 break;
741 }
742 if (!Opc)
743 return false;
744
745 MatchInfo.first = Opc;
746 MatchInfo.second = *LaneIdx;
747 return true;
748}
749
750void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
751 MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
752 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
753 Register Src1Reg = MI.getOperand(1).getReg();
754 const LLT SrcTy = MRI.getType(Src1Reg);
755
756 B.setInstrAndDebugLoc(MI);
757 auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second);
758
759 Register DupSrc = MI.getOperand(1).getReg();
760 // For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source.
761 // To do this, we can use a G_CONCAT_VECTORS to do the widening.
762 if (SrcTy.getSizeInBits() == 64) {
763 auto Undef = B.buildUndef(SrcTy);
764 DupSrc = B.buildConcatVectors(SrcTy.multiplyElements(2),
765 {Src1Reg, Undef.getReg(0)})
766 .getReg(0);
767 }
768 B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, {DupSrc, Lane});
769 MI.eraseFromParent();
770}
771
772bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
773 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
775 if (!Splat)
776 return false;
777 if (Splat->isReg())
778 return true;
779 // Later, during selection, we'll try to match imported patterns using
780 // immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
781 // G_BUILD_VECTORs which could match those patterns.
782 int64_t Cst = Splat->getCst();
783 return (Cst != 0 && Cst != -1);
784}
785
786void applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
788 B.setInstrAndDebugLoc(MI);
789 B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()},
790 {MI.getOperand(1).getReg()});
791 MI.eraseFromParent();
792}
793
794/// \returns how many instructions would be saved by folding a G_ICMP's shift
795/// and/or extension operations.
797 // No instructions to save if there's more than one use or no uses.
798 if (!MRI.hasOneNonDBGUse(CmpOp))
799 return 0;
800
801 // FIXME: This is duplicated with the selector. (See: selectShiftedRegister)
802 auto IsSupportedExtend = [&](const MachineInstr &MI) {
803 if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG)
804 return true;
805 if (MI.getOpcode() != TargetOpcode::G_AND)
806 return false;
807 auto ValAndVReg =
808 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
809 if (!ValAndVReg)
810 return false;
811 uint64_t Mask = ValAndVReg->Value.getZExtValue();
812 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
813 };
814
816 if (IsSupportedExtend(*Def))
817 return 1;
818
819 unsigned Opc = Def->getOpcode();
820 if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR &&
821 Opc != TargetOpcode::G_LSHR)
822 return 0;
823
824 auto MaybeShiftAmt =
825 getIConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI);
826 if (!MaybeShiftAmt)
827 return 0;
828 uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue();
829 MachineInstr *ShiftLHS =
830 getDefIgnoringCopies(Def->getOperand(1).getReg(), MRI);
831
832 // Check if we can fold an extend and a shift.
833 // FIXME: This is duplicated with the selector. (See:
834 // selectArithExtendedRegister)
835 if (IsSupportedExtend(*ShiftLHS))
836 return (ShiftAmt <= 4) ? 2 : 1;
837
838 LLT Ty = MRI.getType(Def->getOperand(0).getReg());
839 if (Ty.isVector())
840 return 0;
841 unsigned ShiftSize = Ty.getSizeInBits();
842 if ((ShiftSize == 32 && ShiftAmt <= 31) ||
843 (ShiftSize == 64 && ShiftAmt <= 63))
844 return 1;
845 return 0;
846}
847
848/// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP
849/// instruction \p MI.
850bool trySwapICmpOperands(MachineInstr &MI, MachineRegisterInfo &MRI) {
851 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
852 // Swap the operands if it would introduce a profitable folding opportunity.
853 // (e.g. a shift + extend).
854 //
855 // For example:
856 // lsl w13, w11, #1
857 // cmp w13, w12
858 // can be turned into:
859 // cmp w12, w11, lsl #1
860
861 // Don't swap if there's a constant on the RHS, because we know we can fold
862 // that.
863 Register RHS = MI.getOperand(3).getReg();
864 auto RHSCst = getIConstantVRegValWithLookThrough(RHS, MRI);
865 if (RHSCst && isLegalArithImmed(RHSCst->Value.getSExtValue()))
866 return false;
867
868 Register LHS = MI.getOperand(2).getReg();
869 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
870 auto GetRegForProfit = [&](Register Reg) {
872 return isCMN(Def, Pred, MRI) ? Def->getOperand(2).getReg() : Reg;
873 };
874
875 // Don't have a constant on the RHS. If we swap the LHS and RHS of the
876 // compare, would we be able to fold more instructions?
877 Register TheLHS = GetRegForProfit(LHS);
878 Register TheRHS = GetRegForProfit(RHS);
879
880 // If the LHS is more likely to give us a folding opportunity, then swap the
881 // LHS and RHS.
882 return (getCmpOperandFoldingProfit(TheLHS, MRI) >
884}
885
886void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {
887 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
888 Register LHS = MI.getOperand(2).getReg();
889 Register RHS = MI.getOperand(3).getReg();
890 Observer.changedInstr(MI);
891 MI.getOperand(1).setPredicate(CmpInst::getSwappedPredicate(Pred));
892 MI.getOperand(2).setReg(RHS);
893 MI.getOperand(3).setReg(LHS);
894 Observer.changedInstr(MI);
895}
896
897/// \returns a function which builds a vector floating point compare instruction
898/// for a condition code \p CC.
899/// \param [in] IsZero - True if the comparison is against 0.
900/// \param [in] NoNans - True if the target has NoNansFPMath.
901std::function<Register(MachineIRBuilder &)>
902getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero,
903 bool NoNans, MachineRegisterInfo &MRI) {
904 LLT DstTy = MRI.getType(LHS);
905 assert(DstTy.isVector() && "Expected vector types only?");
906 assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");
907 switch (CC) {
908 default:
909 llvm_unreachable("Unexpected condition code!");
910 case AArch64CC::NE:
911 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
912 auto FCmp = IsZero
913 ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS})
914 : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
915 return MIB.buildNot(DstTy, FCmp).getReg(0);
916 };
917 case AArch64CC::EQ:
918 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
919 return IsZero
920 ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS}).getReg(0)
921 : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS})
922 .getReg(0);
923 };
924 case AArch64CC::GE:
925 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
926 return IsZero
927 ? MIB.buildInstr(AArch64::G_FCMGEZ, {DstTy}, {LHS}).getReg(0)
928 : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS})
929 .getReg(0);
930 };
931 case AArch64CC::GT:
932 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
933 return IsZero
934 ? MIB.buildInstr(AArch64::G_FCMGTZ, {DstTy}, {LHS}).getReg(0)
935 : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS})
936 .getReg(0);
937 };
938 case AArch64CC::LS:
939 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
940 return IsZero
941 ? MIB.buildInstr(AArch64::G_FCMLEZ, {DstTy}, {LHS}).getReg(0)
942 : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS})
943 .getReg(0);
944 };
945 case AArch64CC::MI:
946 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
947 return IsZero
948 ? MIB.buildInstr(AArch64::G_FCMLTZ, {DstTy}, {LHS}).getReg(0)
949 : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS})
950 .getReg(0);
951 };
952 }
953}
954
955/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
956bool matchLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
957 MachineIRBuilder &MIB) {
958 assert(MI.getOpcode() == TargetOpcode::G_FCMP);
959 const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
960
961 Register Dst = MI.getOperand(0).getReg();
962 LLT DstTy = MRI.getType(Dst);
963 if (!DstTy.isVector() || !ST.hasNEON())
964 return false;
965 Register LHS = MI.getOperand(2).getReg();
966 unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits();
967 if (EltSize == 16 && !ST.hasFullFP16())
968 return false;
969 if (EltSize != 16 && EltSize != 32 && EltSize != 64)
970 return false;
971
972 return true;
973}
974
975/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
976void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
977 MachineIRBuilder &MIB) {
978 assert(MI.getOpcode() == TargetOpcode::G_FCMP);
979 const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
980
981 const auto &CmpMI = cast<GFCmp>(MI);
982
983 Register Dst = CmpMI.getReg(0);
984 CmpInst::Predicate Pred = CmpMI.getCond();
985 Register LHS = CmpMI.getLHSReg();
986 Register RHS = CmpMI.getRHSReg();
987
988 LLT DstTy = MRI.getType(Dst);
989
990 auto Splat = getAArch64VectorSplat(*MRI.getVRegDef(RHS), MRI);
991
992 // Compares against 0 have special target-specific pseudos.
993 bool IsZero = Splat && Splat->isCst() && Splat->getCst() == 0;
994
995 bool Invert = false;
997 if (Pred == CmpInst::Predicate::FCMP_ORD && IsZero) {
998 // The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
999 // NaN, so equivalent to a == a and doesn't need the two comparisons an
1000 // "ord" normally would.
1001 RHS = LHS;
1002 IsZero = false;
1003 CC = AArch64CC::EQ;
1004 } else
1005 changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);
1006
1007 // Instead of having an apply function, just build here to simplify things.
1009
1010 const bool NoNans =
1011 ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;
1012
1013 auto Cmp = getVectorFCMP(CC, LHS, RHS, IsZero, NoNans, MRI);
1014 Register CmpRes;
1015 if (CC2 == AArch64CC::AL)
1016 CmpRes = Cmp(MIB);
1017 else {
1018 auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, IsZero, NoNans, MRI);
1019 auto Cmp2Dst = Cmp2(MIB);
1020 auto Cmp1Dst = Cmp(MIB);
1021 CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);
1022 }
1023 if (Invert)
1024 CmpRes = MIB.buildNot(DstTy, CmpRes).getReg(0);
1025 MRI.replaceRegWith(Dst, CmpRes);
1026 MI.eraseFromParent();
1027}
1028
1029bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1030 Register &SrcReg) {
1031 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1032 Register DstReg = MI.getOperand(0).getReg();
1033 if (MRI.getType(DstReg).isVector())
1034 return false;
1035 // Match a store of a truncate.
1036 if (!mi_match(DstReg, MRI, m_GTrunc(m_Reg(SrcReg))))
1037 return false;
1038 // Only form truncstores for value types of max 64b.
1039 return MRI.getType(SrcReg).getSizeInBits() <= 64;
1040}
1041
1042void applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1044 Register &SrcReg) {
1045 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1046 Observer.changingInstr(MI);
1047 MI.getOperand(0).setReg(SrcReg);
1048 Observer.changedInstr(MI);
1049}
1050
1051// Lower vector G_SEXT_INREG back to shifts for selection. We allowed them to
1052// form in the first place for combine opportunities, so any remaining ones
1053// at this stage need be lowered back.
1054bool matchVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI) {
1055 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1056 Register DstReg = MI.getOperand(0).getReg();
1057 LLT DstTy = MRI.getType(DstReg);
1058 return DstTy.isVector();
1059}
1060
1061void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI,
1063 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1064 B.setInstrAndDebugLoc(MI);
1065 LegalizerHelper Helper(*MI.getMF(), Observer, B);
1066 Helper.lower(MI, 0, /* Unused hint type */ LLT());
1067}
1068
1069/// Combine <N x t>, unused = unmerge(G_EXT <2*N x t> v, undef, N)
1070/// => unused, <N x t> = unmerge v
1071bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1072 Register &MatchInfo) {
1073 auto &Unmerge = cast<GUnmerge>(MI);
1074 if (Unmerge.getNumDefs() != 2)
1075 return false;
1076 if (!MRI.use_nodbg_empty(Unmerge.getReg(1)))
1077 return false;
1078
1079 LLT DstTy = MRI.getType(Unmerge.getReg(0));
1080 if (!DstTy.isVector())
1081 return false;
1082
1083 MachineInstr *Ext = getOpcodeDef(AArch64::G_EXT, Unmerge.getSourceReg(), MRI);
1084 if (!Ext)
1085 return false;
1086
1087 Register ExtSrc1 = Ext->getOperand(1).getReg();
1088 Register ExtSrc2 = Ext->getOperand(2).getReg();
1089 auto LowestVal =
1090 getIConstantVRegValWithLookThrough(Ext->getOperand(3).getReg(), MRI);
1091 if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes())
1092 return false;
1093
1094 if (!getOpcodeDef<GImplicitDef>(ExtSrc2, MRI))
1095 return false;
1096
1097 MatchInfo = ExtSrc1;
1098 return true;
1099}
1100
1101void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1103 GISelChangeObserver &Observer, Register &SrcReg) {
1104 Observer.changingInstr(MI);
1105 // Swap dst registers.
1106 Register Dst1 = MI.getOperand(0).getReg();
1107 MI.getOperand(0).setReg(MI.getOperand(1).getReg());
1108 MI.getOperand(1).setReg(Dst1);
1109 MI.getOperand(2).setReg(SrcReg);
1110 Observer.changedInstr(MI);
1111}
1112
1113// Match mul({z/s}ext , {z/s}ext) => {u/s}mull OR
1114// Match v2s64 mul instructions, which will then be scalarised later on
1115// Doing these two matches in one function to ensure that the order of matching
1116// will always be the same.
1117// Try lowering MUL to MULL before trying to scalarize if needed.
1118bool matchExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI) {
1119 // Get the instructions that defined the source operand
1120 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1121 MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
1122 MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
1123
1124 if (DstTy.isVector()) {
1125 // If the source operands were EXTENDED before, then {U/S}MULL can be used
1126 unsigned I1Opc = I1->getOpcode();
1127 unsigned I2Opc = I2->getOpcode();
1128 if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) ||
1129 (I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) &&
1130 (MRI.getType(I1->getOperand(0).getReg()).getScalarSizeInBits() ==
1131 MRI.getType(I1->getOperand(1).getReg()).getScalarSizeInBits() * 2) &&
1132 (MRI.getType(I2->getOperand(0).getReg()).getScalarSizeInBits() ==
1133 MRI.getType(I2->getOperand(1).getReg()).getScalarSizeInBits() * 2)) {
1134 return true;
1135 }
1136 // If result type is v2s64, scalarise the instruction
1137 else if (DstTy == LLT::fixed_vector(2, 64)) {
1138 return true;
1139 }
1140 }
1141 return false;
1142}
1143
1144void applyExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI,
1146 assert(MI.getOpcode() == TargetOpcode::G_MUL &&
1147 "Expected a G_MUL instruction");
1148
1149 // Get the instructions that defined the source operand
1150 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1151 MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
1152 MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
1153
1154 // If the source operands were EXTENDED before, then {U/S}MULL can be used
1155 unsigned I1Opc = I1->getOpcode();
1156 unsigned I2Opc = I2->getOpcode();
1157 if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) ||
1158 (I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) &&
1159 (MRI.getType(I1->getOperand(0).getReg()).getScalarSizeInBits() ==
1160 MRI.getType(I1->getOperand(1).getReg()).getScalarSizeInBits() * 2) &&
1161 (MRI.getType(I2->getOperand(0).getReg()).getScalarSizeInBits() ==
1162 MRI.getType(I2->getOperand(1).getReg()).getScalarSizeInBits() * 2)) {
1163
1164 B.setInstrAndDebugLoc(MI);
1165 B.buildInstr(I1->getOpcode() == TargetOpcode::G_ZEXT ? AArch64::G_UMULL
1166 : AArch64::G_SMULL,
1167 {MI.getOperand(0).getReg()},
1168 {I1->getOperand(1).getReg(), I2->getOperand(1).getReg()});
1169 MI.eraseFromParent();
1170 }
1171 // If result type is v2s64, scalarise the instruction
1172 else if (DstTy == LLT::fixed_vector(2, 64)) {
1173 LegalizerHelper Helper(*MI.getMF(), Observer, B);
1174 B.setInstrAndDebugLoc(MI);
1175 Helper.fewerElementsVector(
1176 MI, 0,
1177 DstTy.changeElementCount(
1179 }
1180}
1181
1182class AArch64PostLegalizerLoweringImpl : public Combiner {
1183protected:
1184 // TODO: Make CombinerHelper methods const.
1185 mutable CombinerHelper Helper;
1186 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig;
1187 const AArch64Subtarget &STI;
1188
1189public:
1190 AArch64PostLegalizerLoweringImpl(
1191 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
1192 GISelCSEInfo *CSEInfo,
1193 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1194 const AArch64Subtarget &STI);
1195
1196 static const char *getName() { return "AArch6400PreLegalizerCombiner"; }
1197
1198 bool tryCombineAll(MachineInstr &I) const override;
1199
1200private:
1201#define GET_GICOMBINER_CLASS_MEMBERS
1202#include "AArch64GenPostLegalizeGILowering.inc"
1203#undef GET_GICOMBINER_CLASS_MEMBERS
1204};
1205
1206#define GET_GICOMBINER_IMPL
1207#include "AArch64GenPostLegalizeGILowering.inc"
1208#undef GET_GICOMBINER_IMPL
1209
1210AArch64PostLegalizerLoweringImpl::AArch64PostLegalizerLoweringImpl(
1211 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
1212 GISelCSEInfo *CSEInfo,
1213 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1214 const AArch64Subtarget &STI)
1215 : Combiner(MF, CInfo, TPC, /*KB*/ nullptr, CSEInfo),
1216 Helper(Observer, B, /*IsPreLegalize*/ true), RuleConfig(RuleConfig),
1217 STI(STI),
1219#include "AArch64GenPostLegalizeGILowering.inc"
1221{
1222}
1223
1224class AArch64PostLegalizerLowering : public MachineFunctionPass {
1225public:
1226 static char ID;
1227
1228 AArch64PostLegalizerLowering();
1229
1230 StringRef getPassName() const override {
1231 return "AArch64PostLegalizerLowering";
1232 }
1233
1234 bool runOnMachineFunction(MachineFunction &MF) override;
1235 void getAnalysisUsage(AnalysisUsage &AU) const override;
1236
1237private:
1238 AArch64PostLegalizerLoweringImplRuleConfig RuleConfig;
1239};
1240} // end anonymous namespace
1241
1242void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {
1244 AU.setPreservesCFG();
1247}
1248
1249AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()
1252
1253 if (!RuleConfig.parseCommandLineOption())
1254 report_fatal_error("Invalid rule identifier");
1255}
1256
1257bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {
1258 if (MF.getProperties().hasProperty(
1259 MachineFunctionProperties::Property::FailedISel))
1260 return false;
1262 MachineFunctionProperties::Property::Legalized) &&
1263 "Expected a legalized function?");
1264 auto *TPC = &getAnalysis<TargetPassConfig>();
1265 const Function &F = MF.getFunction();
1266
1268 CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
1269 /*LegalizerInfo*/ nullptr, /*OptEnabled=*/true,
1270 F.hasOptSize(), F.hasMinSize());
1271 AArch64PostLegalizerLoweringImpl Impl(MF, CInfo, TPC, /*CSEInfo*/ nullptr,
1272 RuleConfig, ST);
1273 return Impl.combineMachineInstrs();
1274}
1275
1276char AArch64PostLegalizerLowering::ID = 0;
1277INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE,
1278 "Lower AArch64 MachineInstrs after legalization", false,
1279 false)
1281INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE,
1282 "Lower AArch64 MachineInstrs after legalization", false,
1283 false)
1284
1285namespace llvm {
1287 return new AArch64PostLegalizerLowering();
1288}
1289} // end namespace llvm
unsigned const MachineRegisterInfo * MRI
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isLegalArithImmed(uint64_t C)
static bool isREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
static bool isUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static bool isTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static bool isINSMask(ArrayRef< int > M, int NumInputElements, bool &DstIsLeft, int &Anomaly)
static unsigned getCmpOperandFoldingProfit(SDValue Op)
Returns how profitable it is to fold a comparison's operand's shift and/or extension operations.
static bool isCMN(SDValue Op, ISD::CondCode CC)
This file declares the targeting of the Machinelegalizer class for AArch64.
#define GET_GICOMBINER_CONSTRUCTOR_INITS
Lower AArch64 MachineInstrs after legalization
#define DEBUG_TYPE
basic Basic Alias true
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
This contains common code to allow clients to notify changes to machine instr.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static const int BlockSize
Definition: TarWriter.cpp:33
Target-Independent Code Generator Pass Configuration Options pass.
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1485
unsigned logBase2() const
Definition: APInt.h:1696
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:748
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:777
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:778
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:772
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:771
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:775
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:773
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:776
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:774
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:900
Combiner implementation.
Definition: Combiner.h:34
virtual bool tryCombineAll(MachineInstr &I) const =0
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
The CSE Analysis object.
Definition: CSEInfo.h:69
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:257
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
Definition: LowLevelType.h:244
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:149
constexpr bool isVector() const
Definition: LowLevelType.h:145
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:183
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:280
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:174
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:92
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:220
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:193
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
Definition: MachineInstr.h:68
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:543
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:553
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Target-Independent Code Generator Pass Configuration Options.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:241
#define UINT64_MAX
Definition: DataTypes.h:77
#define INT64_MIN
Definition: DataTypes.h:74
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
std::optional< RegOrConstant > getAArch64VectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void changeVectorFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)
Find the AArch64 condition codes necessary to represent P for a vector floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
operand_type_match m_Reg()
SpecificConstantMatch m_ZeroInt()
{ Convenience matchers for specific integer values.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:479
void initializeAArch64PostLegalizerLoweringPass(PassRegistry &)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:465
FunctionPass * createAArch64PostLegalizerLowering()
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1733
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:922
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:413
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1753
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860