LLVM 19.0.0git
AArch64PostLegalizerLowering.cpp
Go to the documentation of this file.
1//=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Post-legalization lowering for instructions.
11///
12/// This is used to offload pattern matching from the selector.
13///
14/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually
15/// a G_ZIP, G_UZP, etc.
16///
17/// General optimization combines should be handled by either the
18/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.
19///
20//===----------------------------------------------------------------------===//
21
23#include "AArch64Subtarget.h"
44#include "llvm/IR/InstrTypes.h"
46#include "llvm/Support/Debug.h"
48#include <optional>
49
50#define GET_GICOMBINER_DEPS
51#include "AArch64GenPostLegalizeGILowering.inc"
52#undef GET_GICOMBINER_DEPS
53
54#define DEBUG_TYPE "aarch64-postlegalizer-lowering"
55
56using namespace llvm;
57using namespace MIPatternMatch;
58using namespace AArch64GISelUtils;
59
60namespace {
61
62#define GET_GICOMBINER_TYPES
63#include "AArch64GenPostLegalizeGILowering.inc"
64#undef GET_GICOMBINER_TYPES
65
66/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
67///
68/// Used for matching target-supported shuffles before codegen.
69struct ShuffleVectorPseudo {
70 unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
71 Register Dst; ///< Destination register.
72 SmallVector<SrcOp, 2> SrcOps; ///< Source registers.
73 ShuffleVectorPseudo(unsigned Opc, Register Dst,
74 std::initializer_list<SrcOp> SrcOps)
75 : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
76 ShuffleVectorPseudo() = default;
77};
78
79/// Check if a vector shuffle corresponds to a REV instruction with the
80/// specified blocksize.
81bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts,
82 unsigned BlockSize) {
83 assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
84 "Only possible block sizes for REV are: 16, 32, 64");
85 assert(EltSize != 64 && "EltSize cannot be 64 for REV mask.");
86
87 unsigned BlockElts = M[0] + 1;
88
89 // If the first shuffle index is UNDEF, be optimistic.
90 if (M[0] < 0)
91 BlockElts = BlockSize / EltSize;
92
93 if (BlockSize <= EltSize || BlockSize != BlockElts * EltSize)
94 return false;
95
96 for (unsigned i = 0; i < NumElts; ++i) {
97 // Ignore undef indices.
98 if (M[i] < 0)
99 continue;
100 if (static_cast<unsigned>(M[i]) !=
101 (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
102 return false;
103 }
104
105 return true;
106}
107
108/// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts.
109/// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult.
110bool isTRNMask(ArrayRef<int> M, unsigned NumElts, unsigned &WhichResult) {
111 if (NumElts % 2 != 0)
112 return false;
113 WhichResult = (M[0] == 0 ? 0 : 1);
114 for (unsigned i = 0; i < NumElts; i += 2) {
115 if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != i + WhichResult) ||
116 (M[i + 1] >= 0 &&
117 static_cast<unsigned>(M[i + 1]) != i + NumElts + WhichResult))
118 return false;
119 }
120 return true;
121}
122
123/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
124/// sources of the shuffle are different.
125std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
126 unsigned NumElts) {
127 // Look for the first non-undef element.
128 auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
129 if (FirstRealElt == M.end())
130 return std::nullopt;
131
132 // Use APInt to handle overflow when calculating expected element.
133 unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
134 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
135
136 // The following shuffle indices must be the successive elements after the
137 // first real element.
138 if (any_of(
139 make_range(std::next(FirstRealElt), M.end()),
140 [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))
141 return std::nullopt;
142
143 // The index of an EXT is the first element if it is not UNDEF.
144 // Watch out for the beginning UNDEFs. The EXT index should be the expected
145 // value of the first element. E.g.
146 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
147 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
148 // ExpectedElt is the last mask index plus 1.
149 uint64_t Imm = ExpectedElt.getZExtValue();
150 bool ReverseExt = false;
151
152 // There are two difference cases requiring to reverse input vectors.
153 // For example, for vector <4 x i32> we have the following cases,
154 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
155 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
156 // For both cases, we finally use mask <5, 6, 7, 0>, which requires
157 // to reverse two input vectors.
158 if (Imm < NumElts)
159 ReverseExt = true;
160 else
161 Imm -= NumElts;
162 return std::make_pair(ReverseExt, Imm);
163}
164
165/// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts.
166/// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult.
167bool isUZPMask(ArrayRef<int> M, unsigned NumElts, unsigned &WhichResult) {
168 WhichResult = (M[0] == 0 ? 0 : 1);
169 for (unsigned i = 0; i != NumElts; ++i) {
170 // Skip undef indices.
171 if (M[i] < 0)
172 continue;
173 if (static_cast<unsigned>(M[i]) != 2 * i + WhichResult)
174 return false;
175 }
176 return true;
177}
178
179/// \return true if \p M is a zip mask for a shuffle vector of \p NumElts.
180/// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult.
181bool isZipMask(ArrayRef<int> M, unsigned NumElts, unsigned &WhichResult) {
182 if (NumElts % 2 != 0)
183 return false;
184
185 // 0 means use ZIP1, 1 means use ZIP2.
186 WhichResult = (M[0] == 0 ? 0 : 1);
187 unsigned Idx = WhichResult * NumElts / 2;
188 for (unsigned i = 0; i != NumElts; i += 2) {
189 if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != Idx) ||
190 (M[i + 1] >= 0 && static_cast<unsigned>(M[i + 1]) != Idx + NumElts))
191 return false;
192 Idx += 1;
193 }
194 return true;
195}
196
197/// Helper function for matchINS.
198///
199/// \returns a value when \p M is an ins mask for \p NumInputElements.
200///
201/// First element of the returned pair is true when the produced
202/// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR.
203///
204/// Second element is the destination lane for the G_INSERT_VECTOR_ELT.
205std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M,
206 int NumInputElements) {
207 if (M.size() != static_cast<size_t>(NumInputElements))
208 return std::nullopt;
209 int NumLHSMatch = 0, NumRHSMatch = 0;
210 int LastLHSMismatch = -1, LastRHSMismatch = -1;
211 for (int Idx = 0; Idx < NumInputElements; ++Idx) {
212 if (M[Idx] == -1) {
213 ++NumLHSMatch;
214 ++NumRHSMatch;
215 continue;
216 }
217 M[Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx;
218 M[Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx;
219 }
220 const int NumNeededToMatch = NumInputElements - 1;
221 if (NumLHSMatch == NumNeededToMatch)
222 return std::make_pair(true, LastLHSMismatch);
223 if (NumRHSMatch == NumNeededToMatch)
224 return std::make_pair(false, LastRHSMismatch);
225 return std::nullopt;
226}
227
228/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
229/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
230bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
231 ShuffleVectorPseudo &MatchInfo) {
232 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
233 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
234 Register Dst = MI.getOperand(0).getReg();
235 Register Src = MI.getOperand(1).getReg();
236 LLT Ty = MRI.getType(Dst);
237 unsigned EltSize = Ty.getScalarSizeInBits();
238
239 // Element size for a rev cannot be 64.
240 if (EltSize == 64)
241 return false;
242
243 unsigned NumElts = Ty.getNumElements();
244
245 // Try to produce a G_REV instruction
246 for (unsigned LaneSize : {64U, 32U, 16U}) {
247 if (isREVMask(ShuffleMask, EltSize, NumElts, LaneSize)) {
248 unsigned Opcode;
249 if (LaneSize == 64U)
250 Opcode = AArch64::G_REV64;
251 else if (LaneSize == 32U)
252 Opcode = AArch64::G_REV32;
253 else
254 Opcode = AArch64::G_REV16;
255
256 MatchInfo = ShuffleVectorPseudo(Opcode, Dst, {Src});
257 return true;
258 }
259 }
260
261 return false;
262}
263
264/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
265/// a G_TRN1 or G_TRN2 instruction.
266bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
267 ShuffleVectorPseudo &MatchInfo) {
268 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
269 unsigned WhichResult;
270 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
271 Register Dst = MI.getOperand(0).getReg();
272 unsigned NumElts = MRI.getType(Dst).getNumElements();
273 if (!isTRNMask(ShuffleMask, NumElts, WhichResult))
274 return false;
275 unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
276 Register V1 = MI.getOperand(1).getReg();
277 Register V2 = MI.getOperand(2).getReg();
278 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
279 return true;
280}
281
282/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
283/// a G_UZP1 or G_UZP2 instruction.
284///
285/// \param [in] MI - The shuffle vector instruction.
286/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
287bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
288 ShuffleVectorPseudo &MatchInfo) {
289 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
290 unsigned WhichResult;
291 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
292 Register Dst = MI.getOperand(0).getReg();
293 unsigned NumElts = MRI.getType(Dst).getNumElements();
294 if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
295 return false;
296 unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
297 Register V1 = MI.getOperand(1).getReg();
298 Register V2 = MI.getOperand(2).getReg();
299 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
300 return true;
301}
302
303bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
304 ShuffleVectorPseudo &MatchInfo) {
305 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
306 unsigned WhichResult;
307 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
308 Register Dst = MI.getOperand(0).getReg();
309 unsigned NumElts = MRI.getType(Dst).getNumElements();
310 if (!isZipMask(ShuffleMask, NumElts, WhichResult))
311 return false;
312 unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
313 Register V1 = MI.getOperand(1).getReg();
314 Register V2 = MI.getOperand(2).getReg();
315 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
316 return true;
317}
318
319/// Helper function for matchDup.
320bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
322 ShuffleVectorPseudo &MatchInfo) {
323 if (Lane != 0)
324 return false;
325
326 // Try to match a vector splat operation into a dup instruction.
327 // We're looking for this pattern:
328 //
329 // %scalar:gpr(s64) = COPY $x0
330 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
331 // %cst0:gpr(s32) = G_CONSTANT i32 0
332 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
333 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
334 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
335 // %zerovec(<2 x s32>)
336 //
337 // ...into:
338 // %splat = G_DUP %scalar
339
340 // Begin matching the insert.
341 auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,
342 MI.getOperand(1).getReg(), MRI);
343 if (!InsMI)
344 return false;
345 // Match the undef vector operand.
346 if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),
347 MRI))
348 return false;
349
350 // Match the index constant 0.
351 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt()))
352 return false;
353
354 MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),
355 {InsMI->getOperand(2).getReg()});
356 return true;
357}
358
359/// Helper function for matchDup.
360bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
362 ShuffleVectorPseudo &MatchInfo) {
363 assert(Lane >= 0 && "Expected positive lane?");
364 // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
365 // lane's definition directly.
366 auto *BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR,
367 MI.getOperand(1).getReg(), MRI);
368 if (!BuildVecMI)
369 return false;
370 Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();
371 MatchInfo =
372 ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});
373 return true;
374}
375
376bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
377 ShuffleVectorPseudo &MatchInfo) {
378 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
379 auto MaybeLane = getSplatIndex(MI);
380 if (!MaybeLane)
381 return false;
382 int Lane = *MaybeLane;
383 // If this is undef splat, generate it via "just" vdup, if possible.
384 if (Lane < 0)
385 Lane = 0;
386 if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
387 return true;
388 if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
389 return true;
390 return false;
391}
392
393// Check if an EXT instruction can handle the shuffle mask when the vector
394// sources of the shuffle are the same.
395bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) {
396 unsigned NumElts = Ty.getNumElements();
397
398 // Assume that the first shuffle index is not UNDEF. Fail if it is.
399 if (M[0] < 0)
400 return false;
401
402 // If this is a VEXT shuffle, the immediate value is the index of the first
403 // element. The other shuffle indices must be the successive elements after
404 // the first one.
405 unsigned ExpectedElt = M[0];
406 for (unsigned I = 1; I < NumElts; ++I) {
407 // Increment the expected index. If it wraps around, just follow it
408 // back to index zero and keep going.
409 ++ExpectedElt;
410 if (ExpectedElt == NumElts)
411 ExpectedElt = 0;
412
413 if (M[I] < 0)
414 continue; // Ignore UNDEF indices.
415 if (ExpectedElt != static_cast<unsigned>(M[I]))
416 return false;
417 }
418
419 return true;
420}
421
422bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
423 ShuffleVectorPseudo &MatchInfo) {
424 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
425 Register Dst = MI.getOperand(0).getReg();
426 LLT DstTy = MRI.getType(Dst);
427 Register V1 = MI.getOperand(1).getReg();
428 Register V2 = MI.getOperand(2).getReg();
429 auto Mask = MI.getOperand(3).getShuffleMask();
431 auto ExtInfo = getExtMask(Mask, DstTy.getNumElements());
432 uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
433
434 if (!ExtInfo) {
435 if (!getOpcodeDef<GImplicitDef>(V2, MRI) ||
436 !isSingletonExtMask(Mask, DstTy))
437 return false;
438
439 Imm = Mask[0] * ExtFactor;
440 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm});
441 return true;
442 }
443 bool ReverseExt;
444 std::tie(ReverseExt, Imm) = *ExtInfo;
445 if (ReverseExt)
446 std::swap(V1, V2);
447 Imm *= ExtFactor;
448 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
449 return true;
450}
451
452/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
453/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
454void applyShuffleVectorPseudo(MachineInstr &MI,
455 ShuffleVectorPseudo &MatchInfo) {
456 MachineIRBuilder MIRBuilder(MI);
457 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
458 MI.eraseFromParent();
459}
460
461/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
462/// Special-cased because the constant operand must be emitted as a G_CONSTANT
463/// for the imported tablegen patterns to work.
464void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
465 MachineIRBuilder MIRBuilder(MI);
466 if (MatchInfo.SrcOps[2].getImm() == 0)
467 MIRBuilder.buildCopy(MatchInfo.Dst, MatchInfo.SrcOps[0]);
468 else {
469 // Tablegen patterns expect an i32 G_CONSTANT as the final op.
470 auto Cst =
471 MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
472 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
473 {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
474 }
475 MI.eraseFromParent();
476}
477
478/// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a
479/// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair.
480///
481/// e.g.
482/// %shuf = G_SHUFFLE_VECTOR %left, %right, shufflemask(0, 0)
483///
484/// Can be represented as
485///
486/// %extract = G_EXTRACT_VECTOR_ELT %left, 0
487/// %ins = G_INSERT_VECTOR_ELT %left, %extract, 1
488///
489bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI,
490 std::tuple<Register, int, Register, int> &MatchInfo) {
491 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
492 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
493 Register Dst = MI.getOperand(0).getReg();
494 int NumElts = MRI.getType(Dst).getNumElements();
495 auto DstIsLeftAndDstLane = isINSMask(ShuffleMask, NumElts);
496 if (!DstIsLeftAndDstLane)
497 return false;
498 bool DstIsLeft;
499 int DstLane;
500 std::tie(DstIsLeft, DstLane) = *DstIsLeftAndDstLane;
501 Register Left = MI.getOperand(1).getReg();
502 Register Right = MI.getOperand(2).getReg();
503 Register DstVec = DstIsLeft ? Left : Right;
504 Register SrcVec = Left;
505
506 int SrcLane = ShuffleMask[DstLane];
507 if (SrcLane >= NumElts) {
508 SrcVec = Right;
509 SrcLane -= NumElts;
510 }
511
512 MatchInfo = std::make_tuple(DstVec, DstLane, SrcVec, SrcLane);
513 return true;
514}
515
516void applyINS(MachineInstr &MI, MachineRegisterInfo &MRI,
517 MachineIRBuilder &Builder,
518 std::tuple<Register, int, Register, int> &MatchInfo) {
519 Builder.setInstrAndDebugLoc(MI);
520 Register Dst = MI.getOperand(0).getReg();
521 auto ScalarTy = MRI.getType(Dst).getElementType();
522 Register DstVec, SrcVec;
523 int DstLane, SrcLane;
524 std::tie(DstVec, DstLane, SrcVec, SrcLane) = MatchInfo;
525 auto SrcCst = Builder.buildConstant(LLT::scalar(64), SrcLane);
526 auto Extract = Builder.buildExtractVectorElement(ScalarTy, SrcVec, SrcCst);
527 auto DstCst = Builder.buildConstant(LLT::scalar(64), DstLane);
528 Builder.buildInsertVectorElement(Dst, DstVec, Extract, DstCst);
529 MI.eraseFromParent();
530}
531
532/// isVShiftRImm - Check if this is a valid vector for the immediate
533/// operand of a vector shift right operation. The value must be in the range:
534/// 1 <= Value <= ElementBits for a right shift.
536 int64_t &Cnt) {
537 assert(Ty.isVector() && "vector shift count is not a vector type");
538 MachineInstr *MI = MRI.getVRegDef(Reg);
539 auto Cst = getAArch64VectorSplatScalar(*MI, MRI);
540 if (!Cst)
541 return false;
542 Cnt = *Cst;
543 int64_t ElementBits = Ty.getScalarSizeInBits();
544 return Cnt >= 1 && Cnt <= ElementBits;
545}
546
547/// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
548bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
549 int64_t &Imm) {
550 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
551 MI.getOpcode() == TargetOpcode::G_LSHR);
552 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
553 if (!Ty.isVector())
554 return false;
555 return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);
556}
557
558void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
559 int64_t &Imm) {
560 unsigned Opc = MI.getOpcode();
561 assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);
562 unsigned NewOpc =
563 Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
564 MachineIRBuilder MIB(MI);
565 auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm);
566 MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef});
567 MI.eraseFromParent();
568}
569
570/// Determine if it is possible to modify the \p RHS and predicate \p P of a
571/// G_ICMP instruction such that the right-hand side is an arithmetic immediate.
572///
573/// \returns A pair containing the updated immediate and predicate which may
574/// be used to optimize the instruction.
575///
576/// \note This assumes that the comparison has been legalized.
577std::optional<std::pair<uint64_t, CmpInst::Predicate>>
578tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
579 const MachineRegisterInfo &MRI) {
580 const auto &Ty = MRI.getType(RHS);
581 if (Ty.isVector())
582 return std::nullopt;
583 unsigned Size = Ty.getSizeInBits();
584 assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?");
585
586 // If the RHS is not a constant, or the RHS is already a valid arithmetic
587 // immediate, then there is nothing to change.
588 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, MRI);
589 if (!ValAndVReg)
590 return std::nullopt;
591 uint64_t C = ValAndVReg->Value.getZExtValue();
592 if (isLegalArithImmed(C))
593 return std::nullopt;
594
595 // We have a non-arithmetic immediate. Check if adjusting the immediate and
596 // adjusting the predicate will result in a legal arithmetic immediate.
597 switch (P) {
598 default:
599 return std::nullopt;
602 // Check for
603 //
604 // x slt c => x sle c - 1
605 // x sge c => x sgt c - 1
606 //
607 // When c is not the smallest possible negative number.
608 if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
609 (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
610 return std::nullopt;
612 C -= 1;
613 break;
616 // Check for
617 //
618 // x ult c => x ule c - 1
619 // x uge c => x ugt c - 1
620 //
621 // When c is not zero.
622 if (C == 0)
623 return std::nullopt;
625 C -= 1;
626 break;
629 // Check for
630 //
631 // x sle c => x slt c + 1
632 // x sgt c => s sge c + 1
633 //
634 // When c is not the largest possible signed integer.
635 if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
636 (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
637 return std::nullopt;
639 C += 1;
640 break;
643 // Check for
644 //
645 // x ule c => x ult c + 1
646 // x ugt c => s uge c + 1
647 //
648 // When c is not the largest possible unsigned integer.
649 if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
650 (Size == 64 && C == UINT64_MAX))
651 return std::nullopt;
653 C += 1;
654 break;
655 }
656
657 // Check if the new constant is valid, and return the updated constant and
658 // predicate if it is.
659 if (Size == 32)
660 C = static_cast<uint32_t>(C);
661 if (!isLegalArithImmed(C))
662 return std::nullopt;
663 return {{C, P}};
664}
665
666/// Determine whether or not it is possible to update the RHS and predicate of
667/// a G_ICMP instruction such that the RHS will be selected as an arithmetic
668/// immediate.
669///
670/// \p MI - The G_ICMP instruction
671/// \p MatchInfo - The new RHS immediate and predicate on success
672///
673/// See tryAdjustICmpImmAndPred for valid transformations.
674bool matchAdjustICmpImmAndPred(
676 std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {
677 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
678 Register RHS = MI.getOperand(3).getReg();
679 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
680 if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) {
681 MatchInfo = *MaybeNewImmAndPred;
682 return true;
683 }
684 return false;
685}
686
687void applyAdjustICmpImmAndPred(
688 MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,
689 MachineIRBuilder &MIB, GISelChangeObserver &Observer) {
691 MachineOperand &RHS = MI.getOperand(3);
693 auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()),
694 MatchInfo.first);
695 Observer.changingInstr(MI);
696 RHS.setReg(Cst->getOperand(0).getReg());
697 MI.getOperand(1).setPredicate(MatchInfo.second);
698 Observer.changedInstr(MI);
699}
700
701bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
702 std::pair<unsigned, int> &MatchInfo) {
703 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
704 Register Src1Reg = MI.getOperand(1).getReg();
705 const LLT SrcTy = MRI.getType(Src1Reg);
706 const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
707
708 auto LaneIdx = getSplatIndex(MI);
709 if (!LaneIdx)
710 return false;
711
712 // The lane idx should be within the first source vector.
713 if (*LaneIdx >= SrcTy.getNumElements())
714 return false;
715
716 if (DstTy != SrcTy)
717 return false;
718
719 LLT ScalarTy = SrcTy.getElementType();
720 unsigned ScalarSize = ScalarTy.getSizeInBits();
721
722 unsigned Opc = 0;
723 switch (SrcTy.getNumElements()) {
724 case 2:
725 if (ScalarSize == 64)
726 Opc = AArch64::G_DUPLANE64;
727 else if (ScalarSize == 32)
728 Opc = AArch64::G_DUPLANE32;
729 break;
730 case 4:
731 if (ScalarSize == 32)
732 Opc = AArch64::G_DUPLANE32;
733 else if (ScalarSize == 16)
734 Opc = AArch64::G_DUPLANE16;
735 break;
736 case 8:
737 if (ScalarSize == 8)
738 Opc = AArch64::G_DUPLANE8;
739 else if (ScalarSize == 16)
740 Opc = AArch64::G_DUPLANE16;
741 break;
742 case 16:
743 if (ScalarSize == 8)
744 Opc = AArch64::G_DUPLANE8;
745 break;
746 default:
747 break;
748 }
749 if (!Opc)
750 return false;
751
752 MatchInfo.first = Opc;
753 MatchInfo.second = *LaneIdx;
754 return true;
755}
756
757void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
758 MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
759 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
760 Register Src1Reg = MI.getOperand(1).getReg();
761 const LLT SrcTy = MRI.getType(Src1Reg);
762
763 B.setInstrAndDebugLoc(MI);
764 auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second);
765
766 Register DupSrc = MI.getOperand(1).getReg();
767 // For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source.
768 // To do this, we can use a G_CONCAT_VECTORS to do the widening.
769 if (SrcTy.getSizeInBits() == 64) {
770 auto Undef = B.buildUndef(SrcTy);
771 DupSrc = B.buildConcatVectors(SrcTy.multiplyElements(2),
772 {Src1Reg, Undef.getReg(0)})
773 .getReg(0);
774 }
775 B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, {DupSrc, Lane});
776 MI.eraseFromParent();
777}
778
779bool matchScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI) {
780 auto &Unmerge = cast<GUnmerge>(MI);
781 Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
782 const LLT SrcTy = MRI.getType(Src1Reg);
783 if (SrcTy.getSizeInBits() != 128 && SrcTy.getSizeInBits() != 64)
784 return false;
785 return SrcTy.isVector() && !SrcTy.isScalable() &&
786 Unmerge.getNumOperands() == (unsigned)SrcTy.getNumElements() + 1;
787}
788
789void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
791 auto &Unmerge = cast<GUnmerge>(MI);
792 Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
793 const LLT SrcTy = MRI.getType(Src1Reg);
794 assert((SrcTy.isVector() && !SrcTy.isScalable()) &&
795 "Expected a fixed length vector");
796
797 for (int I = 0; I < SrcTy.getNumElements(); ++I)
798 B.buildExtractVectorElementConstant(Unmerge.getReg(I), Src1Reg, I);
799 MI.eraseFromParent();
800}
801
802bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
803 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
805 if (!Splat)
806 return false;
807 if (Splat->isReg())
808 return true;
809 // Later, during selection, we'll try to match imported patterns using
810 // immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
811 // G_BUILD_VECTORs which could match those patterns.
812 int64_t Cst = Splat->getCst();
813 return (Cst != 0 && Cst != -1);
814}
815
816void applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
818 B.setInstrAndDebugLoc(MI);
819 B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()},
820 {MI.getOperand(1).getReg()});
821 MI.eraseFromParent();
822}
823
824/// \returns how many instructions would be saved by folding a G_ICMP's shift
825/// and/or extension operations.
827 // No instructions to save if there's more than one use or no uses.
828 if (!MRI.hasOneNonDBGUse(CmpOp))
829 return 0;
830
831 // FIXME: This is duplicated with the selector. (See: selectShiftedRegister)
832 auto IsSupportedExtend = [&](const MachineInstr &MI) {
833 if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG)
834 return true;
835 if (MI.getOpcode() != TargetOpcode::G_AND)
836 return false;
837 auto ValAndVReg =
838 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
839 if (!ValAndVReg)
840 return false;
841 uint64_t Mask = ValAndVReg->Value.getZExtValue();
842 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
843 };
844
846 if (IsSupportedExtend(*Def))
847 return 1;
848
849 unsigned Opc = Def->getOpcode();
850 if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR &&
851 Opc != TargetOpcode::G_LSHR)
852 return 0;
853
854 auto MaybeShiftAmt =
855 getIConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI);
856 if (!MaybeShiftAmt)
857 return 0;
858 uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue();
859 MachineInstr *ShiftLHS =
860 getDefIgnoringCopies(Def->getOperand(1).getReg(), MRI);
861
862 // Check if we can fold an extend and a shift.
863 // FIXME: This is duplicated with the selector. (See:
864 // selectArithExtendedRegister)
865 if (IsSupportedExtend(*ShiftLHS))
866 return (ShiftAmt <= 4) ? 2 : 1;
867
868 LLT Ty = MRI.getType(Def->getOperand(0).getReg());
869 if (Ty.isVector())
870 return 0;
871 unsigned ShiftSize = Ty.getSizeInBits();
872 if ((ShiftSize == 32 && ShiftAmt <= 31) ||
873 (ShiftSize == 64 && ShiftAmt <= 63))
874 return 1;
875 return 0;
876}
877
878/// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP
879/// instruction \p MI.
880bool trySwapICmpOperands(MachineInstr &MI, MachineRegisterInfo &MRI) {
881 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
882 // Swap the operands if it would introduce a profitable folding opportunity.
883 // (e.g. a shift + extend).
884 //
885 // For example:
886 // lsl w13, w11, #1
887 // cmp w13, w12
888 // can be turned into:
889 // cmp w12, w11, lsl #1
890
891 // Don't swap if there's a constant on the RHS, because we know we can fold
892 // that.
893 Register RHS = MI.getOperand(3).getReg();
894 auto RHSCst = getIConstantVRegValWithLookThrough(RHS, MRI);
895 if (RHSCst && isLegalArithImmed(RHSCst->Value.getSExtValue()))
896 return false;
897
898 Register LHS = MI.getOperand(2).getReg();
899 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
900 auto GetRegForProfit = [&](Register Reg) {
902 return isCMN(Def, Pred, MRI) ? Def->getOperand(2).getReg() : Reg;
903 };
904
905 // Don't have a constant on the RHS. If we swap the LHS and RHS of the
906 // compare, would we be able to fold more instructions?
907 Register TheLHS = GetRegForProfit(LHS);
908 Register TheRHS = GetRegForProfit(RHS);
909
910 // If the LHS is more likely to give us a folding opportunity, then swap the
911 // LHS and RHS.
912 return (getCmpOperandFoldingProfit(TheLHS, MRI) >
914}
915
916void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {
917 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
918 Register LHS = MI.getOperand(2).getReg();
919 Register RHS = MI.getOperand(3).getReg();
920 Observer.changedInstr(MI);
921 MI.getOperand(1).setPredicate(CmpInst::getSwappedPredicate(Pred));
922 MI.getOperand(2).setReg(RHS);
923 MI.getOperand(3).setReg(LHS);
924 Observer.changedInstr(MI);
925}
926
927/// \returns a function which builds a vector floating point compare instruction
928/// for a condition code \p CC.
929/// \param [in] IsZero - True if the comparison is against 0.
930/// \param [in] NoNans - True if the target has NoNansFPMath.
931std::function<Register(MachineIRBuilder &)>
932getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero,
933 bool NoNans, MachineRegisterInfo &MRI) {
934 LLT DstTy = MRI.getType(LHS);
935 assert(DstTy.isVector() && "Expected vector types only?");
936 assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");
937 switch (CC) {
938 default:
939 llvm_unreachable("Unexpected condition code!");
940 case AArch64CC::NE:
941 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
942 auto FCmp = IsZero
943 ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS})
944 : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
945 return MIB.buildNot(DstTy, FCmp).getReg(0);
946 };
947 case AArch64CC::EQ:
948 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
949 return IsZero
950 ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS}).getReg(0)
951 : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS})
952 .getReg(0);
953 };
954 case AArch64CC::GE:
955 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
956 return IsZero
957 ? MIB.buildInstr(AArch64::G_FCMGEZ, {DstTy}, {LHS}).getReg(0)
958 : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS})
959 .getReg(0);
960 };
961 case AArch64CC::GT:
962 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
963 return IsZero
964 ? MIB.buildInstr(AArch64::G_FCMGTZ, {DstTy}, {LHS}).getReg(0)
965 : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS})
966 .getReg(0);
967 };
968 case AArch64CC::LS:
969 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
970 return IsZero
971 ? MIB.buildInstr(AArch64::G_FCMLEZ, {DstTy}, {LHS}).getReg(0)
972 : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS})
973 .getReg(0);
974 };
975 case AArch64CC::MI:
976 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
977 return IsZero
978 ? MIB.buildInstr(AArch64::G_FCMLTZ, {DstTy}, {LHS}).getReg(0)
979 : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS})
980 .getReg(0);
981 };
982 }
983}
984
985/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
986bool matchLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
987 MachineIRBuilder &MIB) {
988 assert(MI.getOpcode() == TargetOpcode::G_FCMP);
989 const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
990
991 Register Dst = MI.getOperand(0).getReg();
992 LLT DstTy = MRI.getType(Dst);
993 if (!DstTy.isVector() || !ST.hasNEON())
994 return false;
995 Register LHS = MI.getOperand(2).getReg();
996 unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits();
997 if (EltSize == 16 && !ST.hasFullFP16())
998 return false;
999 if (EltSize != 16 && EltSize != 32 && EltSize != 64)
1000 return false;
1001
1002 return true;
1003}
1004
1005/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
1006void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
1007 MachineIRBuilder &MIB) {
1008 assert(MI.getOpcode() == TargetOpcode::G_FCMP);
1009 const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
1010
1011 const auto &CmpMI = cast<GFCmp>(MI);
1012
1013 Register Dst = CmpMI.getReg(0);
1014 CmpInst::Predicate Pred = CmpMI.getCond();
1015 Register LHS = CmpMI.getLHSReg();
1016 Register RHS = CmpMI.getRHSReg();
1017
1018 LLT DstTy = MRI.getType(Dst);
1019
1020 auto Splat = getAArch64VectorSplat(*MRI.getVRegDef(RHS), MRI);
1021
1022 // Compares against 0 have special target-specific pseudos.
1023 bool IsZero = Splat && Splat->isCst() && Splat->getCst() == 0;
1024
1025 bool Invert = false;
1027 if ((Pred == CmpInst::Predicate::FCMP_ORD ||
1028 Pred == CmpInst::Predicate::FCMP_UNO) &&
1029 IsZero) {
1030 // The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
1031 // NaN, so equivalent to a == a and doesn't need the two comparisons an
1032 // "ord" normally would.
1033 // Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is
1034 // thus equivalent to a != a.
1035 RHS = LHS;
1036 IsZero = false;
1037 CC = Pred == CmpInst::Predicate::FCMP_ORD ? AArch64CC::EQ : AArch64CC::NE;
1038 } else
1039 changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);
1040
1041 // Instead of having an apply function, just build here to simplify things.
1043
1044 const bool NoNans =
1045 ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;
1046
1047 auto Cmp = getVectorFCMP(CC, LHS, RHS, IsZero, NoNans, MRI);
1048 Register CmpRes;
1049 if (CC2 == AArch64CC::AL)
1050 CmpRes = Cmp(MIB);
1051 else {
1052 auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, IsZero, NoNans, MRI);
1053 auto Cmp2Dst = Cmp2(MIB);
1054 auto Cmp1Dst = Cmp(MIB);
1055 CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);
1056 }
1057 if (Invert)
1058 CmpRes = MIB.buildNot(DstTy, CmpRes).getReg(0);
1059 MRI.replaceRegWith(Dst, CmpRes);
1060 MI.eraseFromParent();
1061}
1062
1063bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1064 Register &SrcReg) {
1065 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1066 Register DstReg = MI.getOperand(0).getReg();
1067 if (MRI.getType(DstReg).isVector())
1068 return false;
1069 // Match a store of a truncate.
1070 if (!mi_match(DstReg, MRI, m_GTrunc(m_Reg(SrcReg))))
1071 return false;
1072 // Only form truncstores for value types of max 64b.
1073 return MRI.getType(SrcReg).getSizeInBits() <= 64;
1074}
1075
1076void applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1078 Register &SrcReg) {
1079 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1080 Observer.changingInstr(MI);
1081 MI.getOperand(0).setReg(SrcReg);
1082 Observer.changedInstr(MI);
1083}
1084
1085// Lower vector G_SEXT_INREG back to shifts for selection. We allowed them to
1086// form in the first place for combine opportunities, so any remaining ones
1087// at this stage need be lowered back.
1088bool matchVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI) {
1089 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1090 Register DstReg = MI.getOperand(0).getReg();
1091 LLT DstTy = MRI.getType(DstReg);
1092 return DstTy.isVector();
1093}
1094
1095void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI,
1097 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1098 B.setInstrAndDebugLoc(MI);
1099 LegalizerHelper Helper(*MI.getMF(), Observer, B);
1100 Helper.lower(MI, 0, /* Unused hint type */ LLT());
1101}
1102
1103/// Combine <N x t>, unused = unmerge(G_EXT <2*N x t> v, undef, N)
1104/// => unused, <N x t> = unmerge v
1105bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1106 Register &MatchInfo) {
1107 auto &Unmerge = cast<GUnmerge>(MI);
1108 if (Unmerge.getNumDefs() != 2)
1109 return false;
1110 if (!MRI.use_nodbg_empty(Unmerge.getReg(1)))
1111 return false;
1112
1113 LLT DstTy = MRI.getType(Unmerge.getReg(0));
1114 if (!DstTy.isVector())
1115 return false;
1116
1117 MachineInstr *Ext = getOpcodeDef(AArch64::G_EXT, Unmerge.getSourceReg(), MRI);
1118 if (!Ext)
1119 return false;
1120
1121 Register ExtSrc1 = Ext->getOperand(1).getReg();
1122 Register ExtSrc2 = Ext->getOperand(2).getReg();
1123 auto LowestVal =
1124 getIConstantVRegValWithLookThrough(Ext->getOperand(3).getReg(), MRI);
1125 if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes())
1126 return false;
1127
1128 if (!getOpcodeDef<GImplicitDef>(ExtSrc2, MRI))
1129 return false;
1130
1131 MatchInfo = ExtSrc1;
1132 return true;
1133}
1134
1135void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1137 GISelChangeObserver &Observer, Register &SrcReg) {
1138 Observer.changingInstr(MI);
1139 // Swap dst registers.
1140 Register Dst1 = MI.getOperand(0).getReg();
1141 MI.getOperand(0).setReg(MI.getOperand(1).getReg());
1142 MI.getOperand(1).setReg(Dst1);
1143 MI.getOperand(2).setReg(SrcReg);
1144 Observer.changedInstr(MI);
1145}
1146
1147// Match mul({z/s}ext , {z/s}ext) => {u/s}mull OR
1148// Match v2s64 mul instructions, which will then be scalarised later on
1149// Doing these two matches in one function to ensure that the order of matching
1150// will always be the same.
1151// Try lowering MUL to MULL before trying to scalarize if needed.
1152bool matchExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI) {
1153 // Get the instructions that defined the source operand
1154 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1155 MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
1156 MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
1157
1158 if (DstTy.isVector()) {
1159 // If the source operands were EXTENDED before, then {U/S}MULL can be used
1160 unsigned I1Opc = I1->getOpcode();
1161 unsigned I2Opc = I2->getOpcode();
1162 if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) ||
1163 (I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) &&
1164 (MRI.getType(I1->getOperand(0).getReg()).getScalarSizeInBits() ==
1165 MRI.getType(I1->getOperand(1).getReg()).getScalarSizeInBits() * 2) &&
1166 (MRI.getType(I2->getOperand(0).getReg()).getScalarSizeInBits() ==
1167 MRI.getType(I2->getOperand(1).getReg()).getScalarSizeInBits() * 2)) {
1168 return true;
1169 }
1170 // If result type is v2s64, scalarise the instruction
1171 else if (DstTy == LLT::fixed_vector(2, 64)) {
1172 return true;
1173 }
1174 }
1175 return false;
1176}
1177
1178void applyExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI,
1180 assert(MI.getOpcode() == TargetOpcode::G_MUL &&
1181 "Expected a G_MUL instruction");
1182
1183 // Get the instructions that defined the source operand
1184 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1185 MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
1186 MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
1187
1188 // If the source operands were EXTENDED before, then {U/S}MULL can be used
1189 unsigned I1Opc = I1->getOpcode();
1190 unsigned I2Opc = I2->getOpcode();
1191 if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) ||
1192 (I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) &&
1193 (MRI.getType(I1->getOperand(0).getReg()).getScalarSizeInBits() ==
1194 MRI.getType(I1->getOperand(1).getReg()).getScalarSizeInBits() * 2) &&
1195 (MRI.getType(I2->getOperand(0).getReg()).getScalarSizeInBits() ==
1196 MRI.getType(I2->getOperand(1).getReg()).getScalarSizeInBits() * 2)) {
1197
1198 B.setInstrAndDebugLoc(MI);
1199 B.buildInstr(I1->getOpcode() == TargetOpcode::G_ZEXT ? AArch64::G_UMULL
1200 : AArch64::G_SMULL,
1201 {MI.getOperand(0).getReg()},
1202 {I1->getOperand(1).getReg(), I2->getOperand(1).getReg()});
1203 MI.eraseFromParent();
1204 }
1205 // If result type is v2s64, scalarise the instruction
1206 else if (DstTy == LLT::fixed_vector(2, 64)) {
1207 LegalizerHelper Helper(*MI.getMF(), Observer, B);
1208 B.setInstrAndDebugLoc(MI);
1209 Helper.fewerElementsVector(
1210 MI, 0,
1211 DstTy.changeElementCount(
1213 }
1214}
1215
1216class AArch64PostLegalizerLoweringImpl : public Combiner {
1217protected:
1218 // TODO: Make CombinerHelper methods const.
1219 mutable CombinerHelper Helper;
1220 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig;
1221 const AArch64Subtarget &STI;
1222
1223public:
1224 AArch64PostLegalizerLoweringImpl(
1225 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
1226 GISelCSEInfo *CSEInfo,
1227 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1228 const AArch64Subtarget &STI);
1229
1230 static const char *getName() { return "AArch6400PreLegalizerCombiner"; }
1231
1232 bool tryCombineAll(MachineInstr &I) const override;
1233
1234private:
1235#define GET_GICOMBINER_CLASS_MEMBERS
1236#include "AArch64GenPostLegalizeGILowering.inc"
1237#undef GET_GICOMBINER_CLASS_MEMBERS
1238};
1239
1240#define GET_GICOMBINER_IMPL
1241#include "AArch64GenPostLegalizeGILowering.inc"
1242#undef GET_GICOMBINER_IMPL
1243
1244AArch64PostLegalizerLoweringImpl::AArch64PostLegalizerLoweringImpl(
1245 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
1246 GISelCSEInfo *CSEInfo,
1247 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1248 const AArch64Subtarget &STI)
1249 : Combiner(MF, CInfo, TPC, /*KB*/ nullptr, CSEInfo),
1250 Helper(Observer, B, /*IsPreLegalize*/ true), RuleConfig(RuleConfig),
1251 STI(STI),
1253#include "AArch64GenPostLegalizeGILowering.inc"
1255{
1256}
1257
1258class AArch64PostLegalizerLowering : public MachineFunctionPass {
1259public:
1260 static char ID;
1261
1262 AArch64PostLegalizerLowering();
1263
1264 StringRef getPassName() const override {
1265 return "AArch64PostLegalizerLowering";
1266 }
1267
1268 bool runOnMachineFunction(MachineFunction &MF) override;
1269 void getAnalysisUsage(AnalysisUsage &AU) const override;
1270
1271private:
1272 AArch64PostLegalizerLoweringImplRuleConfig RuleConfig;
1273};
1274} // end anonymous namespace
1275
1276void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {
1278 AU.setPreservesCFG();
1281}
1282
1283AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()
1286
1287 if (!RuleConfig.parseCommandLineOption())
1288 report_fatal_error("Invalid rule identifier");
1289}
1290
1291bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {
1292 if (MF.getProperties().hasProperty(
1293 MachineFunctionProperties::Property::FailedISel))
1294 return false;
1296 MachineFunctionProperties::Property::Legalized) &&
1297 "Expected a legalized function?");
1298 auto *TPC = &getAnalysis<TargetPassConfig>();
1299 const Function &F = MF.getFunction();
1300
1302 CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
1303 /*LegalizerInfo*/ nullptr, /*OptEnabled=*/true,
1304 F.hasOptSize(), F.hasMinSize());
1305 AArch64PostLegalizerLoweringImpl Impl(MF, CInfo, TPC, /*CSEInfo*/ nullptr,
1306 RuleConfig, ST);
1307 return Impl.combineMachineInstrs();
1308}
1309
1310char AArch64PostLegalizerLowering::ID = 0;
1311INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE,
1312 "Lower AArch64 MachineInstrs after legalization", false,
1313 false)
1315INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE,
1316 "Lower AArch64 MachineInstrs after legalization", false,
1317 false)
1318
1319namespace llvm {
1321 return new AArch64PostLegalizerLowering();
1322}
1323} // end namespace llvm
unsigned const MachineRegisterInfo * MRI
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isLegalArithImmed(uint64_t C)
static bool isREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
static bool isUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static bool isTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static bool isINSMask(ArrayRef< int > M, int NumInputElements, bool &DstIsLeft, int &Anomaly)
static unsigned getCmpOperandFoldingProfit(SDValue Op)
Returns how profitable it is to fold a comparison's operand's shift and/or extension operations.
static bool isCMN(SDValue Op, ISD::CondCode CC)
This file declares the targeting of the Machinelegalizer class for AArch64.
#define GET_GICOMBINER_CONSTRUCTOR_INITS
Lower AArch64 MachineInstrs after legalization
#define DEBUG_TYPE
basic Basic Alias true
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
This contains common code to allow clients to notify changes to machine instr.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static const int BlockSize
Definition: TarWriter.cpp:33
Target-Independent Code Generator Pass Configuration Options pass.
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
unsigned logBase2() const
Definition: APInt.h:1703
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:960
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:989
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:990
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:984
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:983
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:987
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:985
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:988
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:986
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:1134
Combiner implementation.
Definition: Combiner.h:34
virtual bool tryCombineAll(MachineInstr &I) const =0
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
The CSE Analysis object.
Definition: CSEInfo.h:69
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
Definition: LowLevelType.h:254
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:170
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:230
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:546
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:556
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Target-Independent Code Generator Pass Configuration Options.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:239
#define UINT64_MAX
Definition: DataTypes.h:77
#define INT64_MIN
Definition: DataTypes.h:74
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
std::optional< RegOrConstant > getAArch64VectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void changeVectorFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)
Find the AArch64 condition codes necessary to represent P for a vector floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
operand_type_match m_Reg()
SpecificConstantMatch m_ZeroInt()
{ Convenience matchers for specific integer values.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:625
void initializeAArch64PostLegalizerLoweringPass(PassRegistry &)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:465
FunctionPass * createAArch64PostLegalizerLowering()
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:1072
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:413
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1758
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860