LLVM 17.0.0git
AArch64PostLegalizerLowering.cpp
Go to the documentation of this file.
1//=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Post-legalization lowering for instructions.
11///
12/// This is used to offload pattern matching from the selector.
13///
14/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually
15/// a G_ZIP, G_UZP, etc.
16///
17/// General optimization combines should be handled by either the
18/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.
19///
20//===----------------------------------------------------------------------===//
21
23#include "AArch64Subtarget.h"
43#include "llvm/IR/InstrTypes.h"
45#include "llvm/Support/Debug.h"
47#include <optional>
48
49#define DEBUG_TYPE "aarch64-postlegalizer-lowering"
50
51using namespace llvm;
52using namespace MIPatternMatch;
53using namespace AArch64GISelUtils;
54
55/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
56///
57/// Used for matching target-supported shuffles before codegen.
59 unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
60 Register Dst; ///< Destination register.
61 SmallVector<SrcOp, 2> SrcOps; ///< Source registers.
63 std::initializer_list<SrcOp> SrcOps)
64 : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
66};
67
68/// Check if a vector shuffle corresponds to a REV instruction with the
69/// specified blocksize.
70static bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts,
71 unsigned BlockSize) {
72 assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
73 "Only possible block sizes for REV are: 16, 32, 64");
74 assert(EltSize != 64 && "EltSize cannot be 64 for REV mask.");
75
76 unsigned BlockElts = M[0] + 1;
77
78 // If the first shuffle index is UNDEF, be optimistic.
79 if (M[0] < 0)
80 BlockElts = BlockSize / EltSize;
81
82 if (BlockSize <= EltSize || BlockSize != BlockElts * EltSize)
83 return false;
84
85 for (unsigned i = 0; i < NumElts; ++i) {
86 // Ignore undef indices.
87 if (M[i] < 0)
88 continue;
89 if (static_cast<unsigned>(M[i]) !=
90 (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
91 return false;
92 }
93
94 return true;
95}
96
97/// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts.
98/// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult.
99static bool isTRNMask(ArrayRef<int> M, unsigned NumElts,
100 unsigned &WhichResult) {
101 if (NumElts % 2 != 0)
102 return false;
103 WhichResult = (M[0] == 0 ? 0 : 1);
104 for (unsigned i = 0; i < NumElts; i += 2) {
105 if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != i + WhichResult) ||
106 (M[i + 1] >= 0 &&
107 static_cast<unsigned>(M[i + 1]) != i + NumElts + WhichResult))
108 return false;
109 }
110 return true;
111}
112
113/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
114/// sources of the shuffle are different.
115static std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
116 unsigned NumElts) {
117 // Look for the first non-undef element.
118 auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
119 if (FirstRealElt == M.end())
120 return std::nullopt;
121
122 // Use APInt to handle overflow when calculating expected element.
123 unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
124 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
125
126 // The following shuffle indices must be the successive elements after the
127 // first real element.
128 if (any_of(
129 make_range(std::next(FirstRealElt), M.end()),
130 [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))
131 return std::nullopt;
132
133 // The index of an EXT is the first element if it is not UNDEF.
134 // Watch out for the beginning UNDEFs. The EXT index should be the expected
135 // value of the first element. E.g.
136 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
137 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
138 // ExpectedElt is the last mask index plus 1.
139 uint64_t Imm = ExpectedElt.getZExtValue();
140 bool ReverseExt = false;
141
142 // There are two difference cases requiring to reverse input vectors.
143 // For example, for vector <4 x i32> we have the following cases,
144 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
145 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
146 // For both cases, we finally use mask <5, 6, 7, 0>, which requires
147 // to reverse two input vectors.
148 if (Imm < NumElts)
149 ReverseExt = true;
150 else
151 Imm -= NumElts;
152 return std::make_pair(ReverseExt, Imm);
153}
154
155/// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts.
156/// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult.
157static bool isUZPMask(ArrayRef<int> M, unsigned NumElts,
158 unsigned &WhichResult) {
159 WhichResult = (M[0] == 0 ? 0 : 1);
160 for (unsigned i = 0; i != NumElts; ++i) {
161 // Skip undef indices.
162 if (M[i] < 0)
163 continue;
164 if (static_cast<unsigned>(M[i]) != 2 * i + WhichResult)
165 return false;
166 }
167 return true;
168}
169
170/// \return true if \p M is a zip mask for a shuffle vector of \p NumElts.
171/// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult.
172static bool isZipMask(ArrayRef<int> M, unsigned NumElts,
173 unsigned &WhichResult) {
174 if (NumElts % 2 != 0)
175 return false;
176
177 // 0 means use ZIP1, 1 means use ZIP2.
178 WhichResult = (M[0] == 0 ? 0 : 1);
179 unsigned Idx = WhichResult * NumElts / 2;
180 for (unsigned i = 0; i != NumElts; i += 2) {
181 if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != Idx) ||
182 (M[i + 1] >= 0 && static_cast<unsigned>(M[i + 1]) != Idx + NumElts))
183 return false;
184 Idx += 1;
185 }
186 return true;
187}
188
189/// Helper function for matchINS.
190///
191/// \returns a value when \p M is an ins mask for \p NumInputElements.
192///
193/// First element of the returned pair is true when the produced
194/// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR.
195///
196/// Second element is the destination lane for the G_INSERT_VECTOR_ELT.
197static std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M,
198 int NumInputElements) {
199 if (M.size() != static_cast<size_t>(NumInputElements))
200 return std::nullopt;
201 int NumLHSMatch = 0, NumRHSMatch = 0;
202 int LastLHSMismatch = -1, LastRHSMismatch = -1;
203 for (int Idx = 0; Idx < NumInputElements; ++Idx) {
204 if (M[Idx] == -1) {
205 ++NumLHSMatch;
206 ++NumRHSMatch;
207 continue;
208 }
209 M[Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx;
210 M[Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx;
211 }
212 const int NumNeededToMatch = NumInputElements - 1;
213 if (NumLHSMatch == NumNeededToMatch)
214 return std::make_pair(true, LastLHSMismatch);
215 if (NumRHSMatch == NumNeededToMatch)
216 return std::make_pair(false, LastRHSMismatch);
217 return std::nullopt;
218}
219
220/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
221/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
223 ShuffleVectorPseudo &MatchInfo) {
224 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
225 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
226 Register Dst = MI.getOperand(0).getReg();
227 Register Src = MI.getOperand(1).getReg();
228 LLT Ty = MRI.getType(Dst);
229 unsigned EltSize = Ty.getScalarSizeInBits();
230
231 // Element size for a rev cannot be 64.
232 if (EltSize == 64)
233 return false;
234
235 unsigned NumElts = Ty.getNumElements();
236
237 // Try to produce G_REV64
238 if (isREVMask(ShuffleMask, EltSize, NumElts, 64)) {
239 MatchInfo = ShuffleVectorPseudo(AArch64::G_REV64, Dst, {Src});
240 return true;
241 }
242
243 // TODO: Produce G_REV32 and G_REV16 once we have proper legalization support.
244 // This should be identical to above, but with a constant 32 and constant
245 // 16.
246 return false;
247}
248
249/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
250/// a G_TRN1 or G_TRN2 instruction.
252 ShuffleVectorPseudo &MatchInfo) {
253 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
254 unsigned WhichResult;
255 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
256 Register Dst = MI.getOperand(0).getReg();
257 unsigned NumElts = MRI.getType(Dst).getNumElements();
258 if (!isTRNMask(ShuffleMask, NumElts, WhichResult))
259 return false;
260 unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
261 Register V1 = MI.getOperand(1).getReg();
262 Register V2 = MI.getOperand(2).getReg();
263 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
264 return true;
265}
266
267/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
268/// a G_UZP1 or G_UZP2 instruction.
269///
270/// \param [in] MI - The shuffle vector instruction.
271/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
273 ShuffleVectorPseudo &MatchInfo) {
274 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
275 unsigned WhichResult;
276 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
277 Register Dst = MI.getOperand(0).getReg();
278 unsigned NumElts = MRI.getType(Dst).getNumElements();
279 if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
280 return false;
281 unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
282 Register V1 = MI.getOperand(1).getReg();
283 Register V2 = MI.getOperand(2).getReg();
284 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
285 return true;
286}
287
289 ShuffleVectorPseudo &MatchInfo) {
290 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
291 unsigned WhichResult;
292 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
293 Register Dst = MI.getOperand(0).getReg();
294 unsigned NumElts = MRI.getType(Dst).getNumElements();
295 if (!isZipMask(ShuffleMask, NumElts, WhichResult))
296 return false;
297 unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
298 Register V1 = MI.getOperand(1).getReg();
299 Register V2 = MI.getOperand(2).getReg();
300 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
301 return true;
302}
303
304/// Helper function for matchDup.
307 ShuffleVectorPseudo &MatchInfo) {
308 if (Lane != 0)
309 return false;
310
311 // Try to match a vector splat operation into a dup instruction.
312 // We're looking for this pattern:
313 //
314 // %scalar:gpr(s64) = COPY $x0
315 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
316 // %cst0:gpr(s32) = G_CONSTANT i32 0
317 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
318 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
319 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef, %zerovec(<2 x s32>)
320 //
321 // ...into:
322 // %splat = G_DUP %scalar
323
324 // Begin matching the insert.
325 auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,
326 MI.getOperand(1).getReg(), MRI);
327 if (!InsMI)
328 return false;
329 // Match the undef vector operand.
330 if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),
331 MRI))
332 return false;
333
334 // Match the index constant 0.
335 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt()))
336 return false;
337
338 MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),
339 {InsMI->getOperand(2).getReg()});
340 return true;
341}
342
343/// Helper function for matchDup.
346 ShuffleVectorPseudo &MatchInfo) {
347 assert(Lane >= 0 && "Expected positive lane?");
348 // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
349 // lane's definition directly.
350 auto *BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR,
351 MI.getOperand(1).getReg(), MRI);
352 if (!BuildVecMI)
353 return false;
354 Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();
355 MatchInfo =
356 ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});
357 return true;
358}
359
361 ShuffleVectorPseudo &MatchInfo) {
362 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
363 auto MaybeLane = getSplatIndex(MI);
364 if (!MaybeLane)
365 return false;
366 int Lane = *MaybeLane;
367 // If this is undef splat, generate it via "just" vdup, if possible.
368 if (Lane < 0)
369 Lane = 0;
370 if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
371 return true;
372 if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
373 return true;
374 return false;
375}
376
377// Check if an EXT instruction can handle the shuffle mask when the vector
378// sources of the shuffle are the same.
380 unsigned NumElts = Ty.getNumElements();
381
382 // Assume that the first shuffle index is not UNDEF. Fail if it is.
383 if (M[0] < 0)
384 return false;
385
386 // If this is a VEXT shuffle, the immediate value is the index of the first
387 // element. The other shuffle indices must be the successive elements after
388 // the first one.
389 unsigned ExpectedElt = M[0];
390 for (unsigned I = 1; I < NumElts; ++I) {
391 // Increment the expected index. If it wraps around, just follow it
392 // back to index zero and keep going.
393 ++ExpectedElt;
394 if (ExpectedElt == NumElts)
395 ExpectedElt = 0;
396
397 if (M[I] < 0)
398 continue; // Ignore UNDEF indices.
399 if (ExpectedElt != static_cast<unsigned>(M[I]))
400 return false;
401 }
402
403 return true;
404}
405
407 ShuffleVectorPseudo &MatchInfo) {
408 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
409 Register Dst = MI.getOperand(0).getReg();
410 LLT DstTy = MRI.getType(Dst);
411 Register V1 = MI.getOperand(1).getReg();
412 Register V2 = MI.getOperand(2).getReg();
413 auto Mask = MI.getOperand(3).getShuffleMask();
414 uint64_t Imm;
415 auto ExtInfo = getExtMask(Mask, DstTy.getNumElements());
416 uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
417
418 if (!ExtInfo) {
419 if (!getOpcodeDef<GImplicitDef>(V2, MRI) ||
420 !isSingletonExtMask(Mask, DstTy))
421 return false;
422
423 Imm = Mask[0] * ExtFactor;
424 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm});
425 return true;
426 }
427 bool ReverseExt;
428 std::tie(ReverseExt, Imm) = *ExtInfo;
429 if (ReverseExt)
430 std::swap(V1, V2);
431 Imm *= ExtFactor;
432 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
433 return true;
434}
435
436/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
437/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
439 ShuffleVectorPseudo &MatchInfo) {
440 MachineIRBuilder MIRBuilder(MI);
441 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
442 MI.eraseFromParent();
443 return true;
444}
445
446/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
447/// Special-cased because the constant operand must be emitted as a G_CONSTANT
448/// for the imported tablegen patterns to work.
449static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
450 MachineIRBuilder MIRBuilder(MI);
451 // Tablegen patterns expect an i32 G_CONSTANT as the final op.
452 auto Cst =
453 MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
454 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
455 {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
456 MI.eraseFromParent();
457 return true;
458}
459
460/// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a
461/// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair.
462///
463/// e.g.
464/// %shuf = G_SHUFFLE_VECTOR %left, %right, shufflemask(0, 0)
465///
466/// Can be represented as
467///
468/// %extract = G_EXTRACT_VECTOR_ELT %left, 0
469/// %ins = G_INSERT_VECTOR_ELT %left, %extract, 1
470///
472 std::tuple<Register, int, Register, int> &MatchInfo) {
473 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
474 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
475 Register Dst = MI.getOperand(0).getReg();
476 int NumElts = MRI.getType(Dst).getNumElements();
477 auto DstIsLeftAndDstLane = isINSMask(ShuffleMask, NumElts);
478 if (!DstIsLeftAndDstLane)
479 return false;
480 bool DstIsLeft;
481 int DstLane;
482 std::tie(DstIsLeft, DstLane) = *DstIsLeftAndDstLane;
483 Register Left = MI.getOperand(1).getReg();
484 Register Right = MI.getOperand(2).getReg();
485 Register DstVec = DstIsLeft ? Left : Right;
486 Register SrcVec = Left;
487
488 int SrcLane = ShuffleMask[DstLane];
489 if (SrcLane >= NumElts) {
490 SrcVec = Right;
491 SrcLane -= NumElts;
492 }
493
494 MatchInfo = std::make_tuple(DstVec, DstLane, SrcVec, SrcLane);
495 return true;
496}
497
499 MachineIRBuilder &Builder,
500 std::tuple<Register, int, Register, int> &MatchInfo) {
501 Builder.setInstrAndDebugLoc(MI);
502 Register Dst = MI.getOperand(0).getReg();
503 auto ScalarTy = MRI.getType(Dst).getElementType();
504 Register DstVec, SrcVec;
505 int DstLane, SrcLane;
506 std::tie(DstVec, DstLane, SrcVec, SrcLane) = MatchInfo;
507 auto SrcCst = Builder.buildConstant(LLT::scalar(64), SrcLane);
508 auto Extract = Builder.buildExtractVectorElement(ScalarTy, SrcVec, SrcCst);
509 auto DstCst = Builder.buildConstant(LLT::scalar(64), DstLane);
510 Builder.buildInsertVectorElement(Dst, DstVec, Extract, DstCst);
511 MI.eraseFromParent();
512 return true;
513}
514
515/// isVShiftRImm - Check if this is a valid vector for the immediate
516/// operand of a vector shift right operation. The value must be in the range:
517/// 1 <= Value <= ElementBits for a right shift.
519 int64_t &Cnt) {
520 assert(Ty.isVector() && "vector shift count is not a vector type");
521 MachineInstr *MI = MRI.getVRegDef(Reg);
522 auto Cst = getAArch64VectorSplatScalar(*MI, MRI);
523 if (!Cst)
524 return false;
525 Cnt = *Cst;
526 int64_t ElementBits = Ty.getScalarSizeInBits();
527 return Cnt >= 1 && Cnt <= ElementBits;
528}
529
530/// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
532 int64_t &Imm) {
533 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
534 MI.getOpcode() == TargetOpcode::G_LSHR);
535 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
536 if (!Ty.isVector())
537 return false;
538 return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);
539}
540
542 int64_t &Imm) {
543 unsigned Opc = MI.getOpcode();
544 assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);
545 unsigned NewOpc =
546 Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
547 MachineIRBuilder MIB(MI);
548 auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm);
549 MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef});
550 MI.eraseFromParent();
551 return true;
552}
553
554/// Determine if it is possible to modify the \p RHS and predicate \p P of a
555/// G_ICMP instruction such that the right-hand side is an arithmetic immediate.
556///
557/// \returns A pair containing the updated immediate and predicate which may
558/// be used to optimize the instruction.
559///
560/// \note This assumes that the comparison has been legalized.
561std::optional<std::pair<uint64_t, CmpInst::Predicate>>
563 const MachineRegisterInfo &MRI) {
564 const auto &Ty = MRI.getType(RHS);
565 if (Ty.isVector())
566 return std::nullopt;
567 unsigned Size = Ty.getSizeInBits();
568 assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?");
569
570 // If the RHS is not a constant, or the RHS is already a valid arithmetic
571 // immediate, then there is nothing to change.
572 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, MRI);
573 if (!ValAndVReg)
574 return std::nullopt;
575 uint64_t C = ValAndVReg->Value.getZExtValue();
576 if (isLegalArithImmed(C))
577 return std::nullopt;
578
579 // We have a non-arithmetic immediate. Check if adjusting the immediate and
580 // adjusting the predicate will result in a legal arithmetic immediate.
581 switch (P) {
582 default:
583 return std::nullopt;
586 // Check for
587 //
588 // x slt c => x sle c - 1
589 // x sge c => x sgt c - 1
590 //
591 // When c is not the smallest possible negative number.
592 if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
593 (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
594 return std::nullopt;
596 C -= 1;
597 break;
600 // Check for
601 //
602 // x ult c => x ule c - 1
603 // x uge c => x ugt c - 1
604 //
605 // When c is not zero.
606 if (C == 0)
607 return std::nullopt;
609 C -= 1;
610 break;
613 // Check for
614 //
615 // x sle c => x slt c + 1
616 // x sgt c => s sge c + 1
617 //
618 // When c is not the largest possible signed integer.
619 if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
620 (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
621 return std::nullopt;
623 C += 1;
624 break;
627 // Check for
628 //
629 // x ule c => x ult c + 1
630 // x ugt c => s uge c + 1
631 //
632 // When c is not the largest possible unsigned integer.
633 if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
634 (Size == 64 && C == UINT64_MAX))
635 return std::nullopt;
637 C += 1;
638 break;
639 }
640
641 // Check if the new constant is valid, and return the updated constant and
642 // predicate if it is.
643 if (Size == 32)
644 C = static_cast<uint32_t>(C);
645 if (!isLegalArithImmed(C))
646 return std::nullopt;
647 return {{C, P}};
648}
649
650/// Determine whether or not it is possible to update the RHS and predicate of
651/// a G_ICMP instruction such that the RHS will be selected as an arithmetic
652/// immediate.
653///
654/// \p MI - The G_ICMP instruction
655/// \p MatchInfo - The new RHS immediate and predicate on success
656///
657/// See tryAdjustICmpImmAndPred for valid transformations.
660 std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {
661 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
662 Register RHS = MI.getOperand(3).getReg();
663 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
664 if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) {
665 MatchInfo = *MaybeNewImmAndPred;
666 return true;
667 }
668 return false;
669}
670
672 MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,
673 MachineIRBuilder &MIB, GISelChangeObserver &Observer) {
675 MachineOperand &RHS = MI.getOperand(3);
677 auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()),
678 MatchInfo.first);
679 Observer.changingInstr(MI);
680 RHS.setReg(Cst->getOperand(0).getReg());
681 MI.getOperand(1).setPredicate(MatchInfo.second);
682 Observer.changedInstr(MI);
683 return true;
684}
685
687 std::pair<unsigned, int> &MatchInfo) {
688 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
689 Register Src1Reg = MI.getOperand(1).getReg();
690 const LLT SrcTy = MRI.getType(Src1Reg);
691 const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
692
693 auto LaneIdx = getSplatIndex(MI);
694 if (!LaneIdx)
695 return false;
696
697 // The lane idx should be within the first source vector.
698 if (*LaneIdx >= SrcTy.getNumElements())
699 return false;
700
701 if (DstTy != SrcTy)
702 return false;
703
704 LLT ScalarTy = SrcTy.getElementType();
705 unsigned ScalarSize = ScalarTy.getSizeInBits();
706
707 unsigned Opc = 0;
708 switch (SrcTy.getNumElements()) {
709 case 2:
710 if (ScalarSize == 64)
711 Opc = AArch64::G_DUPLANE64;
712 else if (ScalarSize == 32)
713 Opc = AArch64::G_DUPLANE32;
714 break;
715 case 4:
716 if (ScalarSize == 32)
717 Opc = AArch64::G_DUPLANE32;
718 break;
719 case 8:
720 if (ScalarSize == 16)
721 Opc = AArch64::G_DUPLANE16;
722 break;
723 case 16:
724 if (ScalarSize == 8)
725 Opc = AArch64::G_DUPLANE8;
726 break;
727 default:
728 break;
729 }
730 if (!Opc)
731 return false;
732
733 MatchInfo.first = Opc;
734 MatchInfo.second = *LaneIdx;
735 return true;
736}
737
739 MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
740 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
741 Register Src1Reg = MI.getOperand(1).getReg();
742 const LLT SrcTy = MRI.getType(Src1Reg);
743
744 B.setInstrAndDebugLoc(MI);
745 auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second);
746
747 Register DupSrc = MI.getOperand(1).getReg();
748 // For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source.
749 // To do this, we can use a G_CONCAT_VECTORS to do the widening.
750 if (SrcTy == LLT::fixed_vector(2, LLT::scalar(32))) {
751 assert(MRI.getType(MI.getOperand(0).getReg()).getNumElements() == 2 &&
752 "Unexpected dest elements");
753 auto Undef = B.buildUndef(SrcTy);
754 DupSrc = B.buildConcatVectors(
756 {Src1Reg, Undef.getReg(0)})
757 .getReg(0);
758 }
759 B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, {DupSrc, Lane});
760 MI.eraseFromParent();
761 return true;
762}
763
765 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
766 auto Splat = getAArch64VectorSplat(MI, MRI);
767 if (!Splat)
768 return false;
769 if (Splat->isReg())
770 return true;
771 // Later, during selection, we'll try to match imported patterns using
772 // immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
773 // G_BUILD_VECTORs which could match those patterns.
774 int64_t Cst = Splat->getCst();
775 return (Cst != 0 && Cst != -1);
776}
777
780 B.setInstrAndDebugLoc(MI);
781 B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()},
782 {MI.getOperand(1).getReg()});
783 MI.eraseFromParent();
784 return true;
785}
786
787/// \returns how many instructions would be saved by folding a G_ICMP's shift
788/// and/or extension operations.
790 const MachineRegisterInfo &MRI) {
791 // No instructions to save if there's more than one use or no uses.
792 if (!MRI.hasOneNonDBGUse(CmpOp))
793 return 0;
794
795 // FIXME: This is duplicated with the selector. (See: selectShiftedRegister)
796 auto IsSupportedExtend = [&](const MachineInstr &MI) {
797 if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG)
798 return true;
799 if (MI.getOpcode() != TargetOpcode::G_AND)
800 return false;
801 auto ValAndVReg =
802 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
803 if (!ValAndVReg)
804 return false;
805 uint64_t Mask = ValAndVReg->Value.getZExtValue();
806 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
807 };
808
810 if (IsSupportedExtend(*Def))
811 return 1;
812
813 unsigned Opc = Def->getOpcode();
814 if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR &&
815 Opc != TargetOpcode::G_LSHR)
816 return 0;
817
818 auto MaybeShiftAmt =
819 getIConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI);
820 if (!MaybeShiftAmt)
821 return 0;
822 uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue();
823 MachineInstr *ShiftLHS =
824 getDefIgnoringCopies(Def->getOperand(1).getReg(), MRI);
825
826 // Check if we can fold an extend and a shift.
827 // FIXME: This is duplicated with the selector. (See:
828 // selectArithExtendedRegister)
829 if (IsSupportedExtend(*ShiftLHS))
830 return (ShiftAmt <= 4) ? 2 : 1;
831
832 LLT Ty = MRI.getType(Def->getOperand(0).getReg());
833 if (Ty.isVector())
834 return 0;
835 unsigned ShiftSize = Ty.getSizeInBits();
836 if ((ShiftSize == 32 && ShiftAmt <= 31) ||
837 (ShiftSize == 64 && ShiftAmt <= 63))
838 return 1;
839 return 0;
840}
841
842/// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP
843/// instruction \p MI.
845 const MachineRegisterInfo &MRI) {
846 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
847 // Swap the operands if it would introduce a profitable folding opportunity.
848 // (e.g. a shift + extend).
849 //
850 // For example:
851 // lsl w13, w11, #1
852 // cmp w13, w12
853 // can be turned into:
854 // cmp w12, w11, lsl #1
855
856 // Don't swap if there's a constant on the RHS, because we know we can fold
857 // that.
858 Register RHS = MI.getOperand(3).getReg();
860 if (RHSCst && isLegalArithImmed(RHSCst->Value.getSExtValue()))
861 return false;
862
863 Register LHS = MI.getOperand(2).getReg();
864 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
865 auto GetRegForProfit = [&](Register Reg) {
867 return isCMN(Def, Pred, MRI) ? Def->getOperand(2).getReg() : Reg;
868 };
869
870 // Don't have a constant on the RHS. If we swap the LHS and RHS of the
871 // compare, would we be able to fold more instructions?
872 Register TheLHS = GetRegForProfit(LHS);
873 Register TheRHS = GetRegForProfit(RHS);
874
875 // If the LHS is more likely to give us a folding opportunity, then swap the
876 // LHS and RHS.
877 return (getCmpOperandFoldingProfit(TheLHS, MRI) >
879}
880
882 GISelChangeObserver &Observer) {
883 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
884 Register LHS = MI.getOperand(2).getReg();
885 Register RHS = MI.getOperand(3).getReg();
886 Observer.changedInstr(MI);
887 MI.getOperand(1).setPredicate(CmpInst::getSwappedPredicate(Pred));
888 MI.getOperand(2).setReg(RHS);
889 MI.getOperand(3).setReg(LHS);
890 Observer.changedInstr(MI);
891 return true;
892}
893
894/// \returns a function which builds a vector floating point compare instruction
895/// for a condition code \p CC.
896/// \param [in] IsZero - True if the comparison is against 0.
897/// \param [in] NoNans - True if the target has NoNansFPMath.
898static std::function<Register(MachineIRBuilder &)>
900 bool NoNans, MachineRegisterInfo &MRI) {
901 LLT DstTy = MRI.getType(LHS);
902 assert(DstTy.isVector() && "Expected vector types only?");
903 assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");
904 switch (CC) {
905 default:
906 llvm_unreachable("Unexpected condition code!");
907 case AArch64CC::NE:
908 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
909 auto FCmp = IsZero
910 ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS})
911 : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
912 return MIB.buildNot(DstTy, FCmp).getReg(0);
913 };
914 case AArch64CC::EQ:
915 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
916 return IsZero
917 ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS}).getReg(0)
918 : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS})
919 .getReg(0);
920 };
921 case AArch64CC::GE:
922 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
923 return IsZero
924 ? MIB.buildInstr(AArch64::G_FCMGEZ, {DstTy}, {LHS}).getReg(0)
925 : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS})
926 .getReg(0);
927 };
928 case AArch64CC::GT:
929 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
930 return IsZero
931 ? MIB.buildInstr(AArch64::G_FCMGTZ, {DstTy}, {LHS}).getReg(0)
932 : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS})
933 .getReg(0);
934 };
935 case AArch64CC::LS:
936 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
937 return IsZero
938 ? MIB.buildInstr(AArch64::G_FCMLEZ, {DstTy}, {LHS}).getReg(0)
939 : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS})
940 .getReg(0);
941 };
942 case AArch64CC::MI:
943 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
944 return IsZero
945 ? MIB.buildInstr(AArch64::G_FCMLTZ, {DstTy}, {LHS}).getReg(0)
946 : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS})
947 .getReg(0);
948 };
949 }
950}
951
952/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
954 MachineIRBuilder &MIB) {
955 assert(MI.getOpcode() == TargetOpcode::G_FCMP);
956 const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
957 Register Dst = MI.getOperand(0).getReg();
958 LLT DstTy = MRI.getType(Dst);
959 if (!DstTy.isVector() || !ST.hasNEON())
960 return false;
961 const auto Pred =
962 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
963 Register LHS = MI.getOperand(2).getReg();
964 // TODO: Handle v4s16 case.
965 unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits();
966 if (EltSize != 32 && EltSize != 64)
967 return false;
968 Register RHS = MI.getOperand(3).getReg();
969 auto Splat = getAArch64VectorSplat(*MRI.getVRegDef(RHS), MRI);
970
971 // Compares against 0 have special target-specific pseudos.
972 bool IsZero = Splat && Splat->isCst() && Splat->getCst() == 0;
973
974
975 bool Invert = false;
977 if (Pred == CmpInst::Predicate::FCMP_ORD && IsZero) {
978 // The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
979 // NaN, so equivalent to a == a and doesn't need the two comparisons an
980 // "ord" normally would.
981 RHS = LHS;
982 IsZero = false;
984 } else
985 changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);
986
987 bool NoNans = ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;
988
989 // Instead of having an apply function, just build here to simplify things.
991 auto Cmp = getVectorFCMP(CC, LHS, RHS, IsZero, NoNans, MRI);
992 Register CmpRes;
993 if (CC2 == AArch64CC::AL)
994 CmpRes = Cmp(MIB);
995 else {
996 auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, IsZero, NoNans, MRI);
997 auto Cmp2Dst = Cmp2(MIB);
998 auto Cmp1Dst = Cmp(MIB);
999 CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);
1000 }
1001 if (Invert)
1002 CmpRes = MIB.buildNot(DstTy, CmpRes).getReg(0);
1003 MRI.replaceRegWith(Dst, CmpRes);
1004 MI.eraseFromParent();
1005 return false;
1006}
1007
1009 Register &SrcReg) {
1010 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1011 Register DstReg = MI.getOperand(0).getReg();
1012 if (MRI.getType(DstReg).isVector())
1013 return false;
1014 // Match a store of a truncate.
1015 if (!mi_match(DstReg, MRI, m_GTrunc(m_Reg(SrcReg))))
1016 return false;
1017 // Only form truncstores for value types of max 64b.
1018 return MRI.getType(SrcReg).getSizeInBits() <= 64;
1019}
1020
1023 GISelChangeObserver &Observer,
1024 Register &SrcReg) {
1025 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1026 Observer.changingInstr(MI);
1027 MI.getOperand(0).setReg(SrcReg);
1028 Observer.changedInstr(MI);
1029 return true;
1030}
1031
1032// Lower vector G_SEXT_INREG back to shifts for selection. We allowed them to
1033// form in the first place for combine opportunities, so any remaining ones
1034// at this stage need be lowered back.
1036 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1037 Register DstReg = MI.getOperand(0).getReg();
1038 LLT DstTy = MRI.getType(DstReg);
1039 return DstTy.isVector();
1040}
1041
1044 GISelChangeObserver &Observer) {
1045 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1046 B.setInstrAndDebugLoc(MI);
1047 LegalizerHelper Helper(*MI.getMF(), Observer, B);
1048 Helper.lower(MI, 0, /* Unused hint type */ LLT());
1049}
1050
1051#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
1052#include "AArch64GenPostLegalizeGILowering.inc"
1053#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
1054
1055namespace {
1056#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H
1057#include "AArch64GenPostLegalizeGILowering.inc"
1058#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H
1059
1060class AArch64PostLegalizerLoweringInfo : public CombinerInfo {
1061public:
1062 AArch64GenPostLegalizerLoweringHelperRuleConfig GeneratedRuleCfg;
1063
1064 AArch64PostLegalizerLoweringInfo(bool OptSize, bool MinSize)
1065 : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
1066 /*LegalizerInfo*/ nullptr, /*OptEnabled = */ true, OptSize,
1067 MinSize) {
1068 if (!GeneratedRuleCfg.parseCommandLineOption())
1069 report_fatal_error("Invalid rule identifier");
1070 }
1071
1072 bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
1073 MachineIRBuilder &B) const override;
1074};
1075
1076bool AArch64PostLegalizerLoweringInfo::combine(GISelChangeObserver &Observer,
1078 MachineIRBuilder &B) const {
1079 CombinerHelper Helper(Observer, B, /* IsPreLegalize*/ false);
1080 AArch64GenPostLegalizerLoweringHelper Generated(GeneratedRuleCfg);
1081 return Generated.tryCombineAll(Observer, MI, B, Helper);
1082}
1083
1084#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP
1085#include "AArch64GenPostLegalizeGILowering.inc"
1086#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP
1087
1088class AArch64PostLegalizerLowering : public MachineFunctionPass {
1089public:
1090 static char ID;
1091
1092 AArch64PostLegalizerLowering();
1093
1094 StringRef getPassName() const override {
1095 return "AArch64PostLegalizerLowering";
1096 }
1097
1098 bool runOnMachineFunction(MachineFunction &MF) override;
1099 void getAnalysisUsage(AnalysisUsage &AU) const override;
1100};
1101} // end anonymous namespace
1102
1103void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {
1105 AU.setPreservesCFG();
1108}
1109
1110AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()
1113}
1114
1115bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {
1116 if (MF.getProperties().hasProperty(
1117 MachineFunctionProperties::Property::FailedISel))
1118 return false;
1120 MachineFunctionProperties::Property::Legalized) &&
1121 "Expected a legalized function?");
1122 auto *TPC = &getAnalysis<TargetPassConfig>();
1123 const Function &F = MF.getFunction();
1124 AArch64PostLegalizerLoweringInfo PCInfo(F.hasOptSize(), F.hasMinSize());
1125 Combiner C(PCInfo, TPC);
1126 return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
1127}
1128
1129char AArch64PostLegalizerLowering::ID = 0;
1130INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE,
1131 "Lower AArch64 MachineInstrs after legalization", false,
1132 false)
1134INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE,
1135 "Lower AArch64 MachineInstrs after legalization", false,
1136 false)
1137
1138namespace llvm {
1140 return new AArch64PostLegalizerLowering();
1141}
1142} // end namespace llvm
unsigned const MachineRegisterInfo * MRI
static bool isLegalArithImmed(uint64_t C)
static bool isCMN(SDValue Op, ISD::CondCode CC)
This file declares the targeting of the Machinelegalizer class for AArch64.
static bool lowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIB)
Try to lower a vector G_FCMP MI into an AArch64-specific pseudo.
std::optional< std::pair< uint64_t, CmpInst::Predicate > > tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P, const MachineRegisterInfo &MRI)
Determine if it is possible to modify the RHS and predicate P of a G_ICMP instruction such that the r...
static bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI, int64_t &Imm)
Match a vector G_ASHR or G_LSHR with a valid immediate shift.
static void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, GISelChangeObserver &Observer)
static bool applyINS(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &Builder, std::tuple< Register, int, Register, int > &MatchInfo)
static bool matchDupFromBuildVector(int Lane, MachineInstr &MI, MachineRegisterInfo &MRI, ShuffleVectorPseudo &MatchInfo)
Helper function for matchDup.
static std::function< Register(MachineIRBuilder &)> getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero, bool NoNans, MachineRegisterInfo &MRI)
static bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI, ShuffleVectorPseudo &MatchInfo)
static bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI, ShuffleVectorPseudo &MatchInfo)
static bool applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI, int64_t &Imm)
static bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty, int64_t &Cnt)
isVShiftRImm - Check if this is a valid vector for the immediate operand of a vector shift right oper...
bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI, std::pair< unsigned, int > &MatchInfo)
static bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI, std::tuple< Register, int, Register, int > &MatchInfo)
Match a G_SHUFFLE_VECTOR with a mask which corresponds to a G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_...
static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI, ShuffleVectorPseudo &MatchInfo)
static bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI, MachineRegisterInfo &MRI, ShuffleVectorPseudo &MatchInfo)
Helper function for matchDup.
static std::optional< std::pair< bool, int > > isINSMask(ArrayRef< int > M, int NumInputElements)
Helper function for matchINS.
Lower AArch64 MachineInstrs after legalization
static bool trySwapICmpOperands(MachineInstr &MI, const MachineRegisterInfo &MRI)
static bool applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, GISelChangeObserver &Observer, Register &SrcReg)
static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI, ShuffleVectorPseudo &MatchInfo)
static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI, ShuffleVectorPseudo &MatchInfo)
static bool matchVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI)
static bool applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer)
static bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI)
bool applyAdjustICmpImmAndPred(MachineInstr &MI, std::pair< uint64_t, CmpInst::Predicate > &MatchInfo, MachineIRBuilder &MIB, GISelChangeObserver &Observer)
bool applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, std::pair< unsigned, int > &MatchInfo)
static bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI, Register &SrcReg)
static bool isZipMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResult)
static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI, ShuffleVectorPseudo &MatchInfo)
static bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResult)
Determines if M is a shuffle vector mask for a TRN of NumElts.
static bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResult)
Determines if M is a shuffle vector mask for a UZP of NumElts.
static bool applyShuffleVectorPseudo(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo)
Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
bool matchAdjustICmpImmAndPred(MachineInstr &MI, const MachineRegisterInfo &MRI, std::pair< uint64_t, CmpInst::Predicate > &MatchInfo)
Determine whether or not it is possible to update the RHS and predicate of a G_ICMP instruction such ...
#define DEBUG_TYPE
static bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
static bool isSingletonExtMask(ArrayRef< int > M, LLT Ty)
static std::optional< std::pair< bool, uint64_t > > getExtMask(ArrayRef< int > M, unsigned NumElts)
Check if a G_EXT instruction can handle a shuffle mask M when the vector sources of the shuffle are d...
static unsigned getCmpOperandFoldingProfit(Register CmpOp, const MachineRegisterInfo &MRI)
static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo)
Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
static bool applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B)
assume Assume Builder
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This contains common combine transformations that may be used in a combine pass,or by the target else...
Interface for Targets to specify which operations are combined how and when.
This contains common code to drive combines.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
This contains common code to allow clients to notify changes to machine instr.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static const int BlockSize
Definition: TarWriter.cpp:33
Target-Independent Code Generator Pass Configuration Options pass.
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:75
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1494
unsigned logBase2() const
Definition: APInt.h:1707
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:718
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:747
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:748
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:742
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:741
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:745
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:743
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:746
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:744
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:859
virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, MachineIRBuilder &B) const =0
Attempt to combine instructions using MI as the root.
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:291
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr unsigned getScalarSizeInBits() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
Definition: MachineInstr.h:68
MachineOperand class - Representation of each machine instruction operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Target-Independent Code Generator Pass Configuration Options.
#define UINT64_MAX
Definition: DataTypes.h:77
#define INT64_MIN
Definition: DataTypes.h:74
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
std::optional< RegOrConstant > getAArch64VectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void changeVectorFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)
Find the AArch64 condition codes necessary to represent P for a vector floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
operand_type_match m_Reg()
SpecificConstantMatch m_ZeroInt()
{ Convenience matchers for specific integer values.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:475
void initializeAArch64PostLegalizerLoweringPass(PassRegistry &)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:461
FunctionPass * createAArch64PostLegalizerLowering()
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1826
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:895
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:409
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1846
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
ShuffleVectorPseudo()=default
unsigned Opc
Opcode for the instruction. (E.g. G_ZIP1)
Register Dst
Destination register.
ShuffleVectorPseudo(unsigned Opc, Register Dst, std::initializer_list< SrcOp > SrcOps)
SmallVector< SrcOp, 2 > SrcOps
Source registers.