LLVM 22.0.0git
AArch64PostLegalizerLowering.cpp
Go to the documentation of this file.
1//=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Post-legalization lowering for instructions.
11///
12/// This is used to offload pattern matching from the selector.
13///
14/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually
15/// a G_ZIP, G_UZP, etc.
16///
17/// General optimization combines should be handled by either the
18/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.
19///
20//===----------------------------------------------------------------------===//
21
22#include "AArch64ExpandImm.h"
25#include "AArch64Subtarget.h"
45#include "llvm/IR/InstrTypes.h"
47#include <optional>
48
49#define GET_GICOMBINER_DEPS
50#include "AArch64GenPostLegalizeGILowering.inc"
51#undef GET_GICOMBINER_DEPS
52
53#define DEBUG_TYPE "aarch64-postlegalizer-lowering"
54
55using namespace llvm;
56using namespace MIPatternMatch;
57using namespace AArch64GISelUtils;
58
59namespace {
60
61#define GET_GICOMBINER_TYPES
62#include "AArch64GenPostLegalizeGILowering.inc"
63#undef GET_GICOMBINER_TYPES
64
65/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
66///
67/// Used for matching target-supported shuffles before codegen.
68struct ShuffleVectorPseudo {
69 unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
70 Register Dst; ///< Destination register.
71 SmallVector<SrcOp, 2> SrcOps; ///< Source registers.
72 ShuffleVectorPseudo(unsigned Opc, Register Dst,
73 std::initializer_list<SrcOp> SrcOps)
74 : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
75 ShuffleVectorPseudo() = default;
76};
77
78/// Return true if a G_FCONSTANT instruction is known to be better-represented
79/// as a G_CONSTANT.
80bool matchFConstantToConstant(MachineInstr &MI, MachineRegisterInfo &MRI) {
81 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
82 Register DstReg = MI.getOperand(0).getReg();
83 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
84 if (DstSize != 16 && DstSize != 32 && DstSize != 64)
85 return false;
86
87 // When we're storing a value, it doesn't matter what register bank it's on.
88 // Since not all floating point constants can be materialized using a fmov,
89 // it makes more sense to just use a GPR.
90 return all_of(MRI.use_nodbg_instructions(DstReg),
91 [](const MachineInstr &Use) { return Use.mayStore(); });
92}
93
94/// Change a G_FCONSTANT into a G_CONSTANT.
95void applyFConstantToConstant(MachineInstr &MI) {
96 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
98 const APFloat &ImmValAPF = MI.getOperand(1).getFPImm()->getValueAPF();
99 MIB.buildConstant(MI.getOperand(0).getReg(), ImmValAPF.bitcastToAPInt());
100 MI.eraseFromParent();
101}
102
103/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
104/// sources of the shuffle are different.
105std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
106 unsigned NumElts) {
107 // Look for the first non-undef element.
108 auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
109 if (FirstRealElt == M.end())
110 return std::nullopt;
111
112 // Use APInt to handle overflow when calculating expected element.
113 unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
114 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1, false, true);
115
116 // The following shuffle indices must be the successive elements after the
117 // first real element.
118 if (any_of(
119 make_range(std::next(FirstRealElt), M.end()),
120 [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))
121 return std::nullopt;
122
123 // The index of an EXT is the first element if it is not UNDEF.
124 // Watch out for the beginning UNDEFs. The EXT index should be the expected
125 // value of the first element. E.g.
126 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
127 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
128 // ExpectedElt is the last mask index plus 1.
129 uint64_t Imm = ExpectedElt.getZExtValue();
130 bool ReverseExt = false;
131
132 // There are two difference cases requiring to reverse input vectors.
133 // For example, for vector <4 x i32> we have the following cases,
134 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
135 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
136 // For both cases, we finally use mask <5, 6, 7, 0>, which requires
137 // to reverse two input vectors.
138 if (Imm < NumElts)
139 ReverseExt = true;
140 else
141 Imm -= NumElts;
142 return std::make_pair(ReverseExt, Imm);
143}
144
145/// Helper function for matchINS.
146///
147/// \returns a value when \p M is an ins mask for \p NumInputElements.
148///
149/// First element of the returned pair is true when the produced
150/// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR.
151///
152/// Second element is the destination lane for the G_INSERT_VECTOR_ELT.
153std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M,
154 int NumInputElements) {
155 if (M.size() != static_cast<size_t>(NumInputElements))
156 return std::nullopt;
157 int NumLHSMatch = 0, NumRHSMatch = 0;
158 int LastLHSMismatch = -1, LastRHSMismatch = -1;
159 for (int Idx = 0; Idx < NumInputElements; ++Idx) {
160 if (M[Idx] == -1) {
161 ++NumLHSMatch;
162 ++NumRHSMatch;
163 continue;
164 }
165 M[Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx;
166 M[Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx;
167 }
168 const int NumNeededToMatch = NumInputElements - 1;
169 if (NumLHSMatch == NumNeededToMatch)
170 return std::make_pair(true, LastLHSMismatch);
171 if (NumRHSMatch == NumNeededToMatch)
172 return std::make_pair(false, LastRHSMismatch);
173 return std::nullopt;
174}
175
176/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
177/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
178bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
179 ShuffleVectorPseudo &MatchInfo) {
180 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
181 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
182 Register Dst = MI.getOperand(0).getReg();
183 Register Src = MI.getOperand(1).getReg();
184 LLT Ty = MRI.getType(Dst);
185 unsigned EltSize = Ty.getScalarSizeInBits();
186
187 // Element size for a rev cannot be 64.
188 if (EltSize == 64)
189 return false;
190
191 unsigned NumElts = Ty.getNumElements();
192
193 // Try to produce a G_REV instruction
194 for (unsigned LaneSize : {64U, 32U, 16U}) {
195 if (isREVMask(ShuffleMask, EltSize, NumElts, LaneSize)) {
196 unsigned Opcode;
197 if (LaneSize == 64U)
198 Opcode = AArch64::G_REV64;
199 else if (LaneSize == 32U)
200 Opcode = AArch64::G_REV32;
201 else
202 Opcode = AArch64::G_REV16;
203
204 MatchInfo = ShuffleVectorPseudo(Opcode, Dst, {Src});
205 return true;
206 }
207 }
208
209 return false;
210}
211
212/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
213/// a G_TRN1 or G_TRN2 instruction.
214bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
215 ShuffleVectorPseudo &MatchInfo) {
216 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
217 unsigned WhichResult;
218 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
219 Register Dst = MI.getOperand(0).getReg();
220 unsigned NumElts = MRI.getType(Dst).getNumElements();
221 if (!isTRNMask(ShuffleMask, NumElts, WhichResult))
222 return false;
223 unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
224 Register V1 = MI.getOperand(1).getReg();
225 Register V2 = MI.getOperand(2).getReg();
226 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
227 return true;
228}
229
230/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
231/// a G_UZP1 or G_UZP2 instruction.
232///
233/// \param [in] MI - The shuffle vector instruction.
234/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
235bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
236 ShuffleVectorPseudo &MatchInfo) {
237 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
238 unsigned WhichResult;
239 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
240 Register Dst = MI.getOperand(0).getReg();
241 unsigned NumElts = MRI.getType(Dst).getNumElements();
242 if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
243 return false;
244 unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
245 Register V1 = MI.getOperand(1).getReg();
246 Register V2 = MI.getOperand(2).getReg();
247 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
248 return true;
249}
250
251bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
252 ShuffleVectorPseudo &MatchInfo) {
253 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
254 unsigned WhichResult;
255 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
256 Register Dst = MI.getOperand(0).getReg();
257 unsigned NumElts = MRI.getType(Dst).getNumElements();
258 if (!isZIPMask(ShuffleMask, NumElts, WhichResult))
259 return false;
260 unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
261 Register V1 = MI.getOperand(1).getReg();
262 Register V2 = MI.getOperand(2).getReg();
263 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
264 return true;
265}
266
267/// Helper function for matchDup.
268bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
270 ShuffleVectorPseudo &MatchInfo) {
271 if (Lane != 0)
272 return false;
273
274 // Try to match a vector splat operation into a dup instruction.
275 // We're looking for this pattern:
276 //
277 // %scalar:gpr(s64) = COPY $x0
278 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
279 // %cst0:gpr(s32) = G_CONSTANT i32 0
280 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
281 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
282 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
283 // %zerovec(<2 x s32>)
284 //
285 // ...into:
286 // %splat = G_DUP %scalar
287
288 // Begin matching the insert.
289 auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,
290 MI.getOperand(1).getReg(), MRI);
291 if (!InsMI)
292 return false;
293 // Match the undef vector operand.
294 if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),
295 MRI))
296 return false;
297
298 // Match the index constant 0.
299 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt()))
300 return false;
301
302 MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),
303 {InsMI->getOperand(2).getReg()});
304 return true;
305}
306
307/// Helper function for matchDup.
308bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
310 ShuffleVectorPseudo &MatchInfo) {
311 assert(Lane >= 0 && "Expected positive lane?");
312 int NumElements = MRI.getType(MI.getOperand(1).getReg()).getNumElements();
313 // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
314 // lane's definition directly.
315 auto *BuildVecMI =
316 getOpcodeDef(TargetOpcode::G_BUILD_VECTOR,
317 MI.getOperand(Lane < NumElements ? 1 : 2).getReg(), MRI);
318 // If Lane >= NumElements then it is point to RHS, just check from RHS
319 if (NumElements <= Lane)
320 Lane -= NumElements;
321
322 if (!BuildVecMI)
323 return false;
324 Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();
325 MatchInfo =
326 ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});
327 return true;
328}
329
330bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
331 ShuffleVectorPseudo &MatchInfo) {
332 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
333 auto MaybeLane = getSplatIndex(MI);
334 if (!MaybeLane)
335 return false;
336 int Lane = *MaybeLane;
337 // If this is undef splat, generate it via "just" vdup, if possible.
338 if (Lane < 0)
339 Lane = 0;
340 if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
341 return true;
342 if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
343 return true;
344 return false;
345}
346
347// Check if an EXT instruction can handle the shuffle mask when the vector
348// sources of the shuffle are the same.
349bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) {
350 unsigned NumElts = Ty.getNumElements();
351
352 // Assume that the first shuffle index is not UNDEF. Fail if it is.
353 if (M[0] < 0)
354 return false;
355
356 // If this is a VEXT shuffle, the immediate value is the index of the first
357 // element. The other shuffle indices must be the successive elements after
358 // the first one.
359 unsigned ExpectedElt = M[0];
360 for (unsigned I = 1; I < NumElts; ++I) {
361 // Increment the expected index. If it wraps around, just follow it
362 // back to index zero and keep going.
363 ++ExpectedElt;
364 if (ExpectedElt == NumElts)
365 ExpectedElt = 0;
366
367 if (M[I] < 0)
368 continue; // Ignore UNDEF indices.
369 if (ExpectedElt != static_cast<unsigned>(M[I]))
370 return false;
371 }
372
373 return true;
374}
375
376bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
377 ShuffleVectorPseudo &MatchInfo) {
378 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
379 Register Dst = MI.getOperand(0).getReg();
380 LLT DstTy = MRI.getType(Dst);
381 Register V1 = MI.getOperand(1).getReg();
382 Register V2 = MI.getOperand(2).getReg();
383 auto Mask = MI.getOperand(3).getShuffleMask();
385 auto ExtInfo = getExtMask(Mask, DstTy.getNumElements());
386 uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
387
388 if (!ExtInfo) {
390 !isSingletonExtMask(Mask, DstTy))
391 return false;
392
393 Imm = Mask[0] * ExtFactor;
394 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm});
395 return true;
396 }
397 bool ReverseExt;
398 std::tie(ReverseExt, Imm) = *ExtInfo;
399 if (ReverseExt)
400 std::swap(V1, V2);
401 Imm *= ExtFactor;
402 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
403 return true;
404}
405
406/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
407/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
408void applyShuffleVectorPseudo(MachineInstr &MI,
409 ShuffleVectorPseudo &MatchInfo) {
410 MachineIRBuilder MIRBuilder(MI);
411 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
412 MI.eraseFromParent();
413}
414
415/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
416/// Special-cased because the constant operand must be emitted as a G_CONSTANT
417/// for the imported tablegen patterns to work.
418void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
419 MachineIRBuilder MIRBuilder(MI);
420 if (MatchInfo.SrcOps[2].getImm() == 0)
421 MIRBuilder.buildCopy(MatchInfo.Dst, MatchInfo.SrcOps[0]);
422 else {
423 // Tablegen patterns expect an i32 G_CONSTANT as the final op.
424 auto Cst =
425 MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
426 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
427 {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
428 }
429 MI.eraseFromParent();
430}
431
432void applyFullRev(MachineInstr &MI, MachineRegisterInfo &MRI) {
433 Register Dst = MI.getOperand(0).getReg();
434 Register Src = MI.getOperand(1).getReg();
435 LLT DstTy = MRI.getType(Dst);
436 assert(DstTy.getSizeInBits() == 128 &&
437 "Expected 128bit vector in applyFullRev");
438 MachineIRBuilder MIRBuilder(MI);
439 auto Cst = MIRBuilder.buildConstant(LLT::scalar(32), 8);
440 auto Rev = MIRBuilder.buildInstr(AArch64::G_REV64, {DstTy}, {Src});
441 MIRBuilder.buildInstr(AArch64::G_EXT, {Dst}, {Rev, Rev, Cst});
442 MI.eraseFromParent();
443}
444
445bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) {
446 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
447
448 auto ValAndVReg =
449 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
450 return !ValAndVReg;
451}
452
453void applyNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI,
454 MachineIRBuilder &Builder) {
456 Builder.setInstrAndDebugLoc(Insert);
457
458 Register Offset = Insert.getIndexReg();
459 LLT VecTy = MRI.getType(Insert.getReg(0));
460 LLT EltTy = MRI.getType(Insert.getElementReg());
461 LLT IdxTy = MRI.getType(Insert.getIndexReg());
462
463 if (VecTy.isScalableVector())
464 return;
465
466 // Create a stack slot and store the vector into it
467 MachineFunction &MF = Builder.getMF();
468 Align Alignment(
469 std::min<uint64_t>(VecTy.getSizeInBytes().getKnownMinValue(), 16));
470 int FrameIdx = MF.getFrameInfo().CreateStackObject(VecTy.getSizeInBytes(),
471 Alignment, false);
472 LLT FramePtrTy = LLT::pointer(0, 64);
474 auto StackTemp = Builder.buildFrameIndex(FramePtrTy, FrameIdx);
475
476 Builder.buildStore(Insert.getOperand(1), StackTemp, PtrInfo, Align(8));
477
478 // Get the pointer to the element, and be sure not to hit undefined behavior
479 // if the index is out of bounds.
481 "Expected a power-2 vector size");
482 auto Mask = Builder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
483 Register And = Builder.buildAnd(IdxTy, Offset, Mask).getReg(0);
484 auto EltSize = Builder.buildConstant(IdxTy, EltTy.getSizeInBytes());
485 Register Mul = Builder.buildMul(IdxTy, And, EltSize).getReg(0);
486 Register EltPtr =
487 Builder.buildPtrAdd(MRI.getType(StackTemp.getReg(0)), StackTemp, Mul)
488 .getReg(0);
489
490 // Write the inserted element
491 Builder.buildStore(Insert.getElementReg(), EltPtr, PtrInfo, Align(1));
492 // Reload the whole vector.
493 Builder.buildLoad(Insert.getReg(0), StackTemp, PtrInfo, Align(8));
494 Insert.eraseFromParent();
495}
496
497/// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a
498/// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair.
499///
500/// e.g.
501/// %shuf = G_SHUFFLE_VECTOR %left, %right, shufflemask(0, 0)
502///
503/// Can be represented as
504///
505/// %extract = G_EXTRACT_VECTOR_ELT %left, 0
506/// %ins = G_INSERT_VECTOR_ELT %left, %extract, 1
507///
508bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI,
509 std::tuple<Register, int, Register, int> &MatchInfo) {
510 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
511 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
512 Register Dst = MI.getOperand(0).getReg();
513 int NumElts = MRI.getType(Dst).getNumElements();
514 auto DstIsLeftAndDstLane = isINSMask(ShuffleMask, NumElts);
515 if (!DstIsLeftAndDstLane)
516 return false;
517 bool DstIsLeft;
518 int DstLane;
519 std::tie(DstIsLeft, DstLane) = *DstIsLeftAndDstLane;
520 Register Left = MI.getOperand(1).getReg();
521 Register Right = MI.getOperand(2).getReg();
522 Register DstVec = DstIsLeft ? Left : Right;
523 Register SrcVec = Left;
524
525 int SrcLane = ShuffleMask[DstLane];
526 if (SrcLane >= NumElts) {
527 SrcVec = Right;
528 SrcLane -= NumElts;
529 }
530
531 MatchInfo = std::make_tuple(DstVec, DstLane, SrcVec, SrcLane);
532 return true;
533}
534
535void applyINS(MachineInstr &MI, MachineRegisterInfo &MRI,
536 MachineIRBuilder &Builder,
537 std::tuple<Register, int, Register, int> &MatchInfo) {
538 Builder.setInstrAndDebugLoc(MI);
539 Register Dst = MI.getOperand(0).getReg();
540 auto ScalarTy = MRI.getType(Dst).getElementType();
541 Register DstVec, SrcVec;
542 int DstLane, SrcLane;
543 std::tie(DstVec, DstLane, SrcVec, SrcLane) = MatchInfo;
544 auto SrcCst = Builder.buildConstant(LLT::scalar(64), SrcLane);
545 auto Extract = Builder.buildExtractVectorElement(ScalarTy, SrcVec, SrcCst);
546 auto DstCst = Builder.buildConstant(LLT::scalar(64), DstLane);
547 Builder.buildInsertVectorElement(Dst, DstVec, Extract, DstCst);
548 MI.eraseFromParent();
549}
550
551/// isVShiftRImm - Check if this is a valid vector for the immediate
552/// operand of a vector shift right operation. The value must be in the range:
553/// 1 <= Value <= ElementBits for a right shift.
555 int64_t &Cnt) {
556 assert(Ty.isVector() && "vector shift count is not a vector type");
557 MachineInstr *MI = MRI.getVRegDef(Reg);
558 auto Cst = getAArch64VectorSplatScalar(*MI, MRI);
559 if (!Cst)
560 return false;
561 Cnt = *Cst;
562 int64_t ElementBits = Ty.getScalarSizeInBits();
563 return Cnt >= 1 && Cnt <= ElementBits;
564}
565
566/// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
567bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
568 int64_t &Imm) {
569 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
570 MI.getOpcode() == TargetOpcode::G_LSHR);
571 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
572 if (!Ty.isVector())
573 return false;
574 return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);
575}
576
577void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
578 int64_t &Imm) {
579 unsigned Opc = MI.getOpcode();
580 assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);
581 unsigned NewOpc =
582 Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
583 MachineIRBuilder MIB(MI);
584 MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1)}).addImm(Imm);
585 MI.eraseFromParent();
586}
587
588/// Determine if it is possible to modify the \p RHS and predicate \p P of a
589/// G_ICMP instruction such that the right-hand side is an arithmetic immediate.
590///
591/// \returns A pair containing the updated immediate and predicate which may
592/// be used to optimize the instruction.
593///
594/// \note This assumes that the comparison has been legalized.
595std::optional<std::pair<uint64_t, CmpInst::Predicate>>
596tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
597 const MachineRegisterInfo &MRI) {
598 const auto &Ty = MRI.getType(RHS);
599 if (Ty.isVector())
600 return std::nullopt;
601 unsigned Size = Ty.getSizeInBits();
602 assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?");
603
604 // If the RHS is not a constant, or the RHS is already a valid arithmetic
605 // immediate, then there is nothing to change.
606 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, MRI);
607 if (!ValAndVReg)
608 return std::nullopt;
609 uint64_t OriginalC = ValAndVReg->Value.getZExtValue();
610 uint64_t C = OriginalC;
611 if (isLegalArithImmed(C))
612 return std::nullopt;
613
614 // We have a non-arithmetic immediate. Check if adjusting the immediate and
615 // adjusting the predicate will result in a legal arithmetic immediate.
616 switch (P) {
617 default:
618 return std::nullopt;
621 // Check for
622 //
623 // x slt c => x sle c - 1
624 // x sge c => x sgt c - 1
625 //
626 // When c is not the smallest possible negative number.
627 if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
628 (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
629 return std::nullopt;
630 P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
631 C -= 1;
632 break;
635 // Check for
636 //
637 // x ult c => x ule c - 1
638 // x uge c => x ugt c - 1
639 //
640 // When c is not zero.
641 assert(C != 0 && "C should not be zero here!");
642 P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
643 C -= 1;
644 break;
647 // Check for
648 //
649 // x sle c => x slt c + 1
650 // x sgt c => s sge c + 1
651 //
652 // When c is not the largest possible signed integer.
653 if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
654 (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
655 return std::nullopt;
656 P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
657 C += 1;
658 break;
661 // Check for
662 //
663 // x ule c => x ult c + 1
664 // x ugt c => s uge c + 1
665 //
666 // When c is not the largest possible unsigned integer.
667 if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
668 (Size == 64 && C == UINT64_MAX))
669 return std::nullopt;
670 P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
671 C += 1;
672 break;
673 }
674
675 // Check if the new constant is valid, and return the updated constant and
676 // predicate if it is.
677 if (Size == 32)
678 C = static_cast<uint32_t>(C);
679 if (isLegalArithImmed(C))
680 return {{C, P}};
681
682 auto NumberOfInstrToLoadImm = [=](uint64_t Imm) {
684 AArch64_IMM::expandMOVImm(Imm, 32, Insn);
685 return Insn.size();
686 };
687
688 if (NumberOfInstrToLoadImm(OriginalC) > NumberOfInstrToLoadImm(C))
689 return {{C, P}};
690
691 return std::nullopt;
692}
693
694/// Determine whether or not it is possible to update the RHS and predicate of
695/// a G_ICMP instruction such that the RHS will be selected as an arithmetic
696/// immediate.
697///
698/// \p MI - The G_ICMP instruction
699/// \p MatchInfo - The new RHS immediate and predicate on success
700///
701/// See tryAdjustICmpImmAndPred for valid transformations.
702bool matchAdjustICmpImmAndPred(
704 std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {
705 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
706 Register RHS = MI.getOperand(3).getReg();
707 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
708 if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) {
709 MatchInfo = *MaybeNewImmAndPred;
710 return true;
711 }
712 return false;
713}
714
715void applyAdjustICmpImmAndPred(
716 MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,
717 MachineIRBuilder &MIB, GISelChangeObserver &Observer) {
719 MachineOperand &RHS = MI.getOperand(3);
721 auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()),
722 MatchInfo.first);
723 Observer.changingInstr(MI);
724 RHS.setReg(Cst->getOperand(0).getReg());
725 MI.getOperand(1).setPredicate(MatchInfo.second);
726 Observer.changedInstr(MI);
727}
728
729bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
730 std::pair<unsigned, int> &MatchInfo) {
731 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
732 Register Src1Reg = MI.getOperand(1).getReg();
733 const LLT SrcTy = MRI.getType(Src1Reg);
734 const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
735
736 auto LaneIdx = getSplatIndex(MI);
737 if (!LaneIdx)
738 return false;
739
740 // The lane idx should be within the first source vector.
741 if (*LaneIdx >= SrcTy.getNumElements())
742 return false;
743
744 if (DstTy != SrcTy)
745 return false;
746
747 LLT ScalarTy = SrcTy.getElementType();
748 unsigned ScalarSize = ScalarTy.getSizeInBits();
749
750 unsigned Opc = 0;
751 switch (SrcTy.getNumElements()) {
752 case 2:
753 if (ScalarSize == 64)
754 Opc = AArch64::G_DUPLANE64;
755 else if (ScalarSize == 32)
756 Opc = AArch64::G_DUPLANE32;
757 break;
758 case 4:
759 if (ScalarSize == 32)
760 Opc = AArch64::G_DUPLANE32;
761 else if (ScalarSize == 16)
762 Opc = AArch64::G_DUPLANE16;
763 break;
764 case 8:
765 if (ScalarSize == 8)
766 Opc = AArch64::G_DUPLANE8;
767 else if (ScalarSize == 16)
768 Opc = AArch64::G_DUPLANE16;
769 break;
770 case 16:
771 if (ScalarSize == 8)
772 Opc = AArch64::G_DUPLANE8;
773 break;
774 default:
775 break;
776 }
777 if (!Opc)
778 return false;
779
780 MatchInfo.first = Opc;
781 MatchInfo.second = *LaneIdx;
782 return true;
783}
784
785void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
786 MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
787 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
788 Register Src1Reg = MI.getOperand(1).getReg();
789 const LLT SrcTy = MRI.getType(Src1Reg);
790
791 B.setInstrAndDebugLoc(MI);
792 auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second);
793
794 Register DupSrc = MI.getOperand(1).getReg();
795 // For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source.
796 // To do this, we can use a G_CONCAT_VECTORS to do the widening.
797 if (SrcTy.getSizeInBits() == 64) {
798 auto Undef = B.buildUndef(SrcTy);
799 DupSrc = B.buildConcatVectors(SrcTy.multiplyElements(2),
800 {Src1Reg, Undef.getReg(0)})
801 .getReg(0);
802 }
803 B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, {DupSrc, Lane});
804 MI.eraseFromParent();
805}
806
807bool matchScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI) {
808 auto &Unmerge = cast<GUnmerge>(MI);
809 Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
810 const LLT SrcTy = MRI.getType(Src1Reg);
811 if (SrcTy.getSizeInBits() != 128 && SrcTy.getSizeInBits() != 64)
812 return false;
813 return SrcTy.isVector() && !SrcTy.isScalable() &&
814 Unmerge.getNumOperands() == (unsigned)SrcTy.getNumElements() + 1;
815}
816
817void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
819 auto &Unmerge = cast<GUnmerge>(MI);
820 Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
821 const LLT SrcTy = MRI.getType(Src1Reg);
822 assert((SrcTy.isVector() && !SrcTy.isScalable()) &&
823 "Expected a fixed length vector");
824
825 for (int I = 0; I < SrcTy.getNumElements(); ++I)
826 B.buildExtractVectorElementConstant(Unmerge.getReg(I), Src1Reg, I);
827 MI.eraseFromParent();
828}
829
830bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
831 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
832
833 // Later, during selection, we'll try to match imported patterns using
834 // immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
835 // G_BUILD_VECTORs which could match those patterns.
837 return false;
838
839 return getAArch64VectorSplat(MI, MRI).has_value();
840}
841
842void applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
844 B.setInstrAndDebugLoc(MI);
845 B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()},
846 {MI.getOperand(1).getReg()});
847 MI.eraseFromParent();
848}
849
850/// \returns how many instructions would be saved by folding a G_ICMP's shift
851/// and/or extension operations.
853 // No instructions to save if there's more than one use or no uses.
854 if (!MRI.hasOneNonDBGUse(CmpOp))
855 return 0;
856
857 // FIXME: This is duplicated with the selector. (See: selectShiftedRegister)
858 auto IsSupportedExtend = [&](const MachineInstr &MI) {
859 if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG)
860 return true;
861 if (MI.getOpcode() != TargetOpcode::G_AND)
862 return false;
863 auto ValAndVReg =
864 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
865 if (!ValAndVReg)
866 return false;
867 uint64_t Mask = ValAndVReg->Value.getZExtValue();
868 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
869 };
870
872 if (IsSupportedExtend(*Def))
873 return 1;
874
875 unsigned Opc = Def->getOpcode();
876 if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR &&
877 Opc != TargetOpcode::G_LSHR)
878 return 0;
879
880 auto MaybeShiftAmt =
881 getIConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI);
882 if (!MaybeShiftAmt)
883 return 0;
884 uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue();
885 MachineInstr *ShiftLHS =
886 getDefIgnoringCopies(Def->getOperand(1).getReg(), MRI);
887
888 // Check if we can fold an extend and a shift.
889 // FIXME: This is duplicated with the selector. (See:
890 // selectArithExtendedRegister)
891 if (IsSupportedExtend(*ShiftLHS))
892 return (ShiftAmt <= 4) ? 2 : 1;
893
894 LLT Ty = MRI.getType(Def->getOperand(0).getReg());
895 if (Ty.isVector())
896 return 0;
897 unsigned ShiftSize = Ty.getSizeInBits();
898 if ((ShiftSize == 32 && ShiftAmt <= 31) ||
899 (ShiftSize == 64 && ShiftAmt <= 63))
900 return 1;
901 return 0;
902}
903
904/// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP
905/// instruction \p MI.
906bool trySwapICmpOperands(MachineInstr &MI, MachineRegisterInfo &MRI) {
907 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
908 // Swap the operands if it would introduce a profitable folding opportunity.
909 // (e.g. a shift + extend).
910 //
911 // For example:
912 // lsl w13, w11, #1
913 // cmp w13, w12
914 // can be turned into:
915 // cmp w12, w11, lsl #1
916
917 // Don't swap if there's a constant on the RHS, because we know we can fold
918 // that.
919 Register RHS = MI.getOperand(3).getReg();
921 if (RHSCst && isLegalArithImmed(RHSCst->Value.getSExtValue()))
922 return false;
923
924 Register LHS = MI.getOperand(2).getReg();
925 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
926 auto GetRegForProfit = [&](Register Reg) {
928 return isCMN(Def, Pred, MRI) ? Def->getOperand(2).getReg() : Reg;
929 };
930
931 // Don't have a constant on the RHS. If we swap the LHS and RHS of the
932 // compare, would we be able to fold more instructions?
933 Register TheLHS = GetRegForProfit(LHS);
934 Register TheRHS = GetRegForProfit(RHS);
935
936 // If the LHS is more likely to give us a folding opportunity, then swap the
937 // LHS and RHS.
938 return (getCmpOperandFoldingProfit(TheLHS, MRI) >
940}
941
942void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {
943 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
944 Register LHS = MI.getOperand(2).getReg();
945 Register RHS = MI.getOperand(3).getReg();
946 Observer.changedInstr(MI);
947 MI.getOperand(1).setPredicate(CmpInst::getSwappedPredicate(Pred));
948 MI.getOperand(2).setReg(RHS);
949 MI.getOperand(3).setReg(LHS);
950 Observer.changedInstr(MI);
951}
952
953/// \returns a function which builds a vector floating point compare instruction
954/// for a condition code \p CC.
955/// \param [in] NoNans - True if the target has NoNansFPMath.
956std::function<Register(MachineIRBuilder &)>
957getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool NoNans,
959 LLT DstTy = MRI.getType(LHS);
960 assert(DstTy.isVector() && "Expected vector types only?");
961 assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");
962 switch (CC) {
963 default:
964 llvm_unreachable("Unexpected condition code!");
965 case AArch64CC::NE:
966 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
967 auto FCmp = MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
968 return MIB.buildNot(DstTy, FCmp).getReg(0);
969 };
970 case AArch64CC::EQ:
971 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
972 return MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS}).getReg(0);
973 };
974 case AArch64CC::GE:
975 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
976 return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS}).getReg(0);
977 };
978 case AArch64CC::GT:
979 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
980 return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS}).getReg(0);
981 };
982 case AArch64CC::LS:
983 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
984 return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS}).getReg(0);
985 };
986 case AArch64CC::MI:
987 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
988 return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS}).getReg(0);
989 };
990 }
991}
992
993/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
994bool matchLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
995 MachineIRBuilder &MIB) {
996 assert(MI.getOpcode() == TargetOpcode::G_FCMP);
997 const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
998
999 Register Dst = MI.getOperand(0).getReg();
1000 LLT DstTy = MRI.getType(Dst);
1001 if (!DstTy.isVector() || !ST.hasNEON())
1002 return false;
1003 Register LHS = MI.getOperand(2).getReg();
1004 unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits();
1005 if (EltSize == 16 && !ST.hasFullFP16())
1006 return false;
1007 if (EltSize != 16 && EltSize != 32 && EltSize != 64)
1008 return false;
1009
1010 return true;
1011}
1012
1013/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
1014void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
1015 MachineIRBuilder &MIB) {
1016 assert(MI.getOpcode() == TargetOpcode::G_FCMP);
1017 const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
1018
1019 const auto &CmpMI = cast<GFCmp>(MI);
1020
1021 Register Dst = CmpMI.getReg(0);
1022 CmpInst::Predicate Pred = CmpMI.getCond();
1023 Register LHS = CmpMI.getLHSReg();
1024 Register RHS = CmpMI.getRHSReg();
1025
1026 LLT DstTy = MRI.getType(Dst);
1027
1028 bool Invert = false;
1030 if ((Pred == CmpInst::Predicate::FCMP_ORD ||
1032 isBuildVectorAllZeros(*MRI.getVRegDef(RHS), MRI)) {
1033 // The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
1034 // NaN, so equivalent to a == a and doesn't need the two comparisons an
1035 // "ord" normally would.
1036 // Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is
1037 // thus equivalent to a != a.
1038 RHS = LHS;
1040 } else
1041 changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);
1042
1043 // Instead of having an apply function, just build here to simplify things.
1045
1046 const bool NoNans =
1047 ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;
1048
1049 auto Cmp = getVectorFCMP(CC, LHS, RHS, NoNans, MRI);
1050 Register CmpRes;
1051 if (CC2 == AArch64CC::AL)
1052 CmpRes = Cmp(MIB);
1053 else {
1054 auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, NoNans, MRI);
1055 auto Cmp2Dst = Cmp2(MIB);
1056 auto Cmp1Dst = Cmp(MIB);
1057 CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);
1058 }
1059 if (Invert)
1060 CmpRes = MIB.buildNot(DstTy, CmpRes).getReg(0);
1061 MRI.replaceRegWith(Dst, CmpRes);
1062 MI.eraseFromParent();
1063}
1064
1065// Matches G_BUILD_VECTOR where at least one source operand is not a constant
1066bool matchLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI) {
1067 auto *GBuildVec = cast<GBuildVector>(&MI);
1068
1069 // Check if the values are all constants
1070 for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {
1071 auto ConstVal =
1072 getAnyConstantVRegValWithLookThrough(GBuildVec->getSourceReg(I), MRI);
1073
1074 if (!ConstVal.has_value())
1075 return true;
1076 }
1077
1078 return false;
1079}
1080
1081void applyLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI,
1083 auto *GBuildVec = cast<GBuildVector>(&MI);
1084 LLT DstTy = MRI.getType(GBuildVec->getReg(0));
1085 Register DstReg = B.buildUndef(DstTy).getReg(0);
1086
1087 for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {
1088 Register SrcReg = GBuildVec->getSourceReg(I);
1089 if (mi_match(SrcReg, MRI, m_GImplicitDef()))
1090 continue;
1091 auto IdxReg = B.buildConstant(LLT::scalar(64), I);
1092 DstReg =
1093 B.buildInsertVectorElement(DstTy, DstReg, SrcReg, IdxReg).getReg(0);
1094 }
1095 B.buildCopy(GBuildVec->getReg(0), DstReg);
1096 GBuildVec->eraseFromParent();
1097}
1098
1099bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1100 Register &SrcReg) {
1101 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1102 Register DstReg = MI.getOperand(0).getReg();
1103 if (MRI.getType(DstReg).isVector())
1104 return false;
1105 // Match a store of a truncate.
1106 if (!mi_match(DstReg, MRI, m_GTrunc(m_Reg(SrcReg))))
1107 return false;
1108 // Only form truncstores for value types of max 64b.
1109 return MRI.getType(SrcReg).getSizeInBits() <= 64;
1110}
1111
1112void applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1114 Register &SrcReg) {
1115 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1116 Observer.changingInstr(MI);
1117 MI.getOperand(0).setReg(SrcReg);
1118 Observer.changedInstr(MI);
1119}
1120
1121// Lower vector G_SEXT_INREG back to shifts for selection. We allowed them to
1122// form in the first place for combine opportunities, so any remaining ones
1123// at this stage need be lowered back.
1124bool matchVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI) {
1125 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1126 Register DstReg = MI.getOperand(0).getReg();
1127 LLT DstTy = MRI.getType(DstReg);
1128 return DstTy.isVector();
1129}
1130
1131void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI,
1133 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1134 B.setInstrAndDebugLoc(MI);
1135 LegalizerHelper Helper(*MI.getMF(), Observer, B);
1136 Helper.lower(MI, 0, /* Unused hint type */ LLT());
1137}
1138
1139/// Combine <N x t>, unused = unmerge(G_EXT <2*N x t> v, undef, N)
1140/// => unused, <N x t> = unmerge v
1141bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1142 Register &MatchInfo) {
1143 auto &Unmerge = cast<GUnmerge>(MI);
1144 if (Unmerge.getNumDefs() != 2)
1145 return false;
1146 if (!MRI.use_nodbg_empty(Unmerge.getReg(1)))
1147 return false;
1148
1149 LLT DstTy = MRI.getType(Unmerge.getReg(0));
1150 if (!DstTy.isVector())
1151 return false;
1152
1153 MachineInstr *Ext = getOpcodeDef(AArch64::G_EXT, Unmerge.getSourceReg(), MRI);
1154 if (!Ext)
1155 return false;
1156
1157 Register ExtSrc1 = Ext->getOperand(1).getReg();
1158 Register ExtSrc2 = Ext->getOperand(2).getReg();
1159 auto LowestVal =
1160 getIConstantVRegValWithLookThrough(Ext->getOperand(3).getReg(), MRI);
1161 if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes())
1162 return false;
1163
1164 if (!getOpcodeDef<GImplicitDef>(ExtSrc2, MRI))
1165 return false;
1166
1167 MatchInfo = ExtSrc1;
1168 return true;
1169}
1170
1171void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1173 GISelChangeObserver &Observer, Register &SrcReg) {
1174 Observer.changingInstr(MI);
1175 // Swap dst registers.
1176 Register Dst1 = MI.getOperand(0).getReg();
1177 MI.getOperand(0).setReg(MI.getOperand(1).getReg());
1178 MI.getOperand(1).setReg(Dst1);
1179 MI.getOperand(2).setReg(SrcReg);
1180 Observer.changedInstr(MI);
1181}
1182
1183// Match mul({z/s}ext , {z/s}ext) => {u/s}mull OR
1184// Match v2s64 mul instructions, which will then be scalarised later on
1185// Doing these two matches in one function to ensure that the order of matching
1186// will always be the same.
1187// Try lowering MUL to MULL before trying to scalarize if needed.
1188bool matchMulv2s64(MachineInstr &MI, MachineRegisterInfo &MRI) {
1189 // Get the instructions that defined the source operand
1190 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1191 return DstTy == LLT::fixed_vector(2, 64);
1192}
1193
1194void applyMulv2s64(MachineInstr &MI, MachineRegisterInfo &MRI,
1196 assert(MI.getOpcode() == TargetOpcode::G_MUL &&
1197 "Expected a G_MUL instruction");
1198
1199 // Get the instructions that defined the source operand
1200 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1201 assert(DstTy == LLT::fixed_vector(2, 64) && "Expected v2s64 Mul");
1202 LegalizerHelper Helper(*MI.getMF(), Observer, B);
1203 Helper.fewerElementsVector(
1204 MI, 0,
1206}
1207
1208class AArch64PostLegalizerLoweringImpl : public Combiner {
1209protected:
1210 const CombinerHelper Helper;
1211 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig;
1212 const AArch64Subtarget &STI;
1213
1214public:
1215 AArch64PostLegalizerLoweringImpl(
1216 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
1217 GISelCSEInfo *CSEInfo,
1218 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1219 const AArch64Subtarget &STI);
1220
1221 static const char *getName() { return "AArch6400PreLegalizerCombiner"; }
1222
1223 bool tryCombineAll(MachineInstr &I) const override;
1224
1225private:
1226#define GET_GICOMBINER_CLASS_MEMBERS
1227#include "AArch64GenPostLegalizeGILowering.inc"
1228#undef GET_GICOMBINER_CLASS_MEMBERS
1229};
1230
1231#define GET_GICOMBINER_IMPL
1232#include "AArch64GenPostLegalizeGILowering.inc"
1233#undef GET_GICOMBINER_IMPL
1234
1235AArch64PostLegalizerLoweringImpl::AArch64PostLegalizerLoweringImpl(
1236 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
1237 GISelCSEInfo *CSEInfo,
1238 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1239 const AArch64Subtarget &STI)
1240 : Combiner(MF, CInfo, TPC, /*VT*/ nullptr, CSEInfo),
1241 Helper(Observer, B, /*IsPreLegalize*/ true), RuleConfig(RuleConfig),
1242 STI(STI),
1244#include "AArch64GenPostLegalizeGILowering.inc"
1246{
1247}
1248
1249class AArch64PostLegalizerLowering : public MachineFunctionPass {
1250public:
1251 static char ID;
1252
1253 AArch64PostLegalizerLowering();
1254
1255 StringRef getPassName() const override {
1256 return "AArch64PostLegalizerLowering";
1257 }
1258
1259 bool runOnMachineFunction(MachineFunction &MF) override;
1260 void getAnalysisUsage(AnalysisUsage &AU) const override;
1261
1262private:
1263 AArch64PostLegalizerLoweringImplRuleConfig RuleConfig;
1264};
1265} // end anonymous namespace
1266
1267void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {
1269 AU.setPreservesCFG();
1272}
1273
1274AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()
1276 if (!RuleConfig.parseCommandLineOption())
1277 report_fatal_error("Invalid rule identifier");
1278}
1279
1280bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {
1281 if (MF.getProperties().hasFailedISel())
1282 return false;
1283 assert(MF.getProperties().hasLegalized() && "Expected a legalized function?");
1284 auto *TPC = &getAnalysis<TargetPassConfig>();
1285 const Function &F = MF.getFunction();
1286
1288 CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
1289 /*LegalizerInfo*/ nullptr, /*OptEnabled=*/true,
1290 F.hasOptSize(), F.hasMinSize());
1291 // Disable fixed-point iteration to reduce compile-time
1292 CInfo.MaxIterations = 1;
1293 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
1294 // PostLegalizerCombiner performs DCE, so a full DCE pass is unnecessary.
1295 CInfo.EnableFullDCE = false;
1296 AArch64PostLegalizerLoweringImpl Impl(MF, CInfo, TPC, /*CSEInfo*/ nullptr,
1297 RuleConfig, ST);
1298 return Impl.combineMachineInstrs();
1299}
1300
1301char AArch64PostLegalizerLowering::ID = 0;
1302INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE,
1303 "Lower AArch64 MachineInstrs after legalization", false,
1304 false)
1306INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE,
1307 "Lower AArch64 MachineInstrs after legalization", false,
1308 false)
1309
1310namespace llvm {
1312 return new AArch64PostLegalizerLowering();
1313}
1314} // end namespace llvm
unsigned const MachineRegisterInfo * MRI
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isINSMask(ArrayRef< int > M, int NumInputElements, bool &DstIsLeft, int &Anomaly)
static unsigned getCmpOperandFoldingProfit(SDValue Op)
Returns how profitable it is to fold a comparison's operand's shift and/or extension operations.
This file declares the targeting of the Machinelegalizer class for AArch64.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define GET_GICOMBINER_CONSTRUCTOR_INITS
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
This contains common code to allow clients to notify changes to machine instr.
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Register Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static StringRef getName(Value *V)
Target-Independent Code Generator Pass Configuration Options pass.
Value * RHS
Value * LHS
BinaryOperator * Mul
APInt bitcastToAPInt() const
Definition APFloat.h:1335
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
unsigned logBase2() const
Definition APInt.h:1761
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
This class is the base class for the comparison instructions.
Definition InstrTypes.h:664
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Combiner implementation.
Definition Combiner.h:34
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
The CSE Analysis object.
Definition CSEInfo.h:71
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition Pass.cpp:85
Wrapper class representing virtual and physical registers.
Definition Register.h:19
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Target-Independent Code Generator Pass Configuration Options.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:253
#define UINT64_MAX
Definition DataTypes.h:77
#define INT64_MIN
Definition DataTypes.h:74
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
std::optional< RegOrConstant > getAArch64VectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
constexpr bool isLegalArithImmed(const uint64_t C)
void changeVectorFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)
Find the AArch64 condition codes necessary to represent P for a vector floating point comparison.
bool isCMN(const MachineInstr *MaybeSub, const CmpInst::Predicate &Pred, const MachineRegisterInfo &MRI)
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
operand_type_match m_Reg()
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ImplicitDefMatch m_GImplicitDef()
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
@ Undef
Value of the register doesn't matter.
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1481
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:651
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResult)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> or <1,...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:492
FunctionPass * createAArch64PostLegalizerLowering()
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition Utils.cpp:439
LLVM_ABI bool isBuildVectorAllOnes(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1487
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition Utils.cpp:1184
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> or <4, 12,...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
@ SinglePass
Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.