LLVM 22.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34// https://github.com/llvm/llvm-project/issues/114425
35#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36#pragma inline_depth(0)
37#endif
38
39//===--------------------------------------------------------------------===//
40/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41/// instructions for SelectionDAG operations.
42///
43namespace {
44
45class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const AArch64Subtarget *Subtarget;
50
51public:
52 AArch64DAGToDAGISel() = delete;
53
54 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55 CodeGenOptLevel OptLevel)
56 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
57
58 bool runOnMachineFunction(MachineFunction &MF) override {
59 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
61 }
62
63 void Select(SDNode *Node) override;
64
65 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
66 /// inline asm expressions.
67 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
68 InlineAsm::ConstraintCode ConstraintID,
69 std::vector<SDValue> &OutOps) override;
70
71 template <signed Low, signed High, signed Scale>
72 bool SelectRDVLImm(SDValue N, SDValue &Imm);
73
74 template <signed Low, signed High>
75 bool SelectRDSVLShiftImm(SDValue N, SDValue &Imm);
76
77 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
78 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
79 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
80 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
81 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
82 return SelectShiftedRegister(N, false, Reg, Shift);
83 }
84 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
85 return SelectShiftedRegister(N, true, Reg, Shift);
86 }
87 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
88 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
89 }
90 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
91 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
92 }
93 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
94 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
95 }
96 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
97 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
98 }
99 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
100 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
101 }
102 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
103 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
104 }
105 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
106 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
107 }
108 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
109 return SelectAddrModeIndexed(N, 1, Base, OffImm);
110 }
111 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
112 return SelectAddrModeIndexed(N, 2, Base, OffImm);
113 }
114 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
115 return SelectAddrModeIndexed(N, 4, Base, OffImm);
116 }
117 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
118 return SelectAddrModeIndexed(N, 8, Base, OffImm);
119 }
120 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
121 return SelectAddrModeIndexed(N, 16, Base, OffImm);
122 }
123 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
124 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
125 }
126 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
127 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
128 }
129 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
130 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
131 }
132 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
133 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
134 }
135 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
136 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
137 }
138 template <unsigned Size, unsigned Max>
139 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
140 // Test if there is an appropriate addressing mode and check if the
141 // immediate fits.
142 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
143 if (Found) {
144 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
145 int64_t C = CI->getSExtValue();
146 if (C <= Max)
147 return true;
148 }
149 }
150
151 // Otherwise, base only, materialize address in register.
152 Base = N;
153 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
154 return true;
155 }
156
157 template<int Width>
158 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
159 SDValue &SignExtend, SDValue &DoShift) {
160 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
161 }
162
163 template<int Width>
164 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
165 SDValue &SignExtend, SDValue &DoShift) {
166 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
167 }
168
169 bool SelectExtractHigh(SDValue N, SDValue &Res) {
170 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
171 N = N->getOperand(0);
172 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
173 !isa<ConstantSDNode>(N->getOperand(1)))
174 return false;
175 EVT VT = N->getValueType(0);
176 EVT LVT = N->getOperand(0).getValueType();
177 unsigned Index = N->getConstantOperandVal(1);
178 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
179 Index != VT.getVectorNumElements())
180 return false;
181 Res = N->getOperand(0);
182 return true;
183 }
184
185 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
186 if (N.getOpcode() != AArch64ISD::VLSHR)
187 return false;
188 SDValue Op = N->getOperand(0);
189 EVT VT = Op.getValueType();
190 unsigned ShtAmt = N->getConstantOperandVal(1);
191 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
192 return false;
193
194 APInt Imm;
195 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
196 Imm = APInt(VT.getScalarSizeInBits(),
197 Op.getOperand(1).getConstantOperandVal(0)
198 << Op.getOperand(1).getConstantOperandVal(1));
199 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
200 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
201 Imm = APInt(VT.getScalarSizeInBits(),
202 Op.getOperand(1).getConstantOperandVal(0));
203 else
204 return false;
205
206 if (Imm != 1ULL << (ShtAmt - 1))
207 return false;
208
209 Res1 = Op.getOperand(0);
210 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
211 return true;
212 }
213
214 bool SelectDupZeroOrUndef(SDValue N) {
215 switch(N->getOpcode()) {
216 case ISD::UNDEF:
217 return true;
218 case AArch64ISD::DUP:
219 case ISD::SPLAT_VECTOR: {
220 auto Opnd0 = N->getOperand(0);
221 if (isNullConstant(Opnd0))
222 return true;
223 if (isNullFPConstant(Opnd0))
224 return true;
225 break;
226 }
227 default:
228 break;
229 }
230
231 return false;
232 }
233
234 bool SelectAny(SDValue) { return true; }
235
236 bool SelectDupZero(SDValue N) {
237 switch(N->getOpcode()) {
238 case AArch64ISD::DUP:
239 case ISD::SPLAT_VECTOR: {
240 auto Opnd0 = N->getOperand(0);
241 if (isNullConstant(Opnd0))
242 return true;
243 if (isNullFPConstant(Opnd0))
244 return true;
245 break;
246 }
247 }
248
249 return false;
250 }
251
252 template <MVT::SimpleValueType VT, bool Negate>
253 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
254 return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
255 }
256
257 template <MVT::SimpleValueType VT, bool Negate>
258 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
259 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
260 }
261
262 template <MVT::SimpleValueType VT>
263 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
264 return SelectSVECpyDupImm(N, VT, Imm, Shift);
265 }
266
267 template <MVT::SimpleValueType VT, bool Invert = false>
268 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
269 return SelectSVELogicalImm(N, VT, Imm, Invert);
270 }
271
272 template <MVT::SimpleValueType VT>
273 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
274 return SelectSVEArithImm(N, VT, Imm);
275 }
276
277 template <unsigned Low, unsigned High, bool AllowSaturation = false>
278 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
279 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
280 }
281
282 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
283 if (N->getOpcode() != ISD::SPLAT_VECTOR)
284 return false;
285
286 EVT EltVT = N->getValueType(0).getVectorElementType();
287 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
288 /* High */ EltVT.getFixedSizeInBits(),
289 /* AllowSaturation */ true, Imm);
290 }
291
292 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
293 template<signed Min, signed Max, signed Scale, bool Shift>
294 bool SelectCntImm(SDValue N, SDValue &Imm) {
296 return false;
297
298 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
299 if (Shift)
300 MulImm = 1LL << MulImm;
301
302 if ((MulImm % std::abs(Scale)) != 0)
303 return false;
304
305 MulImm /= Scale;
306 if ((MulImm >= Min) && (MulImm <= Max)) {
307 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
308 return true;
309 }
310
311 return false;
312 }
313
314 template <signed Max, signed Scale>
315 bool SelectEXTImm(SDValue N, SDValue &Imm) {
317 return false;
318
319 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
320
321 if (MulImm >= 0 && MulImm <= Max) {
322 MulImm *= Scale;
323 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
324 return true;
325 }
326
327 return false;
328 }
329
330 template <unsigned BaseReg, unsigned Max>
331 bool ImmToReg(SDValue N, SDValue &Imm) {
332 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
333 uint64_t C = CI->getZExtValue();
334
335 if (C > Max)
336 return false;
337
338 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
339 return true;
340 }
341 return false;
342 }
343
344 /// Form sequences of consecutive 64/128-bit registers for use in NEON
345 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
346 /// between 1 and 4 elements. If it contains a single element that is returned
347 /// unchanged; otherwise a REG_SEQUENCE value is returned.
350 // Form a sequence of SVE registers for instructions using list of vectors,
351 // e.g. structured loads and stores (ldN, stN).
352 SDValue createZTuple(ArrayRef<SDValue> Vecs);
353
354 // Similar to above, except the register must start at a multiple of the
355 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
356 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
357
358 /// Generic helper for the createDTuple/createQTuple
359 /// functions. Those should almost always be called instead.
360 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
361 const unsigned SubRegs[]);
362
363 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
364
365 bool tryIndexedLoad(SDNode *N);
366
367 void SelectPtrauthAuth(SDNode *N);
368 void SelectPtrauthResign(SDNode *N);
369
370 bool trySelectStackSlotTagP(SDNode *N);
371 void SelectTagP(SDNode *N);
372
373 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
374 unsigned SubRegIdx);
375 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
376 unsigned SubRegIdx);
377 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
378 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
379 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
380 unsigned Opc_rr, unsigned Opc_ri,
381 bool IsIntr = false);
382 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
383 unsigned Scale, unsigned Opc_ri,
384 unsigned Opc_rr);
385 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
386 bool IsZmMulti, unsigned Opcode,
387 bool HasPred = false);
388 void SelectPExtPair(SDNode *N, unsigned Opc);
389 void SelectWhilePair(SDNode *N, unsigned Opc);
390 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
391 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
392 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
393 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
394 bool IsTupleInput, unsigned Opc);
395 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
396
397 template <unsigned MaxIdx, unsigned Scale>
398 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
399 unsigned Op);
400 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
401 unsigned Op, unsigned MaxIdx, unsigned Scale,
402 unsigned BaseReg = 0);
403 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
404 /// SVE Reg+Imm addressing mode.
405 template <int64_t Min, int64_t Max>
406 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
407 SDValue &OffImm);
408 /// SVE Reg+Reg address mode.
409 template <unsigned Scale>
410 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
411 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
412 }
413
414 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
415 unsigned Opc, uint32_t MaxImm);
416
417 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
418
419 template <unsigned MaxIdx, unsigned Scale>
420 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
421 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
422 }
423
424 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
425 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
426 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
427 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
428 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
429 unsigned Opc_rr, unsigned Opc_ri);
430 std::tuple<unsigned, SDValue, SDValue>
431 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
432 const SDValue &OldBase, const SDValue &OldOffset,
433 unsigned Scale);
434
435 bool tryBitfieldExtractOp(SDNode *N);
436 bool tryBitfieldExtractOpFromSExt(SDNode *N);
437 bool tryBitfieldInsertOp(SDNode *N);
438 bool tryBitfieldInsertInZeroOp(SDNode *N);
439 bool tryShiftAmountMod(SDNode *N);
440
441 bool tryReadRegister(SDNode *N);
442 bool tryWriteRegister(SDNode *N);
443
444 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
445 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
446
447 bool trySelectXAR(SDNode *N);
448
449// Include the pieces autogenerated from the target description.
450#include "AArch64GenDAGISel.inc"
451
452private:
453 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
454 SDValue &Shift);
455 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
456 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
457 SDValue &OffImm) {
458 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
459 }
460 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
461 unsigned Size, SDValue &Base,
462 SDValue &OffImm);
463 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
464 SDValue &OffImm);
465 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
466 SDValue &OffImm);
467 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
468 SDValue &Offset, SDValue &SignExtend,
469 SDValue &DoShift);
470 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
471 SDValue &Offset, SDValue &SignExtend,
472 SDValue &DoShift);
473 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
474 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
475 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
476 SDValue &Offset, SDValue &SignExtend);
477
478 template<unsigned RegWidth>
479 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
480 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
481 }
482
483 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
484
485 template<unsigned RegWidth>
486 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
487 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
488 }
489
490 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
491 unsigned Width);
492
493 bool SelectCMP_SWAP(SDNode *N);
494
495 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
496 bool Negate);
497 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
498 bool Negate);
499 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
500 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
501
502 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
503 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
504 bool AllowSaturation, SDValue &Imm);
505
506 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
507 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
508 SDValue &Offset);
509 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
510 SDValue &Offset, unsigned Scale = 1);
511
512 bool SelectAllActivePredicate(SDValue N);
513 bool SelectAnyPredicate(SDValue N);
514
515 bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm);
516};
517
518class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
519public:
520 static char ID;
521 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
522 CodeGenOptLevel OptLevel)
524 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
525};
526} // end anonymous namespace
527
528char AArch64DAGToDAGISelLegacy::ID = 0;
529
530INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
531
532/// isIntImmediate - This method tests to see if the node is a constant
533/// operand. If so Imm will receive the 32-bit value.
534static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
536 Imm = C->getZExtValue();
537 return true;
538 }
539 return false;
540}
541
542// isIntImmediate - This method tests to see if a constant operand.
543// If so Imm will receive the value.
544static bool isIntImmediate(SDValue N, uint64_t &Imm) {
545 return isIntImmediate(N.getNode(), Imm);
546}
547
548// isOpcWithIntImmediate - This method tests to see if the node is a specific
549// opcode and that it has a immediate integer right operand.
550// If so Imm will receive the 32 bit value.
551static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
552 uint64_t &Imm) {
553 return N->getOpcode() == Opc &&
554 isIntImmediate(N->getOperand(1).getNode(), Imm);
555}
556
557// isIntImmediateEq - This method tests to see if N is a constant operand that
558// is equivalent to 'ImmExpected'.
559#ifndef NDEBUG
560static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
561 uint64_t Imm;
562 if (!isIntImmediate(N.getNode(), Imm))
563 return false;
564 return Imm == ImmExpected;
565}
566#endif
567
568bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
569 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
570 std::vector<SDValue> &OutOps) {
571 switch(ConstraintID) {
572 default:
573 llvm_unreachable("Unexpected asm memory constraint");
574 case InlineAsm::ConstraintCode::m:
575 case InlineAsm::ConstraintCode::o:
576 case InlineAsm::ConstraintCode::Q:
577 // We need to make sure that this one operand does not end up in XZR, thus
578 // require the address to be in a PointerRegClass register.
579 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
580 const TargetRegisterClass *TRC = TRI->getPointerRegClass();
581 SDLoc dl(Op);
582 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
583 SDValue NewOp =
584 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
585 dl, Op.getValueType(),
586 Op, RC), 0);
587 OutOps.push_back(NewOp);
588 return false;
589 }
590 return true;
591}
592
593/// SelectArithImmed - Select an immediate value that can be represented as
594/// a 12-bit value shifted left by either 0 or 12. If so, return true with
595/// Val set to the 12-bit value and Shift set to the shifter operand.
596bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
597 SDValue &Shift) {
598 // This function is called from the addsub_shifted_imm ComplexPattern,
599 // which lists [imm] as the list of opcode it's interested in, however
600 // we still need to check whether the operand is actually an immediate
601 // here because the ComplexPattern opcode list is only used in
602 // root-level opcode matching.
603 if (!isa<ConstantSDNode>(N.getNode()))
604 return false;
605
606 uint64_t Immed = N.getNode()->getAsZExtVal();
607 unsigned ShiftAmt;
608
609 if (Immed >> 12 == 0) {
610 ShiftAmt = 0;
611 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
612 ShiftAmt = 12;
613 Immed = Immed >> 12;
614 } else
615 return false;
616
617 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
618 SDLoc dl(N);
619 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
620 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
621 return true;
622}
623
624/// SelectNegArithImmed - As above, but negates the value before trying to
625/// select it.
626bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
627 SDValue &Shift) {
628 // This function is called from the addsub_shifted_imm ComplexPattern,
629 // which lists [imm] as the list of opcode it's interested in, however
630 // we still need to check whether the operand is actually an immediate
631 // here because the ComplexPattern opcode list is only used in
632 // root-level opcode matching.
633 if (!isa<ConstantSDNode>(N.getNode()))
634 return false;
635
636 // The immediate operand must be a 24-bit zero-extended immediate.
637 uint64_t Immed = N.getNode()->getAsZExtVal();
638
639 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
640 // have the opposite effect on the C flag, so this pattern mustn't match under
641 // those circumstances.
642 if (Immed == 0)
643 return false;
644
645 if (N.getValueType() == MVT::i32)
646 Immed = ~((uint32_t)Immed) + 1;
647 else
648 Immed = ~Immed + 1ULL;
649 if (Immed & 0xFFFFFFFFFF000000ULL)
650 return false;
651
652 Immed &= 0xFFFFFFULL;
653 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
654 Shift);
655}
656
657/// getShiftTypeForNode - Translate a shift node to the corresponding
658/// ShiftType value.
660 switch (N.getOpcode()) {
661 default:
663 case ISD::SHL:
664 return AArch64_AM::LSL;
665 case ISD::SRL:
666 return AArch64_AM::LSR;
667 case ISD::SRA:
668 return AArch64_AM::ASR;
669 case ISD::ROTR:
670 return AArch64_AM::ROR;
671 }
672}
673
675 return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
676}
677
678/// Determine whether it is worth it to fold SHL into the addressing
679/// mode.
681 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
682 // It is worth folding logical shift of up to three places.
683 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
684 if (!CSD)
685 return false;
686 unsigned ShiftVal = CSD->getZExtValue();
687 if (ShiftVal > 3)
688 return false;
689
690 // Check if this particular node is reused in any non-memory related
691 // operation. If yes, do not try to fold this node into the address
692 // computation, since the computation will be kept.
693 const SDNode *Node = V.getNode();
694 for (SDNode *UI : Node->users())
695 if (!isMemOpOrPrefetch(UI))
696 for (SDNode *UII : UI->users())
697 if (!isMemOpOrPrefetch(UII))
698 return false;
699 return true;
700}
701
702/// Determine whether it is worth to fold V into an extended register addressing
703/// mode.
704bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
705 // Trivial if we are optimizing for code size or if there is only
706 // one use of the value.
707 if (CurDAG->shouldOptForSize() || V.hasOneUse())
708 return true;
709
710 // If a subtarget has a slow shift, folding a shift into multiple loads
711 // costs additional micro-ops.
712 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
713 return false;
714
715 // Check whether we're going to emit the address arithmetic anyway because
716 // it's used by a non-address operation.
717 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
718 return true;
719 if (V.getOpcode() == ISD::ADD) {
720 const SDValue LHS = V.getOperand(0);
721 const SDValue RHS = V.getOperand(1);
722 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
723 return true;
724 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
725 return true;
726 }
727
728 // It hurts otherwise, since the value will be reused.
729 return false;
730}
731
732/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
733/// to select more shifted register
734bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
735 SDValue &Shift) {
736 EVT VT = N.getValueType();
737 if (VT != MVT::i32 && VT != MVT::i64)
738 return false;
739
740 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
741 return false;
742 SDValue LHS = N.getOperand(0);
743 if (!LHS->hasOneUse())
744 return false;
745
746 unsigned LHSOpcode = LHS->getOpcode();
747 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
748 return false;
749
750 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
751 if (!ShiftAmtNode)
752 return false;
753
754 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
755 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
756 if (!RHSC)
757 return false;
758
759 APInt AndMask = RHSC->getAPIntValue();
760 unsigned LowZBits, MaskLen;
761 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
762 return false;
763
764 unsigned BitWidth = N.getValueSizeInBits();
765 SDLoc DL(LHS);
766 uint64_t NewShiftC;
767 unsigned NewShiftOp;
768 if (LHSOpcode == ISD::SHL) {
769 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
770 // BitWidth != LowZBits + MaskLen doesn't match the pattern
771 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
772 return false;
773
774 NewShiftC = LowZBits - ShiftAmtC;
775 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
776 } else {
777 if (LowZBits == 0)
778 return false;
779
780 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
781 NewShiftC = LowZBits + ShiftAmtC;
782 if (NewShiftC >= BitWidth)
783 return false;
784
785 // SRA need all high bits
786 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
787 return false;
788
789 // SRL high bits can be 0 or 1
790 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
791 return false;
792
793 if (LHSOpcode == ISD::SRL)
794 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
795 else
796 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
797 }
798
799 assert(NewShiftC < BitWidth && "Invalid shift amount");
800 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
801 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
802 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
803 NewShiftAmt, BitWidthMinus1),
804 0);
805 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
806 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
807 return true;
808}
809
810/// getExtendTypeForNode - Translate an extend node to the corresponding
811/// ExtendType value.
813getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
814 if (N.getOpcode() == ISD::SIGN_EXTEND ||
815 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
816 EVT SrcVT;
817 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
818 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
819 else
820 SrcVT = N.getOperand(0).getValueType();
821
822 if (!IsLoadStore && SrcVT == MVT::i8)
823 return AArch64_AM::SXTB;
824 else if (!IsLoadStore && SrcVT == MVT::i16)
825 return AArch64_AM::SXTH;
826 else if (SrcVT == MVT::i32)
827 return AArch64_AM::SXTW;
828 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
829
831 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
832 N.getOpcode() == ISD::ANY_EXTEND) {
833 EVT SrcVT = N.getOperand(0).getValueType();
834 if (!IsLoadStore && SrcVT == MVT::i8)
835 return AArch64_AM::UXTB;
836 else if (!IsLoadStore && SrcVT == MVT::i16)
837 return AArch64_AM::UXTH;
838 else if (SrcVT == MVT::i32)
839 return AArch64_AM::UXTW;
840 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
841
843 } else if (N.getOpcode() == ISD::AND) {
844 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
845 if (!CSD)
847 uint64_t AndMask = CSD->getZExtValue();
848
849 switch (AndMask) {
850 default:
852 case 0xFF:
853 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
854 case 0xFFFF:
855 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
856 case 0xFFFFFFFF:
857 return AArch64_AM::UXTW;
858 }
859 }
860
862}
863
864/// Determine whether it is worth to fold V into an extended register of an
865/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
866/// instruction, and the shift should be treated as worth folding even if has
867/// multiple uses.
868bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
869 // Trivial if we are optimizing for code size or if there is only
870 // one use of the value.
871 if (CurDAG->shouldOptForSize() || V.hasOneUse())
872 return true;
873
874 // If a subtarget has a fastpath LSL we can fold a logical shift into
875 // the add/sub and save a cycle.
876 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
877 V.getConstantOperandVal(1) <= 4 &&
879 return true;
880
881 // It hurts otherwise, since the value will be reused.
882 return false;
883}
884
885/// SelectShiftedRegister - Select a "shifted register" operand. If the value
886/// is not shifted, set the Shift operand to default of "LSL 0". The logical
887/// instructions allow the shifted register to be rotated, but the arithmetic
888/// instructions do not. The AllowROR parameter specifies whether ROR is
889/// supported.
890bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
891 SDValue &Reg, SDValue &Shift) {
892 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
893 return true;
894
896 if (ShType == AArch64_AM::InvalidShiftExtend)
897 return false;
898 if (!AllowROR && ShType == AArch64_AM::ROR)
899 return false;
900
901 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
902 unsigned BitSize = N.getValueSizeInBits();
903 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
904 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
905
906 Reg = N.getOperand(0);
907 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
908 return isWorthFoldingALU(N, true);
909 }
910
911 return false;
912}
913
914/// Instructions that accept extend modifiers like UXTW expect the register
915/// being extended to be a GPR32, but the incoming DAG might be acting on a
916/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
917/// this is the case.
919 if (N.getValueType() == MVT::i32)
920 return N;
921
922 SDLoc dl(N);
923 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
924}
925
926// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
927template<signed Low, signed High, signed Scale>
928bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
930 return false;
931
932 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
933 if ((MulImm % std::abs(Scale)) == 0) {
934 int64_t RDVLImm = MulImm / Scale;
935 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
936 Imm = CurDAG->getSignedTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
937 return true;
938 }
939 }
940
941 return false;
942}
943
944// Returns a suitable RDSVL multiplier from a left shift.
945template <signed Low, signed High>
946bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) {
948 return false;
949
950 int64_t MulImm = 1LL << cast<ConstantSDNode>(N)->getSExtValue();
951 if (MulImm >= Low && MulImm <= High) {
952 Imm = CurDAG->getSignedTargetConstant(MulImm, SDLoc(N), MVT::i32);
953 return true;
954 }
955
956 return false;
957}
958
959/// SelectArithExtendedRegister - Select a "extended register" operand. This
960/// operand folds in an extend followed by an optional left shift.
961bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
962 SDValue &Shift) {
963 unsigned ShiftVal = 0;
965
966 if (N.getOpcode() == ISD::SHL) {
967 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
968 if (!CSD)
969 return false;
970 ShiftVal = CSD->getZExtValue();
971 if (ShiftVal > 4)
972 return false;
973
974 Ext = getExtendTypeForNode(N.getOperand(0));
976 return false;
977
978 Reg = N.getOperand(0).getOperand(0);
979 } else {
982 return false;
983
984 Reg = N.getOperand(0);
985
986 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
987 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
988 auto isDef32 = [](SDValue N) {
989 unsigned Opc = N.getOpcode();
990 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
993 Opc != ISD::FREEZE;
994 };
995 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
996 isDef32(Reg))
997 return false;
998 }
999
1000 // AArch64 mandates that the RHS of the operation must use the smallest
1001 // register class that could contain the size being extended from. Thus,
1002 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
1003 // there might not be an actual 32-bit value in the program. We can
1004 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
1005 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
1006 Reg = narrowIfNeeded(CurDAG, Reg);
1007 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1008 MVT::i32);
1009 return isWorthFoldingALU(N);
1010}
1011
1012/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
1013/// operand is referred by the instructions have SP operand
1014bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
1015 SDValue &Shift) {
1016 unsigned ShiftVal = 0;
1018
1019 if (N.getOpcode() != ISD::SHL)
1020 return false;
1021
1022 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1023 if (!CSD)
1024 return false;
1025 ShiftVal = CSD->getZExtValue();
1026 if (ShiftVal > 4)
1027 return false;
1028
1030 Reg = N.getOperand(0);
1031 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1032 MVT::i32);
1033 return isWorthFoldingALU(N);
1034}
1035
1036/// If there's a use of this ADDlow that's not itself a load/store then we'll
1037/// need to create a real ADD instruction from it anyway and there's no point in
1038/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1039/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1040/// leads to duplicated ADRP instructions.
1042 for (auto *User : N->users()) {
1043 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1044 User->getOpcode() != ISD::ATOMIC_LOAD &&
1045 User->getOpcode() != ISD::ATOMIC_STORE)
1046 return false;
1047
1048 // ldar and stlr have much more restrictive addressing modes (just a
1049 // register).
1050 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
1051 return false;
1052 }
1053
1054 return true;
1055}
1056
1057/// Check if the immediate offset is valid as a scaled immediate.
1058static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1059 unsigned Size) {
1060 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1061 Offset < (Range << Log2_32(Size)))
1062 return true;
1063 return false;
1064}
1065
1066/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1067/// immediate" address. The "Size" argument is the size in bytes of the memory
1068/// reference, which determines the scale.
1069bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1070 unsigned BW, unsigned Size,
1071 SDValue &Base,
1072 SDValue &OffImm) {
1073 SDLoc dl(N);
1074 const DataLayout &DL = CurDAG->getDataLayout();
1075 const TargetLowering *TLI = getTargetLowering();
1076 if (N.getOpcode() == ISD::FrameIndex) {
1077 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1078 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1079 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1080 return true;
1081 }
1082
1083 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1084 // selected here doesn't support labels/immediates, only base+offset.
1085 if (CurDAG->isBaseWithConstantOffset(N)) {
1086 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1087 if (IsSignedImm) {
1088 int64_t RHSC = RHS->getSExtValue();
1089 unsigned Scale = Log2_32(Size);
1090 int64_t Range = 0x1LL << (BW - 1);
1091
1092 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1093 RHSC < (Range << Scale)) {
1094 Base = N.getOperand(0);
1095 if (Base.getOpcode() == ISD::FrameIndex) {
1096 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1097 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1098 }
1099 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1100 return true;
1101 }
1102 } else {
1103 // unsigned Immediate
1104 uint64_t RHSC = RHS->getZExtValue();
1105 unsigned Scale = Log2_32(Size);
1106 uint64_t Range = 0x1ULL << BW;
1107
1108 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1109 Base = N.getOperand(0);
1110 if (Base.getOpcode() == ISD::FrameIndex) {
1111 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1112 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1113 }
1114 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1115 return true;
1116 }
1117 }
1118 }
1119 }
1120 // Base only. The address will be materialized into a register before
1121 // the memory is accessed.
1122 // add x0, Xbase, #offset
1123 // stp x1, x2, [x0]
1124 Base = N;
1125 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1126 return true;
1127}
1128
1129/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1130/// immediate" address. The "Size" argument is the size in bytes of the memory
1131/// reference, which determines the scale.
1132bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1133 SDValue &Base, SDValue &OffImm) {
1134 SDLoc dl(N);
1135 const DataLayout &DL = CurDAG->getDataLayout();
1136 const TargetLowering *TLI = getTargetLowering();
1137 if (N.getOpcode() == ISD::FrameIndex) {
1138 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1139 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1140 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1141 return true;
1142 }
1143
1144 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1145 GlobalAddressSDNode *GAN =
1146 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1147 Base = N.getOperand(0);
1148 OffImm = N.getOperand(1);
1149 if (!GAN)
1150 return true;
1151
1152 if (GAN->getOffset() % Size == 0 &&
1154 return true;
1155 }
1156
1157 if (CurDAG->isBaseWithConstantOffset(N)) {
1158 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1159 int64_t RHSC = (int64_t)RHS->getZExtValue();
1160 unsigned Scale = Log2_32(Size);
1161 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1162 Base = N.getOperand(0);
1163 if (Base.getOpcode() == ISD::FrameIndex) {
1164 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1165 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1166 }
1167 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1168 return true;
1169 }
1170 }
1171 }
1172
1173 // Before falling back to our general case, check if the unscaled
1174 // instructions can handle this. If so, that's preferable.
1175 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1176 return false;
1177
1178 // Base only. The address will be materialized into a register before
1179 // the memory is accessed.
1180 // add x0, Xbase, #offset
1181 // ldr x0, [x0]
1182 Base = N;
1183 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1184 return true;
1185}
1186
1187/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1188/// immediate" address. This should only match when there is an offset that
1189/// is not valid for a scaled immediate addressing mode. The "Size" argument
1190/// is the size in bytes of the memory reference, which is needed here to know
1191/// what is valid for a scaled immediate.
1192bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1193 SDValue &Base,
1194 SDValue &OffImm) {
1195 if (!CurDAG->isBaseWithConstantOffset(N))
1196 return false;
1197 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1198 int64_t RHSC = RHS->getSExtValue();
1199 if (RHSC >= -256 && RHSC < 256) {
1200 Base = N.getOperand(0);
1201 if (Base.getOpcode() == ISD::FrameIndex) {
1202 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1203 const TargetLowering *TLI = getTargetLowering();
1204 Base = CurDAG->getTargetFrameIndex(
1205 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1206 }
1207 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1208 return true;
1209 }
1210 }
1211 return false;
1212}
1213
1215 SDLoc dl(N);
1216 SDValue ImpDef = SDValue(
1217 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1218 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1219 N);
1220}
1221
1222/// Check if the given SHL node (\p N), can be used to form an
1223/// extended register for an addressing mode.
1224bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1225 bool WantExtend, SDValue &Offset,
1226 SDValue &SignExtend) {
1227 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1228 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1229 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1230 return false;
1231
1232 SDLoc dl(N);
1233 if (WantExtend) {
1235 getExtendTypeForNode(N.getOperand(0), true);
1237 return false;
1238
1239 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1240 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1241 MVT::i32);
1242 } else {
1243 Offset = N.getOperand(0);
1244 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1245 }
1246
1247 unsigned LegalShiftVal = Log2_32(Size);
1248 unsigned ShiftVal = CSD->getZExtValue();
1249
1250 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1251 return false;
1252
1253 return isWorthFoldingAddr(N, Size);
1254}
1255
1256bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1258 SDValue &SignExtend,
1259 SDValue &DoShift) {
1260 if (N.getOpcode() != ISD::ADD)
1261 return false;
1262 SDValue LHS = N.getOperand(0);
1263 SDValue RHS = N.getOperand(1);
1264 SDLoc dl(N);
1265
1266 // We don't want to match immediate adds here, because they are better lowered
1267 // to the register-immediate addressing modes.
1269 return false;
1270
1271 // Check if this particular node is reused in any non-memory related
1272 // operation. If yes, do not try to fold this node into the address
1273 // computation, since the computation will be kept.
1274 const SDNode *Node = N.getNode();
1275 for (SDNode *UI : Node->users()) {
1276 if (!isMemOpOrPrefetch(UI))
1277 return false;
1278 }
1279
1280 // Remember if it is worth folding N when it produces extended register.
1281 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1282
1283 // Try to match a shifted extend on the RHS.
1284 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1285 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1286 Base = LHS;
1287 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1288 return true;
1289 }
1290
1291 // Try to match a shifted extend on the LHS.
1292 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1293 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1294 Base = RHS;
1295 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1296 return true;
1297 }
1298
1299 // There was no shift, whatever else we find.
1300 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1301
1303 // Try to match an unshifted extend on the LHS.
1304 if (IsExtendedRegisterWorthFolding &&
1305 (Ext = getExtendTypeForNode(LHS, true)) !=
1307 Base = RHS;
1308 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1309 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1310 MVT::i32);
1311 if (isWorthFoldingAddr(LHS, Size))
1312 return true;
1313 }
1314
1315 // Try to match an unshifted extend on the RHS.
1316 if (IsExtendedRegisterWorthFolding &&
1317 (Ext = getExtendTypeForNode(RHS, true)) !=
1319 Base = LHS;
1320 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1321 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1322 MVT::i32);
1323 if (isWorthFoldingAddr(RHS, Size))
1324 return true;
1325 }
1326
1327 return false;
1328}
1329
1330// Check if the given immediate is preferred by ADD. If an immediate can be
1331// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1332// encoded by one MOVZ, return true.
1333static bool isPreferredADD(int64_t ImmOff) {
1334 // Constant in [0x0, 0xfff] can be encoded in ADD.
1335 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1336 return true;
1337 // Check if it can be encoded in an "ADD LSL #12".
1338 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1339 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1340 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1341 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1342 return false;
1343}
1344
1345bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1347 SDValue &SignExtend,
1348 SDValue &DoShift) {
1349 if (N.getOpcode() != ISD::ADD)
1350 return false;
1351 SDValue LHS = N.getOperand(0);
1352 SDValue RHS = N.getOperand(1);
1353 SDLoc DL(N);
1354
1355 // Check if this particular node is reused in any non-memory related
1356 // operation. If yes, do not try to fold this node into the address
1357 // computation, since the computation will be kept.
1358 const SDNode *Node = N.getNode();
1359 for (SDNode *UI : Node->users()) {
1360 if (!isMemOpOrPrefetch(UI))
1361 return false;
1362 }
1363
1364 // Watch out if RHS is a wide immediate, it can not be selected into
1365 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1366 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1367 // instructions like:
1368 // MOV X0, WideImmediate
1369 // ADD X1, BaseReg, X0
1370 // LDR X2, [X1, 0]
1371 // For such situation, using [BaseReg, XReg] addressing mode can save one
1372 // ADD/SUB:
1373 // MOV X0, WideImmediate
1374 // LDR X2, [BaseReg, X0]
1375 if (isa<ConstantSDNode>(RHS)) {
1376 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1377 // Skip the immediate can be selected by load/store addressing mode.
1378 // Also skip the immediate can be encoded by a single ADD (SUB is also
1379 // checked by using -ImmOff).
1380 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1381 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1382 return false;
1383
1384 SDValue Ops[] = { RHS };
1385 SDNode *MOVI =
1386 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1387 SDValue MOVIV = SDValue(MOVI, 0);
1388 // This ADD of two X register will be selected into [Reg+Reg] mode.
1389 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1390 }
1391
1392 // Remember if it is worth folding N when it produces extended register.
1393 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1394
1395 // Try to match a shifted extend on the RHS.
1396 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1397 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1398 Base = LHS;
1399 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1400 return true;
1401 }
1402
1403 // Try to match a shifted extend on the LHS.
1404 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1405 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1406 Base = RHS;
1407 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1408 return true;
1409 }
1410
1411 // Match any non-shifted, non-extend, non-immediate add expression.
1412 Base = LHS;
1413 Offset = RHS;
1414 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1415 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1416 // Reg1 + Reg2 is free: no check needed.
1417 return true;
1418}
1419
1420SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1421 static const unsigned RegClassIDs[] = {
1422 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1423 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1424 AArch64::dsub2, AArch64::dsub3};
1425
1426 return createTuple(Regs, RegClassIDs, SubRegs);
1427}
1428
1429SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1430 static const unsigned RegClassIDs[] = {
1431 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1432 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1433 AArch64::qsub2, AArch64::qsub3};
1434
1435 return createTuple(Regs, RegClassIDs, SubRegs);
1436}
1437
1438SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1439 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1440 AArch64::ZPR3RegClassID,
1441 AArch64::ZPR4RegClassID};
1442 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1443 AArch64::zsub2, AArch64::zsub3};
1444
1445 return createTuple(Regs, RegClassIDs, SubRegs);
1446}
1447
1448SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1449 assert(Regs.size() == 2 || Regs.size() == 4);
1450
1451 // The createTuple interface requires 3 RegClassIDs for each possible
1452 // tuple type even though we only have them for ZPR2 and ZPR4.
1453 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1454 AArch64::ZPR4Mul4RegClassID};
1455 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1456 AArch64::zsub2, AArch64::zsub3};
1457 return createTuple(Regs, RegClassIDs, SubRegs);
1458}
1459
1460SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1461 const unsigned RegClassIDs[],
1462 const unsigned SubRegs[]) {
1463 // There's no special register-class for a vector-list of 1 element: it's just
1464 // a vector.
1465 if (Regs.size() == 1)
1466 return Regs[0];
1467
1468 assert(Regs.size() >= 2 && Regs.size() <= 4);
1469
1470 SDLoc DL(Regs[0]);
1471
1473
1474 // First operand of REG_SEQUENCE is the desired RegClass.
1475 Ops.push_back(
1476 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1477
1478 // Then we get pairs of source & subregister-position for the components.
1479 for (unsigned i = 0; i < Regs.size(); ++i) {
1480 Ops.push_back(Regs[i]);
1481 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1482 }
1483
1484 SDNode *N =
1485 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1486 return SDValue(N, 0);
1487}
1488
1489void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1490 bool isExt) {
1491 SDLoc dl(N);
1492 EVT VT = N->getValueType(0);
1493
1494 unsigned ExtOff = isExt;
1495
1496 // Form a REG_SEQUENCE to force register allocation.
1497 unsigned Vec0Off = ExtOff + 1;
1498 SmallVector<SDValue, 4> Regs(N->ops().slice(Vec0Off, NumVecs));
1499 SDValue RegSeq = createQTuple(Regs);
1500
1502 if (isExt)
1503 Ops.push_back(N->getOperand(1));
1504 Ops.push_back(RegSeq);
1505 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1506 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1507}
1508
1509static std::tuple<SDValue, SDValue>
1511 SDLoc DL(Disc);
1512 SDValue AddrDisc;
1513 SDValue ConstDisc;
1514
1515 // If this is a blend, remember the constant and address discriminators.
1516 // Otherwise, it's either a constant discriminator, or a non-blended
1517 // address discriminator.
1518 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1519 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1520 AddrDisc = Disc->getOperand(1);
1521 ConstDisc = Disc->getOperand(2);
1522 } else {
1523 ConstDisc = Disc;
1524 }
1525
1526 // If the constant discriminator (either the blend RHS, or the entire
1527 // discriminator value) isn't a 16-bit constant, bail out, and let the
1528 // discriminator be computed separately.
1529 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1530 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1531 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1532
1533 // If there's no address discriminator, use XZR directly.
1534 if (!AddrDisc)
1535 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1536
1537 return std::make_tuple(
1538 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1539 AddrDisc);
1540}
1541
1542void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1543 SDLoc DL(N);
1544 // IntrinsicID is operand #0
1545 SDValue Val = N->getOperand(1);
1546 SDValue AUTKey = N->getOperand(2);
1547 SDValue AUTDisc = N->getOperand(3);
1548
1549 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1550 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1551
1552 SDValue AUTAddrDisc, AUTConstDisc;
1553 std::tie(AUTConstDisc, AUTAddrDisc) =
1554 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1555
1556 if (!Subtarget->isX16X17Safer()) {
1557 SDValue Ops[] = {Val, AUTKey, AUTConstDisc, AUTAddrDisc};
1558
1559 SDNode *AUT =
1560 CurDAG->getMachineNode(AArch64::AUTxMxN, DL, MVT::i64, MVT::i64, Ops);
1561 ReplaceNode(N, AUT);
1562 } else {
1563 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1564 AArch64::X16, Val, SDValue());
1565 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1566
1567 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUTx16x17, DL, MVT::i64, Ops);
1568 ReplaceNode(N, AUT);
1569 }
1570}
1571
1572void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1573 SDLoc DL(N);
1574 // IntrinsicID is operand #0
1575 SDValue Val = N->getOperand(1);
1576 SDValue AUTKey = N->getOperand(2);
1577 SDValue AUTDisc = N->getOperand(3);
1578 SDValue PACKey = N->getOperand(4);
1579 SDValue PACDisc = N->getOperand(5);
1580
1581 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1582 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1583
1584 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1585 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1586
1587 SDValue AUTAddrDisc, AUTConstDisc;
1588 std::tie(AUTConstDisc, AUTAddrDisc) =
1589 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1590
1591 SDValue PACAddrDisc, PACConstDisc;
1592 std::tie(PACConstDisc, PACAddrDisc) =
1593 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1594
1595 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1596 AArch64::X16, Val, SDValue());
1597
1598 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1599 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1600
1601 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1602 ReplaceNode(N, AUTPAC);
1603}
1604
1605bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1606 LoadSDNode *LD = cast<LoadSDNode>(N);
1607 if (LD->isUnindexed())
1608 return false;
1609 EVT VT = LD->getMemoryVT();
1610 EVT DstVT = N->getValueType(0);
1611 ISD::MemIndexedMode AM = LD->getAddressingMode();
1612 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1613 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1614 int OffsetVal = (int)OffsetOp->getZExtValue();
1615
1616 // We're not doing validity checking here. That was done when checking
1617 // if we should mark the load as indexed or not. We're just selecting
1618 // the right instruction.
1619 unsigned Opcode = 0;
1620
1621 ISD::LoadExtType ExtType = LD->getExtensionType();
1622 bool InsertTo64 = false;
1623 if (VT == MVT::i64)
1624 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1625 else if (VT == MVT::i32) {
1626 if (ExtType == ISD::NON_EXTLOAD)
1627 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1628 else if (ExtType == ISD::SEXTLOAD)
1629 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1630 else {
1631 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1632 InsertTo64 = true;
1633 // The result of the load is only i32. It's the subreg_to_reg that makes
1634 // it into an i64.
1635 DstVT = MVT::i32;
1636 }
1637 } else if (VT == MVT::i16) {
1638 if (ExtType == ISD::SEXTLOAD) {
1639 if (DstVT == MVT::i64)
1640 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1641 else
1642 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1643 } else {
1644 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1645 InsertTo64 = DstVT == MVT::i64;
1646 // The result of the load is only i32. It's the subreg_to_reg that makes
1647 // it into an i64.
1648 DstVT = MVT::i32;
1649 }
1650 } else if (VT == MVT::i8) {
1651 if (ExtType == ISD::SEXTLOAD) {
1652 if (DstVT == MVT::i64)
1653 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1654 else
1655 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1656 } else {
1657 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1658 InsertTo64 = DstVT == MVT::i64;
1659 // The result of the load is only i32. It's the subreg_to_reg that makes
1660 // it into an i64.
1661 DstVT = MVT::i32;
1662 }
1663 } else if (VT == MVT::f16) {
1664 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1665 } else if (VT == MVT::bf16) {
1666 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1667 } else if (VT == MVT::f32) {
1668 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1669 } else if (VT == MVT::f64 ||
1670 (VT.is64BitVector() && Subtarget->isLittleEndian())) {
1671 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1672 } else if (VT.is128BitVector() && Subtarget->isLittleEndian()) {
1673 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1674 } else if (VT.is64BitVector()) {
1675 if (IsPre || OffsetVal != 8)
1676 return false;
1677 switch (VT.getScalarSizeInBits()) {
1678 case 8:
1679 Opcode = AArch64::LD1Onev8b_POST;
1680 break;
1681 case 16:
1682 Opcode = AArch64::LD1Onev4h_POST;
1683 break;
1684 case 32:
1685 Opcode = AArch64::LD1Onev2s_POST;
1686 break;
1687 case 64:
1688 Opcode = AArch64::LD1Onev1d_POST;
1689 break;
1690 default:
1691 llvm_unreachable("Expected vector element to be a power of 2");
1692 }
1693 } else if (VT.is128BitVector()) {
1694 if (IsPre || OffsetVal != 16)
1695 return false;
1696 switch (VT.getScalarSizeInBits()) {
1697 case 8:
1698 Opcode = AArch64::LD1Onev16b_POST;
1699 break;
1700 case 16:
1701 Opcode = AArch64::LD1Onev8h_POST;
1702 break;
1703 case 32:
1704 Opcode = AArch64::LD1Onev4s_POST;
1705 break;
1706 case 64:
1707 Opcode = AArch64::LD1Onev2d_POST;
1708 break;
1709 default:
1710 llvm_unreachable("Expected vector element to be a power of 2");
1711 }
1712 } else
1713 return false;
1714 SDValue Chain = LD->getChain();
1715 SDValue Base = LD->getBasePtr();
1716 SDLoc dl(N);
1717 // LD1 encodes an immediate offset by using XZR as the offset register.
1718 SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian())
1719 ? CurDAG->getRegister(AArch64::XZR, MVT::i64)
1720 : CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1721 SDValue Ops[] = { Base, Offset, Chain };
1722 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1723 MVT::Other, Ops);
1724
1725 // Transfer memoperands.
1726 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1727 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1728
1729 // Either way, we're replacing the node, so tell the caller that.
1730 SDValue LoadedVal = SDValue(Res, 1);
1731 if (InsertTo64) {
1732 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1733 LoadedVal =
1734 SDValue(CurDAG->getMachineNode(
1735 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1736 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1737 SubReg),
1738 0);
1739 }
1740
1741 ReplaceUses(SDValue(N, 0), LoadedVal);
1742 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1743 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1744 CurDAG->RemoveDeadNode(N);
1745 return true;
1746}
1747
1748void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1749 unsigned SubRegIdx) {
1750 SDLoc dl(N);
1751 EVT VT = N->getValueType(0);
1752 SDValue Chain = N->getOperand(0);
1753
1754 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1755 Chain};
1756
1757 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1758
1759 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1760 SDValue SuperReg = SDValue(Ld, 0);
1761 for (unsigned i = 0; i < NumVecs; ++i)
1762 ReplaceUses(SDValue(N, i),
1763 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1764
1765 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1766
1767 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1768 // because it's too simple to have needed special treatment during lowering.
1769 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1770 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1771 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1772 }
1773
1774 CurDAG->RemoveDeadNode(N);
1775}
1776
1777void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1778 unsigned Opc, unsigned SubRegIdx) {
1779 SDLoc dl(N);
1780 EVT VT = N->getValueType(0);
1781 SDValue Chain = N->getOperand(0);
1782
1783 SDValue Ops[] = {N->getOperand(1), // Mem operand
1784 N->getOperand(2), // Incremental
1785 Chain};
1786
1787 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1788 MVT::Untyped, MVT::Other};
1789
1790 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1791
1792 // Update uses of write back register
1793 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1794
1795 // Update uses of vector list
1796 SDValue SuperReg = SDValue(Ld, 1);
1797 if (NumVecs == 1)
1798 ReplaceUses(SDValue(N, 0), SuperReg);
1799 else
1800 for (unsigned i = 0; i < NumVecs; ++i)
1801 ReplaceUses(SDValue(N, i),
1802 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1803
1804 // Update the chain
1805 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1806 CurDAG->RemoveDeadNode(N);
1807}
1808
1809/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1810/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1811/// new Base and an SDValue representing the new offset.
1812std::tuple<unsigned, SDValue, SDValue>
1813AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1814 unsigned Opc_ri,
1815 const SDValue &OldBase,
1816 const SDValue &OldOffset,
1817 unsigned Scale) {
1818 SDValue NewBase = OldBase;
1819 SDValue NewOffset = OldOffset;
1820 // Detect a possible Reg+Imm addressing mode.
1821 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1822 N, OldBase, NewBase, NewOffset);
1823
1824 // Detect a possible reg+reg addressing mode, but only if we haven't already
1825 // detected a Reg+Imm one.
1826 const bool IsRegReg =
1827 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1828
1829 // Select the instruction.
1830 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1831}
1832
1833enum class SelectTypeKind {
1834 Int1 = 0,
1835 Int = 1,
1836 FP = 2,
1838};
1839
1840/// This function selects an opcode from a list of opcodes, which is
1841/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1842/// element types, in this order.
1843template <SelectTypeKind Kind>
1844static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1845 // Only match scalable vector VTs
1846 if (!VT.isScalableVector())
1847 return 0;
1848
1849 EVT EltVT = VT.getVectorElementType();
1850 unsigned Key = VT.getVectorMinNumElements();
1851 switch (Kind) {
1853 break;
1855 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1856 EltVT != MVT::i64)
1857 return 0;
1858 break;
1860 if (EltVT != MVT::i1)
1861 return 0;
1862 break;
1863 case SelectTypeKind::FP:
1864 if (EltVT == MVT::bf16)
1865 Key = 16;
1866 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1867 EltVT != MVT::f64)
1868 return 0;
1869 break;
1870 }
1871
1872 unsigned Offset;
1873 switch (Key) {
1874 case 16: // 8-bit or bf16
1875 Offset = 0;
1876 break;
1877 case 8: // 16-bit
1878 Offset = 1;
1879 break;
1880 case 4: // 32-bit
1881 Offset = 2;
1882 break;
1883 case 2: // 64-bit
1884 Offset = 3;
1885 break;
1886 default:
1887 return 0;
1888 }
1889
1890 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1891}
1892
1893// This function is almost identical to SelectWhilePair, but has an
1894// extra check on the range of the immediate operand.
1895// TODO: Merge these two functions together at some point?
1896void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1897 // Immediate can be either 0 or 1.
1898 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1899 if (Imm->getZExtValue() > 1)
1900 return;
1901
1902 SDLoc DL(N);
1903 EVT VT = N->getValueType(0);
1904 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1905 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1906 SDValue SuperReg = SDValue(WhilePair, 0);
1907
1908 for (unsigned I = 0; I < 2; ++I)
1909 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1910 AArch64::psub0 + I, DL, VT, SuperReg));
1911
1912 CurDAG->RemoveDeadNode(N);
1913}
1914
1915void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1916 SDLoc DL(N);
1917 EVT VT = N->getValueType(0);
1918
1919 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1920
1921 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1922 SDValue SuperReg = SDValue(WhilePair, 0);
1923
1924 for (unsigned I = 0; I < 2; ++I)
1925 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1926 AArch64::psub0 + I, DL, VT, SuperReg));
1927
1928 CurDAG->RemoveDeadNode(N);
1929}
1930
1931void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1932 unsigned Opcode) {
1933 EVT VT = N->getValueType(0);
1934 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
1935 SDValue Ops = createZTuple(Regs);
1936 SDLoc DL(N);
1937 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1938 SDValue SuperReg = SDValue(Intrinsic, 0);
1939 for (unsigned i = 0; i < NumVecs; ++i)
1940 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1941 AArch64::zsub0 + i, DL, VT, SuperReg));
1942
1943 CurDAG->RemoveDeadNode(N);
1944}
1945
1946void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
1947 unsigned Opcode) {
1948 SDLoc DL(N);
1949 EVT VT = N->getValueType(0);
1950 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
1951 Ops.push_back(/*Chain*/ N->getOperand(0));
1952
1953 SDNode *Instruction =
1954 CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
1955 SDValue SuperReg = SDValue(Instruction, 0);
1956
1957 for (unsigned i = 0; i < NumVecs; ++i)
1958 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1959 AArch64::zsub0 + i, DL, VT, SuperReg));
1960
1961 // Copy chain
1962 unsigned ChainIdx = NumVecs;
1963 ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
1964 CurDAG->RemoveDeadNode(N);
1965}
1966
1967void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1968 unsigned NumVecs,
1969 bool IsZmMulti,
1970 unsigned Opcode,
1971 bool HasPred) {
1972 assert(Opcode != 0 && "Unexpected opcode");
1973
1974 SDLoc DL(N);
1975 EVT VT = N->getValueType(0);
1976 unsigned FirstVecIdx = HasPred ? 2 : 1;
1977
1978 auto GetMultiVecOperand = [=](unsigned StartIdx) {
1979 SmallVector<SDValue, 4> Regs(N->ops().slice(StartIdx, NumVecs));
1980 return createZMulTuple(Regs);
1981 };
1982
1983 SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1984
1985 SDValue Zm;
1986 if (IsZmMulti)
1987 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1988 else
1989 Zm = N->getOperand(NumVecs + FirstVecIdx);
1990
1991 SDNode *Intrinsic;
1992 if (HasPred)
1993 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
1994 N->getOperand(1), Zdn, Zm);
1995 else
1996 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
1997 SDValue SuperReg = SDValue(Intrinsic, 0);
1998 for (unsigned i = 0; i < NumVecs; ++i)
1999 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2000 AArch64::zsub0 + i, DL, VT, SuperReg));
2001
2002 CurDAG->RemoveDeadNode(N);
2003}
2004
2005void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
2006 unsigned Scale, unsigned Opc_ri,
2007 unsigned Opc_rr, bool IsIntr) {
2008 assert(Scale < 5 && "Invalid scaling value.");
2009 SDLoc DL(N);
2010 EVT VT = N->getValueType(0);
2011 SDValue Chain = N->getOperand(0);
2012
2013 // Optimize addressing mode.
2015 unsigned Opc;
2016 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2017 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
2018 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
2019
2020 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
2021 Base, // Memory operand
2022 Offset, Chain};
2023
2024 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2025
2026 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2027 SDValue SuperReg = SDValue(Load, 0);
2028 for (unsigned i = 0; i < NumVecs; ++i)
2029 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2030 AArch64::zsub0 + i, DL, VT, SuperReg));
2031
2032 // Copy chain
2033 unsigned ChainIdx = NumVecs;
2034 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2035 CurDAG->RemoveDeadNode(N);
2036}
2037
2038void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
2039 unsigned NumVecs,
2040 unsigned Scale,
2041 unsigned Opc_ri,
2042 unsigned Opc_rr) {
2043 assert(Scale < 4 && "Invalid scaling value.");
2044 SDLoc DL(N);
2045 EVT VT = N->getValueType(0);
2046 SDValue Chain = N->getOperand(0);
2047
2048 SDValue PNg = N->getOperand(2);
2049 SDValue Base = N->getOperand(3);
2050 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
2051 unsigned Opc;
2052 std::tie(Opc, Base, Offset) =
2053 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
2054
2055 SDValue Ops[] = {PNg, // Predicate-as-counter
2056 Base, // Memory operand
2057 Offset, Chain};
2058
2059 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2060
2061 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2062 SDValue SuperReg = SDValue(Load, 0);
2063 for (unsigned i = 0; i < NumVecs; ++i)
2064 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2065 AArch64::zsub0 + i, DL, VT, SuperReg));
2066
2067 // Copy chain
2068 unsigned ChainIdx = NumVecs;
2069 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2070 CurDAG->RemoveDeadNode(N);
2071}
2072
2073void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2074 unsigned Opcode) {
2075 if (N->getValueType(0) != MVT::nxv4f32)
2076 return;
2077 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
2078}
2079
2080void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2081 unsigned NumOutVecs,
2082 unsigned Opc,
2083 uint32_t MaxImm) {
2084 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
2085 if (Imm->getZExtValue() > MaxImm)
2086 return;
2087
2088 SDValue ZtValue;
2089 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2090 return;
2091
2092 SDValue Chain = Node->getOperand(0);
2093 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4), Chain};
2094 SDLoc DL(Node);
2095 EVT VT = Node->getValueType(0);
2096
2097 SDNode *Instruction =
2098 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2099 SDValue SuperReg = SDValue(Instruction, 0);
2100
2101 for (unsigned I = 0; I < NumOutVecs; ++I)
2102 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2103 AArch64::zsub0 + I, DL, VT, SuperReg));
2104
2105 // Copy chain
2106 unsigned ChainIdx = NumOutVecs;
2107 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2108 CurDAG->RemoveDeadNode(Node);
2109}
2110
2111void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2112 unsigned NumOutVecs,
2113 unsigned Opc) {
2114 SDValue ZtValue;
2115 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2116 return;
2117
2118 SDValue Chain = Node->getOperand(0);
2119 SDValue Ops[] = {ZtValue,
2120 createZMulTuple({Node->getOperand(3), Node->getOperand(4)}),
2121 Chain};
2122
2123 SDLoc DL(Node);
2124 EVT VT = Node->getValueType(0);
2125
2126 SDNode *Instruction =
2127 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2128 SDValue SuperReg = SDValue(Instruction, 0);
2129
2130 for (unsigned I = 0; I < NumOutVecs; ++I)
2131 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2132 AArch64::zsub0 + I, DL, VT, SuperReg));
2133
2134 // Copy chain
2135 unsigned ChainIdx = NumOutVecs;
2136 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2137 CurDAG->RemoveDeadNode(Node);
2138}
2139
2140void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2141 unsigned Op) {
2142 SDLoc DL(N);
2143 EVT VT = N->getValueType(0);
2144
2145 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2146 SDValue Zd = createZMulTuple(Regs);
2147 SDValue Zn = N->getOperand(1 + NumVecs);
2148 SDValue Zm = N->getOperand(2 + NumVecs);
2149
2150 SDValue Ops[] = {Zd, Zn, Zm};
2151
2152 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2153 SDValue SuperReg = SDValue(Intrinsic, 0);
2154 for (unsigned i = 0; i < NumVecs; ++i)
2155 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2156 AArch64::zsub0 + i, DL, VT, SuperReg));
2157
2158 CurDAG->RemoveDeadNode(N);
2159}
2160
2161bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2162 switch (BaseReg) {
2163 default:
2164 return false;
2165 case AArch64::ZA:
2166 case AArch64::ZAB0:
2167 if (TileNum == 0)
2168 break;
2169 return false;
2170 case AArch64::ZAH0:
2171 if (TileNum <= 1)
2172 break;
2173 return false;
2174 case AArch64::ZAS0:
2175 if (TileNum <= 3)
2176 break;
2177 return false;
2178 case AArch64::ZAD0:
2179 if (TileNum <= 7)
2180 break;
2181 return false;
2182 }
2183
2184 BaseReg += TileNum;
2185 return true;
2186}
2187
2188template <unsigned MaxIdx, unsigned Scale>
2189void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2190 unsigned BaseReg, unsigned Op) {
2191 unsigned TileNum = 0;
2192 if (BaseReg != AArch64::ZA)
2193 TileNum = N->getConstantOperandVal(2);
2194
2195 if (!SelectSMETile(BaseReg, TileNum))
2196 return;
2197
2198 SDValue SliceBase, Base, Offset;
2199 if (BaseReg == AArch64::ZA)
2200 SliceBase = N->getOperand(2);
2201 else
2202 SliceBase = N->getOperand(3);
2203
2204 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2205 return;
2206
2207 SDLoc DL(N);
2208 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2209 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2210 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2211
2212 EVT VT = N->getValueType(0);
2213 for (unsigned I = 0; I < NumVecs; ++I)
2214 ReplaceUses(SDValue(N, I),
2215 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2216 SDValue(Mov, 0)));
2217 // Copy chain
2218 unsigned ChainIdx = NumVecs;
2219 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2220 CurDAG->RemoveDeadNode(N);
2221}
2222
2223void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2224 unsigned Op, unsigned MaxIdx,
2225 unsigned Scale, unsigned BaseReg) {
2226 // Slice can be in different positions
2227 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2228 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2229 SDValue SliceBase = N->getOperand(2);
2230 if (BaseReg != AArch64::ZA)
2231 SliceBase = N->getOperand(3);
2232
2234 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2235 return;
2236 // The correct Za tile number is computed in Machine Instruction
2237 // See EmitZAInstr
2238 // DAG cannot select Za tile as an output register with ZReg
2239 SDLoc DL(N);
2241 if (BaseReg != AArch64::ZA )
2242 Ops.push_back(N->getOperand(2));
2243 Ops.push_back(Base);
2244 Ops.push_back(Offset);
2245 Ops.push_back(N->getOperand(0)); //Chain
2246 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2247
2248 EVT VT = N->getValueType(0);
2249 for (unsigned I = 0; I < NumVecs; ++I)
2250 ReplaceUses(SDValue(N, I),
2251 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2252 SDValue(Mov, 0)));
2253
2254 // Copy chain
2255 unsigned ChainIdx = NumVecs;
2256 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2257 CurDAG->RemoveDeadNode(N);
2258}
2259
2260void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2261 unsigned NumOutVecs,
2262 bool IsTupleInput,
2263 unsigned Opc) {
2264 SDLoc DL(N);
2265 EVT VT = N->getValueType(0);
2266 unsigned NumInVecs = N->getNumOperands() - 1;
2267
2269 if (IsTupleInput) {
2270 assert((NumInVecs == 2 || NumInVecs == 4) &&
2271 "Don't know how to handle multi-register input!");
2272 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2273 Ops.push_back(createZMulTuple(Regs));
2274 } else {
2275 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2276 for (unsigned I = 0; I < NumInVecs; I++)
2277 Ops.push_back(N->getOperand(1 + I));
2278 }
2279
2280 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2281 SDValue SuperReg = SDValue(Res, 0);
2282
2283 for (unsigned I = 0; I < NumOutVecs; I++)
2284 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2285 AArch64::zsub0 + I, DL, VT, SuperReg));
2286 CurDAG->RemoveDeadNode(N);
2287}
2288
2289void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2290 unsigned Opc) {
2291 SDLoc dl(N);
2292 EVT VT = N->getOperand(2)->getValueType(0);
2293
2294 // Form a REG_SEQUENCE to force register allocation.
2295 bool Is128Bit = VT.getSizeInBits() == 128;
2296 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2297 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2298
2299 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2300 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2301
2302 // Transfer memoperands.
2303 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2304 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2305
2306 ReplaceNode(N, St);
2307}
2308
2309void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2310 unsigned Scale, unsigned Opc_rr,
2311 unsigned Opc_ri) {
2312 SDLoc dl(N);
2313
2314 // Form a REG_SEQUENCE to force register allocation.
2315 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2316 SDValue RegSeq = createZTuple(Regs);
2317
2318 // Optimize addressing mode.
2319 unsigned Opc;
2321 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2322 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2323 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2324
2325 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2326 Base, // address
2327 Offset, // offset
2328 N->getOperand(0)}; // chain
2329 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2330
2331 ReplaceNode(N, St);
2332}
2333
2334bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2335 SDValue &OffImm) {
2336 SDLoc dl(N);
2337 const DataLayout &DL = CurDAG->getDataLayout();
2338 const TargetLowering *TLI = getTargetLowering();
2339
2340 // Try to match it for the frame address
2341 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2342 int FI = FINode->getIndex();
2343 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2344 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2345 return true;
2346 }
2347
2348 return false;
2349}
2350
2351void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2352 unsigned Opc) {
2353 SDLoc dl(N);
2354 EVT VT = N->getOperand(2)->getValueType(0);
2355 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2356 MVT::Other}; // Type for the Chain
2357
2358 // Form a REG_SEQUENCE to force register allocation.
2359 bool Is128Bit = VT.getSizeInBits() == 128;
2360 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2361 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2362
2363 SDValue Ops[] = {RegSeq,
2364 N->getOperand(NumVecs + 1), // base register
2365 N->getOperand(NumVecs + 2), // Incremental
2366 N->getOperand(0)}; // Chain
2367 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2368
2369 ReplaceNode(N, St);
2370}
2371
2372namespace {
2373/// WidenVector - Given a value in the V64 register class, produce the
2374/// equivalent value in the V128 register class.
2375class WidenVector {
2376 SelectionDAG &DAG;
2377
2378public:
2379 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2380
2381 SDValue operator()(SDValue V64Reg) {
2382 EVT VT = V64Reg.getValueType();
2383 unsigned NarrowSize = VT.getVectorNumElements();
2384 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2385 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2386 SDLoc DL(V64Reg);
2387
2388 SDValue Undef =
2389 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2390 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2391 }
2392};
2393} // namespace
2394
2395/// NarrowVector - Given a value in the V128 register class, produce the
2396/// equivalent value in the V64 register class.
2398 EVT VT = V128Reg.getValueType();
2399 unsigned WideSize = VT.getVectorNumElements();
2400 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2401 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2402
2403 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2404 V128Reg);
2405}
2406
2407void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2408 unsigned Opc) {
2409 SDLoc dl(N);
2410 EVT VT = N->getValueType(0);
2411 bool Narrow = VT.getSizeInBits() == 64;
2412
2413 // Form a REG_SEQUENCE to force register allocation.
2414 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2415
2416 if (Narrow)
2417 transform(Regs, Regs.begin(),
2418 WidenVector(*CurDAG));
2419
2420 SDValue RegSeq = createQTuple(Regs);
2421
2422 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2423
2424 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2425
2426 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2427 N->getOperand(NumVecs + 3), N->getOperand(0)};
2428 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2429 SDValue SuperReg = SDValue(Ld, 0);
2430
2431 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2432 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2433 AArch64::qsub2, AArch64::qsub3 };
2434 for (unsigned i = 0; i < NumVecs; ++i) {
2435 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2436 if (Narrow)
2437 NV = NarrowVector(NV, *CurDAG);
2438 ReplaceUses(SDValue(N, i), NV);
2439 }
2440
2441 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2442 CurDAG->RemoveDeadNode(N);
2443}
2444
2445void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2446 unsigned Opc) {
2447 SDLoc dl(N);
2448 EVT VT = N->getValueType(0);
2449 bool Narrow = VT.getSizeInBits() == 64;
2450
2451 // Form a REG_SEQUENCE to force register allocation.
2452 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2453
2454 if (Narrow)
2455 transform(Regs, Regs.begin(),
2456 WidenVector(*CurDAG));
2457
2458 SDValue RegSeq = createQTuple(Regs);
2459
2460 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2461 RegSeq->getValueType(0), MVT::Other};
2462
2463 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2464
2465 SDValue Ops[] = {RegSeq,
2466 CurDAG->getTargetConstant(LaneNo, dl,
2467 MVT::i64), // Lane Number
2468 N->getOperand(NumVecs + 2), // Base register
2469 N->getOperand(NumVecs + 3), // Incremental
2470 N->getOperand(0)};
2471 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2472
2473 // Update uses of the write back register
2474 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2475
2476 // Update uses of the vector list
2477 SDValue SuperReg = SDValue(Ld, 1);
2478 if (NumVecs == 1) {
2479 ReplaceUses(SDValue(N, 0),
2480 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2481 } else {
2482 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2483 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2484 AArch64::qsub2, AArch64::qsub3 };
2485 for (unsigned i = 0; i < NumVecs; ++i) {
2486 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2487 SuperReg);
2488 if (Narrow)
2489 NV = NarrowVector(NV, *CurDAG);
2490 ReplaceUses(SDValue(N, i), NV);
2491 }
2492 }
2493
2494 // Update the Chain
2495 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2496 CurDAG->RemoveDeadNode(N);
2497}
2498
2499void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2500 unsigned Opc) {
2501 SDLoc dl(N);
2502 EVT VT = N->getOperand(2)->getValueType(0);
2503 bool Narrow = VT.getSizeInBits() == 64;
2504
2505 // Form a REG_SEQUENCE to force register allocation.
2506 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2507
2508 if (Narrow)
2509 transform(Regs, Regs.begin(),
2510 WidenVector(*CurDAG));
2511
2512 SDValue RegSeq = createQTuple(Regs);
2513
2514 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2515
2516 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2517 N->getOperand(NumVecs + 3), N->getOperand(0)};
2518 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2519
2520 // Transfer memoperands.
2521 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2522 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2523
2524 ReplaceNode(N, St);
2525}
2526
2527void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2528 unsigned Opc) {
2529 SDLoc dl(N);
2530 EVT VT = N->getOperand(2)->getValueType(0);
2531 bool Narrow = VT.getSizeInBits() == 64;
2532
2533 // Form a REG_SEQUENCE to force register allocation.
2534 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2535
2536 if (Narrow)
2537 transform(Regs, Regs.begin(),
2538 WidenVector(*CurDAG));
2539
2540 SDValue RegSeq = createQTuple(Regs);
2541
2542 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2543 MVT::Other};
2544
2545 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2546
2547 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2548 N->getOperand(NumVecs + 2), // Base Register
2549 N->getOperand(NumVecs + 3), // Incremental
2550 N->getOperand(0)};
2551 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2552
2553 // Transfer memoperands.
2554 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2555 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2556
2557 ReplaceNode(N, St);
2558}
2559
2561 unsigned &Opc, SDValue &Opd0,
2562 unsigned &LSB, unsigned &MSB,
2563 unsigned NumberOfIgnoredLowBits,
2564 bool BiggerPattern) {
2565 assert(N->getOpcode() == ISD::AND &&
2566 "N must be a AND operation to call this function");
2567
2568 EVT VT = N->getValueType(0);
2569
2570 // Here we can test the type of VT and return false when the type does not
2571 // match, but since it is done prior to that call in the current context
2572 // we turned that into an assert to avoid redundant code.
2573 assert((VT == MVT::i32 || VT == MVT::i64) &&
2574 "Type checking must have been done before calling this function");
2575
2576 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2577 // changed the AND node to a 32-bit mask operation. We'll have to
2578 // undo that as part of the transform here if we want to catch all
2579 // the opportunities.
2580 // Currently the NumberOfIgnoredLowBits argument helps to recover
2581 // from these situations when matching bigger pattern (bitfield insert).
2582
2583 // For unsigned extracts, check for a shift right and mask
2584 uint64_t AndImm = 0;
2585 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2586 return false;
2587
2588 const SDNode *Op0 = N->getOperand(0).getNode();
2589
2590 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2591 // simplified. Try to undo that
2592 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2593
2594 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2595 if (AndImm & (AndImm + 1))
2596 return false;
2597
2598 bool ClampMSB = false;
2599 uint64_t SrlImm = 0;
2600 // Handle the SRL + ANY_EXTEND case.
2601 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2602 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2603 // Extend the incoming operand of the SRL to 64-bit.
2604 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2605 // Make sure to clamp the MSB so that we preserve the semantics of the
2606 // original operations.
2607 ClampMSB = true;
2608 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2610 SrlImm)) {
2611 // If the shift result was truncated, we can still combine them.
2612 Opd0 = Op0->getOperand(0).getOperand(0);
2613
2614 // Use the type of SRL node.
2615 VT = Opd0->getValueType(0);
2616 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2617 Opd0 = Op0->getOperand(0);
2618 ClampMSB = (VT == MVT::i32);
2619 } else if (BiggerPattern) {
2620 // Let's pretend a 0 shift right has been performed.
2621 // The resulting code will be at least as good as the original one
2622 // plus it may expose more opportunities for bitfield insert pattern.
2623 // FIXME: Currently we limit this to the bigger pattern, because
2624 // some optimizations expect AND and not UBFM.
2625 Opd0 = N->getOperand(0);
2626 } else
2627 return false;
2628
2629 // Bail out on large immediates. This happens when no proper
2630 // combining/constant folding was performed.
2631 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2632 LLVM_DEBUG(
2633 (dbgs() << N
2634 << ": Found large shift immediate, this should not happen\n"));
2635 return false;
2636 }
2637
2638 LSB = SrlImm;
2639 MSB = SrlImm +
2640 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2641 : llvm::countr_one<uint64_t>(AndImm)) -
2642 1;
2643 if (ClampMSB)
2644 // Since we're moving the extend before the right shift operation, we need
2645 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2646 // the zeros which would get shifted in with the original right shift
2647 // operation.
2648 MSB = MSB > 31 ? 31 : MSB;
2649
2650 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2651 return true;
2652}
2653
2655 SDValue &Opd0, unsigned &Immr,
2656 unsigned &Imms) {
2657 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2658
2659 EVT VT = N->getValueType(0);
2660 unsigned BitWidth = VT.getSizeInBits();
2661 assert((VT == MVT::i32 || VT == MVT::i64) &&
2662 "Type checking must have been done before calling this function");
2663
2664 SDValue Op = N->getOperand(0);
2665 if (Op->getOpcode() == ISD::TRUNCATE) {
2666 Op = Op->getOperand(0);
2667 VT = Op->getValueType(0);
2668 BitWidth = VT.getSizeInBits();
2669 }
2670
2671 uint64_t ShiftImm;
2672 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2673 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2674 return false;
2675
2676 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2677 if (ShiftImm + Width > BitWidth)
2678 return false;
2679
2680 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2681 Opd0 = Op.getOperand(0);
2682 Immr = ShiftImm;
2683 Imms = ShiftImm + Width - 1;
2684 return true;
2685}
2686
2688 SDValue &Opd0, unsigned &LSB,
2689 unsigned &MSB) {
2690 // We are looking for the following pattern which basically extracts several
2691 // continuous bits from the source value and places it from the LSB of the
2692 // destination value, all other bits of the destination value or set to zero:
2693 //
2694 // Value2 = AND Value, MaskImm
2695 // SRL Value2, ShiftImm
2696 //
2697 // with MaskImm >> ShiftImm to search for the bit width.
2698 //
2699 // This gets selected into a single UBFM:
2700 //
2701 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2702 //
2703
2704 if (N->getOpcode() != ISD::SRL)
2705 return false;
2706
2707 uint64_t AndMask = 0;
2708 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2709 return false;
2710
2711 Opd0 = N->getOperand(0).getOperand(0);
2712
2713 uint64_t SrlImm = 0;
2714 if (!isIntImmediate(N->getOperand(1), SrlImm))
2715 return false;
2716
2717 // Check whether we really have several bits extract here.
2718 if (!isMask_64(AndMask >> SrlImm))
2719 return false;
2720
2721 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2722 LSB = SrlImm;
2723 MSB = llvm::Log2_64(AndMask);
2724 return true;
2725}
2726
2727static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2728 unsigned &Immr, unsigned &Imms,
2729 bool BiggerPattern) {
2730 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2731 "N must be a SHR/SRA operation to call this function");
2732
2733 EVT VT = N->getValueType(0);
2734
2735 // Here we can test the type of VT and return false when the type does not
2736 // match, but since it is done prior to that call in the current context
2737 // we turned that into an assert to avoid redundant code.
2738 assert((VT == MVT::i32 || VT == MVT::i64) &&
2739 "Type checking must have been done before calling this function");
2740
2741 // Check for AND + SRL doing several bits extract.
2742 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2743 return true;
2744
2745 // We're looking for a shift of a shift.
2746 uint64_t ShlImm = 0;
2747 uint64_t TruncBits = 0;
2748 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2749 Opd0 = N->getOperand(0).getOperand(0);
2750 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2751 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2752 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2753 // be considered as setting high 32 bits as zero. Our strategy here is to
2754 // always generate 64bit UBFM. This consistency will help the CSE pass
2755 // later find more redundancy.
2756 Opd0 = N->getOperand(0).getOperand(0);
2757 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2758 VT = Opd0.getValueType();
2759 assert(VT == MVT::i64 && "the promoted type should be i64");
2760 } else if (BiggerPattern) {
2761 // Let's pretend a 0 shift left has been performed.
2762 // FIXME: Currently we limit this to the bigger pattern case,
2763 // because some optimizations expect AND and not UBFM
2764 Opd0 = N->getOperand(0);
2765 } else
2766 return false;
2767
2768 // Missing combines/constant folding may have left us with strange
2769 // constants.
2770 if (ShlImm >= VT.getSizeInBits()) {
2771 LLVM_DEBUG(
2772 (dbgs() << N
2773 << ": Found large shift immediate, this should not happen\n"));
2774 return false;
2775 }
2776
2777 uint64_t SrlImm = 0;
2778 if (!isIntImmediate(N->getOperand(1), SrlImm))
2779 return false;
2780
2781 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2782 "bad amount in shift node!");
2783 int immr = SrlImm - ShlImm;
2784 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2785 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2786 // SRA requires a signed extraction
2787 if (VT == MVT::i32)
2788 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2789 else
2790 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2791 return true;
2792}
2793
2794bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2795 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2796
2797 EVT VT = N->getValueType(0);
2798 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2799 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2800 return false;
2801
2802 uint64_t ShiftImm;
2803 SDValue Op = N->getOperand(0);
2804 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2805 return false;
2806
2807 SDLoc dl(N);
2808 // Extend the incoming operand of the shift to 64-bits.
2809 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2810 unsigned Immr = ShiftImm;
2811 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2812 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2813 CurDAG->getTargetConstant(Imms, dl, VT)};
2814 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2815 return true;
2816}
2817
2818static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2819 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2820 unsigned NumberOfIgnoredLowBits = 0,
2821 bool BiggerPattern = false) {
2822 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2823 return false;
2824
2825 switch (N->getOpcode()) {
2826 default:
2827 if (!N->isMachineOpcode())
2828 return false;
2829 break;
2830 case ISD::AND:
2831 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2832 NumberOfIgnoredLowBits, BiggerPattern);
2833 case ISD::SRL:
2834 case ISD::SRA:
2835 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2836
2838 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2839 }
2840
2841 unsigned NOpc = N->getMachineOpcode();
2842 switch (NOpc) {
2843 default:
2844 return false;
2845 case AArch64::SBFMWri:
2846 case AArch64::UBFMWri:
2847 case AArch64::SBFMXri:
2848 case AArch64::UBFMXri:
2849 Opc = NOpc;
2850 Opd0 = N->getOperand(0);
2851 Immr = N->getConstantOperandVal(1);
2852 Imms = N->getConstantOperandVal(2);
2853 return true;
2854 }
2855 // Unreachable
2856 return false;
2857}
2858
2859bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2860 unsigned Opc, Immr, Imms;
2861 SDValue Opd0;
2862 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2863 return false;
2864
2865 EVT VT = N->getValueType(0);
2866 SDLoc dl(N);
2867
2868 // If the bit extract operation is 64bit but the original type is 32bit, we
2869 // need to add one EXTRACT_SUBREG.
2870 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2871 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2872 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2873
2874 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2875 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2876 MVT::i32, SDValue(BFM, 0));
2877 ReplaceNode(N, Inner.getNode());
2878 return true;
2879 }
2880
2881 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2882 CurDAG->getTargetConstant(Imms, dl, VT)};
2883 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2884 return true;
2885}
2886
2887/// Does DstMask form a complementary pair with the mask provided by
2888/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2889/// this asks whether DstMask zeroes precisely those bits that will be set by
2890/// the other half.
2891static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2892 unsigned NumberOfIgnoredHighBits, EVT VT) {
2893 assert((VT == MVT::i32 || VT == MVT::i64) &&
2894 "i32 or i64 mask type expected!");
2895 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2896
2897 // Enable implicitTrunc as we're intentionally ignoring high bits.
2898 APInt SignificantDstMask =
2899 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
2900 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2901
2902 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2903 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2904}
2905
2906// Look for bits that will be useful for later uses.
2907// A bit is consider useless as soon as it is dropped and never used
2908// before it as been dropped.
2909// E.g., looking for useful bit of x
2910// 1. y = x & 0x7
2911// 2. z = y >> 2
2912// After #1, x useful bits are 0x7, then the useful bits of x, live through
2913// y.
2914// After #2, the useful bits of x are 0x4.
2915// However, if x is used on an unpredictable instruction, then all its bits
2916// are useful.
2917// E.g.
2918// 1. y = x & 0x7
2919// 2. z = y >> 2
2920// 3. str x, [@x]
2921static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2922
2924 unsigned Depth) {
2925 uint64_t Imm =
2926 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2927 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2928 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2929 getUsefulBits(Op, UsefulBits, Depth + 1);
2930}
2931
2933 uint64_t Imm, uint64_t MSB,
2934 unsigned Depth) {
2935 // inherit the bitwidth value
2936 APInt OpUsefulBits(UsefulBits);
2937 OpUsefulBits = 1;
2938
2939 if (MSB >= Imm) {
2940 OpUsefulBits <<= MSB - Imm + 1;
2941 --OpUsefulBits;
2942 // The interesting part will be in the lower part of the result
2943 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2944 // The interesting part was starting at Imm in the argument
2945 OpUsefulBits <<= Imm;
2946 } else {
2947 OpUsefulBits <<= MSB + 1;
2948 --OpUsefulBits;
2949 // The interesting part will be shifted in the result
2950 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2951 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2952 // The interesting part was at zero in the argument
2953 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2954 }
2955
2956 UsefulBits &= OpUsefulBits;
2957}
2958
2959static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2960 unsigned Depth) {
2961 uint64_t Imm =
2962 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2963 uint64_t MSB =
2964 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2965
2966 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2967}
2968
2970 unsigned Depth) {
2971 uint64_t ShiftTypeAndValue =
2972 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2973 APInt Mask(UsefulBits);
2974 Mask.clearAllBits();
2975 Mask.flipAllBits();
2976
2977 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2978 // Shift Left
2979 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2980 Mask <<= ShiftAmt;
2981 getUsefulBits(Op, Mask, Depth + 1);
2982 Mask.lshrInPlace(ShiftAmt);
2983 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2984 // Shift Right
2985 // We do not handle AArch64_AM::ASR, because the sign will change the
2986 // number of useful bits
2987 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2988 Mask.lshrInPlace(ShiftAmt);
2989 getUsefulBits(Op, Mask, Depth + 1);
2990 Mask <<= ShiftAmt;
2991 } else
2992 return;
2993
2994 UsefulBits &= Mask;
2995}
2996
2997static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2998 unsigned Depth) {
2999 uint64_t Imm =
3000 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3001 uint64_t MSB =
3002 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
3003
3004 APInt OpUsefulBits(UsefulBits);
3005 OpUsefulBits = 1;
3006
3007 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
3008 ResultUsefulBits.flipAllBits();
3009 APInt Mask(UsefulBits.getBitWidth(), 0);
3010
3011 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
3012
3013 if (MSB >= Imm) {
3014 // The instruction is a BFXIL.
3015 uint64_t Width = MSB - Imm + 1;
3016 uint64_t LSB = Imm;
3017
3018 OpUsefulBits <<= Width;
3019 --OpUsefulBits;
3020
3021 if (Op.getOperand(1) == Orig) {
3022 // Copy the low bits from the result to bits starting from LSB.
3023 Mask = ResultUsefulBits & OpUsefulBits;
3024 Mask <<= LSB;
3025 }
3026
3027 if (Op.getOperand(0) == Orig)
3028 // Bits starting from LSB in the input contribute to the result.
3029 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3030 } else {
3031 // The instruction is a BFI.
3032 uint64_t Width = MSB + 1;
3033 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
3034
3035 OpUsefulBits <<= Width;
3036 --OpUsefulBits;
3037 OpUsefulBits <<= LSB;
3038
3039 if (Op.getOperand(1) == Orig) {
3040 // Copy the bits from the result to the zero bits.
3041 Mask = ResultUsefulBits & OpUsefulBits;
3042 Mask.lshrInPlace(LSB);
3043 }
3044
3045 if (Op.getOperand(0) == Orig)
3046 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3047 }
3048
3049 UsefulBits &= Mask;
3050}
3051
3052static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
3053 SDValue Orig, unsigned Depth) {
3054
3055 // Users of this node should have already been instruction selected
3056 // FIXME: Can we turn that into an assert?
3057 if (!UserNode->isMachineOpcode())
3058 return;
3059
3060 switch (UserNode->getMachineOpcode()) {
3061 default:
3062 return;
3063 case AArch64::ANDSWri:
3064 case AArch64::ANDSXri:
3065 case AArch64::ANDWri:
3066 case AArch64::ANDXri:
3067 // We increment Depth only when we call the getUsefulBits
3068 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
3069 Depth);
3070 case AArch64::UBFMWri:
3071 case AArch64::UBFMXri:
3072 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
3073
3074 case AArch64::ORRWrs:
3075 case AArch64::ORRXrs:
3076 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
3077 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
3078 Depth);
3079 return;
3080 case AArch64::BFMWri:
3081 case AArch64::BFMXri:
3082 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3083
3084 case AArch64::STRBBui:
3085 case AArch64::STURBBi:
3086 if (UserNode->getOperand(0) != Orig)
3087 return;
3088 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3089 return;
3090
3091 case AArch64::STRHHui:
3092 case AArch64::STURHHi:
3093 if (UserNode->getOperand(0) != Orig)
3094 return;
3095 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3096 return;
3097 }
3098}
3099
3100static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3102 return;
3103 // Initialize UsefulBits
3104 if (!Depth) {
3105 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3106 // At the beginning, assume every produced bits is useful
3107 UsefulBits = APInt(Bitwidth, 0);
3108 UsefulBits.flipAllBits();
3109 }
3110 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3111
3112 for (SDNode *Node : Op.getNode()->users()) {
3113 // A use cannot produce useful bits
3114 APInt UsefulBitsForUse = APInt(UsefulBits);
3115 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
3116 UsersUsefulBits |= UsefulBitsForUse;
3117 }
3118 // UsefulBits contains the produced bits that are meaningful for the
3119 // current definition, thus a user cannot make a bit meaningful at
3120 // this point
3121 UsefulBits &= UsersUsefulBits;
3122}
3123
3124/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3125/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3126/// 0, return Op unchanged.
3127static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3128 if (ShlAmount == 0)
3129 return Op;
3130
3131 EVT VT = Op.getValueType();
3132 SDLoc dl(Op);
3133 unsigned BitWidth = VT.getSizeInBits();
3134 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3135
3136 SDNode *ShiftNode;
3137 if (ShlAmount > 0) {
3138 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3139 ShiftNode = CurDAG->getMachineNode(
3140 UBFMOpc, dl, VT, Op,
3141 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3142 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3143 } else {
3144 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3145 assert(ShlAmount < 0 && "expected right shift");
3146 int ShrAmount = -ShlAmount;
3147 ShiftNode = CurDAG->getMachineNode(
3148 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3149 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3150 }
3151
3152 return SDValue(ShiftNode, 0);
3153}
3154
3155// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3156static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3157 bool BiggerPattern,
3158 const uint64_t NonZeroBits,
3159 SDValue &Src, int &DstLSB,
3160 int &Width);
3161
3162// For bit-field-positioning pattern "shl VAL, N)".
3163static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3164 bool BiggerPattern,
3165 const uint64_t NonZeroBits,
3166 SDValue &Src, int &DstLSB,
3167 int &Width);
3168
3169/// Does this tree qualify as an attempt to move a bitfield into position,
3170/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3172 bool BiggerPattern, SDValue &Src,
3173 int &DstLSB, int &Width) {
3174 EVT VT = Op.getValueType();
3175 unsigned BitWidth = VT.getSizeInBits();
3176 (void)BitWidth;
3177 assert(BitWidth == 32 || BitWidth == 64);
3178
3179 KnownBits Known = CurDAG->computeKnownBits(Op);
3180
3181 // Non-zero in the sense that they're not provably zero, which is the key
3182 // point if we want to use this value
3183 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3184 if (!isShiftedMask_64(NonZeroBits))
3185 return false;
3186
3187 switch (Op.getOpcode()) {
3188 default:
3189 break;
3190 case ISD::AND:
3191 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3192 NonZeroBits, Src, DstLSB, Width);
3193 case ISD::SHL:
3194 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3195 NonZeroBits, Src, DstLSB, Width);
3196 }
3197
3198 return false;
3199}
3200
3202 bool BiggerPattern,
3203 const uint64_t NonZeroBits,
3204 SDValue &Src, int &DstLSB,
3205 int &Width) {
3206 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3207
3208 EVT VT = Op.getValueType();
3209 assert((VT == MVT::i32 || VT == MVT::i64) &&
3210 "Caller guarantees VT is one of i32 or i64");
3211 (void)VT;
3212
3213 uint64_t AndImm;
3214 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3215 return false;
3216
3217 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3218 // 1) (AndImm & (1 << POS) == 0)
3219 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3220 //
3221 // 1) and 2) don't agree so something must be wrong (e.g., in
3222 // 'SelectionDAG::computeKnownBits')
3223 assert((~AndImm & NonZeroBits) == 0 &&
3224 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3225
3226 SDValue AndOp0 = Op.getOperand(0);
3227
3228 uint64_t ShlImm;
3229 SDValue ShlOp0;
3230 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3231 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3232 ShlOp0 = AndOp0.getOperand(0);
3233 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3235 ShlImm)) {
3236 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3237
3238 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3239 SDValue ShlVal = AndOp0.getOperand(0);
3240
3241 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3242 // expect VT to be MVT::i32.
3243 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3244
3245 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3246 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3247 } else
3248 return false;
3249
3250 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3251 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3252 // AndOp0+AND.
3253 if (!BiggerPattern && !AndOp0.hasOneUse())
3254 return false;
3255
3256 DstLSB = llvm::countr_zero(NonZeroBits);
3257 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3258
3259 // Bail out on large Width. This happens when no proper combining / constant
3260 // folding was performed.
3261 if (Width >= (int)VT.getSizeInBits()) {
3262 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3263 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3264 // "val".
3265 // If VT is i32, what Width >= 32 means:
3266 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3267 // demands at least 'Width' bits (after dag-combiner). This together with
3268 // `any_extend` Op (undefined higher bits) indicates missed combination
3269 // when lowering the 'and' IR instruction to an machine IR instruction.
3270 LLVM_DEBUG(
3271 dbgs()
3272 << "Found large Width in bit-field-positioning -- this indicates no "
3273 "proper combining / constant folding was performed\n");
3274 return false;
3275 }
3276
3277 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3278 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3279 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3280 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3281 // which case it is not profitable to insert an extra shift.
3282 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3283 return false;
3284
3285 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3286 return true;
3287}
3288
3289// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3290// UBFIZ.
3292 SDValue &Src, int &DstLSB,
3293 int &Width) {
3294 // Caller should have verified that N is a left shift with constant shift
3295 // amount; asserts that.
3296 assert(Op.getOpcode() == ISD::SHL &&
3297 "Op.getNode() should be a SHL node to call this function");
3298 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3299 "Op.getNode() should shift ShlImm to call this function");
3300
3301 uint64_t AndImm = 0;
3302 SDValue Op0 = Op.getOperand(0);
3303 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3304 return false;
3305
3306 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3307 if (isMask_64(ShiftedAndImm)) {
3308 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3309 // should end with Mask, and could be prefixed with random bits if those
3310 // bits are shifted out.
3311 //
3312 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3313 // the AND result corresponding to those bits are shifted out, so it's fine
3314 // to not extract them.
3315 Width = llvm::countr_one(ShiftedAndImm);
3316 DstLSB = ShlImm;
3317 Src = Op0.getOperand(0);
3318 return true;
3319 }
3320 return false;
3321}
3322
3324 bool BiggerPattern,
3325 const uint64_t NonZeroBits,
3326 SDValue &Src, int &DstLSB,
3327 int &Width) {
3328 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3329
3330 EVT VT = Op.getValueType();
3331 assert((VT == MVT::i32 || VT == MVT::i64) &&
3332 "Caller guarantees that type is i32 or i64");
3333 (void)VT;
3334
3335 uint64_t ShlImm;
3336 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3337 return false;
3338
3339 if (!BiggerPattern && !Op.hasOneUse())
3340 return false;
3341
3342 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3343 return true;
3344
3345 DstLSB = llvm::countr_zero(NonZeroBits);
3346 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3347
3348 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3349 return false;
3350
3351 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3352 return true;
3353}
3354
3355static bool isShiftedMask(uint64_t Mask, EVT VT) {
3356 assert(VT == MVT::i32 || VT == MVT::i64);
3357 if (VT == MVT::i32)
3358 return isShiftedMask_32(Mask);
3359 return isShiftedMask_64(Mask);
3360}
3361
3362// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3363// inserted only sets known zero bits.
3365 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3366
3367 EVT VT = N->getValueType(0);
3368 if (VT != MVT::i32 && VT != MVT::i64)
3369 return false;
3370
3371 unsigned BitWidth = VT.getSizeInBits();
3372
3373 uint64_t OrImm;
3374 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3375 return false;
3376
3377 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3378 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3379 // performance neutral.
3381 return false;
3382
3383 uint64_t MaskImm;
3384 SDValue And = N->getOperand(0);
3385 // Must be a single use AND with an immediate operand.
3386 if (!And.hasOneUse() ||
3387 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3388 return false;
3389
3390 // Compute the Known Zero for the AND as this allows us to catch more general
3391 // cases than just looking for AND with imm.
3392 KnownBits Known = CurDAG->computeKnownBits(And);
3393
3394 // Non-zero in the sense that they're not provably zero, which is the key
3395 // point if we want to use this value.
3396 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3397
3398 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3399 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3400 return false;
3401
3402 // The bits being inserted must only set those bits that are known to be zero.
3403 if ((OrImm & NotKnownZero) != 0) {
3404 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3405 // currently handle this case.
3406 return false;
3407 }
3408
3409 // BFI/BFXIL dst, src, #lsb, #width.
3410 int LSB = llvm::countr_one(NotKnownZero);
3411 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3412
3413 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3414 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3415 unsigned ImmS = Width - 1;
3416
3417 // If we're creating a BFI instruction avoid cases where we need more
3418 // instructions to materialize the BFI constant as compared to the original
3419 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3420 // should be no worse in this case.
3421 bool IsBFI = LSB != 0;
3422 uint64_t BFIImm = OrImm >> LSB;
3423 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3424 // We have a BFI instruction and we know the constant can't be materialized
3425 // with a ORR-immediate with the zero register.
3426 unsigned OrChunks = 0, BFIChunks = 0;
3427 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3428 if (((OrImm >> Shift) & 0xFFFF) != 0)
3429 ++OrChunks;
3430 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3431 ++BFIChunks;
3432 }
3433 if (BFIChunks > OrChunks)
3434 return false;
3435 }
3436
3437 // Materialize the constant to be inserted.
3438 SDLoc DL(N);
3439 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3440 SDNode *MOVI = CurDAG->getMachineNode(
3441 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3442
3443 // Create the BFI/BFXIL instruction.
3444 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3445 CurDAG->getTargetConstant(ImmR, DL, VT),
3446 CurDAG->getTargetConstant(ImmS, DL, VT)};
3447 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3448 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3449 return true;
3450}
3451
3453 SDValue &ShiftedOperand,
3454 uint64_t &EncodedShiftImm) {
3455 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3456 if (!Dst.hasOneUse())
3457 return false;
3458
3459 EVT VT = Dst.getValueType();
3460 assert((VT == MVT::i32 || VT == MVT::i64) &&
3461 "Caller should guarantee that VT is one of i32 or i64");
3462 const unsigned SizeInBits = VT.getSizeInBits();
3463
3464 SDLoc DL(Dst.getNode());
3465 uint64_t AndImm, ShlImm;
3466 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3467 isShiftedMask_64(AndImm)) {
3468 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3469 SDValue DstOp0 = Dst.getOperand(0);
3470 if (!DstOp0.hasOneUse())
3471 return false;
3472
3473 // An example to illustrate the transformation
3474 // From:
3475 // lsr x8, x1, #1
3476 // and x8, x8, #0x3f80
3477 // bfxil x8, x1, #0, #7
3478 // To:
3479 // and x8, x23, #0x7f
3480 // ubfx x9, x23, #8, #7
3481 // orr x23, x8, x9, lsl #7
3482 //
3483 // The number of instructions remains the same, but ORR is faster than BFXIL
3484 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3485 // the dependency chain is improved after the transformation.
3486 uint64_t SrlImm;
3487 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3488 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3489 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3490 unsigned MaskWidth =
3491 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3492 unsigned UBFMOpc =
3493 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3494 SDNode *UBFMNode = CurDAG->getMachineNode(
3495 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3496 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3497 VT),
3498 CurDAG->getTargetConstant(
3499 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3500 ShiftedOperand = SDValue(UBFMNode, 0);
3501 EncodedShiftImm = AArch64_AM::getShifterImm(
3502 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3503 return true;
3504 }
3505 }
3506 return false;
3507 }
3508
3509 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3510 ShiftedOperand = Dst.getOperand(0);
3511 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3512 return true;
3513 }
3514
3515 uint64_t SrlImm;
3516 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3517 ShiftedOperand = Dst.getOperand(0);
3518 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3519 return true;
3520 }
3521 return false;
3522}
3523
3524// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3525// the operands and select it to AArch64::ORR with shifted registers if
3526// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3527static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3528 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3529 const bool BiggerPattern) {
3530 EVT VT = N->getValueType(0);
3531 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3532 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3533 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3534 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3535 assert((VT == MVT::i32 || VT == MVT::i64) &&
3536 "Expect result type to be i32 or i64 since N is combinable to BFM");
3537 SDLoc DL(N);
3538
3539 // Bail out if BFM simplifies away one node in BFM Dst.
3540 if (OrOpd1 != Dst)
3541 return false;
3542
3543 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3544 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3545 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3546 if (BiggerPattern) {
3547 uint64_t SrcAndImm;
3548 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3549 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3550 // OrOpd0 = AND Src, #Mask
3551 // So BFM simplifies away one AND node from Src and doesn't simplify away
3552 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3553 // one node (from Rd), ORR is better since it has higher throughput and
3554 // smaller latency than BFM on many AArch64 processors (and for the rest
3555 // ORR is at least as good as BFM).
3556 SDValue ShiftedOperand;
3557 uint64_t EncodedShiftImm;
3558 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3559 EncodedShiftImm)) {
3560 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3561 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3562 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3563 return true;
3564 }
3565 }
3566 return false;
3567 }
3568
3569 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3570
3571 uint64_t ShlImm;
3572 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3573 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3574 SDValue Ops[] = {
3575 Dst, Src,
3576 CurDAG->getTargetConstant(
3578 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3579 return true;
3580 }
3581
3582 // Select the following pattern to left-shifted operand rather than BFI.
3583 // %val1 = op ..
3584 // %val2 = shl %val1, #imm
3585 // %res = or %val1, %val2
3586 //
3587 // If N is selected to be BFI, we know that
3588 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3589 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3590 //
3591 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3592 if (OrOpd0.getOperand(0) == OrOpd1) {
3593 SDValue Ops[] = {
3594 OrOpd1, OrOpd1,
3595 CurDAG->getTargetConstant(
3597 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3598 return true;
3599 }
3600 }
3601
3602 uint64_t SrlImm;
3603 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3604 // Select the following pattern to right-shifted operand rather than BFXIL.
3605 // %val1 = op ..
3606 // %val2 = lshr %val1, #imm
3607 // %res = or %val1, %val2
3608 //
3609 // If N is selected to be BFXIL, we know that
3610 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3611 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3612 //
3613 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3614 if (OrOpd0.getOperand(0) == OrOpd1) {
3615 SDValue Ops[] = {
3616 OrOpd1, OrOpd1,
3617 CurDAG->getTargetConstant(
3619 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3620 return true;
3621 }
3622 }
3623
3624 return false;
3625}
3626
3627static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3628 SelectionDAG *CurDAG) {
3629 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3630
3631 EVT VT = N->getValueType(0);
3632 if (VT != MVT::i32 && VT != MVT::i64)
3633 return false;
3634
3635 unsigned BitWidth = VT.getSizeInBits();
3636
3637 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3638 // have the expected shape. Try to undo that.
3639
3640 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3641 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3642
3643 // Given a OR operation, check if we have the following pattern
3644 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3645 // isBitfieldExtractOp)
3646 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3647 // countTrailingZeros(mask2) == imm2 - imm + 1
3648 // f = d | c
3649 // if yes, replace the OR instruction with:
3650 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3651
3652 // OR is commutative, check all combinations of operand order and values of
3653 // BiggerPattern, i.e.
3654 // Opd0, Opd1, BiggerPattern=false
3655 // Opd1, Opd0, BiggerPattern=false
3656 // Opd0, Opd1, BiggerPattern=true
3657 // Opd1, Opd0, BiggerPattern=true
3658 // Several of these combinations may match, so check with BiggerPattern=false
3659 // first since that will produce better results by matching more instructions
3660 // and/or inserting fewer extra instructions.
3661 for (int I = 0; I < 4; ++I) {
3662
3663 SDValue Dst, Src;
3664 unsigned ImmR, ImmS;
3665 bool BiggerPattern = I / 2;
3666 SDValue OrOpd0Val = N->getOperand(I % 2);
3667 SDNode *OrOpd0 = OrOpd0Val.getNode();
3668 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3669 SDNode *OrOpd1 = OrOpd1Val.getNode();
3670
3671 unsigned BFXOpc;
3672 int DstLSB, Width;
3673 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3674 NumberOfIgnoredLowBits, BiggerPattern)) {
3675 // Check that the returned opcode is compatible with the pattern,
3676 // i.e., same type and zero extended (U and not S)
3677 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3678 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3679 continue;
3680
3681 // Compute the width of the bitfield insertion
3682 DstLSB = 0;
3683 Width = ImmS - ImmR + 1;
3684 // FIXME: This constraint is to catch bitfield insertion we may
3685 // want to widen the pattern if we want to grab general bitfield
3686 // move case
3687 if (Width <= 0)
3688 continue;
3689
3690 // If the mask on the insertee is correct, we have a BFXIL operation. We
3691 // can share the ImmR and ImmS values from the already-computed UBFM.
3692 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3693 BiggerPattern,
3694 Src, DstLSB, Width)) {
3695 ImmR = (BitWidth - DstLSB) % BitWidth;
3696 ImmS = Width - 1;
3697 } else
3698 continue;
3699
3700 // Check the second part of the pattern
3701 EVT VT = OrOpd1Val.getValueType();
3702 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3703
3704 // Compute the Known Zero for the candidate of the first operand.
3705 // This allows to catch more general case than just looking for
3706 // AND with imm. Indeed, simplify-demanded-bits may have removed
3707 // the AND instruction because it proves it was useless.
3708 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3709
3710 // Check if there is enough room for the second operand to appear
3711 // in the first one
3712 APInt BitsToBeInserted =
3713 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3714
3715 if ((BitsToBeInserted & ~Known.Zero) != 0)
3716 continue;
3717
3718 // Set the first operand
3719 uint64_t Imm;
3720 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3721 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3722 // In that case, we can eliminate the AND
3723 Dst = OrOpd1->getOperand(0);
3724 else
3725 // Maybe the AND has been removed by simplify-demanded-bits
3726 // or is useful because it discards more bits
3727 Dst = OrOpd1Val;
3728
3729 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3730 // with shifted operand is more efficient.
3731 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3732 BiggerPattern))
3733 return true;
3734
3735 // both parts match
3736 SDLoc DL(N);
3737 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3738 CurDAG->getTargetConstant(ImmS, DL, VT)};
3739 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3740 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3741 return true;
3742 }
3743
3744 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3745 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3746 // mask (e.g., 0x000ffff0).
3747 uint64_t Mask0Imm, Mask1Imm;
3748 SDValue And0 = N->getOperand(0);
3749 SDValue And1 = N->getOperand(1);
3750 if (And0.hasOneUse() && And1.hasOneUse() &&
3751 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3752 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3753 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3754 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3755
3756 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3757 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3758 // bits to be inserted.
3759 if (isShiftedMask(Mask0Imm, VT)) {
3760 std::swap(And0, And1);
3761 std::swap(Mask0Imm, Mask1Imm);
3762 }
3763
3764 SDValue Src = And1->getOperand(0);
3765 SDValue Dst = And0->getOperand(0);
3766 unsigned LSB = llvm::countr_zero(Mask1Imm);
3767 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3768
3769 // The BFXIL inserts the low-order bits from a source register, so right
3770 // shift the needed bits into place.
3771 SDLoc DL(N);
3772 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3773 uint64_t LsrImm = LSB;
3774 if (Src->hasOneUse() &&
3775 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3776 (LsrImm + LSB) < BitWidth) {
3777 Src = Src->getOperand(0);
3778 LsrImm += LSB;
3779 }
3780
3781 SDNode *LSR = CurDAG->getMachineNode(
3782 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3783 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3784
3785 // BFXIL is an alias of BFM, so translate to BFM operands.
3786 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3787 unsigned ImmS = Width - 1;
3788
3789 // Create the BFXIL instruction.
3790 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3791 CurDAG->getTargetConstant(ImmR, DL, VT),
3792 CurDAG->getTargetConstant(ImmS, DL, VT)};
3793 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3794 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3795 return true;
3796 }
3797
3798 return false;
3799}
3800
3801bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3802 if (N->getOpcode() != ISD::OR)
3803 return false;
3804
3805 APInt NUsefulBits;
3806 getUsefulBits(SDValue(N, 0), NUsefulBits);
3807
3808 // If all bits are not useful, just return UNDEF.
3809 if (!NUsefulBits) {
3810 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3811 return true;
3812 }
3813
3814 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3815 return true;
3816
3817 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3818}
3819
3820/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3821/// equivalent of a left shift by a constant amount followed by an and masking
3822/// out a contiguous set of bits.
3823bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3824 if (N->getOpcode() != ISD::AND)
3825 return false;
3826
3827 EVT VT = N->getValueType(0);
3828 if (VT != MVT::i32 && VT != MVT::i64)
3829 return false;
3830
3831 SDValue Op0;
3832 int DstLSB, Width;
3833 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3834 Op0, DstLSB, Width))
3835 return false;
3836
3837 // ImmR is the rotate right amount.
3838 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3839 // ImmS is the most significant bit of the source to be moved.
3840 unsigned ImmS = Width - 1;
3841
3842 SDLoc DL(N);
3843 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3844 CurDAG->getTargetConstant(ImmS, DL, VT)};
3845 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3846 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3847 return true;
3848}
3849
3850/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3851/// variable shift/rotate instructions.
3852bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3853 EVT VT = N->getValueType(0);
3854
3855 unsigned Opc;
3856 switch (N->getOpcode()) {
3857 case ISD::ROTR:
3858 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3859 break;
3860 case ISD::SHL:
3861 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3862 break;
3863 case ISD::SRL:
3864 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3865 break;
3866 case ISD::SRA:
3867 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3868 break;
3869 default:
3870 return false;
3871 }
3872
3873 uint64_t Size;
3874 uint64_t Bits;
3875 if (VT == MVT::i32) {
3876 Bits = 5;
3877 Size = 32;
3878 } else if (VT == MVT::i64) {
3879 Bits = 6;
3880 Size = 64;
3881 } else
3882 return false;
3883
3884 SDValue ShiftAmt = N->getOperand(1);
3885 SDLoc DL(N);
3886 SDValue NewShiftAmt;
3887
3888 // Skip over an extend of the shift amount.
3889 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3890 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3891 ShiftAmt = ShiftAmt->getOperand(0);
3892
3893 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3894 SDValue Add0 = ShiftAmt->getOperand(0);
3895 SDValue Add1 = ShiftAmt->getOperand(1);
3896 uint64_t Add0Imm;
3897 uint64_t Add1Imm;
3898 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3899 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3900 // to avoid the ADD/SUB.
3901 NewShiftAmt = Add0;
3902 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3903 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3904 (Add0Imm % Size == 0)) {
3905 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3906 // to generate a NEG instead of a SUB from a constant.
3907 unsigned NegOpc;
3908 unsigned ZeroReg;
3909 EVT SubVT = ShiftAmt->getValueType(0);
3910 if (SubVT == MVT::i32) {
3911 NegOpc = AArch64::SUBWrr;
3912 ZeroReg = AArch64::WZR;
3913 } else {
3914 assert(SubVT == MVT::i64);
3915 NegOpc = AArch64::SUBXrr;
3916 ZeroReg = AArch64::XZR;
3917 }
3918 SDValue Zero =
3919 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3920 MachineSDNode *Neg =
3921 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3922 NewShiftAmt = SDValue(Neg, 0);
3923 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3924 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3925 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3926 // to generate a NOT instead of a SUB from a constant.
3927 unsigned NotOpc;
3928 unsigned ZeroReg;
3929 EVT SubVT = ShiftAmt->getValueType(0);
3930 if (SubVT == MVT::i32) {
3931 NotOpc = AArch64::ORNWrr;
3932 ZeroReg = AArch64::WZR;
3933 } else {
3934 assert(SubVT == MVT::i64);
3935 NotOpc = AArch64::ORNXrr;
3936 ZeroReg = AArch64::XZR;
3937 }
3938 SDValue Zero =
3939 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3940 MachineSDNode *Not =
3941 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3942 NewShiftAmt = SDValue(Not, 0);
3943 } else
3944 return false;
3945 } else {
3946 // If the shift amount is masked with an AND, check that the mask covers the
3947 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3948 // the AND.
3949 uint64_t MaskImm;
3950 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3951 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3952 return false;
3953
3954 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3955 return false;
3956
3957 NewShiftAmt = ShiftAmt->getOperand(0);
3958 }
3959
3960 // Narrow/widen the shift amount to match the size of the shift operation.
3961 if (VT == MVT::i32)
3962 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3963 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3964 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3965 MachineSDNode *Ext = CurDAG->getMachineNode(
3966 AArch64::SUBREG_TO_REG, DL, VT,
3967 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3968 NewShiftAmt = SDValue(Ext, 0);
3969 }
3970
3971 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3972 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3973 return true;
3974}
3975
3977 SDValue &FixedPos,
3978 unsigned RegWidth,
3979 bool isReciprocal) {
3980 APFloat FVal(0.0);
3982 FVal = CN->getValueAPF();
3983 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3984 // Some otherwise illegal constants are allowed in this case.
3985 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3986 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3987 return false;
3988
3989 ConstantPoolSDNode *CN =
3990 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3991 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3992 } else
3993 return false;
3994
3995 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
3996 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
3997 // x-register.
3998 //
3999 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
4000 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
4001 // integers.
4002 bool IsExact;
4003
4004 if (isReciprocal)
4005 if (!FVal.getExactInverse(&FVal))
4006 return false;
4007
4008 // fbits is between 1 and 64 in the worst-case, which means the fmul
4009 // could have 2^64 as an actual operand. Need 65 bits of precision.
4010 APSInt IntVal(65, true);
4011 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
4012
4013 // N.b. isPowerOf2 also checks for > 0.
4014 if (!IsExact || !IntVal.isPowerOf2())
4015 return false;
4016 unsigned FBits = IntVal.logBase2();
4017
4018 // Checks above should have guaranteed that we haven't lost information in
4019 // finding FBits, but it must still be in range.
4020 if (FBits == 0 || FBits > RegWidth) return false;
4021
4022 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4023 return true;
4024}
4025
4026bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4027 unsigned RegWidth) {
4028 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4029 false);
4030}
4031
4032bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
4033 SDValue &FixedPos,
4034 unsigned RegWidth) {
4035 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4036 true);
4037}
4038
4039// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
4040// of the string and obtains the integer values from them and combines these
4041// into a single value to be used in the MRS/MSR instruction.
4044 RegString.split(Fields, ':');
4045
4046 if (Fields.size() == 1)
4047 return -1;
4048
4049 assert(Fields.size() == 5
4050 && "Invalid number of fields in read register string");
4051
4053 bool AllIntFields = true;
4054
4055 for (StringRef Field : Fields) {
4056 unsigned IntField;
4057 AllIntFields &= !Field.getAsInteger(10, IntField);
4058 Ops.push_back(IntField);
4059 }
4060
4061 assert(AllIntFields &&
4062 "Unexpected non-integer value in special register string.");
4063 (void)AllIntFields;
4064
4065 // Need to combine the integer fields of the string into a single value
4066 // based on the bit encoding of MRS/MSR instruction.
4067 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
4068 (Ops[3] << 3) | (Ops[4]);
4069}
4070
4071// Lower the read_register intrinsic to an MRS instruction node if the special
4072// register string argument is either of the form detailed in the ALCE (the
4073// form described in getIntOperandsFromRegisterString) or is a named register
4074// known by the MRS SysReg mapper.
4075bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4076 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4077 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4078 SDLoc DL(N);
4079
4080 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4081
4082 unsigned Opcode64Bit = AArch64::MRS;
4083 int Imm = getIntOperandFromRegisterString(RegString->getString());
4084 if (Imm == -1) {
4085 // No match, Use the sysreg mapper to map the remaining possible strings to
4086 // the value for the register to be used for the instruction operand.
4087 const auto *TheReg =
4088 AArch64SysReg::lookupSysRegByName(RegString->getString());
4089 if (TheReg && TheReg->Readable &&
4090 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4091 Imm = TheReg->Encoding;
4092 else
4093 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4094
4095 if (Imm == -1) {
4096 // Still no match, see if this is "pc" or give up.
4097 if (!ReadIs128Bit && RegString->getString() == "pc") {
4098 Opcode64Bit = AArch64::ADR;
4099 Imm = 0;
4100 } else {
4101 return false;
4102 }
4103 }
4104 }
4105
4106 SDValue InChain = N->getOperand(0);
4107 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
4108 if (!ReadIs128Bit) {
4109 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
4110 {SysRegImm, InChain});
4111 } else {
4112 SDNode *MRRS = CurDAG->getMachineNode(
4113 AArch64::MRRS, DL,
4114 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4115 {SysRegImm, InChain});
4116
4117 // Sysregs are not endian. The even register always contains the low half
4118 // of the register.
4119 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
4120 SDValue(MRRS, 0));
4121 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
4122 SDValue(MRRS, 0));
4123 SDValue OutChain = SDValue(MRRS, 1);
4124
4125 ReplaceUses(SDValue(N, 0), Lo);
4126 ReplaceUses(SDValue(N, 1), Hi);
4127 ReplaceUses(SDValue(N, 2), OutChain);
4128 };
4129 return true;
4130}
4131
4132// Lower the write_register intrinsic to an MSR instruction node if the special
4133// register string argument is either of the form detailed in the ALCE (the
4134// form described in getIntOperandsFromRegisterString) or is a named register
4135// known by the MSR SysReg mapper.
4136bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4137 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4138 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4139 SDLoc DL(N);
4140
4141 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4142
4143 if (!WriteIs128Bit) {
4144 // Check if the register was one of those allowed as the pstatefield value
4145 // in the MSR (immediate) instruction. To accept the values allowed in the
4146 // pstatefield for the MSR (immediate) instruction, we also require that an
4147 // immediate value has been provided as an argument, we know that this is
4148 // the case as it has been ensured by semantic checking.
4149 auto trySelectPState = [&](auto PMapper, unsigned State) {
4150 if (PMapper) {
4151 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4152 "Expected a constant integer expression.");
4153 unsigned Reg = PMapper->Encoding;
4154 uint64_t Immed = N->getConstantOperandVal(2);
4155 CurDAG->SelectNodeTo(
4156 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4157 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4158 return true;
4159 }
4160 return false;
4161 };
4162
4163 if (trySelectPState(
4164 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4165 AArch64::MSRpstateImm4))
4166 return true;
4167 if (trySelectPState(
4168 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4169 AArch64::MSRpstateImm1))
4170 return true;
4171 }
4172
4173 int Imm = getIntOperandFromRegisterString(RegString->getString());
4174 if (Imm == -1) {
4175 // Use the sysreg mapper to attempt to map the remaining possible strings
4176 // to the value for the register to be used for the MSR (register)
4177 // instruction operand.
4178 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4179 if (TheReg && TheReg->Writeable &&
4180 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4181 Imm = TheReg->Encoding;
4182 else
4183 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4184
4185 if (Imm == -1)
4186 return false;
4187 }
4188
4189 SDValue InChain = N->getOperand(0);
4190 if (!WriteIs128Bit) {
4191 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4192 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4193 N->getOperand(2), InChain);
4194 } else {
4195 // No endian swap. The lower half always goes into the even subreg, and the
4196 // higher half always into the odd supreg.
4197 SDNode *Pair = CurDAG->getMachineNode(
4198 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4199 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4200 MVT::i32),
4201 N->getOperand(2),
4202 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4203 N->getOperand(3),
4204 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4205
4206 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4207 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4208 SDValue(Pair, 0), InChain);
4209 }
4210
4211 return true;
4212}
4213
4214/// We've got special pseudo-instructions for these
4215bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4216 unsigned Opcode;
4217 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4218
4219 // Leave IR for LSE if subtarget supports it.
4220 if (Subtarget->hasLSE()) return false;
4221
4222 if (MemTy == MVT::i8)
4223 Opcode = AArch64::CMP_SWAP_8;
4224 else if (MemTy == MVT::i16)
4225 Opcode = AArch64::CMP_SWAP_16;
4226 else if (MemTy == MVT::i32)
4227 Opcode = AArch64::CMP_SWAP_32;
4228 else if (MemTy == MVT::i64)
4229 Opcode = AArch64::CMP_SWAP_64;
4230 else
4231 llvm_unreachable("Unknown AtomicCmpSwap type");
4232
4233 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4234 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4235 N->getOperand(0)};
4236 SDNode *CmpSwap = CurDAG->getMachineNode(
4237 Opcode, SDLoc(N),
4238 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4239
4240 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4241 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4242
4243 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4244 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4245 CurDAG->RemoveDeadNode(N);
4246
4247 return true;
4248}
4249
4250bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4251 SDValue &Shift, bool Negate) {
4252 if (!isa<ConstantSDNode>(N))
4253 return false;
4254
4255 SDLoc DL(N);
4256 APInt Val =
4257 cast<ConstantSDNode>(N)->getAPIntValue().trunc(VT.getFixedSizeInBits());
4258
4259 if (Negate)
4260 Val = -Val;
4261
4262 switch (VT.SimpleTy) {
4263 case MVT::i8:
4264 // All immediates are supported.
4265 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4266 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4267 return true;
4268 case MVT::i16:
4269 case MVT::i32:
4270 case MVT::i64:
4271 // Support 8bit unsigned immediates.
4272 if ((Val & ~0xff) == 0) {
4273 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4274 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4275 return true;
4276 }
4277 // Support 16bit unsigned immediates that are a multiple of 256.
4278 if ((Val & ~0xff00) == 0) {
4279 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4280 Imm = CurDAG->getTargetConstant(Val.lshr(8).getZExtValue(), DL, MVT::i32);
4281 return true;
4282 }
4283 break;
4284 default:
4285 break;
4286 }
4287
4288 return false;
4289}
4290
4291bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4292 SDValue &Imm, SDValue &Shift,
4293 bool Negate) {
4294 if (!isa<ConstantSDNode>(N))
4295 return false;
4296
4297 SDLoc DL(N);
4298 int64_t Val = cast<ConstantSDNode>(N)
4299 ->getAPIntValue()
4301 .getSExtValue();
4302
4303 if (Negate)
4304 Val = -Val;
4305
4306 // Signed saturating instructions treat their immediate operand as unsigned,
4307 // whereas the related intrinsics define their operands to be signed. This
4308 // means we can only use the immediate form when the operand is non-negative.
4309 if (Val < 0)
4310 return false;
4311
4312 switch (VT.SimpleTy) {
4313 case MVT::i8:
4314 // All positive immediates are supported.
4315 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4316 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4317 return true;
4318 case MVT::i16:
4319 case MVT::i32:
4320 case MVT::i64:
4321 // Support 8bit positive immediates.
4322 if (Val <= 255) {
4323 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4324 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4325 return true;
4326 }
4327 // Support 16bit positive immediates that are a multiple of 256.
4328 if (Val <= 65280 && Val % 256 == 0) {
4329 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4330 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4331 return true;
4332 }
4333 break;
4334 default:
4335 break;
4336 }
4337
4338 return false;
4339}
4340
4341bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4342 SDValue &Shift) {
4343 if (!isa<ConstantSDNode>(N))
4344 return false;
4345
4346 SDLoc DL(N);
4347 int64_t Val = cast<ConstantSDNode>(N)
4348 ->getAPIntValue()
4349 .trunc(VT.getFixedSizeInBits())
4350 .getSExtValue();
4351 int32_t ImmVal, ShiftVal;
4352 if (!AArch64_AM::isSVECpyDupImm(VT.getScalarSizeInBits(), Val, ImmVal,
4353 ShiftVal))
4354 return false;
4355
4356 Shift = CurDAG->getTargetConstant(ShiftVal, DL, MVT::i32);
4357 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4358 return true;
4359}
4360
4361bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4362 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4363 int64_t ImmVal = CNode->getSExtValue();
4364 SDLoc DL(N);
4365 if (ImmVal >= -128 && ImmVal < 128) {
4366 Imm = CurDAG->getSignedTargetConstant(ImmVal, DL, MVT::i32);
4367 return true;
4368 }
4369 }
4370 return false;
4371}
4372
4373bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4374 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4375 uint64_t ImmVal = CNode->getZExtValue();
4376
4377 switch (VT.SimpleTy) {
4378 case MVT::i8:
4379 ImmVal &= 0xFF;
4380 break;
4381 case MVT::i16:
4382 ImmVal &= 0xFFFF;
4383 break;
4384 case MVT::i32:
4385 ImmVal &= 0xFFFFFFFF;
4386 break;
4387 case MVT::i64:
4388 break;
4389 default:
4390 llvm_unreachable("Unexpected type");
4391 }
4392
4393 if (ImmVal < 256) {
4394 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4395 return true;
4396 }
4397 }
4398 return false;
4399}
4400
4401bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4402 bool Invert) {
4403 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4404 uint64_t ImmVal = CNode->getZExtValue();
4405 SDLoc DL(N);
4406
4407 if (Invert)
4408 ImmVal = ~ImmVal;
4409
4410 // Shift mask depending on type size.
4411 switch (VT.SimpleTy) {
4412 case MVT::i8:
4413 ImmVal &= 0xFF;
4414 ImmVal |= ImmVal << 8;
4415 ImmVal |= ImmVal << 16;
4416 ImmVal |= ImmVal << 32;
4417 break;
4418 case MVT::i16:
4419 ImmVal &= 0xFFFF;
4420 ImmVal |= ImmVal << 16;
4421 ImmVal |= ImmVal << 32;
4422 break;
4423 case MVT::i32:
4424 ImmVal &= 0xFFFFFFFF;
4425 ImmVal |= ImmVal << 32;
4426 break;
4427 case MVT::i64:
4428 break;
4429 default:
4430 llvm_unreachable("Unexpected type");
4431 }
4432
4433 uint64_t encoding;
4434 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
4435 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
4436 return true;
4437 }
4438 }
4439 return false;
4440}
4441
4442// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4443// Rather than attempt to normalise everything we can sometimes saturate the
4444// shift amount during selection. This function also allows for consistent
4445// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4446// required by the instructions.
4447bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4448 uint64_t High, bool AllowSaturation,
4449 SDValue &Imm) {
4450 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4451 uint64_t ImmVal = CN->getZExtValue();
4452
4453 // Reject shift amounts that are too small.
4454 if (ImmVal < Low)
4455 return false;
4456
4457 // Reject or saturate shift amounts that are too big.
4458 if (ImmVal > High) {
4459 if (!AllowSaturation)
4460 return false;
4461 ImmVal = High;
4462 }
4463
4464 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4465 return true;
4466 }
4467
4468 return false;
4469}
4470
4471bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4472 // tagp(FrameIndex, IRGstack, tag_offset):
4473 // since the offset between FrameIndex and IRGstack is a compile-time
4474 // constant, this can be lowered to a single ADDG instruction.
4475 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4476 return false;
4477 }
4478
4479 SDValue IRG_SP = N->getOperand(2);
4480 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4481 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4482 return false;
4483 }
4484
4485 const TargetLowering *TLI = getTargetLowering();
4486 SDLoc DL(N);
4487 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4488 SDValue FiOp = CurDAG->getTargetFrameIndex(
4489 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4490 int TagOffset = N->getConstantOperandVal(3);
4491
4492 SDNode *Out = CurDAG->getMachineNode(
4493 AArch64::TAGPstack, DL, MVT::i64,
4494 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4495 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4496 ReplaceNode(N, Out);
4497 return true;
4498}
4499
4500void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4501 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4502 "llvm.aarch64.tagp third argument must be an immediate");
4503 if (trySelectStackSlotTagP(N))
4504 return;
4505 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4506 // compile-time constant, not just for stack allocations.
4507
4508 // General case for unrelated pointers in Op1 and Op2.
4509 SDLoc DL(N);
4510 int TagOffset = N->getConstantOperandVal(3);
4511 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4512 {N->getOperand(1), N->getOperand(2)});
4513 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4514 {SDValue(N1, 0), N->getOperand(2)});
4515 SDNode *N3 = CurDAG->getMachineNode(
4516 AArch64::ADDG, DL, MVT::i64,
4517 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4518 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4519 ReplaceNode(N, N3);
4520}
4521
4522bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4523 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4524
4525 // Bail when not a "cast" like insert_subvector.
4526 if (N->getConstantOperandVal(2) != 0)
4527 return false;
4528 if (!N->getOperand(0).isUndef())
4529 return false;
4530
4531 // Bail when normal isel should do the job.
4532 EVT VT = N->getValueType(0);
4533 EVT InVT = N->getOperand(1).getValueType();
4534 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4535 return false;
4536 if (InVT.getSizeInBits() <= 128)
4537 return false;
4538
4539 // NOTE: We can only get here when doing fixed length SVE code generation.
4540 // We do manual selection because the types involved are not linked to real
4541 // registers (despite being legal) and must be coerced into SVE registers.
4542
4544 "Expected to insert into a packed scalable vector!");
4545
4546 SDLoc DL(N);
4547 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4548 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4549 N->getOperand(1), RC));
4550 return true;
4551}
4552
4553bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4554 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4555
4556 // Bail when not a "cast" like extract_subvector.
4557 if (N->getConstantOperandVal(1) != 0)
4558 return false;
4559
4560 // Bail when normal isel can do the job.
4561 EVT VT = N->getValueType(0);
4562 EVT InVT = N->getOperand(0).getValueType();
4563 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4564 return false;
4565 if (VT.getSizeInBits() <= 128)
4566 return false;
4567
4568 // NOTE: We can only get here when doing fixed length SVE code generation.
4569 // We do manual selection because the types involved are not linked to real
4570 // registers (despite being legal) and must be coerced into SVE registers.
4571
4573 "Expected to extract from a packed scalable vector!");
4574
4575 SDLoc DL(N);
4576 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4577 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4578 N->getOperand(0), RC));
4579 return true;
4580}
4581
4582bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4583 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4584
4585 SDValue N0 = N->getOperand(0);
4586 SDValue N1 = N->getOperand(1);
4587
4588 EVT VT = N->getValueType(0);
4589 SDLoc DL(N);
4590
4591 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4592 // Rotate by a constant is a funnel shift in IR which is exanded to
4593 // an OR with shifted operands.
4594 // We do the following transform:
4595 // OR N0, N1 -> xar (x, y, imm)
4596 // Where:
4597 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4598 // N0 = SHL_PRED true, V, splat(bits-imm)
4599 // V = (xor x, y)
4600 if (VT.isScalableVector() &&
4601 (Subtarget->hasSVE2() ||
4602 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4603 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4604 N1.getOpcode() != AArch64ISD::SRL_PRED)
4605 std::swap(N0, N1);
4606 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4607 N1.getOpcode() != AArch64ISD::SRL_PRED)
4608 return false;
4609
4610 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4611 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4612 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4613 return false;
4614
4615 if (N0.getOperand(1) != N1.getOperand(1))
4616 return false;
4617
4618 SDValue R1, R2;
4619 bool IsXOROperand = true;
4620 if (N0.getOperand(1).getOpcode() != ISD::XOR) {
4621 IsXOROperand = false;
4622 } else {
4623 R1 = N0.getOperand(1).getOperand(0);
4624 R2 = N1.getOperand(1).getOperand(1);
4625 }
4626
4627 APInt ShlAmt, ShrAmt;
4628 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4630 return false;
4631
4632 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4633 return false;
4634
4635 if (!IsXOROperand) {
4636 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4637 SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
4638 SDValue MOVIV = SDValue(MOV, 0);
4639
4640 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4641 SDNode *SubRegToReg = CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL,
4642 VT, Zero, MOVIV, ZSub);
4643
4644 R1 = N1->getOperand(1);
4645 R2 = SDValue(SubRegToReg, 0);
4646 }
4647
4648 SDValue Imm =
4649 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4650
4651 SDValue Ops[] = {R1, R2, Imm};
4653 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4654 AArch64::XAR_ZZZI_D})) {
4655 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4656 return true;
4657 }
4658 return false;
4659 }
4660
4661 // We have Neon SHA3 XAR operation for v2i64 but for types
4662 // v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4663 // is available.
4664 EVT SVT;
4665 switch (VT.getSimpleVT().SimpleTy) {
4666 case MVT::v4i32:
4667 case MVT::v2i32:
4668 SVT = MVT::nxv4i32;
4669 break;
4670 case MVT::v8i16:
4671 case MVT::v4i16:
4672 SVT = MVT::nxv8i16;
4673 break;
4674 case MVT::v16i8:
4675 case MVT::v8i8:
4676 SVT = MVT::nxv16i8;
4677 break;
4678 case MVT::v2i64:
4679 case MVT::v1i64:
4680 SVT = Subtarget->hasSHA3() ? MVT::v2i64 : MVT::nxv2i64;
4681 break;
4682 default:
4683 return false;
4684 }
4685
4686 if ((!SVT.isScalableVector() && !Subtarget->hasSHA3()) ||
4687 (SVT.isScalableVector() && !Subtarget->hasSVE2()))
4688 return false;
4689
4690 if (N0->getOpcode() != AArch64ISD::VSHL ||
4691 N1->getOpcode() != AArch64ISD::VLSHR)
4692 return false;
4693
4694 if (N0->getOperand(0) != N1->getOperand(0))
4695 return false;
4696
4697 SDValue R1, R2;
4698 bool IsXOROperand = true;
4699 if (N1->getOperand(0)->getOpcode() != ISD::XOR) {
4700 IsXOROperand = false;
4701 } else {
4702 SDValue XOR = N0.getOperand(0);
4703 R1 = XOR.getOperand(0);
4704 R2 = XOR.getOperand(1);
4705 }
4706
4707 unsigned HsAmt = N0.getConstantOperandVal(1);
4708 unsigned ShAmt = N1.getConstantOperandVal(1);
4709
4710 SDValue Imm = CurDAG->getTargetConstant(
4711 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4712
4713 unsigned VTSizeInBits = VT.getScalarSizeInBits();
4714 if (ShAmt + HsAmt != VTSizeInBits)
4715 return false;
4716
4717 if (!IsXOROperand) {
4718 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4719 SDNode *MOV =
4720 CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, MVT::v2i64, Zero);
4721 SDValue MOVIV = SDValue(MOV, 0);
4722
4723 R1 = N1->getOperand(0);
4724 R2 = MOVIV;
4725 }
4726
4727 if (SVT != VT) {
4728 SDValue Undef =
4729 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, SVT), 0);
4730
4731 if (SVT.isScalableVector() && VT.is64BitVector()) {
4732 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4733
4734 SDValue UndefQ = SDValue(
4735 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, QVT), 0);
4736 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4737
4738 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4739 UndefQ, R1, DSub),
4740 0);
4741 if (R2.getValueType() == VT)
4742 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4743 UndefQ, R2, DSub),
4744 0);
4745 }
4746
4747 SDValue SubReg = CurDAG->getTargetConstant(
4748 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL, MVT::i32);
4749
4750 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
4751 R1, SubReg),
4752 0);
4753
4754 if (SVT.isScalableVector() || R2.getValueType() != SVT)
4755 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT,
4756 Undef, R2, SubReg),
4757 0);
4758 }
4759
4760 SDValue Ops[] = {R1, R2, Imm};
4761 SDNode *XAR = nullptr;
4762
4763 if (SVT.isScalableVector()) {
4765 SVT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4766 AArch64::XAR_ZZZI_D}))
4767 XAR = CurDAG->getMachineNode(Opc, DL, SVT, Ops);
4768 } else {
4769 XAR = CurDAG->getMachineNode(AArch64::XAR, DL, SVT, Ops);
4770 }
4771
4772 assert(XAR && "Unexpected NULL value for XAR instruction in DAG");
4773
4774 if (SVT != VT) {
4775 if (VT.is64BitVector() && SVT.isScalableVector()) {
4776 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4777
4778 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4779 SDNode *Q = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, QVT,
4780 SDValue(XAR, 0), ZSub);
4781
4782 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4783 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4784 SDValue(Q, 0), DSub);
4785 } else {
4786 SDValue SubReg = CurDAG->getTargetConstant(
4787 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL,
4788 MVT::i32);
4789 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4790 SDValue(XAR, 0), SubReg);
4791 }
4792 }
4793 ReplaceNode(N, XAR);
4794 return true;
4795}
4796
4797void AArch64DAGToDAGISel::Select(SDNode *Node) {
4798 // If we have a custom node, we already have selected!
4799 if (Node->isMachineOpcode()) {
4800 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4801 Node->setNodeId(-1);
4802 return;
4803 }
4804
4805 // Few custom selection stuff.
4806 EVT VT = Node->getValueType(0);
4807
4808 switch (Node->getOpcode()) {
4809 default:
4810 break;
4811
4812 case ISD::ATOMIC_CMP_SWAP:
4813 if (SelectCMP_SWAP(Node))
4814 return;
4815 break;
4816
4817 case ISD::READ_REGISTER:
4818 case AArch64ISD::MRRS:
4819 if (tryReadRegister(Node))
4820 return;
4821 break;
4822
4824 case AArch64ISD::MSRR:
4825 if (tryWriteRegister(Node))
4826 return;
4827 break;
4828
4829 case ISD::LOAD: {
4830 // Try to select as an indexed load. Fall through to normal processing
4831 // if we can't.
4832 if (tryIndexedLoad(Node))
4833 return;
4834 break;
4835 }
4836
4837 case ISD::SRL:
4838 case ISD::AND:
4839 case ISD::SRA:
4841 if (tryBitfieldExtractOp(Node))
4842 return;
4843 if (tryBitfieldInsertInZeroOp(Node))
4844 return;
4845 [[fallthrough]];
4846 case ISD::ROTR:
4847 case ISD::SHL:
4848 if (tryShiftAmountMod(Node))
4849 return;
4850 break;
4851
4852 case ISD::SIGN_EXTEND:
4853 if (tryBitfieldExtractOpFromSExt(Node))
4854 return;
4855 break;
4856
4857 case ISD::OR:
4858 if (tryBitfieldInsertOp(Node))
4859 return;
4860 if (trySelectXAR(Node))
4861 return;
4862 break;
4863
4865 if (trySelectCastScalableToFixedLengthVector(Node))
4866 return;
4867 break;
4868 }
4869
4870 case ISD::INSERT_SUBVECTOR: {
4871 if (trySelectCastFixedLengthToScalableVector(Node))
4872 return;
4873 break;
4874 }
4875
4876 case ISD::Constant: {
4877 // Materialize zero constants as copies from WZR/XZR. This allows
4878 // the coalescer to propagate these into other instructions.
4879 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4880 if (ConstNode->isZero()) {
4881 if (VT == MVT::i32) {
4882 SDValue New = CurDAG->getCopyFromReg(
4883 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4884 ReplaceNode(Node, New.getNode());
4885 return;
4886 } else if (VT == MVT::i64) {
4887 SDValue New = CurDAG->getCopyFromReg(
4888 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4889 ReplaceNode(Node, New.getNode());
4890 return;
4891 }
4892 }
4893 break;
4894 }
4895
4896 case ISD::FrameIndex: {
4897 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4898 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4899 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4900 const TargetLowering *TLI = getTargetLowering();
4901 SDValue TFI = CurDAG->getTargetFrameIndex(
4902 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4903 SDLoc DL(Node);
4904 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4905 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4906 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4907 return;
4908 }
4910 unsigned IntNo = Node->getConstantOperandVal(1);
4911 switch (IntNo) {
4912 default:
4913 break;
4914 case Intrinsic::aarch64_gcsss: {
4915 SDLoc DL(Node);
4916 SDValue Chain = Node->getOperand(0);
4917 SDValue Val = Node->getOperand(2);
4918 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
4919 SDNode *SS1 =
4920 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
4921 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
4922 MVT::Other, Zero, SDValue(SS1, 0));
4923 ReplaceNode(Node, SS2);
4924 return;
4925 }
4926 case Intrinsic::aarch64_ldaxp:
4927 case Intrinsic::aarch64_ldxp: {
4928 unsigned Op =
4929 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4930 SDValue MemAddr = Node->getOperand(2);
4931 SDLoc DL(Node);
4932 SDValue Chain = Node->getOperand(0);
4933
4934 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4935 MVT::Other, MemAddr, Chain);
4936
4937 // Transfer memoperands.
4938 MachineMemOperand *MemOp =
4939 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4940 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4941 ReplaceNode(Node, Ld);
4942 return;
4943 }
4944 case Intrinsic::aarch64_stlxp:
4945 case Intrinsic::aarch64_stxp: {
4946 unsigned Op =
4947 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4948 SDLoc DL(Node);
4949 SDValue Chain = Node->getOperand(0);
4950 SDValue ValLo = Node->getOperand(2);
4951 SDValue ValHi = Node->getOperand(3);
4952 SDValue MemAddr = Node->getOperand(4);
4953
4954 // Place arguments in the right order.
4955 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4956
4957 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4958 // Transfer memoperands.
4959 MachineMemOperand *MemOp =
4960 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4961 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4962
4963 ReplaceNode(Node, St);
4964 return;
4965 }
4966 case Intrinsic::aarch64_neon_ld1x2:
4967 if (VT == MVT::v8i8) {
4968 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4969 return;
4970 } else if (VT == MVT::v16i8) {
4971 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4972 return;
4973 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4974 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4975 return;
4976 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4977 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4978 return;
4979 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4980 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4981 return;
4982 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4983 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4984 return;
4985 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4986 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4987 return;
4988 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4989 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4990 return;
4991 }
4992 break;
4993 case Intrinsic::aarch64_neon_ld1x3:
4994 if (VT == MVT::v8i8) {
4995 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4996 return;
4997 } else if (VT == MVT::v16i8) {
4998 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4999 return;
5000 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5001 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
5002 return;
5003 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5004 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
5005 return;
5006 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5007 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
5008 return;
5009 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5010 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
5011 return;
5012 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5013 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5014 return;
5015 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5016 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
5017 return;
5018 }
5019 break;
5020 case Intrinsic::aarch64_neon_ld1x4:
5021 if (VT == MVT::v8i8) {
5022 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
5023 return;
5024 } else if (VT == MVT::v16i8) {
5025 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
5026 return;
5027 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5028 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
5029 return;
5030 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5031 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
5032 return;
5033 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5034 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
5035 return;
5036 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5037 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
5038 return;
5039 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5040 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5041 return;
5042 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5043 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
5044 return;
5045 }
5046 break;
5047 case Intrinsic::aarch64_neon_ld2:
5048 if (VT == MVT::v8i8) {
5049 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
5050 return;
5051 } else if (VT == MVT::v16i8) {
5052 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
5053 return;
5054 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5055 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
5056 return;
5057 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5058 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
5059 return;
5060 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5061 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
5062 return;
5063 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5064 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
5065 return;
5066 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5067 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5068 return;
5069 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5070 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
5071 return;
5072 }
5073 break;
5074 case Intrinsic::aarch64_neon_ld3:
5075 if (VT == MVT::v8i8) {
5076 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
5077 return;
5078 } else if (VT == MVT::v16i8) {
5079 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
5080 return;
5081 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5082 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
5083 return;
5084 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5085 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
5086 return;
5087 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5088 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
5089 return;
5090 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5091 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
5092 return;
5093 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5094 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5095 return;
5096 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5097 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
5098 return;
5099 }
5100 break;
5101 case Intrinsic::aarch64_neon_ld4:
5102 if (VT == MVT::v8i8) {
5103 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
5104 return;
5105 } else if (VT == MVT::v16i8) {
5106 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
5107 return;
5108 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5109 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
5110 return;
5111 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5112 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
5113 return;
5114 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5115 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
5116 return;
5117 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5118 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
5119 return;
5120 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5121 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5122 return;
5123 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5124 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
5125 return;
5126 }
5127 break;
5128 case Intrinsic::aarch64_neon_ld2r:
5129 if (VT == MVT::v8i8) {
5130 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
5131 return;
5132 } else if (VT == MVT::v16i8) {
5133 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
5134 return;
5135 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5136 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
5137 return;
5138 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5139 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
5140 return;
5141 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5142 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
5143 return;
5144 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5145 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
5146 return;
5147 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5148 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
5149 return;
5150 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5151 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
5152 return;
5153 }
5154 break;
5155 case Intrinsic::aarch64_neon_ld3r:
5156 if (VT == MVT::v8i8) {
5157 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
5158 return;
5159 } else if (VT == MVT::v16i8) {
5160 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
5161 return;
5162 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5163 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
5164 return;
5165 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5166 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
5167 return;
5168 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5169 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
5170 return;
5171 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5172 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
5173 return;
5174 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5175 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
5176 return;
5177 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5178 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
5179 return;
5180 }
5181 break;
5182 case Intrinsic::aarch64_neon_ld4r:
5183 if (VT == MVT::v8i8) {
5184 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
5185 return;
5186 } else if (VT == MVT::v16i8) {
5187 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
5188 return;
5189 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5190 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
5191 return;
5192 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5193 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
5194 return;
5195 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5196 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
5197 return;
5198 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5199 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
5200 return;
5201 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5202 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
5203 return;
5204 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5205 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
5206 return;
5207 }
5208 break;
5209 case Intrinsic::aarch64_neon_ld2lane:
5210 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5211 SelectLoadLane(Node, 2, AArch64::LD2i8);
5212 return;
5213 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5214 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5215 SelectLoadLane(Node, 2, AArch64::LD2i16);
5216 return;
5217 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5218 VT == MVT::v2f32) {
5219 SelectLoadLane(Node, 2, AArch64::LD2i32);
5220 return;
5221 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5222 VT == MVT::v1f64) {
5223 SelectLoadLane(Node, 2, AArch64::LD2i64);
5224 return;
5225 }
5226 break;
5227 case Intrinsic::aarch64_neon_ld3lane:
5228 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5229 SelectLoadLane(Node, 3, AArch64::LD3i8);
5230 return;
5231 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5232 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5233 SelectLoadLane(Node, 3, AArch64::LD3i16);
5234 return;
5235 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5236 VT == MVT::v2f32) {
5237 SelectLoadLane(Node, 3, AArch64::LD3i32);
5238 return;
5239 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5240 VT == MVT::v1f64) {
5241 SelectLoadLane(Node, 3, AArch64::LD3i64);
5242 return;
5243 }
5244 break;
5245 case Intrinsic::aarch64_neon_ld4lane:
5246 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5247 SelectLoadLane(Node, 4, AArch64::LD4i8);
5248 return;
5249 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5250 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5251 SelectLoadLane(Node, 4, AArch64::LD4i16);
5252 return;
5253 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5254 VT == MVT::v2f32) {
5255 SelectLoadLane(Node, 4, AArch64::LD4i32);
5256 return;
5257 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5258 VT == MVT::v1f64) {
5259 SelectLoadLane(Node, 4, AArch64::LD4i64);
5260 return;
5261 }
5262 break;
5263 case Intrinsic::aarch64_ld64b:
5264 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5265 return;
5266 case Intrinsic::aarch64_sve_ld2q_sret: {
5267 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5268 return;
5269 }
5270 case Intrinsic::aarch64_sve_ld3q_sret: {
5271 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5272 return;
5273 }
5274 case Intrinsic::aarch64_sve_ld4q_sret: {
5275 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5276 return;
5277 }
5278 case Intrinsic::aarch64_sve_ld2_sret: {
5279 if (VT == MVT::nxv16i8) {
5280 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5281 true);
5282 return;
5283 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5284 VT == MVT::nxv8bf16) {
5285 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5286 true);
5287 return;
5288 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5289 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5290 true);
5291 return;
5292 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5293 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5294 true);
5295 return;
5296 }
5297 break;
5298 }
5299 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5300 if (VT == MVT::nxv16i8) {
5301 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5302 SelectContiguousMultiVectorLoad(
5303 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5304 else if (Subtarget->hasSVE2p1())
5305 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5306 AArch64::LD1B_2Z);
5307 else
5308 break;
5309 return;
5310 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5311 VT == MVT::nxv8bf16) {
5312 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5313 SelectContiguousMultiVectorLoad(
5314 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5315 else if (Subtarget->hasSVE2p1())
5316 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5317 AArch64::LD1H_2Z);
5318 else
5319 break;
5320 return;
5321 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5322 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5323 SelectContiguousMultiVectorLoad(
5324 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5325 else if (Subtarget->hasSVE2p1())
5326 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5327 AArch64::LD1W_2Z);
5328 else
5329 break;
5330 return;
5331 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5332 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5333 SelectContiguousMultiVectorLoad(
5334 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5335 else if (Subtarget->hasSVE2p1())
5336 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5337 AArch64::LD1D_2Z);
5338 else
5339 break;
5340 return;
5341 }
5342 break;
5343 }
5344 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5345 if (VT == MVT::nxv16i8) {
5346 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5347 SelectContiguousMultiVectorLoad(
5348 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5349 else if (Subtarget->hasSVE2p1())
5350 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5351 AArch64::LD1B_4Z);
5352 else
5353 break;
5354 return;
5355 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5356 VT == MVT::nxv8bf16) {
5357 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5358 SelectContiguousMultiVectorLoad(
5359 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5360 else if (Subtarget->hasSVE2p1())
5361 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5362 AArch64::LD1H_4Z);
5363 else
5364 break;
5365 return;
5366 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5367 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5368 SelectContiguousMultiVectorLoad(
5369 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5370 else if (Subtarget->hasSVE2p1())
5371 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5372 AArch64::LD1W_4Z);
5373 else
5374 break;
5375 return;
5376 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5377 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5378 SelectContiguousMultiVectorLoad(
5379 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5380 else if (Subtarget->hasSVE2p1())
5381 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5382 AArch64::LD1D_4Z);
5383 else
5384 break;
5385 return;
5386 }
5387 break;
5388 }
5389 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5390 if (VT == MVT::nxv16i8) {
5391 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5392 SelectContiguousMultiVectorLoad(Node, 2, 0,
5393 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5394 AArch64::LDNT1B_2Z_PSEUDO);
5395 else if (Subtarget->hasSVE2p1())
5396 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5397 AArch64::LDNT1B_2Z);
5398 else
5399 break;
5400 return;
5401 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5402 VT == MVT::nxv8bf16) {
5403 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5404 SelectContiguousMultiVectorLoad(Node, 2, 1,
5405 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5406 AArch64::LDNT1H_2Z_PSEUDO);
5407 else if (Subtarget->hasSVE2p1())
5408 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5409 AArch64::LDNT1H_2Z);
5410 else
5411 break;
5412 return;
5413 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5414 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5415 SelectContiguousMultiVectorLoad(Node, 2, 2,
5416 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5417 AArch64::LDNT1W_2Z_PSEUDO);
5418 else if (Subtarget->hasSVE2p1())
5419 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5420 AArch64::LDNT1W_2Z);
5421 else
5422 break;
5423 return;
5424 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5425 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5426 SelectContiguousMultiVectorLoad(Node, 2, 3,
5427 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5428 AArch64::LDNT1D_2Z_PSEUDO);
5429 else if (Subtarget->hasSVE2p1())
5430 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5431 AArch64::LDNT1D_2Z);
5432 else
5433 break;
5434 return;
5435 }
5436 break;
5437 }
5438 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5439 if (VT == MVT::nxv16i8) {
5440 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5441 SelectContiguousMultiVectorLoad(Node, 4, 0,
5442 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5443 AArch64::LDNT1B_4Z_PSEUDO);
5444 else if (Subtarget->hasSVE2p1())
5445 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5446 AArch64::LDNT1B_4Z);
5447 else
5448 break;
5449 return;
5450 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5451 VT == MVT::nxv8bf16) {
5452 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5453 SelectContiguousMultiVectorLoad(Node, 4, 1,
5454 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5455 AArch64::LDNT1H_4Z_PSEUDO);
5456 else if (Subtarget->hasSVE2p1())
5457 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5458 AArch64::LDNT1H_4Z);
5459 else
5460 break;
5461 return;
5462 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5463 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5464 SelectContiguousMultiVectorLoad(Node, 4, 2,
5465 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5466 AArch64::LDNT1W_4Z_PSEUDO);
5467 else if (Subtarget->hasSVE2p1())
5468 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5469 AArch64::LDNT1W_4Z);
5470 else
5471 break;
5472 return;
5473 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5474 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5475 SelectContiguousMultiVectorLoad(Node, 4, 3,
5476 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5477 AArch64::LDNT1D_4Z_PSEUDO);
5478 else if (Subtarget->hasSVE2p1())
5479 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5480 AArch64::LDNT1D_4Z);
5481 else
5482 break;
5483 return;
5484 }
5485 break;
5486 }
5487 case Intrinsic::aarch64_sve_ld3_sret: {
5488 if (VT == MVT::nxv16i8) {
5489 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5490 true);
5491 return;
5492 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5493 VT == MVT::nxv8bf16) {
5494 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5495 true);
5496 return;
5497 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5498 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5499 true);
5500 return;
5501 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5502 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5503 true);
5504 return;
5505 }
5506 break;
5507 }
5508 case Intrinsic::aarch64_sve_ld4_sret: {
5509 if (VT == MVT::nxv16i8) {
5510 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5511 true);
5512 return;
5513 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5514 VT == MVT::nxv8bf16) {
5515 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5516 true);
5517 return;
5518 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5519 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5520 true);
5521 return;
5522 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5523 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5524 true);
5525 return;
5526 }
5527 break;
5528 }
5529 case Intrinsic::aarch64_sme_read_hor_vg2: {
5530 if (VT == MVT::nxv16i8) {
5531 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5532 AArch64::MOVA_2ZMXI_H_B);
5533 return;
5534 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5535 VT == MVT::nxv8bf16) {
5536 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5537 AArch64::MOVA_2ZMXI_H_H);
5538 return;
5539 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5540 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5541 AArch64::MOVA_2ZMXI_H_S);
5542 return;
5543 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5544 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5545 AArch64::MOVA_2ZMXI_H_D);
5546 return;
5547 }
5548 break;
5549 }
5550 case Intrinsic::aarch64_sme_read_ver_vg2: {
5551 if (VT == MVT::nxv16i8) {
5552 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5553 AArch64::MOVA_2ZMXI_V_B);
5554 return;
5555 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5556 VT == MVT::nxv8bf16) {
5557 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5558 AArch64::MOVA_2ZMXI_V_H);
5559 return;
5560 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5561 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5562 AArch64::MOVA_2ZMXI_V_S);
5563 return;
5564 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5565 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5566 AArch64::MOVA_2ZMXI_V_D);
5567 return;
5568 }
5569 break;
5570 }
5571 case Intrinsic::aarch64_sme_read_hor_vg4: {
5572 if (VT == MVT::nxv16i8) {
5573 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5574 AArch64::MOVA_4ZMXI_H_B);
5575 return;
5576 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5577 VT == MVT::nxv8bf16) {
5578 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5579 AArch64::MOVA_4ZMXI_H_H);
5580 return;
5581 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5582 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5583 AArch64::MOVA_4ZMXI_H_S);
5584 return;
5585 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5586 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5587 AArch64::MOVA_4ZMXI_H_D);
5588 return;
5589 }
5590 break;
5591 }
5592 case Intrinsic::aarch64_sme_read_ver_vg4: {
5593 if (VT == MVT::nxv16i8) {
5594 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5595 AArch64::MOVA_4ZMXI_V_B);
5596 return;
5597 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5598 VT == MVT::nxv8bf16) {
5599 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5600 AArch64::MOVA_4ZMXI_V_H);
5601 return;
5602 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5603 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5604 AArch64::MOVA_4ZMXI_V_S);
5605 return;
5606 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5607 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5608 AArch64::MOVA_4ZMXI_V_D);
5609 return;
5610 }
5611 break;
5612 }
5613 case Intrinsic::aarch64_sme_read_vg1x2: {
5614 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5615 AArch64::MOVA_VG2_2ZMXI);
5616 return;
5617 }
5618 case Intrinsic::aarch64_sme_read_vg1x4: {
5619 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5620 AArch64::MOVA_VG4_4ZMXI);
5621 return;
5622 }
5623 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5624 if (VT == MVT::nxv16i8) {
5625 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5626 return;
5627 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5628 VT == MVT::nxv8bf16) {
5629 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5630 return;
5631 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5632 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5633 return;
5634 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5635 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5636 return;
5637 }
5638 break;
5639 }
5640 case Intrinsic::aarch64_sme_readz_vert_x2: {
5641 if (VT == MVT::nxv16i8) {
5642 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5643 return;
5644 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5645 VT == MVT::nxv8bf16) {
5646 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5647 return;
5648 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5649 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5650 return;
5651 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5652 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5653 return;
5654 }
5655 break;
5656 }
5657 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5658 if (VT == MVT::nxv16i8) {
5659 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5660 return;
5661 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5662 VT == MVT::nxv8bf16) {
5663 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5664 return;
5665 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5666 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5667 return;
5668 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5669 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5670 return;
5671 }
5672 break;
5673 }
5674 case Intrinsic::aarch64_sme_readz_vert_x4: {
5675 if (VT == MVT::nxv16i8) {
5676 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5677 return;
5678 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5679 VT == MVT::nxv8bf16) {
5680 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5681 return;
5682 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5683 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5684 return;
5685 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5686 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5687 return;
5688 }
5689 break;
5690 }
5691 case Intrinsic::aarch64_sme_readz_x2: {
5692 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5693 AArch64::ZA);
5694 return;
5695 }
5696 case Intrinsic::aarch64_sme_readz_x4: {
5697 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5698 AArch64::ZA);
5699 return;
5700 }
5701 case Intrinsic::swift_async_context_addr: {
5702 SDLoc DL(Node);
5703 SDValue Chain = Node->getOperand(0);
5704 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5705 SDValue Res = SDValue(
5706 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5707 CurDAG->getTargetConstant(8, DL, MVT::i32),
5708 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5709 0);
5710 ReplaceUses(SDValue(Node, 0), Res);
5711 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5712 CurDAG->RemoveDeadNode(Node);
5713
5714 auto &MF = CurDAG->getMachineFunction();
5715 MF.getFrameInfo().setFrameAddressIsTaken(true);
5716 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5717 return;
5718 }
5719 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5721 Node->getValueType(0),
5722 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5723 AArch64::LUTI2_4ZTZI_S}))
5724 // Second Immediate must be <= 3:
5725 SelectMultiVectorLutiLane(Node, 4, Opc, 3);
5726 return;
5727 }
5728 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5730 Node->getValueType(0),
5731 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5732 // Second Immediate must be <= 1:
5733 SelectMultiVectorLutiLane(Node, 4, Opc, 1);
5734 return;
5735 }
5736 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5738 Node->getValueType(0),
5739 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5740 AArch64::LUTI2_2ZTZI_S}))
5741 // Second Immediate must be <= 7:
5742 SelectMultiVectorLutiLane(Node, 2, Opc, 7);
5743 return;
5744 }
5745 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5747 Node->getValueType(0),
5748 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5749 AArch64::LUTI4_2ZTZI_S}))
5750 // Second Immediate must be <= 3:
5751 SelectMultiVectorLutiLane(Node, 2, Opc, 3);
5752 return;
5753 }
5754 case Intrinsic::aarch64_sme_luti4_zt_x4: {
5755 SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
5756 return;
5757 }
5758 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5760 Node->getValueType(0),
5761 {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5762 SelectCVTIntrinsicFP8(Node, 2, Opc);
5763 return;
5764 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5766 Node->getValueType(0),
5767 {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
5768 SelectCVTIntrinsicFP8(Node, 2, Opc);
5769 return;
5770 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
5772 Node->getValueType(0),
5773 {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
5774 SelectCVTIntrinsicFP8(Node, 2, Opc);
5775 return;
5776 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
5778 Node->getValueType(0),
5779 {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
5780 SelectCVTIntrinsicFP8(Node, 2, Opc);
5781 return;
5782 }
5783 } break;
5785 unsigned IntNo = Node->getConstantOperandVal(0);
5786 switch (IntNo) {
5787 default:
5788 break;
5789 case Intrinsic::aarch64_tagp:
5790 SelectTagP(Node);
5791 return;
5792
5793 case Intrinsic::ptrauth_auth:
5794 SelectPtrauthAuth(Node);
5795 return;
5796
5797 case Intrinsic::ptrauth_resign:
5798 SelectPtrauthResign(Node);
5799 return;
5800
5801 case Intrinsic::aarch64_neon_tbl2:
5802 SelectTable(Node, 2,
5803 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5804 false);
5805 return;
5806 case Intrinsic::aarch64_neon_tbl3:
5807 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5808 : AArch64::TBLv16i8Three,
5809 false);
5810 return;
5811 case Intrinsic::aarch64_neon_tbl4:
5812 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5813 : AArch64::TBLv16i8Four,
5814 false);
5815 return;
5816 case Intrinsic::aarch64_neon_tbx2:
5817 SelectTable(Node, 2,
5818 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5819 true);
5820 return;
5821 case Intrinsic::aarch64_neon_tbx3:
5822 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5823 : AArch64::TBXv16i8Three,
5824 true);
5825 return;
5826 case Intrinsic::aarch64_neon_tbx4:
5827 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5828 : AArch64::TBXv16i8Four,
5829 true);
5830 return;
5831 case Intrinsic::aarch64_sve_srshl_single_x2:
5833 Node->getValueType(0),
5834 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5835 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5836 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5837 return;
5838 case Intrinsic::aarch64_sve_srshl_single_x4:
5840 Node->getValueType(0),
5841 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5842 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5843 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5844 return;
5845 case Intrinsic::aarch64_sve_urshl_single_x2:
5847 Node->getValueType(0),
5848 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5849 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5850 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5851 return;
5852 case Intrinsic::aarch64_sve_urshl_single_x4:
5854 Node->getValueType(0),
5855 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5856 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5857 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5858 return;
5859 case Intrinsic::aarch64_sve_srshl_x2:
5861 Node->getValueType(0),
5862 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5863 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5864 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5865 return;
5866 case Intrinsic::aarch64_sve_srshl_x4:
5868 Node->getValueType(0),
5869 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5870 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5871 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5872 return;
5873 case Intrinsic::aarch64_sve_urshl_x2:
5875 Node->getValueType(0),
5876 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5877 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5878 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5879 return;
5880 case Intrinsic::aarch64_sve_urshl_x4:
5882 Node->getValueType(0),
5883 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5884 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5885 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5886 return;
5887 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5889 Node->getValueType(0),
5890 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5891 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5892 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5893 return;
5894 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5896 Node->getValueType(0),
5897 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5898 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5899 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5900 return;
5901 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5903 Node->getValueType(0),
5904 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5905 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5906 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5907 return;
5908 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5910 Node->getValueType(0),
5911 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5912 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5913 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5914 return;
5915 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
5917 Node->getValueType(0),
5918 {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
5919 AArch64::FSCALE_2ZZ_D}))
5920 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5921 return;
5922 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
5924 Node->getValueType(0),
5925 {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
5926 AArch64::FSCALE_4ZZ_D}))
5927 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5928 return;
5929 case Intrinsic::aarch64_sme_fp8_scale_x2:
5931 Node->getValueType(0),
5932 {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
5933 AArch64::FSCALE_2Z2Z_D}))
5934 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5935 return;
5936 case Intrinsic::aarch64_sme_fp8_scale_x4:
5938 Node->getValueType(0),
5939 {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
5940 AArch64::FSCALE_4Z4Z_D}))
5941 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5942 return;
5943 case Intrinsic::aarch64_sve_whilege_x2:
5945 Node->getValueType(0),
5946 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5947 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5948 SelectWhilePair(Node, Op);
5949 return;
5950 case Intrinsic::aarch64_sve_whilegt_x2:
5952 Node->getValueType(0),
5953 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5954 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5955 SelectWhilePair(Node, Op);
5956 return;
5957 case Intrinsic::aarch64_sve_whilehi_x2:
5959 Node->getValueType(0),
5960 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5961 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5962 SelectWhilePair(Node, Op);
5963 return;
5964 case Intrinsic::aarch64_sve_whilehs_x2:
5966 Node->getValueType(0),
5967 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5968 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5969 SelectWhilePair(Node, Op);
5970 return;
5971 case Intrinsic::aarch64_sve_whilele_x2:
5973 Node->getValueType(0),
5974 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5975 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5976 SelectWhilePair(Node, Op);
5977 return;
5978 case Intrinsic::aarch64_sve_whilelo_x2:
5980 Node->getValueType(0),
5981 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
5982 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
5983 SelectWhilePair(Node, Op);
5984 return;
5985 case Intrinsic::aarch64_sve_whilels_x2:
5987 Node->getValueType(0),
5988 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
5989 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
5990 SelectWhilePair(Node, Op);
5991 return;
5992 case Intrinsic::aarch64_sve_whilelt_x2:
5994 Node->getValueType(0),
5995 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
5996 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
5997 SelectWhilePair(Node, Op);
5998 return;
5999 case Intrinsic::aarch64_sve_smax_single_x2:
6001 Node->getValueType(0),
6002 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
6003 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
6004 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6005 return;
6006 case Intrinsic::aarch64_sve_umax_single_x2:
6008 Node->getValueType(0),
6009 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
6010 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
6011 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6012 return;
6013 case Intrinsic::aarch64_sve_fmax_single_x2:
6015 Node->getValueType(0),
6016 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
6017 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
6018 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6019 return;
6020 case Intrinsic::aarch64_sve_smax_single_x4:
6022 Node->getValueType(0),
6023 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
6024 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
6025 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6026 return;
6027 case Intrinsic::aarch64_sve_umax_single_x4:
6029 Node->getValueType(0),
6030 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
6031 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
6032 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6033 return;
6034 case Intrinsic::aarch64_sve_fmax_single_x4:
6036 Node->getValueType(0),
6037 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
6038 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
6039 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6040 return;
6041 case Intrinsic::aarch64_sve_smin_single_x2:
6043 Node->getValueType(0),
6044 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
6045 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
6046 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6047 return;
6048 case Intrinsic::aarch64_sve_umin_single_x2:
6050 Node->getValueType(0),
6051 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
6052 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
6053 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6054 return;
6055 case Intrinsic::aarch64_sve_fmin_single_x2:
6057 Node->getValueType(0),
6058 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
6059 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
6060 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6061 return;
6062 case Intrinsic::aarch64_sve_smin_single_x4:
6064 Node->getValueType(0),
6065 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
6066 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
6067 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6068 return;
6069 case Intrinsic::aarch64_sve_umin_single_x4:
6071 Node->getValueType(0),
6072 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
6073 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
6074 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6075 return;
6076 case Intrinsic::aarch64_sve_fmin_single_x4:
6078 Node->getValueType(0),
6079 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
6080 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
6081 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6082 return;
6083 case Intrinsic::aarch64_sve_smax_x2:
6085 Node->getValueType(0),
6086 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
6087 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
6088 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6089 return;
6090 case Intrinsic::aarch64_sve_umax_x2:
6092 Node->getValueType(0),
6093 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
6094 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
6095 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6096 return;
6097 case Intrinsic::aarch64_sve_fmax_x2:
6099 Node->getValueType(0),
6100 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
6101 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
6102 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6103 return;
6104 case Intrinsic::aarch64_sve_smax_x4:
6106 Node->getValueType(0),
6107 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
6108 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
6109 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6110 return;
6111 case Intrinsic::aarch64_sve_umax_x4:
6113 Node->getValueType(0),
6114 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
6115 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
6116 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6117 return;
6118 case Intrinsic::aarch64_sve_fmax_x4:
6120 Node->getValueType(0),
6121 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
6122 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
6123 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6124 return;
6125 case Intrinsic::aarch64_sme_famax_x2:
6127 Node->getValueType(0),
6128 {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
6129 AArch64::FAMAX_2Z2Z_D}))
6130 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6131 return;
6132 case Intrinsic::aarch64_sme_famax_x4:
6134 Node->getValueType(0),
6135 {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
6136 AArch64::FAMAX_4Z4Z_D}))
6137 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6138 return;
6139 case Intrinsic::aarch64_sme_famin_x2:
6141 Node->getValueType(0),
6142 {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
6143 AArch64::FAMIN_2Z2Z_D}))
6144 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6145 return;
6146 case Intrinsic::aarch64_sme_famin_x4:
6148 Node->getValueType(0),
6149 {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
6150 AArch64::FAMIN_4Z4Z_D}))
6151 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6152 return;
6153 case Intrinsic::aarch64_sve_smin_x2:
6155 Node->getValueType(0),
6156 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
6157 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
6158 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6159 return;
6160 case Intrinsic::aarch64_sve_umin_x2:
6162 Node->getValueType(0),
6163 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
6164 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
6165 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6166 return;
6167 case Intrinsic::aarch64_sve_fmin_x2:
6169 Node->getValueType(0),
6170 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
6171 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
6172 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6173 return;
6174 case Intrinsic::aarch64_sve_smin_x4:
6176 Node->getValueType(0),
6177 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
6178 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
6179 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6180 return;
6181 case Intrinsic::aarch64_sve_umin_x4:
6183 Node->getValueType(0),
6184 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6185 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6186 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6187 return;
6188 case Intrinsic::aarch64_sve_fmin_x4:
6190 Node->getValueType(0),
6191 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6192 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6193 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6194 return;
6195 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6197 Node->getValueType(0),
6198 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6199 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6200 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6201 return;
6202 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6204 Node->getValueType(0),
6205 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6206 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6207 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6208 return;
6209 case Intrinsic::aarch64_sve_fminnm_single_x2:
6211 Node->getValueType(0),
6212 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6213 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6214 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6215 return;
6216 case Intrinsic::aarch64_sve_fminnm_single_x4:
6218 Node->getValueType(0),
6219 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6220 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6221 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6222 return;
6223 case Intrinsic::aarch64_sve_fmaxnm_x2:
6225 Node->getValueType(0),
6226 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6227 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6228 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6229 return;
6230 case Intrinsic::aarch64_sve_fmaxnm_x4:
6232 Node->getValueType(0),
6233 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6234 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6235 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6236 return;
6237 case Intrinsic::aarch64_sve_fminnm_x2:
6239 Node->getValueType(0),
6240 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6241 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6242 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6243 return;
6244 case Intrinsic::aarch64_sve_fminnm_x4:
6246 Node->getValueType(0),
6247 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6248 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6249 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6250 return;
6251 case Intrinsic::aarch64_sve_fcvtzs_x2:
6252 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
6253 return;
6254 case Intrinsic::aarch64_sve_scvtf_x2:
6255 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
6256 return;
6257 case Intrinsic::aarch64_sve_fcvtzu_x2:
6258 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
6259 return;
6260 case Intrinsic::aarch64_sve_ucvtf_x2:
6261 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
6262 return;
6263 case Intrinsic::aarch64_sve_fcvtzs_x4:
6264 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
6265 return;
6266 case Intrinsic::aarch64_sve_scvtf_x4:
6267 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
6268 return;
6269 case Intrinsic::aarch64_sve_fcvtzu_x4:
6270 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
6271 return;
6272 case Intrinsic::aarch64_sve_ucvtf_x4:
6273 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
6274 return;
6275 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6276 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
6277 return;
6278 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6279 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
6280 return;
6281 case Intrinsic::aarch64_sve_sclamp_single_x2:
6283 Node->getValueType(0),
6284 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6285 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6286 SelectClamp(Node, 2, Op);
6287 return;
6288 case Intrinsic::aarch64_sve_uclamp_single_x2:
6290 Node->getValueType(0),
6291 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6292 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6293 SelectClamp(Node, 2, Op);
6294 return;
6295 case Intrinsic::aarch64_sve_fclamp_single_x2:
6297 Node->getValueType(0),
6298 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6299 AArch64::FCLAMP_VG2_2Z2Z_D}))
6300 SelectClamp(Node, 2, Op);
6301 return;
6302 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6303 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
6304 return;
6305 case Intrinsic::aarch64_sve_sclamp_single_x4:
6307 Node->getValueType(0),
6308 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6309 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6310 SelectClamp(Node, 4, Op);
6311 return;
6312 case Intrinsic::aarch64_sve_uclamp_single_x4:
6314 Node->getValueType(0),
6315 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6316 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6317 SelectClamp(Node, 4, Op);
6318 return;
6319 case Intrinsic::aarch64_sve_fclamp_single_x4:
6321 Node->getValueType(0),
6322 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6323 AArch64::FCLAMP_VG4_4Z4Z_D}))
6324 SelectClamp(Node, 4, Op);
6325 return;
6326 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6327 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6328 return;
6329 case Intrinsic::aarch64_sve_add_single_x2:
6331 Node->getValueType(0),
6332 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6333 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6334 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6335 return;
6336 case Intrinsic::aarch64_sve_add_single_x4:
6338 Node->getValueType(0),
6339 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6340 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6341 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6342 return;
6343 case Intrinsic::aarch64_sve_zip_x2:
6345 Node->getValueType(0),
6346 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6347 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6348 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6349 return;
6350 case Intrinsic::aarch64_sve_zipq_x2:
6351 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6352 AArch64::ZIP_VG2_2ZZZ_Q);
6353 return;
6354 case Intrinsic::aarch64_sve_zip_x4:
6356 Node->getValueType(0),
6357 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6358 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6359 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6360 return;
6361 case Intrinsic::aarch64_sve_zipq_x4:
6362 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6363 AArch64::ZIP_VG4_4Z4Z_Q);
6364 return;
6365 case Intrinsic::aarch64_sve_uzp_x2:
6367 Node->getValueType(0),
6368 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6369 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6370 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6371 return;
6372 case Intrinsic::aarch64_sve_uzpq_x2:
6373 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6374 AArch64::UZP_VG2_2ZZZ_Q);
6375 return;
6376 case Intrinsic::aarch64_sve_uzp_x4:
6378 Node->getValueType(0),
6379 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6380 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6381 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6382 return;
6383 case Intrinsic::aarch64_sve_uzpq_x4:
6384 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6385 AArch64::UZP_VG4_4Z4Z_Q);
6386 return;
6387 case Intrinsic::aarch64_sve_sel_x2:
6389 Node->getValueType(0),
6390 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6391 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6392 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6393 return;
6394 case Intrinsic::aarch64_sve_sel_x4:
6396 Node->getValueType(0),
6397 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6398 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6399 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6400 return;
6401 case Intrinsic::aarch64_sve_frinta_x2:
6402 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6403 return;
6404 case Intrinsic::aarch64_sve_frinta_x4:
6405 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6406 return;
6407 case Intrinsic::aarch64_sve_frintm_x2:
6408 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6409 return;
6410 case Intrinsic::aarch64_sve_frintm_x4:
6411 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6412 return;
6413 case Intrinsic::aarch64_sve_frintn_x2:
6414 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6415 return;
6416 case Intrinsic::aarch64_sve_frintn_x4:
6417 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6418 return;
6419 case Intrinsic::aarch64_sve_frintp_x2:
6420 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6421 return;
6422 case Intrinsic::aarch64_sve_frintp_x4:
6423 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6424 return;
6425 case Intrinsic::aarch64_sve_sunpk_x2:
6427 Node->getValueType(0),
6428 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6429 AArch64::SUNPK_VG2_2ZZ_D}))
6430 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6431 return;
6432 case Intrinsic::aarch64_sve_uunpk_x2:
6434 Node->getValueType(0),
6435 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6436 AArch64::UUNPK_VG2_2ZZ_D}))
6437 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6438 return;
6439 case Intrinsic::aarch64_sve_sunpk_x4:
6441 Node->getValueType(0),
6442 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6443 AArch64::SUNPK_VG4_4Z2Z_D}))
6444 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6445 return;
6446 case Intrinsic::aarch64_sve_uunpk_x4:
6448 Node->getValueType(0),
6449 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6450 AArch64::UUNPK_VG4_4Z2Z_D}))
6451 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6452 return;
6453 case Intrinsic::aarch64_sve_pext_x2: {
6455 Node->getValueType(0),
6456 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6457 AArch64::PEXT_2PCI_D}))
6458 SelectPExtPair(Node, Op);
6459 return;
6460 }
6461 }
6462 break;
6463 }
6464 case ISD::INTRINSIC_VOID: {
6465 unsigned IntNo = Node->getConstantOperandVal(1);
6466 if (Node->getNumOperands() >= 3)
6467 VT = Node->getOperand(2)->getValueType(0);
6468 switch (IntNo) {
6469 default:
6470 break;
6471 case Intrinsic::aarch64_neon_st1x2: {
6472 if (VT == MVT::v8i8) {
6473 SelectStore(Node, 2, AArch64::ST1Twov8b);
6474 return;
6475 } else if (VT == MVT::v16i8) {
6476 SelectStore(Node, 2, AArch64::ST1Twov16b);
6477 return;
6478 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6479 VT == MVT::v4bf16) {
6480 SelectStore(Node, 2, AArch64::ST1Twov4h);
6481 return;
6482 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6483 VT == MVT::v8bf16) {
6484 SelectStore(Node, 2, AArch64::ST1Twov8h);
6485 return;
6486 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6487 SelectStore(Node, 2, AArch64::ST1Twov2s);
6488 return;
6489 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6490 SelectStore(Node, 2, AArch64::ST1Twov4s);
6491 return;
6492 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6493 SelectStore(Node, 2, AArch64::ST1Twov2d);
6494 return;
6495 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6496 SelectStore(Node, 2, AArch64::ST1Twov1d);
6497 return;
6498 }
6499 break;
6500 }
6501 case Intrinsic::aarch64_neon_st1x3: {
6502 if (VT == MVT::v8i8) {
6503 SelectStore(Node, 3, AArch64::ST1Threev8b);
6504 return;
6505 } else if (VT == MVT::v16i8) {
6506 SelectStore(Node, 3, AArch64::ST1Threev16b);
6507 return;
6508 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6509 VT == MVT::v4bf16) {
6510 SelectStore(Node, 3, AArch64::ST1Threev4h);
6511 return;
6512 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6513 VT == MVT::v8bf16) {
6514 SelectStore(Node, 3, AArch64::ST1Threev8h);
6515 return;
6516 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6517 SelectStore(Node, 3, AArch64::ST1Threev2s);
6518 return;
6519 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6520 SelectStore(Node, 3, AArch64::ST1Threev4s);
6521 return;
6522 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6523 SelectStore(Node, 3, AArch64::ST1Threev2d);
6524 return;
6525 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6526 SelectStore(Node, 3, AArch64::ST1Threev1d);
6527 return;
6528 }
6529 break;
6530 }
6531 case Intrinsic::aarch64_neon_st1x4: {
6532 if (VT == MVT::v8i8) {
6533 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6534 return;
6535 } else if (VT == MVT::v16i8) {
6536 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6537 return;
6538 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6539 VT == MVT::v4bf16) {
6540 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6541 return;
6542 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6543 VT == MVT::v8bf16) {
6544 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6545 return;
6546 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6547 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6548 return;
6549 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6550 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6551 return;
6552 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6553 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6554 return;
6555 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6556 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6557 return;
6558 }
6559 break;
6560 }
6561 case Intrinsic::aarch64_neon_st2: {
6562 if (VT == MVT::v8i8) {
6563 SelectStore(Node, 2, AArch64::ST2Twov8b);
6564 return;
6565 } else if (VT == MVT::v16i8) {
6566 SelectStore(Node, 2, AArch64::ST2Twov16b);
6567 return;
6568 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6569 VT == MVT::v4bf16) {
6570 SelectStore(Node, 2, AArch64::ST2Twov4h);
6571 return;
6572 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6573 VT == MVT::v8bf16) {
6574 SelectStore(Node, 2, AArch64::ST2Twov8h);
6575 return;
6576 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6577 SelectStore(Node, 2, AArch64::ST2Twov2s);
6578 return;
6579 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6580 SelectStore(Node, 2, AArch64::ST2Twov4s);
6581 return;
6582 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6583 SelectStore(Node, 2, AArch64::ST2Twov2d);
6584 return;
6585 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6586 SelectStore(Node, 2, AArch64::ST1Twov1d);
6587 return;
6588 }
6589 break;
6590 }
6591 case Intrinsic::aarch64_neon_st3: {
6592 if (VT == MVT::v8i8) {
6593 SelectStore(Node, 3, AArch64::ST3Threev8b);
6594 return;
6595 } else if (VT == MVT::v16i8) {
6596 SelectStore(Node, 3, AArch64::ST3Threev16b);
6597 return;
6598 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6599 VT == MVT::v4bf16) {
6600 SelectStore(Node, 3, AArch64::ST3Threev4h);
6601 return;
6602 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6603 VT == MVT::v8bf16) {
6604 SelectStore(Node, 3, AArch64::ST3Threev8h);
6605 return;
6606 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6607 SelectStore(Node, 3, AArch64::ST3Threev2s);
6608 return;
6609 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6610 SelectStore(Node, 3, AArch64::ST3Threev4s);
6611 return;
6612 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6613 SelectStore(Node, 3, AArch64::ST3Threev2d);
6614 return;
6615 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6616 SelectStore(Node, 3, AArch64::ST1Threev1d);
6617 return;
6618 }
6619 break;
6620 }
6621 case Intrinsic::aarch64_neon_st4: {
6622 if (VT == MVT::v8i8) {
6623 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6624 return;
6625 } else if (VT == MVT::v16i8) {
6626 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6627 return;
6628 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6629 VT == MVT::v4bf16) {
6630 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6631 return;
6632 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6633 VT == MVT::v8bf16) {
6634 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6635 return;
6636 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6637 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6638 return;
6639 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6640 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6641 return;
6642 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6643 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6644 return;
6645 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6646 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6647 return;
6648 }
6649 break;
6650 }
6651 case Intrinsic::aarch64_neon_st2lane: {
6652 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6653 SelectStoreLane(Node, 2, AArch64::ST2i8);
6654 return;
6655 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6656 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6657 SelectStoreLane(Node, 2, AArch64::ST2i16);
6658 return;
6659 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6660 VT == MVT::v2f32) {
6661 SelectStoreLane(Node, 2, AArch64::ST2i32);
6662 return;
6663 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6664 VT == MVT::v1f64) {
6665 SelectStoreLane(Node, 2, AArch64::ST2i64);
6666 return;
6667 }
6668 break;
6669 }
6670 case Intrinsic::aarch64_neon_st3lane: {
6671 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6672 SelectStoreLane(Node, 3, AArch64::ST3i8);
6673 return;
6674 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6675 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6676 SelectStoreLane(Node, 3, AArch64::ST3i16);
6677 return;
6678 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6679 VT == MVT::v2f32) {
6680 SelectStoreLane(Node, 3, AArch64::ST3i32);
6681 return;
6682 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6683 VT == MVT::v1f64) {
6684 SelectStoreLane(Node, 3, AArch64::ST3i64);
6685 return;
6686 }
6687 break;
6688 }
6689 case Intrinsic::aarch64_neon_st4lane: {
6690 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6691 SelectStoreLane(Node, 4, AArch64::ST4i8);
6692 return;
6693 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6694 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6695 SelectStoreLane(Node, 4, AArch64::ST4i16);
6696 return;
6697 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6698 VT == MVT::v2f32) {
6699 SelectStoreLane(Node, 4, AArch64::ST4i32);
6700 return;
6701 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6702 VT == MVT::v1f64) {
6703 SelectStoreLane(Node, 4, AArch64::ST4i64);
6704 return;
6705 }
6706 break;
6707 }
6708 case Intrinsic::aarch64_sve_st2q: {
6709 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6710 return;
6711 }
6712 case Intrinsic::aarch64_sve_st3q: {
6713 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6714 return;
6715 }
6716 case Intrinsic::aarch64_sve_st4q: {
6717 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6718 return;
6719 }
6720 case Intrinsic::aarch64_sve_st2: {
6721 if (VT == MVT::nxv16i8) {
6722 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6723 return;
6724 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6725 VT == MVT::nxv8bf16) {
6726 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6727 return;
6728 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6729 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6730 return;
6731 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6732 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6733 return;
6734 }
6735 break;
6736 }
6737 case Intrinsic::aarch64_sve_st3: {
6738 if (VT == MVT::nxv16i8) {
6739 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6740 return;
6741 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6742 VT == MVT::nxv8bf16) {
6743 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6744 return;
6745 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6746 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6747 return;
6748 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6749 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6750 return;
6751 }
6752 break;
6753 }
6754 case Intrinsic::aarch64_sve_st4: {
6755 if (VT == MVT::nxv16i8) {
6756 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6757 return;
6758 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6759 VT == MVT::nxv8bf16) {
6760 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6761 return;
6762 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6763 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6764 return;
6765 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6766 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6767 return;
6768 }
6769 break;
6770 }
6771 }
6772 break;
6773 }
6774 case AArch64ISD::LD2post: {
6775 if (VT == MVT::v8i8) {
6776 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6777 return;
6778 } else if (VT == MVT::v16i8) {
6779 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6780 return;
6781 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6782 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6783 return;
6784 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6785 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6786 return;
6787 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6788 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6789 return;
6790 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6791 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6792 return;
6793 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6794 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6795 return;
6796 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6797 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6798 return;
6799 }
6800 break;
6801 }
6802 case AArch64ISD::LD3post: {
6803 if (VT == MVT::v8i8) {
6804 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6805 return;
6806 } else if (VT == MVT::v16i8) {
6807 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6808 return;
6809 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6810 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6811 return;
6812 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6813 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6814 return;
6815 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6816 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6817 return;
6818 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6819 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6820 return;
6821 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6822 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6823 return;
6824 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6825 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6826 return;
6827 }
6828 break;
6829 }
6830 case AArch64ISD::LD4post: {
6831 if (VT == MVT::v8i8) {
6832 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6833 return;
6834 } else if (VT == MVT::v16i8) {
6835 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6836 return;
6837 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6838 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6839 return;
6840 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6841 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6842 return;
6843 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6844 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6845 return;
6846 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6847 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6848 return;
6849 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6850 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6851 return;
6852 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6853 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6854 return;
6855 }
6856 break;
6857 }
6858 case AArch64ISD::LD1x2post: {
6859 if (VT == MVT::v8i8) {
6860 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6861 return;
6862 } else if (VT == MVT::v16i8) {
6863 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6864 return;
6865 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6866 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6867 return;
6868 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6869 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6870 return;
6871 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6872 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6873 return;
6874 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6875 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6876 return;
6877 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6878 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6879 return;
6880 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6881 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
6882 return;
6883 }
6884 break;
6885 }
6886 case AArch64ISD::LD1x3post: {
6887 if (VT == MVT::v8i8) {
6888 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
6889 return;
6890 } else if (VT == MVT::v16i8) {
6891 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
6892 return;
6893 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6894 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
6895 return;
6896 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6897 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
6898 return;
6899 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6900 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
6901 return;
6902 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6903 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
6904 return;
6905 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6906 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6907 return;
6908 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6909 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
6910 return;
6911 }
6912 break;
6913 }
6914 case AArch64ISD::LD1x4post: {
6915 if (VT == MVT::v8i8) {
6916 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
6917 return;
6918 } else if (VT == MVT::v16i8) {
6919 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
6920 return;
6921 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6922 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
6923 return;
6924 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6925 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
6926 return;
6927 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6928 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
6929 return;
6930 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6931 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
6932 return;
6933 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6934 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6935 return;
6936 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6937 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
6938 return;
6939 }
6940 break;
6941 }
6942 case AArch64ISD::LD1DUPpost: {
6943 if (VT == MVT::v8i8) {
6944 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
6945 return;
6946 } else if (VT == MVT::v16i8) {
6947 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
6948 return;
6949 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6950 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
6951 return;
6952 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6953 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
6954 return;
6955 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6956 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
6957 return;
6958 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6959 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
6960 return;
6961 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6962 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
6963 return;
6964 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6965 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
6966 return;
6967 }
6968 break;
6969 }
6970 case AArch64ISD::LD2DUPpost: {
6971 if (VT == MVT::v8i8) {
6972 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
6973 return;
6974 } else if (VT == MVT::v16i8) {
6975 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
6976 return;
6977 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6978 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
6979 return;
6980 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6981 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
6982 return;
6983 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6984 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
6985 return;
6986 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6987 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
6988 return;
6989 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6990 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
6991 return;
6992 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6993 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
6994 return;
6995 }
6996 break;
6997 }
6998 case AArch64ISD::LD3DUPpost: {
6999 if (VT == MVT::v8i8) {
7000 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
7001 return;
7002 } else if (VT == MVT::v16i8) {
7003 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
7004 return;
7005 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7006 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
7007 return;
7008 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7009 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
7010 return;
7011 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7012 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
7013 return;
7014 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7015 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
7016 return;
7017 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7018 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
7019 return;
7020 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7021 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
7022 return;
7023 }
7024 break;
7025 }
7026 case AArch64ISD::LD4DUPpost: {
7027 if (VT == MVT::v8i8) {
7028 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
7029 return;
7030 } else if (VT == MVT::v16i8) {
7031 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
7032 return;
7033 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7034 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
7035 return;
7036 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7037 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
7038 return;
7039 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7040 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
7041 return;
7042 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7043 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
7044 return;
7045 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7046 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
7047 return;
7048 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7049 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
7050 return;
7051 }
7052 break;
7053 }
7054 case AArch64ISD::LD1LANEpost: {
7055 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7056 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
7057 return;
7058 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7059 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7060 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
7061 return;
7062 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7063 VT == MVT::v2f32) {
7064 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
7065 return;
7066 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7067 VT == MVT::v1f64) {
7068 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
7069 return;
7070 }
7071 break;
7072 }
7073 case AArch64ISD::LD2LANEpost: {
7074 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7075 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
7076 return;
7077 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7078 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7079 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
7080 return;
7081 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7082 VT == MVT::v2f32) {
7083 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
7084 return;
7085 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7086 VT == MVT::v1f64) {
7087 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
7088 return;
7089 }
7090 break;
7091 }
7092 case AArch64ISD::LD3LANEpost: {
7093 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7094 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
7095 return;
7096 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7097 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7098 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
7099 return;
7100 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7101 VT == MVT::v2f32) {
7102 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
7103 return;
7104 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7105 VT == MVT::v1f64) {
7106 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
7107 return;
7108 }
7109 break;
7110 }
7111 case AArch64ISD::LD4LANEpost: {
7112 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7113 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
7114 return;
7115 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7116 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7117 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
7118 return;
7119 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7120 VT == MVT::v2f32) {
7121 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
7122 return;
7123 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7124 VT == MVT::v1f64) {
7125 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
7126 return;
7127 }
7128 break;
7129 }
7130 case AArch64ISD::ST2post: {
7131 VT = Node->getOperand(1).getValueType();
7132 if (VT == MVT::v8i8) {
7133 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
7134 return;
7135 } else if (VT == MVT::v16i8) {
7136 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
7137 return;
7138 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7139 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
7140 return;
7141 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7142 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
7143 return;
7144 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7145 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
7146 return;
7147 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7148 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
7149 return;
7150 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7151 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
7152 return;
7153 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7154 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7155 return;
7156 }
7157 break;
7158 }
7159 case AArch64ISD::ST3post: {
7160 VT = Node->getOperand(1).getValueType();
7161 if (VT == MVT::v8i8) {
7162 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
7163 return;
7164 } else if (VT == MVT::v16i8) {
7165 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
7166 return;
7167 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7168 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
7169 return;
7170 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7171 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
7172 return;
7173 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7174 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
7175 return;
7176 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7177 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
7178 return;
7179 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7180 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
7181 return;
7182 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7183 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7184 return;
7185 }
7186 break;
7187 }
7188 case AArch64ISD::ST4post: {
7189 VT = Node->getOperand(1).getValueType();
7190 if (VT == MVT::v8i8) {
7191 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
7192 return;
7193 } else if (VT == MVT::v16i8) {
7194 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
7195 return;
7196 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7197 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
7198 return;
7199 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7200 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
7201 return;
7202 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7203 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
7204 return;
7205 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7206 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
7207 return;
7208 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7209 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
7210 return;
7211 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7212 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7213 return;
7214 }
7215 break;
7216 }
7217 case AArch64ISD::ST1x2post: {
7218 VT = Node->getOperand(1).getValueType();
7219 if (VT == MVT::v8i8) {
7220 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
7221 return;
7222 } else if (VT == MVT::v16i8) {
7223 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
7224 return;
7225 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7226 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
7227 return;
7228 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7229 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
7230 return;
7231 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7232 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
7233 return;
7234 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7235 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
7236 return;
7237 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7238 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7239 return;
7240 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7241 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
7242 return;
7243 }
7244 break;
7245 }
7246 case AArch64ISD::ST1x3post: {
7247 VT = Node->getOperand(1).getValueType();
7248 if (VT == MVT::v8i8) {
7249 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
7250 return;
7251 } else if (VT == MVT::v16i8) {
7252 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
7253 return;
7254 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7255 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
7256 return;
7257 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7258 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
7259 return;
7260 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7261 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
7262 return;
7263 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7264 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
7265 return;
7266 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7267 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7268 return;
7269 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7270 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
7271 return;
7272 }
7273 break;
7274 }
7275 case AArch64ISD::ST1x4post: {
7276 VT = Node->getOperand(1).getValueType();
7277 if (VT == MVT::v8i8) {
7278 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
7279 return;
7280 } else if (VT == MVT::v16i8) {
7281 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
7282 return;
7283 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7284 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
7285 return;
7286 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7287 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
7288 return;
7289 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7290 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
7291 return;
7292 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7293 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
7294 return;
7295 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7296 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7297 return;
7298 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7299 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
7300 return;
7301 }
7302 break;
7303 }
7304 case AArch64ISD::ST2LANEpost: {
7305 VT = Node->getOperand(1).getValueType();
7306 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7307 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
7308 return;
7309 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7310 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7311 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
7312 return;
7313 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7314 VT == MVT::v2f32) {
7315 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7316 return;
7317 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7318 VT == MVT::v1f64) {
7319 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7320 return;
7321 }
7322 break;
7323 }
7324 case AArch64ISD::ST3LANEpost: {
7325 VT = Node->getOperand(1).getValueType();
7326 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7327 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7328 return;
7329 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7330 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7331 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7332 return;
7333 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7334 VT == MVT::v2f32) {
7335 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7336 return;
7337 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7338 VT == MVT::v1f64) {
7339 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7340 return;
7341 }
7342 break;
7343 }
7344 case AArch64ISD::ST4LANEpost: {
7345 VT = Node->getOperand(1).getValueType();
7346 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7347 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7348 return;
7349 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7350 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7351 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7352 return;
7353 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7354 VT == MVT::v2f32) {
7355 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7356 return;
7357 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7358 VT == MVT::v1f64) {
7359 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7360 return;
7361 }
7362 break;
7363 }
7364 }
7365
7366 // Select the default instruction
7367 SelectCode(Node);
7368}
7369
7370/// createAArch64ISelDag - This pass converts a legalized DAG into a
7371/// AArch64-specific DAG, ready for instruction scheduling.
7373 CodeGenOptLevel OptLevel) {
7374 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7375}
7376
7377/// When \p PredVT is a scalable vector predicate in the form
7378/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7379/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7380/// structured vectors (NumVec >1), the output data type is
7381/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7382/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7383/// EVT.
7385 unsigned NumVec) {
7386 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7387 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7388 return EVT();
7389
7390 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7391 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7392 return EVT();
7393
7394 ElementCount EC = PredVT.getVectorElementCount();
7395 EVT ScalarVT =
7396 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7397 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7398
7399 return MemVT;
7400}
7401
7402/// Return the EVT of the data associated to a memory operation in \p
7403/// Root. If such EVT cannot be retrieved, it returns an invalid EVT.
7405 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Root))
7406 return MemIntr->getMemoryVT();
7407
7408 if (isa<MemSDNode>(Root)) {
7409 EVT MemVT = cast<MemSDNode>(Root)->getMemoryVT();
7410
7411 EVT DataVT;
7412 if (auto *Load = dyn_cast<LoadSDNode>(Root))
7413 DataVT = Load->getValueType(0);
7414 else if (auto *Load = dyn_cast<MaskedLoadSDNode>(Root))
7415 DataVT = Load->getValueType(0);
7416 else if (auto *Store = dyn_cast<StoreSDNode>(Root))
7417 DataVT = Store->getValue().getValueType();
7418 else if (auto *Store = dyn_cast<MaskedStoreSDNode>(Root))
7419 DataVT = Store->getValue().getValueType();
7420 else
7421 llvm_unreachable("Unexpected MemSDNode!");
7422
7423 return DataVT.changeVectorElementType(MemVT.getVectorElementType());
7424 }
7425
7426 const unsigned Opcode = Root->getOpcode();
7427 // For custom ISD nodes, we have to look at them individually to extract the
7428 // type of the data moved to/from memory.
7429 switch (Opcode) {
7430 case AArch64ISD::LD1_MERGE_ZERO:
7431 case AArch64ISD::LD1S_MERGE_ZERO:
7432 case AArch64ISD::LDNF1_MERGE_ZERO:
7433 case AArch64ISD::LDNF1S_MERGE_ZERO:
7434 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7435 case AArch64ISD::ST1_PRED:
7436 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7437 default:
7438 break;
7439 }
7440
7441 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7442 return EVT();
7443
7444 switch (Root->getConstantOperandVal(1)) {
7445 default:
7446 return EVT();
7447 case Intrinsic::aarch64_sme_ldr:
7448 case Intrinsic::aarch64_sme_str:
7449 return MVT::nxv16i8;
7450 case Intrinsic::aarch64_sve_prf:
7451 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7452 // width of the predicate.
7454 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7455 case Intrinsic::aarch64_sve_ld2_sret:
7456 case Intrinsic::aarch64_sve_ld2q_sret:
7458 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7459 case Intrinsic::aarch64_sve_st2q:
7461 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7462 case Intrinsic::aarch64_sve_ld3_sret:
7463 case Intrinsic::aarch64_sve_ld3q_sret:
7465 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7466 case Intrinsic::aarch64_sve_st3q:
7468 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7469 case Intrinsic::aarch64_sve_ld4_sret:
7470 case Intrinsic::aarch64_sve_ld4q_sret:
7472 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7473 case Intrinsic::aarch64_sve_st4q:
7475 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7476 case Intrinsic::aarch64_sve_ld1udq:
7477 case Intrinsic::aarch64_sve_st1dq:
7478 return EVT(MVT::nxv1i64);
7479 case Intrinsic::aarch64_sve_ld1uwq:
7480 case Intrinsic::aarch64_sve_st1wq:
7481 return EVT(MVT::nxv1i32);
7482 }
7483}
7484
7485/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7486/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7487/// where Root is the memory access using N for its address.
7488template <int64_t Min, int64_t Max>
7489bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7490 SDValue &Base,
7491 SDValue &OffImm) {
7492 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7493 const DataLayout &DL = CurDAG->getDataLayout();
7494 const MachineFrameInfo &MFI = MF->getFrameInfo();
7495
7496 if (N.getOpcode() == ISD::FrameIndex) {
7497 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7498 // We can only encode VL scaled offsets, so only fold in frame indexes
7499 // referencing SVE objects.
7501 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7502 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7503 return true;
7504 }
7505
7506 return false;
7507 }
7508
7509 if (MemVT == EVT())
7510 return false;
7511
7512 if (N.getOpcode() != ISD::ADD)
7513 return false;
7514
7515 SDValue VScale = N.getOperand(1);
7516 int64_t MulImm = std::numeric_limits<int64_t>::max();
7517 if (VScale.getOpcode() == ISD::VSCALE) {
7518 MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7519 } else if (auto C = dyn_cast<ConstantSDNode>(VScale)) {
7520 int64_t ByteOffset = C->getSExtValue();
7521 const auto KnownVScale =
7523
7524 if (!KnownVScale || ByteOffset % KnownVScale != 0)
7525 return false;
7526
7527 MulImm = ByteOffset / KnownVScale;
7528 } else
7529 return false;
7530
7531 TypeSize TS = MemVT.getSizeInBits();
7532 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7533
7534 if ((MulImm % MemWidthBytes) != 0)
7535 return false;
7536
7537 int64_t Offset = MulImm / MemWidthBytes;
7539 return false;
7540
7541 Base = N.getOperand(0);
7542 if (Base.getOpcode() == ISD::FrameIndex) {
7543 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7544 // We can only encode VL scaled offsets, so only fold in frame indexes
7545 // referencing SVE objects.
7547 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7548 }
7549
7550 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7551 return true;
7552}
7553
7554/// Select register plus register addressing mode for SVE, with scaled
7555/// offset.
7556bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7557 SDValue &Base,
7558 SDValue &Offset) {
7559 if (N.getOpcode() != ISD::ADD)
7560 return false;
7561
7562 // Process an ADD node.
7563 const SDValue LHS = N.getOperand(0);
7564 const SDValue RHS = N.getOperand(1);
7565
7566 // 8 bit data does not come with the SHL node, so it is treated
7567 // separately.
7568 if (Scale == 0) {
7569 Base = LHS;
7570 Offset = RHS;
7571 return true;
7572 }
7573
7574 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7575 int64_t ImmOff = C->getSExtValue();
7576 unsigned Size = 1 << Scale;
7577
7578 // To use the reg+reg addressing mode, the immediate must be a multiple of
7579 // the vector element's byte size.
7580 if (ImmOff % Size)
7581 return false;
7582
7583 SDLoc DL(N);
7584 Base = LHS;
7585 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7586 SDValue Ops[] = {Offset};
7587 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7588 Offset = SDValue(MI, 0);
7589 return true;
7590 }
7591
7592 // Check if the RHS is a shift node with a constant.
7593 if (RHS.getOpcode() != ISD::SHL)
7594 return false;
7595
7596 const SDValue ShiftRHS = RHS.getOperand(1);
7597 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7598 if (C->getZExtValue() == Scale) {
7599 Base = LHS;
7600 Offset = RHS.getOperand(0);
7601 return true;
7602 }
7603
7604 return false;
7605}
7606
7607bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7608 const AArch64TargetLowering *TLI =
7609 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7610
7611 return TLI->isAllActivePredicate(*CurDAG, N);
7612}
7613
7614bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7615 EVT VT = N.getValueType();
7616 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7617}
7618
7619bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7621 unsigned Scale) {
7622 auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
7623 if (auto *C = dyn_cast<ConstantSDNode>(CN)) {
7624 int64_t ImmOff = C->getSExtValue();
7625 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
7626 return CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7627 }
7628 return SDValue();
7629 };
7630
7631 if (SDValue C = MatchConstantOffset(N)) {
7632 Base = CurDAG->getConstant(0, SDLoc(N), MVT::i32);
7633 Offset = C;
7634 return true;
7635 }
7636
7637 // Try to untangle an ADD node into a 'reg + offset'
7638 if (CurDAG->isBaseWithConstantOffset(N)) {
7639 if (SDValue C = MatchConstantOffset(N.getOperand(1))) {
7640 Base = N.getOperand(0);
7641 Offset = C;
7642 return true;
7643 }
7644 }
7645
7646 // By default, just match reg + 0.
7647 Base = N;
7648 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7649 return true;
7650}
7651
7652bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N,
7653 SDValue &Imm) {
7655 static_cast<AArch64CC::CondCode>(P->getConstantOperandVal(1));
7656 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
7657 // Check conservatively if the immediate fits the valid range [0, 64).
7658 // Immediate variants for GE and HS definitely need to be decremented
7659 // when lowering the pseudos later, so an immediate of 1 would become 0.
7660 // For the inverse conditions LT and LO we don't know for sure if they
7661 // will need a decrement but should the decision be made to reverse the
7662 // branch condition, we again end up with the need to decrement.
7663 // The same argument holds for LE, LS, GT and HI and possibly
7664 // incremented immediates. This can lead to slightly less optimal
7665 // codegen, e.g. we never codegen the legal case
7666 // cblt w0, #63, A
7667 // because we could end up with the illegal case
7668 // cbge w0, #64, B
7669 // should the decision to reverse the branch direction be made. For the
7670 // lower bound cases this is no problem since we can express comparisons
7671 // against 0 with either tbz/tnbz or using wzr/xzr.
7672 uint64_t LowerBound = 0, UpperBound = 64;
7673 switch (CC) {
7674 case AArch64CC::GE:
7675 case AArch64CC::HS:
7676 case AArch64CC::LT:
7677 case AArch64CC::LO:
7678 LowerBound = 1;
7679 break;
7680 case AArch64CC::LE:
7681 case AArch64CC::LS:
7682 case AArch64CC::GT:
7683 case AArch64CC::HI:
7684 UpperBound = 63;
7685 break;
7686 default:
7687 break;
7688 }
7689
7690 if (CN->getAPIntValue().uge(LowerBound) &&
7691 CN->getAPIntValue().ult(UpperBound)) {
7692 SDLoc DL(N);
7693 Imm = CurDAG->getTargetConstant(CN->getZExtValue(), DL, N.getValueType());
7694 return true;
7695 }
7696 }
7697
7698 return false;
7699}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isMemOpOrPrefetch(SDNode *N)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
return SDValue()
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define DEBUG_TYPE
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
OptimizedStructLayoutField Field
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define PASS_NAME
Value * RHS
Value * LHS
const AArch64RegisterInfo * getRegisterInfo() const override
bool isStreaming() const
Returns true if the function has a streaming body.
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
unsigned getSVEVectorSizeInBits() const
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
bool getExactInverse(APFloat *Inv) const
If this value is normal and has an exact, normal, multiplicative inverse, store it in inv and return ...
Definition APFloat.cpp:5999
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1332
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1670
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:258
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1598
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1452
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:510
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
iterator begin() const
Definition ArrayRef.h:135
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
uint8_t getStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:702
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned getID() const
Return the register class ID number.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:956
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
static bool isSVECpyDupImm(int SizeInBits, int64_t Val, int32_t &Imm, int32_t &Shift)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:663
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:262
@ Offset
Definition DWP.cpp:477
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:279
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:276
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:282
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:1948
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:270
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:86
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
static constexpr roundingMode rmTowardZero
Definition APFloat.h:308
Extended Value Type.
Definition ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:463
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition ValueTypes.h:202
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.