LLVM 20.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34//===--------------------------------------------------------------------===//
35/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
36/// instructions for SelectionDAG operations.
37///
38namespace {
39
40class AArch64DAGToDAGISel : public SelectionDAGISel {
41
42 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
43 /// make the right decision when generating code for different targets.
44 const AArch64Subtarget *Subtarget;
45
46public:
47 AArch64DAGToDAGISel() = delete;
48
49 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
50 CodeGenOptLevel OptLevel)
51 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
52
53 bool runOnMachineFunction(MachineFunction &MF) override {
54 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
56 }
57
58 void Select(SDNode *Node) override;
59
60 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
61 /// inline asm expressions.
63 InlineAsm::ConstraintCode ConstraintID,
64 std::vector<SDValue> &OutOps) override;
65
66 template <signed Low, signed High, signed Scale>
67 bool SelectRDVLImm(SDValue N, SDValue &Imm);
68
69 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
70 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
71 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
72 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
73 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
74 return SelectShiftedRegister(N, false, Reg, Shift);
75 }
76 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
77 return SelectShiftedRegister(N, true, Reg, Shift);
78 }
79 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
80 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
81 }
82 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
83 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
84 }
85 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
86 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
87 }
88 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
89 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
90 }
91 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
92 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
93 }
94 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
95 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
96 }
97 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
98 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
99 }
100 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
101 return SelectAddrModeIndexed(N, 1, Base, OffImm);
102 }
103 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
104 return SelectAddrModeIndexed(N, 2, Base, OffImm);
105 }
106 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
107 return SelectAddrModeIndexed(N, 4, Base, OffImm);
108 }
109 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
110 return SelectAddrModeIndexed(N, 8, Base, OffImm);
111 }
112 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
113 return SelectAddrModeIndexed(N, 16, Base, OffImm);
114 }
115 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
116 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
117 }
118 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
119 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
120 }
121 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
122 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
123 }
124 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
125 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
126 }
127 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
128 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
129 }
130 template <unsigned Size, unsigned Max>
131 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
132 // Test if there is an appropriate addressing mode and check if the
133 // immediate fits.
134 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
135 if (Found) {
136 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
137 int64_t C = CI->getSExtValue();
138 if (C <= Max)
139 return true;
140 }
141 }
142
143 // Otherwise, base only, materialize address in register.
144 Base = N;
145 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
146 return true;
147 }
148
149 template<int Width>
150 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
151 SDValue &SignExtend, SDValue &DoShift) {
152 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
153 }
154
155 template<int Width>
156 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
157 SDValue &SignExtend, SDValue &DoShift) {
158 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
159 }
160
161 bool SelectExtractHigh(SDValue N, SDValue &Res) {
162 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
163 N = N->getOperand(0);
164 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
165 !isa<ConstantSDNode>(N->getOperand(1)))
166 return false;
167 EVT VT = N->getValueType(0);
168 EVT LVT = N->getOperand(0).getValueType();
169 unsigned Index = N->getConstantOperandVal(1);
170 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
172 return false;
173 Res = N->getOperand(0);
174 return true;
175 }
176
177 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
178 if (N.getOpcode() != AArch64ISD::VLSHR)
179 return false;
180 SDValue Op = N->getOperand(0);
181 EVT VT = Op.getValueType();
182 unsigned ShtAmt = N->getConstantOperandVal(1);
183 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
184 return false;
185
186 APInt Imm;
187 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
188 Imm = APInt(VT.getScalarSizeInBits(),
189 Op.getOperand(1).getConstantOperandVal(0)
190 << Op.getOperand(1).getConstantOperandVal(1));
191 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
192 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
193 Imm = APInt(VT.getScalarSizeInBits(),
194 Op.getOperand(1).getConstantOperandVal(0));
195 else
196 return false;
197
198 if (Imm != 1ULL << (ShtAmt - 1))
199 return false;
200
201 Res1 = Op.getOperand(0);
202 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
203 return true;
204 }
205
206 bool SelectDupZeroOrUndef(SDValue N) {
207 switch(N->getOpcode()) {
208 case ISD::UNDEF:
209 return true;
210 case AArch64ISD::DUP:
211 case ISD::SPLAT_VECTOR: {
212 auto Opnd0 = N->getOperand(0);
213 if (isNullConstant(Opnd0))
214 return true;
215 if (isNullFPConstant(Opnd0))
216 return true;
217 break;
218 }
219 default:
220 break;
221 }
222
223 return false;
224 }
225
226 bool SelectDupZero(SDValue N) {
227 switch(N->getOpcode()) {
228 case AArch64ISD::DUP:
229 case ISD::SPLAT_VECTOR: {
230 auto Opnd0 = N->getOperand(0);
231 if (isNullConstant(Opnd0))
232 return true;
233 if (isNullFPConstant(Opnd0))
234 return true;
235 break;
236 }
237 }
238
239 return false;
240 }
241
242 template<MVT::SimpleValueType VT>
243 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
244 return SelectSVEAddSubImm(N, VT, Imm, Shift);
245 }
246
247 template <MVT::SimpleValueType VT, bool Negate>
248 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
249 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
250 }
251
252 template <MVT::SimpleValueType VT>
253 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
254 return SelectSVECpyDupImm(N, VT, Imm, Shift);
255 }
256
257 template <MVT::SimpleValueType VT, bool Invert = false>
258 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
259 return SelectSVELogicalImm(N, VT, Imm, Invert);
260 }
261
262 template <MVT::SimpleValueType VT>
263 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
264 return SelectSVEArithImm(N, VT, Imm);
265 }
266
267 template <unsigned Low, unsigned High, bool AllowSaturation = false>
268 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
269 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
270 }
271
272 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
273 if (N->getOpcode() != ISD::SPLAT_VECTOR)
274 return false;
275
276 EVT EltVT = N->getValueType(0).getVectorElementType();
277 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
278 /* High */ EltVT.getFixedSizeInBits(),
279 /* AllowSaturation */ true, Imm);
280 }
281
282 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
283 template<signed Min, signed Max, signed Scale, bool Shift>
284 bool SelectCntImm(SDValue N, SDValue &Imm) {
285 if (!isa<ConstantSDNode>(N))
286 return false;
287
288 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
289 if (Shift)
290 MulImm = 1LL << MulImm;
291
292 if ((MulImm % std::abs(Scale)) != 0)
293 return false;
294
295 MulImm /= Scale;
296 if ((MulImm >= Min) && (MulImm <= Max)) {
297 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
298 return true;
299 }
300
301 return false;
302 }
303
304 template <signed Max, signed Scale>
305 bool SelectEXTImm(SDValue N, SDValue &Imm) {
306 if (!isa<ConstantSDNode>(N))
307 return false;
308
309 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
310
311 if (MulImm >= 0 && MulImm <= Max) {
312 MulImm *= Scale;
313 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
314 return true;
315 }
316
317 return false;
318 }
319
320 template <unsigned BaseReg, unsigned Max>
321 bool ImmToReg(SDValue N, SDValue &Imm) {
322 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
323 uint64_t C = CI->getZExtValue();
324
325 if (C > Max)
326 return false;
327
328 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
329 return true;
330 }
331 return false;
332 }
333
334 /// Form sequences of consecutive 64/128-bit registers for use in NEON
335 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
336 /// between 1 and 4 elements. If it contains a single element that is returned
337 /// unchanged; otherwise a REG_SEQUENCE value is returned.
340 // Form a sequence of SVE registers for instructions using list of vectors,
341 // e.g. structured loads and stores (ldN, stN).
342 SDValue createZTuple(ArrayRef<SDValue> Vecs);
343
344 // Similar to above, except the register must start at a multiple of the
345 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
346 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
347
348 /// Generic helper for the createDTuple/createQTuple
349 /// functions. Those should almost always be called instead.
350 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
351 const unsigned SubRegs[]);
352
353 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
354
355 bool tryIndexedLoad(SDNode *N);
356
357 void SelectPtrauthAuth(SDNode *N);
358 void SelectPtrauthResign(SDNode *N);
359
360 bool trySelectStackSlotTagP(SDNode *N);
361 void SelectTagP(SDNode *N);
362
363 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
364 unsigned SubRegIdx);
365 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
366 unsigned SubRegIdx);
367 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
368 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
369 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
370 unsigned Opc_rr, unsigned Opc_ri,
371 bool IsIntr = false);
372 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
373 unsigned Scale, unsigned Opc_ri,
374 unsigned Opc_rr);
375 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
376 bool IsZmMulti, unsigned Opcode,
377 bool HasPred = false);
378 void SelectPExtPair(SDNode *N, unsigned Opc);
379 void SelectWhilePair(SDNode *N, unsigned Opc);
380 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
381 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
382 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
383 bool IsTupleInput, unsigned Opc);
384 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
385
386 template <unsigned MaxIdx, unsigned Scale>
387 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
388 unsigned Op);
389 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
390 unsigned Op, unsigned MaxIdx, unsigned Scale,
391 unsigned BaseReg = 0);
392 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
393 /// SVE Reg+Imm addressing mode.
394 template <int64_t Min, int64_t Max>
395 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
396 SDValue &OffImm);
397 /// SVE Reg+Reg address mode.
398 template <unsigned Scale>
399 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
400 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
401 }
402
403 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc,
404 uint32_t MaxImm);
405
406 template <unsigned MaxIdx, unsigned Scale>
407 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
408 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
409 }
410
411 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
412 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
413 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
414 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
415 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
416 unsigned Opc_rr, unsigned Opc_ri);
417 std::tuple<unsigned, SDValue, SDValue>
418 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
419 const SDValue &OldBase, const SDValue &OldOffset,
420 unsigned Scale);
421
422 bool tryBitfieldExtractOp(SDNode *N);
423 bool tryBitfieldExtractOpFromSExt(SDNode *N);
424 bool tryBitfieldInsertOp(SDNode *N);
425 bool tryBitfieldInsertInZeroOp(SDNode *N);
426 bool tryShiftAmountMod(SDNode *N);
427
428 bool tryReadRegister(SDNode *N);
429 bool tryWriteRegister(SDNode *N);
430
431 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
432 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
433
434 bool trySelectXAR(SDNode *N);
435
436// Include the pieces autogenerated from the target description.
437#include "AArch64GenDAGISel.inc"
438
439private:
440 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
441 SDValue &Shift);
442 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
443 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
444 SDValue &OffImm) {
445 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
446 }
447 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
448 unsigned Size, SDValue &Base,
449 SDValue &OffImm);
450 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
451 SDValue &OffImm);
452 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
453 SDValue &OffImm);
454 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
455 SDValue &Offset, SDValue &SignExtend,
456 SDValue &DoShift);
457 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
458 SDValue &Offset, SDValue &SignExtend,
459 SDValue &DoShift);
460 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
461 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
462 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
463 SDValue &Offset, SDValue &SignExtend);
464
465 template<unsigned RegWidth>
466 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
467 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
468 }
469
470 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
471
472 template<unsigned RegWidth>
473 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
474 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
475 }
476
477 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
478 unsigned Width);
479
480 bool SelectCMP_SWAP(SDNode *N);
481
482 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
483 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
484 bool Negate);
485 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
486 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
487
488 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
489 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
490 bool AllowSaturation, SDValue &Imm);
491
492 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
493 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
494 SDValue &Offset);
495 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
496 SDValue &Offset, unsigned Scale = 1);
497
498 bool SelectAllActivePredicate(SDValue N);
499 bool SelectAnyPredicate(SDValue N);
500};
501
502class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
503public:
504 static char ID;
505 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
506 CodeGenOptLevel OptLevel)
508 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
509};
510} // end anonymous namespace
511
512char AArch64DAGToDAGISelLegacy::ID = 0;
513
514INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
515
516/// isIntImmediate - This method tests to see if the node is a constant
517/// operand. If so Imm will receive the 32-bit value.
518static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
519 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
520 Imm = C->getZExtValue();
521 return true;
522 }
523 return false;
524}
525
526// isIntImmediate - This method tests to see if a constant operand.
527// If so Imm will receive the value.
528static bool isIntImmediate(SDValue N, uint64_t &Imm) {
529 return isIntImmediate(N.getNode(), Imm);
530}
531
532// isOpcWithIntImmediate - This method tests to see if the node is a specific
533// opcode and that it has a immediate integer right operand.
534// If so Imm will receive the 32 bit value.
535static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
536 uint64_t &Imm) {
537 return N->getOpcode() == Opc &&
538 isIntImmediate(N->getOperand(1).getNode(), Imm);
539}
540
541// isIntImmediateEq - This method tests to see if N is a constant operand that
542// is equivalent to 'ImmExpected'.
543#ifndef NDEBUG
544static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
545 uint64_t Imm;
546 if (!isIntImmediate(N.getNode(), Imm))
547 return false;
548 return Imm == ImmExpected;
549}
550#endif
551
552bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
553 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
554 std::vector<SDValue> &OutOps) {
555 switch(ConstraintID) {
556 default:
557 llvm_unreachable("Unexpected asm memory constraint");
558 case InlineAsm::ConstraintCode::m:
559 case InlineAsm::ConstraintCode::o:
560 case InlineAsm::ConstraintCode::Q:
561 // We need to make sure that this one operand does not end up in XZR, thus
562 // require the address to be in a PointerRegClass register.
563 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
564 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
565 SDLoc dl(Op);
566 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
567 SDValue NewOp =
568 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
569 dl, Op.getValueType(),
570 Op, RC), 0);
571 OutOps.push_back(NewOp);
572 return false;
573 }
574 return true;
575}
576
577/// SelectArithImmed - Select an immediate value that can be represented as
578/// a 12-bit value shifted left by either 0 or 12. If so, return true with
579/// Val set to the 12-bit value and Shift set to the shifter operand.
580bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
581 SDValue &Shift) {
582 // This function is called from the addsub_shifted_imm ComplexPattern,
583 // which lists [imm] as the list of opcode it's interested in, however
584 // we still need to check whether the operand is actually an immediate
585 // here because the ComplexPattern opcode list is only used in
586 // root-level opcode matching.
587 if (!isa<ConstantSDNode>(N.getNode()))
588 return false;
589
590 uint64_t Immed = N.getNode()->getAsZExtVal();
591 unsigned ShiftAmt;
592
593 if (Immed >> 12 == 0) {
594 ShiftAmt = 0;
595 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
596 ShiftAmt = 12;
597 Immed = Immed >> 12;
598 } else
599 return false;
600
601 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
602 SDLoc dl(N);
603 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
604 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
605 return true;
606}
607
608/// SelectNegArithImmed - As above, but negates the value before trying to
609/// select it.
610bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
611 SDValue &Shift) {
612 // This function is called from the addsub_shifted_imm ComplexPattern,
613 // which lists [imm] as the list of opcode it's interested in, however
614 // we still need to check whether the operand is actually an immediate
615 // here because the ComplexPattern opcode list is only used in
616 // root-level opcode matching.
617 if (!isa<ConstantSDNode>(N.getNode()))
618 return false;
619
620 // The immediate operand must be a 24-bit zero-extended immediate.
621 uint64_t Immed = N.getNode()->getAsZExtVal();
622
623 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
624 // have the opposite effect on the C flag, so this pattern mustn't match under
625 // those circumstances.
626 if (Immed == 0)
627 return false;
628
629 if (N.getValueType() == MVT::i32)
630 Immed = ~((uint32_t)Immed) + 1;
631 else
632 Immed = ~Immed + 1ULL;
633 if (Immed & 0xFFFFFFFFFF000000ULL)
634 return false;
635
636 Immed &= 0xFFFFFFULL;
637 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
638 Shift);
639}
640
641/// getShiftTypeForNode - Translate a shift node to the corresponding
642/// ShiftType value.
644 switch (N.getOpcode()) {
645 default:
647 case ISD::SHL:
648 return AArch64_AM::LSL;
649 case ISD::SRL:
650 return AArch64_AM::LSR;
651 case ISD::SRA:
652 return AArch64_AM::ASR;
653 case ISD::ROTR:
654 return AArch64_AM::ROR;
655 }
656}
657
658/// Determine whether it is worth it to fold SHL into the addressing
659/// mode.
661 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
662 // It is worth folding logical shift of up to three places.
663 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
664 if (!CSD)
665 return false;
666 unsigned ShiftVal = CSD->getZExtValue();
667 if (ShiftVal > 3)
668 return false;
669
670 // Check if this particular node is reused in any non-memory related
671 // operation. If yes, do not try to fold this node into the address
672 // computation, since the computation will be kept.
673 const SDNode *Node = V.getNode();
674 for (SDNode *UI : Node->uses())
675 if (!isa<MemSDNode>(*UI))
676 for (SDNode *UII : UI->uses())
677 if (!isa<MemSDNode>(*UII))
678 return false;
679 return true;
680}
681
682/// Determine whether it is worth to fold V into an extended register addressing
683/// mode.
684bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
685 // Trivial if we are optimizing for code size or if there is only
686 // one use of the value.
687 if (CurDAG->shouldOptForSize() || V.hasOneUse())
688 return true;
689
690 // If a subtarget has a slow shift, folding a shift into multiple loads
691 // costs additional micro-ops.
692 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
693 return false;
694
695 // Check whether we're going to emit the address arithmetic anyway because
696 // it's used by a non-address operation.
697 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
698 return true;
699 if (V.getOpcode() == ISD::ADD) {
700 const SDValue LHS = V.getOperand(0);
701 const SDValue RHS = V.getOperand(1);
702 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
703 return true;
704 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
705 return true;
706 }
707
708 // It hurts otherwise, since the value will be reused.
709 return false;
710}
711
712/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
713/// to select more shifted register
714bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
715 SDValue &Shift) {
716 EVT VT = N.getValueType();
717 if (VT != MVT::i32 && VT != MVT::i64)
718 return false;
719
720 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
721 return false;
722 SDValue LHS = N.getOperand(0);
723 if (!LHS->hasOneUse())
724 return false;
725
726 unsigned LHSOpcode = LHS->getOpcode();
727 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
728 return false;
729
730 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
731 if (!ShiftAmtNode)
732 return false;
733
734 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
735 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
736 if (!RHSC)
737 return false;
738
739 APInt AndMask = RHSC->getAPIntValue();
740 unsigned LowZBits, MaskLen;
741 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
742 return false;
743
744 unsigned BitWidth = N.getValueSizeInBits();
745 SDLoc DL(LHS);
746 uint64_t NewShiftC;
747 unsigned NewShiftOp;
748 if (LHSOpcode == ISD::SHL) {
749 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
750 // BitWidth != LowZBits + MaskLen doesn't match the pattern
751 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
752 return false;
753
754 NewShiftC = LowZBits - ShiftAmtC;
755 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
756 } else {
757 if (LowZBits == 0)
758 return false;
759
760 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
761 NewShiftC = LowZBits + ShiftAmtC;
762 if (NewShiftC >= BitWidth)
763 return false;
764
765 // SRA need all high bits
766 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
767 return false;
768
769 // SRL high bits can be 0 or 1
770 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
771 return false;
772
773 if (LHSOpcode == ISD::SRL)
774 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
775 else
776 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
777 }
778
779 assert(NewShiftC < BitWidth && "Invalid shift amount");
780 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
781 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
782 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
783 NewShiftAmt, BitWidthMinus1),
784 0);
785 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
786 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
787 return true;
788}
789
790/// getExtendTypeForNode - Translate an extend node to the corresponding
791/// ExtendType value.
793getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
794 if (N.getOpcode() == ISD::SIGN_EXTEND ||
795 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
796 EVT SrcVT;
797 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
798 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
799 else
800 SrcVT = N.getOperand(0).getValueType();
801
802 if (!IsLoadStore && SrcVT == MVT::i8)
803 return AArch64_AM::SXTB;
804 else if (!IsLoadStore && SrcVT == MVT::i16)
805 return AArch64_AM::SXTH;
806 else if (SrcVT == MVT::i32)
807 return AArch64_AM::SXTW;
808 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
809
811 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
812 N.getOpcode() == ISD::ANY_EXTEND) {
813 EVT SrcVT = N.getOperand(0).getValueType();
814 if (!IsLoadStore && SrcVT == MVT::i8)
815 return AArch64_AM::UXTB;
816 else if (!IsLoadStore && SrcVT == MVT::i16)
817 return AArch64_AM::UXTH;
818 else if (SrcVT == MVT::i32)
819 return AArch64_AM::UXTW;
820 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
821
823 } else if (N.getOpcode() == ISD::AND) {
824 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
825 if (!CSD)
827 uint64_t AndMask = CSD->getZExtValue();
828
829 switch (AndMask) {
830 default:
832 case 0xFF:
833 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
834 case 0xFFFF:
835 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
836 case 0xFFFFFFFF:
837 return AArch64_AM::UXTW;
838 }
839 }
840
842}
843
844/// Determine whether it is worth to fold V into an extended register of an
845/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
846/// instruction, and the shift should be treated as worth folding even if has
847/// multiple uses.
848bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
849 // Trivial if we are optimizing for code size or if there is only
850 // one use of the value.
851 if (CurDAG->shouldOptForSize() || V.hasOneUse())
852 return true;
853
854 // If a subtarget has a fastpath LSL we can fold a logical shift into
855 // the add/sub and save a cycle.
856 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
857 V.getConstantOperandVal(1) <= 4 &&
859 return true;
860
861 // It hurts otherwise, since the value will be reused.
862 return false;
863}
864
865/// SelectShiftedRegister - Select a "shifted register" operand. If the value
866/// is not shifted, set the Shift operand to default of "LSL 0". The logical
867/// instructions allow the shifted register to be rotated, but the arithmetic
868/// instructions do not. The AllowROR parameter specifies whether ROR is
869/// supported.
870bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
871 SDValue &Reg, SDValue &Shift) {
872 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
873 return true;
874
876 if (ShType == AArch64_AM::InvalidShiftExtend)
877 return false;
878 if (!AllowROR && ShType == AArch64_AM::ROR)
879 return false;
880
881 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
882 unsigned BitSize = N.getValueSizeInBits();
883 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
884 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
885
886 Reg = N.getOperand(0);
887 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
888 return isWorthFoldingALU(N, true);
889 }
890
891 return false;
892}
893
894/// Instructions that accept extend modifiers like UXTW expect the register
895/// being extended to be a GPR32, but the incoming DAG might be acting on a
896/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
897/// this is the case.
899 if (N.getValueType() == MVT::i32)
900 return N;
901
902 SDLoc dl(N);
903 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
904}
905
906// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
907template<signed Low, signed High, signed Scale>
908bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
909 if (!isa<ConstantSDNode>(N))
910 return false;
911
912 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
913 if ((MulImm % std::abs(Scale)) == 0) {
914 int64_t RDVLImm = MulImm / Scale;
915 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
916 Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
917 return true;
918 }
919 }
920
921 return false;
922}
923
924/// SelectArithExtendedRegister - Select a "extended register" operand. This
925/// operand folds in an extend followed by an optional left shift.
926bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
927 SDValue &Shift) {
928 unsigned ShiftVal = 0;
930
931 if (N.getOpcode() == ISD::SHL) {
932 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
933 if (!CSD)
934 return false;
935 ShiftVal = CSD->getZExtValue();
936 if (ShiftVal > 4)
937 return false;
938
939 Ext = getExtendTypeForNode(N.getOperand(0));
941 return false;
942
943 Reg = N.getOperand(0).getOperand(0);
944 } else {
947 return false;
948
949 Reg = N.getOperand(0);
950
951 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
952 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
953 auto isDef32 = [](SDValue N) {
954 unsigned Opc = N.getOpcode();
955 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
956 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
957 Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
958 Opc != ISD::FREEZE;
959 };
960 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
961 isDef32(Reg))
962 return false;
963 }
964
965 // AArch64 mandates that the RHS of the operation must use the smallest
966 // register class that could contain the size being extended from. Thus,
967 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
968 // there might not be an actual 32-bit value in the program. We can
969 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
970 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
971 Reg = narrowIfNeeded(CurDAG, Reg);
972 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
973 MVT::i32);
974 return isWorthFoldingALU(N);
975}
976
977/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
978/// operand is refered by the instructions have SP operand
979bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
980 SDValue &Shift) {
981 unsigned ShiftVal = 0;
983
984 if (N.getOpcode() != ISD::SHL)
985 return false;
986
987 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
988 if (!CSD)
989 return false;
990 ShiftVal = CSD->getZExtValue();
991 if (ShiftVal > 4)
992 return false;
993
995 Reg = N.getOperand(0);
996 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
997 MVT::i32);
998 return isWorthFoldingALU(N);
999}
1000
1001/// If there's a use of this ADDlow that's not itself a load/store then we'll
1002/// need to create a real ADD instruction from it anyway and there's no point in
1003/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1004/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1005/// leads to duplicated ADRP instructions.
1007 for (auto *Use : N->uses()) {
1008 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
1009 Use->getOpcode() != ISD::ATOMIC_LOAD &&
1010 Use->getOpcode() != ISD::ATOMIC_STORE)
1011 return false;
1012
1013 // ldar and stlr have much more restrictive addressing modes (just a
1014 // register).
1015 if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering()))
1016 return false;
1017 }
1018
1019 return true;
1020}
1021
1022/// Check if the immediate offset is valid as a scaled immediate.
1023static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1024 unsigned Size) {
1025 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1026 Offset < (Range << Log2_32(Size)))
1027 return true;
1028 return false;
1029}
1030
1031/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1032/// immediate" address. The "Size" argument is the size in bytes of the memory
1033/// reference, which determines the scale.
1034bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1035 unsigned BW, unsigned Size,
1036 SDValue &Base,
1037 SDValue &OffImm) {
1038 SDLoc dl(N);
1039 const DataLayout &DL = CurDAG->getDataLayout();
1040 const TargetLowering *TLI = getTargetLowering();
1041 if (N.getOpcode() == ISD::FrameIndex) {
1042 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1043 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1044 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1045 return true;
1046 }
1047
1048 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1049 // selected here doesn't support labels/immediates, only base+offset.
1050 if (CurDAG->isBaseWithConstantOffset(N)) {
1051 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1052 if (IsSignedImm) {
1053 int64_t RHSC = RHS->getSExtValue();
1054 unsigned Scale = Log2_32(Size);
1055 int64_t Range = 0x1LL << (BW - 1);
1056
1057 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1058 RHSC < (Range << Scale)) {
1059 Base = N.getOperand(0);
1060 if (Base.getOpcode() == ISD::FrameIndex) {
1061 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1062 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1063 }
1064 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1065 return true;
1066 }
1067 } else {
1068 // unsigned Immediate
1069 uint64_t RHSC = RHS->getZExtValue();
1070 unsigned Scale = Log2_32(Size);
1071 uint64_t Range = 0x1ULL << BW;
1072
1073 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1074 Base = N.getOperand(0);
1075 if (Base.getOpcode() == ISD::FrameIndex) {
1076 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1077 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1078 }
1079 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1080 return true;
1081 }
1082 }
1083 }
1084 }
1085 // Base only. The address will be materialized into a register before
1086 // the memory is accessed.
1087 // add x0, Xbase, #offset
1088 // stp x1, x2, [x0]
1089 Base = N;
1090 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1091 return true;
1092}
1093
1094/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1095/// immediate" address. The "Size" argument is the size in bytes of the memory
1096/// reference, which determines the scale.
1097bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1098 SDValue &Base, SDValue &OffImm) {
1099 SDLoc dl(N);
1100 const DataLayout &DL = CurDAG->getDataLayout();
1101 const TargetLowering *TLI = getTargetLowering();
1102 if (N.getOpcode() == ISD::FrameIndex) {
1103 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1104 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1105 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1106 return true;
1107 }
1108
1109 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1110 GlobalAddressSDNode *GAN =
1111 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1112 Base = N.getOperand(0);
1113 OffImm = N.getOperand(1);
1114 if (!GAN)
1115 return true;
1116
1117 if (GAN->getOffset() % Size == 0 &&
1119 return true;
1120 }
1121
1122 if (CurDAG->isBaseWithConstantOffset(N)) {
1123 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1124 int64_t RHSC = (int64_t)RHS->getZExtValue();
1125 unsigned Scale = Log2_32(Size);
1126 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1127 Base = N.getOperand(0);
1128 if (Base.getOpcode() == ISD::FrameIndex) {
1129 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1130 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1131 }
1132 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1133 return true;
1134 }
1135 }
1136 }
1137
1138 // Before falling back to our general case, check if the unscaled
1139 // instructions can handle this. If so, that's preferable.
1140 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1141 return false;
1142
1143 // Base only. The address will be materialized into a register before
1144 // the memory is accessed.
1145 // add x0, Xbase, #offset
1146 // ldr x0, [x0]
1147 Base = N;
1148 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1149 return true;
1150}
1151
1152/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1153/// immediate" address. This should only match when there is an offset that
1154/// is not valid for a scaled immediate addressing mode. The "Size" argument
1155/// is the size in bytes of the memory reference, which is needed here to know
1156/// what is valid for a scaled immediate.
1157bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1158 SDValue &Base,
1159 SDValue &OffImm) {
1160 if (!CurDAG->isBaseWithConstantOffset(N))
1161 return false;
1162 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1163 int64_t RHSC = RHS->getSExtValue();
1164 if (RHSC >= -256 && RHSC < 256) {
1165 Base = N.getOperand(0);
1166 if (Base.getOpcode() == ISD::FrameIndex) {
1167 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1168 const TargetLowering *TLI = getTargetLowering();
1169 Base = CurDAG->getTargetFrameIndex(
1170 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1171 }
1172 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1173 return true;
1174 }
1175 }
1176 return false;
1177}
1178
1180 SDLoc dl(N);
1181 SDValue ImpDef = SDValue(
1182 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1183 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1184 N);
1185}
1186
1187/// Check if the given SHL node (\p N), can be used to form an
1188/// extended register for an addressing mode.
1189bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1190 bool WantExtend, SDValue &Offset,
1191 SDValue &SignExtend) {
1192 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1193 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1194 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1195 return false;
1196
1197 SDLoc dl(N);
1198 if (WantExtend) {
1200 getExtendTypeForNode(N.getOperand(0), true);
1202 return false;
1203
1204 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1205 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1206 MVT::i32);
1207 } else {
1208 Offset = N.getOperand(0);
1209 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1210 }
1211
1212 unsigned LegalShiftVal = Log2_32(Size);
1213 unsigned ShiftVal = CSD->getZExtValue();
1214
1215 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1216 return false;
1217
1218 return isWorthFoldingAddr(N, Size);
1219}
1220
1221bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1223 SDValue &SignExtend,
1224 SDValue &DoShift) {
1225 if (N.getOpcode() != ISD::ADD)
1226 return false;
1227 SDValue LHS = N.getOperand(0);
1228 SDValue RHS = N.getOperand(1);
1229 SDLoc dl(N);
1230
1231 // We don't want to match immediate adds here, because they are better lowered
1232 // to the register-immediate addressing modes.
1233 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
1234 return false;
1235
1236 // Check if this particular node is reused in any non-memory related
1237 // operation. If yes, do not try to fold this node into the address
1238 // computation, since the computation will be kept.
1239 const SDNode *Node = N.getNode();
1240 for (SDNode *UI : Node->uses()) {
1241 if (!isa<MemSDNode>(*UI))
1242 return false;
1243 }
1244
1245 // Remember if it is worth folding N when it produces extended register.
1246 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1247
1248 // Try to match a shifted extend on the RHS.
1249 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1250 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1251 Base = LHS;
1252 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1253 return true;
1254 }
1255
1256 // Try to match a shifted extend on the LHS.
1257 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1258 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1259 Base = RHS;
1260 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1261 return true;
1262 }
1263
1264 // There was no shift, whatever else we find.
1265 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1266
1268 // Try to match an unshifted extend on the LHS.
1269 if (IsExtendedRegisterWorthFolding &&
1270 (Ext = getExtendTypeForNode(LHS, true)) !=
1272 Base = RHS;
1273 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1274 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1275 MVT::i32);
1276 if (isWorthFoldingAddr(LHS, Size))
1277 return true;
1278 }
1279
1280 // Try to match an unshifted extend on the RHS.
1281 if (IsExtendedRegisterWorthFolding &&
1282 (Ext = getExtendTypeForNode(RHS, true)) !=
1284 Base = LHS;
1285 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1286 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1287 MVT::i32);
1288 if (isWorthFoldingAddr(RHS, Size))
1289 return true;
1290 }
1291
1292 return false;
1293}
1294
1295// Check if the given immediate is preferred by ADD. If an immediate can be
1296// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1297// encoded by one MOVZ, return true.
1298static bool isPreferredADD(int64_t ImmOff) {
1299 // Constant in [0x0, 0xfff] can be encoded in ADD.
1300 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1301 return true;
1302 // Check if it can be encoded in an "ADD LSL #12".
1303 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1304 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1305 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1306 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1307 return false;
1308}
1309
1310bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1312 SDValue &SignExtend,
1313 SDValue &DoShift) {
1314 if (N.getOpcode() != ISD::ADD)
1315 return false;
1316 SDValue LHS = N.getOperand(0);
1317 SDValue RHS = N.getOperand(1);
1318 SDLoc DL(N);
1319
1320 // Check if this particular node is reused in any non-memory related
1321 // operation. If yes, do not try to fold this node into the address
1322 // computation, since the computation will be kept.
1323 const SDNode *Node = N.getNode();
1324 for (SDNode *UI : Node->uses()) {
1325 if (!isa<MemSDNode>(*UI))
1326 return false;
1327 }
1328
1329 // Watch out if RHS is a wide immediate, it can not be selected into
1330 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1331 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1332 // instructions like:
1333 // MOV X0, WideImmediate
1334 // ADD X1, BaseReg, X0
1335 // LDR X2, [X1, 0]
1336 // For such situation, using [BaseReg, XReg] addressing mode can save one
1337 // ADD/SUB:
1338 // MOV X0, WideImmediate
1339 // LDR X2, [BaseReg, X0]
1340 if (isa<ConstantSDNode>(RHS)) {
1341 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1342 // Skip the immediate can be selected by load/store addressing mode.
1343 // Also skip the immediate can be encoded by a single ADD (SUB is also
1344 // checked by using -ImmOff).
1345 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1346 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1347 return false;
1348
1349 SDValue Ops[] = { RHS };
1350 SDNode *MOVI =
1351 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1352 SDValue MOVIV = SDValue(MOVI, 0);
1353 // This ADD of two X register will be selected into [Reg+Reg] mode.
1354 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1355 }
1356
1357 // Remember if it is worth folding N when it produces extended register.
1358 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1359
1360 // Try to match a shifted extend on the RHS.
1361 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1362 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1363 Base = LHS;
1364 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1365 return true;
1366 }
1367
1368 // Try to match a shifted extend on the LHS.
1369 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1370 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1371 Base = RHS;
1372 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1373 return true;
1374 }
1375
1376 // Match any non-shifted, non-extend, non-immediate add expression.
1377 Base = LHS;
1378 Offset = RHS;
1379 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1380 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1381 // Reg1 + Reg2 is free: no check needed.
1382 return true;
1383}
1384
1385SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1386 static const unsigned RegClassIDs[] = {
1387 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1388 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1389 AArch64::dsub2, AArch64::dsub3};
1390
1391 return createTuple(Regs, RegClassIDs, SubRegs);
1392}
1393
1394SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1395 static const unsigned RegClassIDs[] = {
1396 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1397 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1398 AArch64::qsub2, AArch64::qsub3};
1399
1400 return createTuple(Regs, RegClassIDs, SubRegs);
1401}
1402
1403SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1404 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1405 AArch64::ZPR3RegClassID,
1406 AArch64::ZPR4RegClassID};
1407 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1408 AArch64::zsub2, AArch64::zsub3};
1409
1410 return createTuple(Regs, RegClassIDs, SubRegs);
1411}
1412
1413SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1414 assert(Regs.size() == 2 || Regs.size() == 4);
1415
1416 // The createTuple interface requires 3 RegClassIDs for each possible
1417 // tuple type even though we only have them for ZPR2 and ZPR4.
1418 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1419 AArch64::ZPR4Mul4RegClassID};
1420 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1421 AArch64::zsub2, AArch64::zsub3};
1422 return createTuple(Regs, RegClassIDs, SubRegs);
1423}
1424
1425SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1426 const unsigned RegClassIDs[],
1427 const unsigned SubRegs[]) {
1428 // There's no special register-class for a vector-list of 1 element: it's just
1429 // a vector.
1430 if (Regs.size() == 1)
1431 return Regs[0];
1432
1433 assert(Regs.size() >= 2 && Regs.size() <= 4);
1434
1435 SDLoc DL(Regs[0]);
1436
1438
1439 // First operand of REG_SEQUENCE is the desired RegClass.
1440 Ops.push_back(
1441 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1442
1443 // Then we get pairs of source & subregister-position for the components.
1444 for (unsigned i = 0; i < Regs.size(); ++i) {
1445 Ops.push_back(Regs[i]);
1446 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1447 }
1448
1449 SDNode *N =
1450 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1451 return SDValue(N, 0);
1452}
1453
1454void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1455 bool isExt) {
1456 SDLoc dl(N);
1457 EVT VT = N->getValueType(0);
1458
1459 unsigned ExtOff = isExt;
1460
1461 // Form a REG_SEQUENCE to force register allocation.
1462 unsigned Vec0Off = ExtOff + 1;
1463 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1464 N->op_begin() + Vec0Off + NumVecs);
1465 SDValue RegSeq = createQTuple(Regs);
1466
1468 if (isExt)
1469 Ops.push_back(N->getOperand(1));
1470 Ops.push_back(RegSeq);
1471 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1472 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1473}
1474
1475static std::tuple<SDValue, SDValue>
1477 SDLoc DL(Disc);
1478 SDValue AddrDisc;
1479 SDValue ConstDisc;
1480
1481 // If this is a blend, remember the constant and address discriminators.
1482 // Otherwise, it's either a constant discriminator, or a non-blended
1483 // address discriminator.
1484 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1485 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1486 AddrDisc = Disc->getOperand(1);
1487 ConstDisc = Disc->getOperand(2);
1488 } else {
1489 ConstDisc = Disc;
1490 }
1491
1492 // If the constant discriminator (either the blend RHS, or the entire
1493 // discriminator value) isn't a 16-bit constant, bail out, and let the
1494 // discriminator be computed separately.
1495 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1496 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1497 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1498
1499 // If there's no address discriminator, use XZR directly.
1500 if (!AddrDisc)
1501 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1502
1503 return std::make_tuple(
1504 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1505 AddrDisc);
1506}
1507
1508void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1509 SDLoc DL(N);
1510 // IntrinsicID is operand #0
1511 SDValue Val = N->getOperand(1);
1512 SDValue AUTKey = N->getOperand(2);
1513 SDValue AUTDisc = N->getOperand(3);
1514
1515 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1516 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1517
1518 SDValue AUTAddrDisc, AUTConstDisc;
1519 std::tie(AUTConstDisc, AUTAddrDisc) =
1520 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1521
1522 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1523 AArch64::X16, Val, SDValue());
1524 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1525
1526 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUT, DL, MVT::i64, Ops);
1527 ReplaceNode(N, AUT);
1528 return;
1529}
1530
1531void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1532 SDLoc DL(N);
1533 // IntrinsicID is operand #0
1534 SDValue Val = N->getOperand(1);
1535 SDValue AUTKey = N->getOperand(2);
1536 SDValue AUTDisc = N->getOperand(3);
1537 SDValue PACKey = N->getOperand(4);
1538 SDValue PACDisc = N->getOperand(5);
1539
1540 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1541 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1542
1543 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1544 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1545
1546 SDValue AUTAddrDisc, AUTConstDisc;
1547 std::tie(AUTConstDisc, AUTAddrDisc) =
1548 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1549
1550 SDValue PACAddrDisc, PACConstDisc;
1551 std::tie(PACConstDisc, PACAddrDisc) =
1552 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1553
1554 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1555 AArch64::X16, Val, SDValue());
1556
1557 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1558 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1559
1560 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1561 ReplaceNode(N, AUTPAC);
1562 return;
1563}
1564
1565bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1566 LoadSDNode *LD = cast<LoadSDNode>(N);
1567 if (LD->isUnindexed())
1568 return false;
1569 EVT VT = LD->getMemoryVT();
1570 EVT DstVT = N->getValueType(0);
1571 ISD::MemIndexedMode AM = LD->getAddressingMode();
1572 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1573
1574 // We're not doing validity checking here. That was done when checking
1575 // if we should mark the load as indexed or not. We're just selecting
1576 // the right instruction.
1577 unsigned Opcode = 0;
1578
1579 ISD::LoadExtType ExtType = LD->getExtensionType();
1580 bool InsertTo64 = false;
1581 if (VT == MVT::i64)
1582 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1583 else if (VT == MVT::i32) {
1584 if (ExtType == ISD::NON_EXTLOAD)
1585 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1586 else if (ExtType == ISD::SEXTLOAD)
1587 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1588 else {
1589 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1590 InsertTo64 = true;
1591 // The result of the load is only i32. It's the subreg_to_reg that makes
1592 // it into an i64.
1593 DstVT = MVT::i32;
1594 }
1595 } else if (VT == MVT::i16) {
1596 if (ExtType == ISD::SEXTLOAD) {
1597 if (DstVT == MVT::i64)
1598 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1599 else
1600 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1601 } else {
1602 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1603 InsertTo64 = DstVT == MVT::i64;
1604 // The result of the load is only i32. It's the subreg_to_reg that makes
1605 // it into an i64.
1606 DstVT = MVT::i32;
1607 }
1608 } else if (VT == MVT::i8) {
1609 if (ExtType == ISD::SEXTLOAD) {
1610 if (DstVT == MVT::i64)
1611 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1612 else
1613 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1614 } else {
1615 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1616 InsertTo64 = DstVT == MVT::i64;
1617 // The result of the load is only i32. It's the subreg_to_reg that makes
1618 // it into an i64.
1619 DstVT = MVT::i32;
1620 }
1621 } else if (VT == MVT::f16) {
1622 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1623 } else if (VT == MVT::bf16) {
1624 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1625 } else if (VT == MVT::f32) {
1626 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1627 } else if (VT == MVT::f64 || VT.is64BitVector()) {
1628 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1629 } else if (VT.is128BitVector()) {
1630 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1631 } else
1632 return false;
1633 SDValue Chain = LD->getChain();
1634 SDValue Base = LD->getBasePtr();
1635 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1636 int OffsetVal = (int)OffsetOp->getZExtValue();
1637 SDLoc dl(N);
1638 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1639 SDValue Ops[] = { Base, Offset, Chain };
1640 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1641 MVT::Other, Ops);
1642
1643 // Transfer memoperands.
1644 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1645 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1646
1647 // Either way, we're replacing the node, so tell the caller that.
1648 SDValue LoadedVal = SDValue(Res, 1);
1649 if (InsertTo64) {
1650 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1651 LoadedVal =
1652 SDValue(CurDAG->getMachineNode(
1653 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1654 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1655 SubReg),
1656 0);
1657 }
1658
1659 ReplaceUses(SDValue(N, 0), LoadedVal);
1660 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1661 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1662 CurDAG->RemoveDeadNode(N);
1663 return true;
1664}
1665
1666void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1667 unsigned SubRegIdx) {
1668 SDLoc dl(N);
1669 EVT VT = N->getValueType(0);
1670 SDValue Chain = N->getOperand(0);
1671
1672 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1673 Chain};
1674
1675 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1676
1677 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1678 SDValue SuperReg = SDValue(Ld, 0);
1679 for (unsigned i = 0; i < NumVecs; ++i)
1680 ReplaceUses(SDValue(N, i),
1681 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1682
1683 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1684
1685 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1686 // because it's too simple to have needed special treatment during lowering.
1687 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1688 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1689 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1690 }
1691
1692 CurDAG->RemoveDeadNode(N);
1693}
1694
1695void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1696 unsigned Opc, unsigned SubRegIdx) {
1697 SDLoc dl(N);
1698 EVT VT = N->getValueType(0);
1699 SDValue Chain = N->getOperand(0);
1700
1701 SDValue Ops[] = {N->getOperand(1), // Mem operand
1702 N->getOperand(2), // Incremental
1703 Chain};
1704
1705 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1706 MVT::Untyped, MVT::Other};
1707
1708 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1709
1710 // Update uses of write back register
1711 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1712
1713 // Update uses of vector list
1714 SDValue SuperReg = SDValue(Ld, 1);
1715 if (NumVecs == 1)
1716 ReplaceUses(SDValue(N, 0), SuperReg);
1717 else
1718 for (unsigned i = 0; i < NumVecs; ++i)
1719 ReplaceUses(SDValue(N, i),
1720 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1721
1722 // Update the chain
1723 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1724 CurDAG->RemoveDeadNode(N);
1725}
1726
1727/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1728/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1729/// new Base and an SDValue representing the new offset.
1730std::tuple<unsigned, SDValue, SDValue>
1731AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1732 unsigned Opc_ri,
1733 const SDValue &OldBase,
1734 const SDValue &OldOffset,
1735 unsigned Scale) {
1736 SDValue NewBase = OldBase;
1737 SDValue NewOffset = OldOffset;
1738 // Detect a possible Reg+Imm addressing mode.
1739 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1740 N, OldBase, NewBase, NewOffset);
1741
1742 // Detect a possible reg+reg addressing mode, but only if we haven't already
1743 // detected a Reg+Imm one.
1744 const bool IsRegReg =
1745 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1746
1747 // Select the instruction.
1748 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1749}
1750
1751enum class SelectTypeKind {
1752 Int1 = 0,
1753 Int = 1,
1754 FP = 2,
1755 AnyType = 3,
1756};
1757
1758/// This function selects an opcode from a list of opcodes, which is
1759/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1760/// element types, in this order.
1761template <SelectTypeKind Kind>
1762static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1763 // Only match scalable vector VTs
1764 if (!VT.isScalableVector())
1765 return 0;
1766
1767 EVT EltVT = VT.getVectorElementType();
1768 unsigned Key = VT.getVectorMinNumElements();
1769 switch (Kind) {
1771 break;
1773 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1774 EltVT != MVT::i64)
1775 return 0;
1776 break;
1778 if (EltVT != MVT::i1)
1779 return 0;
1780 break;
1781 case SelectTypeKind::FP:
1782 if (EltVT == MVT::bf16)
1783 Key = 16;
1784 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1785 EltVT != MVT::f64)
1786 return 0;
1787 break;
1788 }
1789
1790 unsigned Offset;
1791 switch (Key) {
1792 case 16: // 8-bit or bf16
1793 Offset = 0;
1794 break;
1795 case 8: // 16-bit
1796 Offset = 1;
1797 break;
1798 case 4: // 32-bit
1799 Offset = 2;
1800 break;
1801 case 2: // 64-bit
1802 Offset = 3;
1803 break;
1804 default:
1805 return 0;
1806 }
1807
1808 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1809}
1810
1811// This function is almost identical to SelectWhilePair, but has an
1812// extra check on the range of the immediate operand.
1813// TODO: Merge these two functions together at some point?
1814void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1815 // Immediate can be either 0 or 1.
1816 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1817 if (Imm->getZExtValue() > 1)
1818 return;
1819
1820 SDLoc DL(N);
1821 EVT VT = N->getValueType(0);
1822 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1823 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1824 SDValue SuperReg = SDValue(WhilePair, 0);
1825
1826 for (unsigned I = 0; I < 2; ++I)
1827 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1828 AArch64::psub0 + I, DL, VT, SuperReg));
1829
1830 CurDAG->RemoveDeadNode(N);
1831}
1832
1833void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1834 SDLoc DL(N);
1835 EVT VT = N->getValueType(0);
1836
1837 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1838
1839 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1840 SDValue SuperReg = SDValue(WhilePair, 0);
1841
1842 for (unsigned I = 0; I < 2; ++I)
1843 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1844 AArch64::psub0 + I, DL, VT, SuperReg));
1845
1846 CurDAG->RemoveDeadNode(N);
1847}
1848
1849void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1850 unsigned Opcode) {
1851 EVT VT = N->getValueType(0);
1852 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1853 SDValue Ops = createZTuple(Regs);
1854 SDLoc DL(N);
1855 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1856 SDValue SuperReg = SDValue(Intrinsic, 0);
1857 for (unsigned i = 0; i < NumVecs; ++i)
1858 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1859 AArch64::zsub0 + i, DL, VT, SuperReg));
1860
1861 CurDAG->RemoveDeadNode(N);
1862}
1863
1864void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1865 unsigned NumVecs,
1866 bool IsZmMulti,
1867 unsigned Opcode,
1868 bool HasPred) {
1869 assert(Opcode != 0 && "Unexpected opcode");
1870
1871 SDLoc DL(N);
1872 EVT VT = N->getValueType(0);
1873 unsigned FirstVecIdx = HasPred ? 2 : 1;
1874
1875 auto GetMultiVecOperand = [=](unsigned StartIdx) {
1876 SmallVector<SDValue, 4> Regs(N->ops().slice(StartIdx, NumVecs));
1877 return createZMulTuple(Regs);
1878 };
1879
1880 SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1881
1882 SDValue Zm;
1883 if (IsZmMulti)
1884 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1885 else
1886 Zm = N->getOperand(NumVecs + FirstVecIdx);
1887
1889 if (HasPred)
1890 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
1891 N->getOperand(1), Zdn, Zm);
1892 else
1893 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
1894 SDValue SuperReg = SDValue(Intrinsic, 0);
1895 for (unsigned i = 0; i < NumVecs; ++i)
1896 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1897 AArch64::zsub0 + i, DL, VT, SuperReg));
1898
1899 CurDAG->RemoveDeadNode(N);
1900}
1901
1902void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
1903 unsigned Scale, unsigned Opc_ri,
1904 unsigned Opc_rr, bool IsIntr) {
1905 assert(Scale < 5 && "Invalid scaling value.");
1906 SDLoc DL(N);
1907 EVT VT = N->getValueType(0);
1908 SDValue Chain = N->getOperand(0);
1909
1910 // Optimize addressing mode.
1912 unsigned Opc;
1913 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1914 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
1915 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
1916
1917 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
1918 Base, // Memory operand
1919 Offset, Chain};
1920
1921 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1922
1923 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1924 SDValue SuperReg = SDValue(Load, 0);
1925 for (unsigned i = 0; i < NumVecs; ++i)
1926 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1927 AArch64::zsub0 + i, DL, VT, SuperReg));
1928
1929 // Copy chain
1930 unsigned ChainIdx = NumVecs;
1931 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1932 CurDAG->RemoveDeadNode(N);
1933}
1934
1935void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
1936 unsigned NumVecs,
1937 unsigned Scale,
1938 unsigned Opc_ri,
1939 unsigned Opc_rr) {
1940 assert(Scale < 4 && "Invalid scaling value.");
1941 SDLoc DL(N);
1942 EVT VT = N->getValueType(0);
1943 SDValue Chain = N->getOperand(0);
1944
1945 SDValue PNg = N->getOperand(2);
1946 SDValue Base = N->getOperand(3);
1947 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
1948 unsigned Opc;
1949 std::tie(Opc, Base, Offset) =
1950 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
1951
1952 SDValue Ops[] = {PNg, // Predicate-as-counter
1953 Base, // Memory operand
1954 Offset, Chain};
1955
1956 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1957
1958 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1959 SDValue SuperReg = SDValue(Load, 0);
1960 for (unsigned i = 0; i < NumVecs; ++i)
1961 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1962 AArch64::zsub0 + i, DL, VT, SuperReg));
1963
1964 // Copy chain
1965 unsigned ChainIdx = NumVecs;
1966 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1967 CurDAG->RemoveDeadNode(N);
1968}
1969
1970void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
1971 unsigned Opcode) {
1972 if (N->getValueType(0) != MVT::nxv4f32)
1973 return;
1974 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
1975}
1976
1977void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
1978 unsigned NumOutVecs,
1979 unsigned Opc, uint32_t MaxImm) {
1980 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
1981 if (Imm->getZExtValue() > MaxImm)
1982 return;
1983
1984 SDValue ZtValue;
1985 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
1986 return;
1987 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)};
1988 SDLoc DL(Node);
1989 EVT VT = Node->getValueType(0);
1990
1992 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
1993 SDValue SuperReg = SDValue(Instruction, 0);
1994
1995 for (unsigned I = 0; I < NumOutVecs; ++I)
1996 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
1997 AArch64::zsub0 + I, DL, VT, SuperReg));
1998
1999 // Copy chain
2000 unsigned ChainIdx = NumOutVecs;
2001 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2002 CurDAG->RemoveDeadNode(Node);
2003}
2004
2005void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2006 unsigned Op) {
2007 SDLoc DL(N);
2008 EVT VT = N->getValueType(0);
2009
2010 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2011 SDValue Zd = createZMulTuple(Regs);
2012 SDValue Zn = N->getOperand(1 + NumVecs);
2013 SDValue Zm = N->getOperand(2 + NumVecs);
2014
2015 SDValue Ops[] = {Zd, Zn, Zm};
2016
2017 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2018 SDValue SuperReg = SDValue(Intrinsic, 0);
2019 for (unsigned i = 0; i < NumVecs; ++i)
2020 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2021 AArch64::zsub0 + i, DL, VT, SuperReg));
2022
2023 CurDAG->RemoveDeadNode(N);
2024}
2025
2026bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2027 switch (BaseReg) {
2028 default:
2029 return false;
2030 case AArch64::ZA:
2031 case AArch64::ZAB0:
2032 if (TileNum == 0)
2033 break;
2034 return false;
2035 case AArch64::ZAH0:
2036 if (TileNum <= 1)
2037 break;
2038 return false;
2039 case AArch64::ZAS0:
2040 if (TileNum <= 3)
2041 break;
2042 return false;
2043 case AArch64::ZAD0:
2044 if (TileNum <= 7)
2045 break;
2046 return false;
2047 }
2048
2049 BaseReg += TileNum;
2050 return true;
2051}
2052
2053template <unsigned MaxIdx, unsigned Scale>
2054void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2055 unsigned BaseReg, unsigned Op) {
2056 unsigned TileNum = 0;
2057 if (BaseReg != AArch64::ZA)
2058 TileNum = N->getConstantOperandVal(2);
2059
2060 if (!SelectSMETile(BaseReg, TileNum))
2061 return;
2062
2063 SDValue SliceBase, Base, Offset;
2064 if (BaseReg == AArch64::ZA)
2065 SliceBase = N->getOperand(2);
2066 else
2067 SliceBase = N->getOperand(3);
2068
2069 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2070 return;
2071
2072 SDLoc DL(N);
2073 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2074 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2075 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2076
2077 EVT VT = N->getValueType(0);
2078 for (unsigned I = 0; I < NumVecs; ++I)
2079 ReplaceUses(SDValue(N, I),
2080 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2081 SDValue(Mov, 0)));
2082 // Copy chain
2083 unsigned ChainIdx = NumVecs;
2084 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2085 CurDAG->RemoveDeadNode(N);
2086}
2087
2088void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2089 unsigned Op, unsigned MaxIdx,
2090 unsigned Scale, unsigned BaseReg) {
2091 // Slice can be in different positions
2092 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2093 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2094 SDValue SliceBase = N->getOperand(2);
2095 if (BaseReg != AArch64::ZA)
2096 SliceBase = N->getOperand(3);
2097
2099 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2100 return;
2101 // The correct Za tile number is computed in Machine Instruction
2102 // See EmitZAInstr
2103 // DAG cannot select Za tile as an output register with ZReg
2104 SDLoc DL(N);
2106 if (BaseReg != AArch64::ZA )
2107 Ops.push_back(N->getOperand(2));
2108 Ops.push_back(Base);
2109 Ops.push_back(Offset);
2110 Ops.push_back(N->getOperand(0)); //Chain
2111 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2112
2113 EVT VT = N->getValueType(0);
2114 for (unsigned I = 0; I < NumVecs; ++I)
2115 ReplaceUses(SDValue(N, I),
2116 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2117 SDValue(Mov, 0)));
2118
2119 // Copy chain
2120 unsigned ChainIdx = NumVecs;
2121 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2122 CurDAG->RemoveDeadNode(N);
2123}
2124
2125void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2126 unsigned NumOutVecs,
2127 bool IsTupleInput,
2128 unsigned Opc) {
2129 SDLoc DL(N);
2130 EVT VT = N->getValueType(0);
2131 unsigned NumInVecs = N->getNumOperands() - 1;
2132
2134 if (IsTupleInput) {
2135 assert((NumInVecs == 2 || NumInVecs == 4) &&
2136 "Don't know how to handle multi-register input!");
2137 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2138 Ops.push_back(createZMulTuple(Regs));
2139 } else {
2140 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2141 for (unsigned I = 0; I < NumInVecs; I++)
2142 Ops.push_back(N->getOperand(1 + I));
2143 }
2144
2145 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2146 SDValue SuperReg = SDValue(Res, 0);
2147
2148 for (unsigned I = 0; I < NumOutVecs; I++)
2149 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2150 AArch64::zsub0 + I, DL, VT, SuperReg));
2151 CurDAG->RemoveDeadNode(N);
2152}
2153
2154void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2155 unsigned Opc) {
2156 SDLoc dl(N);
2157 EVT VT = N->getOperand(2)->getValueType(0);
2158
2159 // Form a REG_SEQUENCE to force register allocation.
2160 bool Is128Bit = VT.getSizeInBits() == 128;
2161 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2162 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2163
2164 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2165 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2166
2167 // Transfer memoperands.
2168 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2169 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2170
2171 ReplaceNode(N, St);
2172}
2173
2174void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2175 unsigned Scale, unsigned Opc_rr,
2176 unsigned Opc_ri) {
2177 SDLoc dl(N);
2178
2179 // Form a REG_SEQUENCE to force register allocation.
2180 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2181 SDValue RegSeq = createZTuple(Regs);
2182
2183 // Optimize addressing mode.
2184 unsigned Opc;
2186 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2187 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2188 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2189
2190 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2191 Base, // address
2192 Offset, // offset
2193 N->getOperand(0)}; // chain
2194 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2195
2196 ReplaceNode(N, St);
2197}
2198
2199bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2200 SDValue &OffImm) {
2201 SDLoc dl(N);
2202 const DataLayout &DL = CurDAG->getDataLayout();
2203 const TargetLowering *TLI = getTargetLowering();
2204
2205 // Try to match it for the frame address
2206 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2207 int FI = FINode->getIndex();
2208 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2209 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2210 return true;
2211 }
2212
2213 return false;
2214}
2215
2216void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2217 unsigned Opc) {
2218 SDLoc dl(N);
2219 EVT VT = N->getOperand(2)->getValueType(0);
2220 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2221 MVT::Other}; // Type for the Chain
2222
2223 // Form a REG_SEQUENCE to force register allocation.
2224 bool Is128Bit = VT.getSizeInBits() == 128;
2225 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2226 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2227
2228 SDValue Ops[] = {RegSeq,
2229 N->getOperand(NumVecs + 1), // base register
2230 N->getOperand(NumVecs + 2), // Incremental
2231 N->getOperand(0)}; // Chain
2232 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2233
2234 ReplaceNode(N, St);
2235}
2236
2237namespace {
2238/// WidenVector - Given a value in the V64 register class, produce the
2239/// equivalent value in the V128 register class.
2240class WidenVector {
2241 SelectionDAG &DAG;
2242
2243public:
2244 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2245
2246 SDValue operator()(SDValue V64Reg) {
2247 EVT VT = V64Reg.getValueType();
2248 unsigned NarrowSize = VT.getVectorNumElements();
2249 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2250 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2251 SDLoc DL(V64Reg);
2252
2253 SDValue Undef =
2254 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2255 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2256 }
2257};
2258} // namespace
2259
2260/// NarrowVector - Given a value in the V128 register class, produce the
2261/// equivalent value in the V64 register class.
2263 EVT VT = V128Reg.getValueType();
2264 unsigned WideSize = VT.getVectorNumElements();
2265 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2266 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2267
2268 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2269 V128Reg);
2270}
2271
2272void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2273 unsigned Opc) {
2274 SDLoc dl(N);
2275 EVT VT = N->getValueType(0);
2276 bool Narrow = VT.getSizeInBits() == 64;
2277
2278 // Form a REG_SEQUENCE to force register allocation.
2279 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2280
2281 if (Narrow)
2282 transform(Regs, Regs.begin(),
2283 WidenVector(*CurDAG));
2284
2285 SDValue RegSeq = createQTuple(Regs);
2286
2287 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2288
2289 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2290
2291 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2292 N->getOperand(NumVecs + 3), N->getOperand(0)};
2293 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2294 SDValue SuperReg = SDValue(Ld, 0);
2295
2296 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2297 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2298 AArch64::qsub2, AArch64::qsub3 };
2299 for (unsigned i = 0; i < NumVecs; ++i) {
2300 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2301 if (Narrow)
2302 NV = NarrowVector(NV, *CurDAG);
2303 ReplaceUses(SDValue(N, i), NV);
2304 }
2305
2306 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2307 CurDAG->RemoveDeadNode(N);
2308}
2309
2310void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2311 unsigned Opc) {
2312 SDLoc dl(N);
2313 EVT VT = N->getValueType(0);
2314 bool Narrow = VT.getSizeInBits() == 64;
2315
2316 // Form a REG_SEQUENCE to force register allocation.
2317 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2318
2319 if (Narrow)
2320 transform(Regs, Regs.begin(),
2321 WidenVector(*CurDAG));
2322
2323 SDValue RegSeq = createQTuple(Regs);
2324
2325 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2326 RegSeq->getValueType(0), MVT::Other};
2327
2328 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2329
2330 SDValue Ops[] = {RegSeq,
2331 CurDAG->getTargetConstant(LaneNo, dl,
2332 MVT::i64), // Lane Number
2333 N->getOperand(NumVecs + 2), // Base register
2334 N->getOperand(NumVecs + 3), // Incremental
2335 N->getOperand(0)};
2336 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2337
2338 // Update uses of the write back register
2339 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2340
2341 // Update uses of the vector list
2342 SDValue SuperReg = SDValue(Ld, 1);
2343 if (NumVecs == 1) {
2344 ReplaceUses(SDValue(N, 0),
2345 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2346 } else {
2347 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2348 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2349 AArch64::qsub2, AArch64::qsub3 };
2350 for (unsigned i = 0; i < NumVecs; ++i) {
2351 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2352 SuperReg);
2353 if (Narrow)
2354 NV = NarrowVector(NV, *CurDAG);
2355 ReplaceUses(SDValue(N, i), NV);
2356 }
2357 }
2358
2359 // Update the Chain
2360 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2361 CurDAG->RemoveDeadNode(N);
2362}
2363
2364void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2365 unsigned Opc) {
2366 SDLoc dl(N);
2367 EVT VT = N->getOperand(2)->getValueType(0);
2368 bool Narrow = VT.getSizeInBits() == 64;
2369
2370 // Form a REG_SEQUENCE to force register allocation.
2371 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2372
2373 if (Narrow)
2374 transform(Regs, Regs.begin(),
2375 WidenVector(*CurDAG));
2376
2377 SDValue RegSeq = createQTuple(Regs);
2378
2379 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2380
2381 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2382 N->getOperand(NumVecs + 3), N->getOperand(0)};
2383 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2384
2385 // Transfer memoperands.
2386 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2387 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2388
2389 ReplaceNode(N, St);
2390}
2391
2392void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2393 unsigned Opc) {
2394 SDLoc dl(N);
2395 EVT VT = N->getOperand(2)->getValueType(0);
2396 bool Narrow = VT.getSizeInBits() == 64;
2397
2398 // Form a REG_SEQUENCE to force register allocation.
2399 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2400
2401 if (Narrow)
2402 transform(Regs, Regs.begin(),
2403 WidenVector(*CurDAG));
2404
2405 SDValue RegSeq = createQTuple(Regs);
2406
2407 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2408 MVT::Other};
2409
2410 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2411
2412 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2413 N->getOperand(NumVecs + 2), // Base Register
2414 N->getOperand(NumVecs + 3), // Incremental
2415 N->getOperand(0)};
2416 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2417
2418 // Transfer memoperands.
2419 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2420 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2421
2422 ReplaceNode(N, St);
2423}
2424
2426 unsigned &Opc, SDValue &Opd0,
2427 unsigned &LSB, unsigned &MSB,
2428 unsigned NumberOfIgnoredLowBits,
2429 bool BiggerPattern) {
2430 assert(N->getOpcode() == ISD::AND &&
2431 "N must be a AND operation to call this function");
2432
2433 EVT VT = N->getValueType(0);
2434
2435 // Here we can test the type of VT and return false when the type does not
2436 // match, but since it is done prior to that call in the current context
2437 // we turned that into an assert to avoid redundant code.
2438 assert((VT == MVT::i32 || VT == MVT::i64) &&
2439 "Type checking must have been done before calling this function");
2440
2441 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2442 // changed the AND node to a 32-bit mask operation. We'll have to
2443 // undo that as part of the transform here if we want to catch all
2444 // the opportunities.
2445 // Currently the NumberOfIgnoredLowBits argument helps to recover
2446 // from these situations when matching bigger pattern (bitfield insert).
2447
2448 // For unsigned extracts, check for a shift right and mask
2449 uint64_t AndImm = 0;
2450 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2451 return false;
2452
2453 const SDNode *Op0 = N->getOperand(0).getNode();
2454
2455 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2456 // simplified. Try to undo that
2457 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2458
2459 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2460 if (AndImm & (AndImm + 1))
2461 return false;
2462
2463 bool ClampMSB = false;
2464 uint64_t SrlImm = 0;
2465 // Handle the SRL + ANY_EXTEND case.
2466 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2467 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2468 // Extend the incoming operand of the SRL to 64-bit.
2469 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2470 // Make sure to clamp the MSB so that we preserve the semantics of the
2471 // original operations.
2472 ClampMSB = true;
2473 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2475 SrlImm)) {
2476 // If the shift result was truncated, we can still combine them.
2477 Opd0 = Op0->getOperand(0).getOperand(0);
2478
2479 // Use the type of SRL node.
2480 VT = Opd0->getValueType(0);
2481 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2482 Opd0 = Op0->getOperand(0);
2483 ClampMSB = (VT == MVT::i32);
2484 } else if (BiggerPattern) {
2485 // Let's pretend a 0 shift right has been performed.
2486 // The resulting code will be at least as good as the original one
2487 // plus it may expose more opportunities for bitfield insert pattern.
2488 // FIXME: Currently we limit this to the bigger pattern, because
2489 // some optimizations expect AND and not UBFM.
2490 Opd0 = N->getOperand(0);
2491 } else
2492 return false;
2493
2494 // Bail out on large immediates. This happens when no proper
2495 // combining/constant folding was performed.
2496 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2497 LLVM_DEBUG(
2498 (dbgs() << N
2499 << ": Found large shift immediate, this should not happen\n"));
2500 return false;
2501 }
2502
2503 LSB = SrlImm;
2504 MSB = SrlImm +
2505 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2506 : llvm::countr_one<uint64_t>(AndImm)) -
2507 1;
2508 if (ClampMSB)
2509 // Since we're moving the extend before the right shift operation, we need
2510 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2511 // the zeros which would get shifted in with the original right shift
2512 // operation.
2513 MSB = MSB > 31 ? 31 : MSB;
2514
2515 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2516 return true;
2517}
2518
2519static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
2520 SDValue &Opd0, unsigned &Immr,
2521 unsigned &Imms) {
2522 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2523
2524 EVT VT = N->getValueType(0);
2525 unsigned BitWidth = VT.getSizeInBits();
2526 assert((VT == MVT::i32 || VT == MVT::i64) &&
2527 "Type checking must have been done before calling this function");
2528
2529 SDValue Op = N->getOperand(0);
2530 if (Op->getOpcode() == ISD::TRUNCATE) {
2531 Op = Op->getOperand(0);
2532 VT = Op->getValueType(0);
2533 BitWidth = VT.getSizeInBits();
2534 }
2535
2536 uint64_t ShiftImm;
2537 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2538 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2539 return false;
2540
2541 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2542 if (ShiftImm + Width > BitWidth)
2543 return false;
2544
2545 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2546 Opd0 = Op.getOperand(0);
2547 Immr = ShiftImm;
2548 Imms = ShiftImm + Width - 1;
2549 return true;
2550}
2551
2552static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
2553 SDValue &Opd0, unsigned &LSB,
2554 unsigned &MSB) {
2555 // We are looking for the following pattern which basically extracts several
2556 // continuous bits from the source value and places it from the LSB of the
2557 // destination value, all other bits of the destination value or set to zero:
2558 //
2559 // Value2 = AND Value, MaskImm
2560 // SRL Value2, ShiftImm
2561 //
2562 // with MaskImm >> ShiftImm to search for the bit width.
2563 //
2564 // This gets selected into a single UBFM:
2565 //
2566 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2567 //
2568
2569 if (N->getOpcode() != ISD::SRL)
2570 return false;
2571
2572 uint64_t AndMask = 0;
2573 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2574 return false;
2575
2576 Opd0 = N->getOperand(0).getOperand(0);
2577
2578 uint64_t SrlImm = 0;
2579 if (!isIntImmediate(N->getOperand(1), SrlImm))
2580 return false;
2581
2582 // Check whether we really have several bits extract here.
2583 if (!isMask_64(AndMask >> SrlImm))
2584 return false;
2585
2586 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2587 LSB = SrlImm;
2588 MSB = llvm::Log2_64(AndMask);
2589 return true;
2590}
2591
2592static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2593 unsigned &Immr, unsigned &Imms,
2594 bool BiggerPattern) {
2595 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2596 "N must be a SHR/SRA operation to call this function");
2597
2598 EVT VT = N->getValueType(0);
2599
2600 // Here we can test the type of VT and return false when the type does not
2601 // match, but since it is done prior to that call in the current context
2602 // we turned that into an assert to avoid redundant code.
2603 assert((VT == MVT::i32 || VT == MVT::i64) &&
2604 "Type checking must have been done before calling this function");
2605
2606 // Check for AND + SRL doing several bits extract.
2607 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2608 return true;
2609
2610 // We're looking for a shift of a shift.
2611 uint64_t ShlImm = 0;
2612 uint64_t TruncBits = 0;
2613 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2614 Opd0 = N->getOperand(0).getOperand(0);
2615 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2616 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2617 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2618 // be considered as setting high 32 bits as zero. Our strategy here is to
2619 // always generate 64bit UBFM. This consistency will help the CSE pass
2620 // later find more redundancy.
2621 Opd0 = N->getOperand(0).getOperand(0);
2622 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2623 VT = Opd0.getValueType();
2624 assert(VT == MVT::i64 && "the promoted type should be i64");
2625 } else if (BiggerPattern) {
2626 // Let's pretend a 0 shift left has been performed.
2627 // FIXME: Currently we limit this to the bigger pattern case,
2628 // because some optimizations expect AND and not UBFM
2629 Opd0 = N->getOperand(0);
2630 } else
2631 return false;
2632
2633 // Missing combines/constant folding may have left us with strange
2634 // constants.
2635 if (ShlImm >= VT.getSizeInBits()) {
2636 LLVM_DEBUG(
2637 (dbgs() << N
2638 << ": Found large shift immediate, this should not happen\n"));
2639 return false;
2640 }
2641
2642 uint64_t SrlImm = 0;
2643 if (!isIntImmediate(N->getOperand(1), SrlImm))
2644 return false;
2645
2646 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2647 "bad amount in shift node!");
2648 int immr = SrlImm - ShlImm;
2649 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2650 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2651 // SRA requires a signed extraction
2652 if (VT == MVT::i32)
2653 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2654 else
2655 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2656 return true;
2657}
2658
2659bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2660 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2661
2662 EVT VT = N->getValueType(0);
2663 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2664 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2665 return false;
2666
2667 uint64_t ShiftImm;
2668 SDValue Op = N->getOperand(0);
2669 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2670 return false;
2671
2672 SDLoc dl(N);
2673 // Extend the incoming operand of the shift to 64-bits.
2674 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2675 unsigned Immr = ShiftImm;
2676 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2677 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2678 CurDAG->getTargetConstant(Imms, dl, VT)};
2679 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2680 return true;
2681}
2682
2683static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2684 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2685 unsigned NumberOfIgnoredLowBits = 0,
2686 bool BiggerPattern = false) {
2687 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2688 return false;
2689
2690 switch (N->getOpcode()) {
2691 default:
2692 if (!N->isMachineOpcode())
2693 return false;
2694 break;
2695 case ISD::AND:
2696 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2697 NumberOfIgnoredLowBits, BiggerPattern);
2698 case ISD::SRL:
2699 case ISD::SRA:
2700 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2701
2703 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2704 }
2705
2706 unsigned NOpc = N->getMachineOpcode();
2707 switch (NOpc) {
2708 default:
2709 return false;
2710 case AArch64::SBFMWri:
2711 case AArch64::UBFMWri:
2712 case AArch64::SBFMXri:
2713 case AArch64::UBFMXri:
2714 Opc = NOpc;
2715 Opd0 = N->getOperand(0);
2716 Immr = N->getConstantOperandVal(1);
2717 Imms = N->getConstantOperandVal(2);
2718 return true;
2719 }
2720 // Unreachable
2721 return false;
2722}
2723
2724bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2725 unsigned Opc, Immr, Imms;
2726 SDValue Opd0;
2727 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2728 return false;
2729
2730 EVT VT = N->getValueType(0);
2731 SDLoc dl(N);
2732
2733 // If the bit extract operation is 64bit but the original type is 32bit, we
2734 // need to add one EXTRACT_SUBREG.
2735 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2736 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2737 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2738
2739 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2740 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2741 MVT::i32, SDValue(BFM, 0));
2742 ReplaceNode(N, Inner.getNode());
2743 return true;
2744 }
2745
2746 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2747 CurDAG->getTargetConstant(Imms, dl, VT)};
2748 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2749 return true;
2750}
2751
2752/// Does DstMask form a complementary pair with the mask provided by
2753/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2754/// this asks whether DstMask zeroes precisely those bits that will be set by
2755/// the other half.
2756static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2757 unsigned NumberOfIgnoredHighBits, EVT VT) {
2758 assert((VT == MVT::i32 || VT == MVT::i64) &&
2759 "i32 or i64 mask type expected!");
2760 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2761
2762 APInt SignificantDstMask = APInt(BitWidth, DstMask);
2763 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2764
2765 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2766 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2767}
2768
2769// Look for bits that will be useful for later uses.
2770// A bit is consider useless as soon as it is dropped and never used
2771// before it as been dropped.
2772// E.g., looking for useful bit of x
2773// 1. y = x & 0x7
2774// 2. z = y >> 2
2775// After #1, x useful bits are 0x7, then the useful bits of x, live through
2776// y.
2777// After #2, the useful bits of x are 0x4.
2778// However, if x is used on an unpredicatable instruction, then all its bits
2779// are useful.
2780// E.g.
2781// 1. y = x & 0x7
2782// 2. z = y >> 2
2783// 3. str x, [@x]
2784static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2785
2787 unsigned Depth) {
2788 uint64_t Imm =
2789 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2790 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2791 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2792 getUsefulBits(Op, UsefulBits, Depth + 1);
2793}
2794
2796 uint64_t Imm, uint64_t MSB,
2797 unsigned Depth) {
2798 // inherit the bitwidth value
2799 APInt OpUsefulBits(UsefulBits);
2800 OpUsefulBits = 1;
2801
2802 if (MSB >= Imm) {
2803 OpUsefulBits <<= MSB - Imm + 1;
2804 --OpUsefulBits;
2805 // The interesting part will be in the lower part of the result
2806 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2807 // The interesting part was starting at Imm in the argument
2808 OpUsefulBits <<= Imm;
2809 } else {
2810 OpUsefulBits <<= MSB + 1;
2811 --OpUsefulBits;
2812 // The interesting part will be shifted in the result
2813 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2814 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2815 // The interesting part was at zero in the argument
2816 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2817 }
2818
2819 UsefulBits &= OpUsefulBits;
2820}
2821
2822static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2823 unsigned Depth) {
2824 uint64_t Imm =
2825 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2826 uint64_t MSB =
2827 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2828
2829 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2830}
2831
2833 unsigned Depth) {
2834 uint64_t ShiftTypeAndValue =
2835 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2836 APInt Mask(UsefulBits);
2837 Mask.clearAllBits();
2838 Mask.flipAllBits();
2839
2840 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2841 // Shift Left
2842 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2843 Mask <<= ShiftAmt;
2844 getUsefulBits(Op, Mask, Depth + 1);
2845 Mask.lshrInPlace(ShiftAmt);
2846 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2847 // Shift Right
2848 // We do not handle AArch64_AM::ASR, because the sign will change the
2849 // number of useful bits
2850 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2851 Mask.lshrInPlace(ShiftAmt);
2852 getUsefulBits(Op, Mask, Depth + 1);
2853 Mask <<= ShiftAmt;
2854 } else
2855 return;
2856
2857 UsefulBits &= Mask;
2858}
2859
2860static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2861 unsigned Depth) {
2862 uint64_t Imm =
2863 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2864 uint64_t MSB =
2865 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
2866
2867 APInt OpUsefulBits(UsefulBits);
2868 OpUsefulBits = 1;
2869
2870 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
2871 ResultUsefulBits.flipAllBits();
2872 APInt Mask(UsefulBits.getBitWidth(), 0);
2873
2874 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
2875
2876 if (MSB >= Imm) {
2877 // The instruction is a BFXIL.
2878 uint64_t Width = MSB - Imm + 1;
2879 uint64_t LSB = Imm;
2880
2881 OpUsefulBits <<= Width;
2882 --OpUsefulBits;
2883
2884 if (Op.getOperand(1) == Orig) {
2885 // Copy the low bits from the result to bits starting from LSB.
2886 Mask = ResultUsefulBits & OpUsefulBits;
2887 Mask <<= LSB;
2888 }
2889
2890 if (Op.getOperand(0) == Orig)
2891 // Bits starting from LSB in the input contribute to the result.
2892 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2893 } else {
2894 // The instruction is a BFI.
2895 uint64_t Width = MSB + 1;
2896 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
2897
2898 OpUsefulBits <<= Width;
2899 --OpUsefulBits;
2900 OpUsefulBits <<= LSB;
2901
2902 if (Op.getOperand(1) == Orig) {
2903 // Copy the bits from the result to the zero bits.
2904 Mask = ResultUsefulBits & OpUsefulBits;
2905 Mask.lshrInPlace(LSB);
2906 }
2907
2908 if (Op.getOperand(0) == Orig)
2909 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2910 }
2911
2912 UsefulBits &= Mask;
2913}
2914
2915static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2916 SDValue Orig, unsigned Depth) {
2917
2918 // Users of this node should have already been instruction selected
2919 // FIXME: Can we turn that into an assert?
2920 if (!UserNode->isMachineOpcode())
2921 return;
2922
2923 switch (UserNode->getMachineOpcode()) {
2924 default:
2925 return;
2926 case AArch64::ANDSWri:
2927 case AArch64::ANDSXri:
2928 case AArch64::ANDWri:
2929 case AArch64::ANDXri:
2930 // We increment Depth only when we call the getUsefulBits
2931 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2932 Depth);
2933 case AArch64::UBFMWri:
2934 case AArch64::UBFMXri:
2935 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2936
2937 case AArch64::ORRWrs:
2938 case AArch64::ORRXrs:
2939 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
2940 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2941 Depth);
2942 return;
2943 case AArch64::BFMWri:
2944 case AArch64::BFMXri:
2945 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2946
2947 case AArch64::STRBBui:
2948 case AArch64::STURBBi:
2949 if (UserNode->getOperand(0) != Orig)
2950 return;
2951 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2952 return;
2953
2954 case AArch64::STRHHui:
2955 case AArch64::STURHHi:
2956 if (UserNode->getOperand(0) != Orig)
2957 return;
2958 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2959 return;
2960 }
2961}
2962
2963static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2965 return;
2966 // Initialize UsefulBits
2967 if (!Depth) {
2968 unsigned Bitwidth = Op.getScalarValueSizeInBits();
2969 // At the beginning, assume every produced bits is useful
2970 UsefulBits = APInt(Bitwidth, 0);
2971 UsefulBits.flipAllBits();
2972 }
2973 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2974
2975 for (SDNode *Node : Op.getNode()->uses()) {
2976 // A use cannot produce useful bits
2977 APInt UsefulBitsForUse = APInt(UsefulBits);
2978 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2979 UsersUsefulBits |= UsefulBitsForUse;
2980 }
2981 // UsefulBits contains the produced bits that are meaningful for the
2982 // current definition, thus a user cannot make a bit meaningful at
2983 // this point
2984 UsefulBits &= UsersUsefulBits;
2985}
2986
2987/// Create a machine node performing a notional SHL of Op by ShlAmount. If
2988/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2989/// 0, return Op unchanged.
2990static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2991 if (ShlAmount == 0)
2992 return Op;
2993
2994 EVT VT = Op.getValueType();
2995 SDLoc dl(Op);
2996 unsigned BitWidth = VT.getSizeInBits();
2997 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2998
2999 SDNode *ShiftNode;
3000 if (ShlAmount > 0) {
3001 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3002 ShiftNode = CurDAG->getMachineNode(
3003 UBFMOpc, dl, VT, Op,
3004 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3005 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3006 } else {
3007 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3008 assert(ShlAmount < 0 && "expected right shift");
3009 int ShrAmount = -ShlAmount;
3010 ShiftNode = CurDAG->getMachineNode(
3011 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3012 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3013 }
3014
3015 return SDValue(ShiftNode, 0);
3016}
3017
3018// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3020 bool BiggerPattern,
3021 const uint64_t NonZeroBits,
3022 SDValue &Src, int &DstLSB,
3023 int &Width);
3024
3025// For bit-field-positioning pattern "shl VAL, N)".
3027 bool BiggerPattern,
3028 const uint64_t NonZeroBits,
3029 SDValue &Src, int &DstLSB,
3030 int &Width);
3031
3032/// Does this tree qualify as an attempt to move a bitfield into position,
3033/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3035 bool BiggerPattern, SDValue &Src,
3036 int &DstLSB, int &Width) {
3037 EVT VT = Op.getValueType();
3038 unsigned BitWidth = VT.getSizeInBits();
3039 (void)BitWidth;
3040 assert(BitWidth == 32 || BitWidth == 64);
3041
3042 KnownBits Known = CurDAG->computeKnownBits(Op);
3043
3044 // Non-zero in the sense that they're not provably zero, which is the key
3045 // point if we want to use this value
3046 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3047 if (!isShiftedMask_64(NonZeroBits))
3048 return false;
3049
3050 switch (Op.getOpcode()) {
3051 default:
3052 break;
3053 case ISD::AND:
3054 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3055 NonZeroBits, Src, DstLSB, Width);
3056 case ISD::SHL:
3057 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3058 NonZeroBits, Src, DstLSB, Width);
3059 }
3060
3061 return false;
3062}
3063
3065 bool BiggerPattern,
3066 const uint64_t NonZeroBits,
3067 SDValue &Src, int &DstLSB,
3068 int &Width) {
3069 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3070
3071 EVT VT = Op.getValueType();
3072 assert((VT == MVT::i32 || VT == MVT::i64) &&
3073 "Caller guarantees VT is one of i32 or i64");
3074 (void)VT;
3075
3076 uint64_t AndImm;
3077 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3078 return false;
3079
3080 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3081 // 1) (AndImm & (1 << POS) == 0)
3082 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3083 //
3084 // 1) and 2) don't agree so something must be wrong (e.g., in
3085 // 'SelectionDAG::computeKnownBits')
3086 assert((~AndImm & NonZeroBits) == 0 &&
3087 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3088
3089 SDValue AndOp0 = Op.getOperand(0);
3090
3091 uint64_t ShlImm;
3092 SDValue ShlOp0;
3093 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3094 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3095 ShlOp0 = AndOp0.getOperand(0);
3096 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3098 ShlImm)) {
3099 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3100
3101 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3102 SDValue ShlVal = AndOp0.getOperand(0);
3103
3104 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3105 // expect VT to be MVT::i32.
3106 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3107
3108 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3109 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3110 } else
3111 return false;
3112
3113 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3114 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3115 // AndOp0+AND.
3116 if (!BiggerPattern && !AndOp0.hasOneUse())
3117 return false;
3118
3119 DstLSB = llvm::countr_zero(NonZeroBits);
3120 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3121
3122 // Bail out on large Width. This happens when no proper combining / constant
3123 // folding was performed.
3124 if (Width >= (int)VT.getSizeInBits()) {
3125 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3126 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3127 // "val".
3128 // If VT is i32, what Width >= 32 means:
3129 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3130 // demands at least 'Width' bits (after dag-combiner). This together with
3131 // `any_extend` Op (undefined higher bits) indicates missed combination
3132 // when lowering the 'and' IR instruction to an machine IR instruction.
3133 LLVM_DEBUG(
3134 dbgs()
3135 << "Found large Width in bit-field-positioning -- this indicates no "
3136 "proper combining / constant folding was performed\n");
3137 return false;
3138 }
3139
3140 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3141 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3142 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3143 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3144 // which case it is not profitable to insert an extra shift.
3145 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3146 return false;
3147
3148 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3149 return true;
3150}
3151
3152// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3153// UBFIZ.
3155 SDValue &Src, int &DstLSB,
3156 int &Width) {
3157 // Caller should have verified that N is a left shift with constant shift
3158 // amount; asserts that.
3159 assert(Op.getOpcode() == ISD::SHL &&
3160 "Op.getNode() should be a SHL node to call this function");
3161 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3162 "Op.getNode() should shift ShlImm to call this function");
3163
3164 uint64_t AndImm = 0;
3165 SDValue Op0 = Op.getOperand(0);
3166 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3167 return false;
3168
3169 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3170 if (isMask_64(ShiftedAndImm)) {
3171 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3172 // should end with Mask, and could be prefixed with random bits if those
3173 // bits are shifted out.
3174 //
3175 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3176 // the AND result corresponding to those bits are shifted out, so it's fine
3177 // to not extract them.
3178 Width = llvm::countr_one(ShiftedAndImm);
3179 DstLSB = ShlImm;
3180 Src = Op0.getOperand(0);
3181 return true;
3182 }
3183 return false;
3184}
3185
3187 bool BiggerPattern,
3188 const uint64_t NonZeroBits,
3189 SDValue &Src, int &DstLSB,
3190 int &Width) {
3191 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3192
3193 EVT VT = Op.getValueType();
3194 assert((VT == MVT::i32 || VT == MVT::i64) &&
3195 "Caller guarantees that type is i32 or i64");
3196 (void)VT;
3197
3198 uint64_t ShlImm;
3199 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3200 return false;
3201
3202 if (!BiggerPattern && !Op.hasOneUse())
3203 return false;
3204
3205 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3206 return true;
3207
3208 DstLSB = llvm::countr_zero(NonZeroBits);
3209 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3210
3211 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3212 return false;
3213
3214 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3215 return true;
3216}
3217
3218static bool isShiftedMask(uint64_t Mask, EVT VT) {
3219 assert(VT == MVT::i32 || VT == MVT::i64);
3220 if (VT == MVT::i32)
3221 return isShiftedMask_32(Mask);
3222 return isShiftedMask_64(Mask);
3223}
3224
3225// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3226// inserted only sets known zero bits.
3228 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3229
3230 EVT VT = N->getValueType(0);
3231 if (VT != MVT::i32 && VT != MVT::i64)
3232 return false;
3233
3234 unsigned BitWidth = VT.getSizeInBits();
3235
3236 uint64_t OrImm;
3237 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3238 return false;
3239
3240 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3241 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3242 // performance neutral.
3244 return false;
3245
3246 uint64_t MaskImm;
3247 SDValue And = N->getOperand(0);
3248 // Must be a single use AND with an immediate operand.
3249 if (!And.hasOneUse() ||
3250 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3251 return false;
3252
3253 // Compute the Known Zero for the AND as this allows us to catch more general
3254 // cases than just looking for AND with imm.
3255 KnownBits Known = CurDAG->computeKnownBits(And);
3256
3257 // Non-zero in the sense that they're not provably zero, which is the key
3258 // point if we want to use this value.
3259 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3260
3261 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3262 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3263 return false;
3264
3265 // The bits being inserted must only set those bits that are known to be zero.
3266 if ((OrImm & NotKnownZero) != 0) {
3267 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3268 // currently handle this case.
3269 return false;
3270 }
3271
3272 // BFI/BFXIL dst, src, #lsb, #width.
3273 int LSB = llvm::countr_one(NotKnownZero);
3274 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3275
3276 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3277 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3278 unsigned ImmS = Width - 1;
3279
3280 // If we're creating a BFI instruction avoid cases where we need more
3281 // instructions to materialize the BFI constant as compared to the original
3282 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3283 // should be no worse in this case.
3284 bool IsBFI = LSB != 0;
3285 uint64_t BFIImm = OrImm >> LSB;
3286 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3287 // We have a BFI instruction and we know the constant can't be materialized
3288 // with a ORR-immediate with the zero register.
3289 unsigned OrChunks = 0, BFIChunks = 0;
3290 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3291 if (((OrImm >> Shift) & 0xFFFF) != 0)
3292 ++OrChunks;
3293 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3294 ++BFIChunks;
3295 }
3296 if (BFIChunks > OrChunks)
3297 return false;
3298 }
3299
3300 // Materialize the constant to be inserted.
3301 SDLoc DL(N);
3302 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3303 SDNode *MOVI = CurDAG->getMachineNode(
3304 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3305
3306 // Create the BFI/BFXIL instruction.
3307 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3308 CurDAG->getTargetConstant(ImmR, DL, VT),
3309 CurDAG->getTargetConstant(ImmS, DL, VT)};
3310 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3311 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3312 return true;
3313}
3314
3316 SDValue &ShiftedOperand,
3317 uint64_t &EncodedShiftImm) {
3318 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3319 if (!Dst.hasOneUse())
3320 return false;
3321
3322 EVT VT = Dst.getValueType();
3323 assert((VT == MVT::i32 || VT == MVT::i64) &&
3324 "Caller should guarantee that VT is one of i32 or i64");
3325 const unsigned SizeInBits = VT.getSizeInBits();
3326
3327 SDLoc DL(Dst.getNode());
3328 uint64_t AndImm, ShlImm;
3329 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3330 isShiftedMask_64(AndImm)) {
3331 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3332 SDValue DstOp0 = Dst.getOperand(0);
3333 if (!DstOp0.hasOneUse())
3334 return false;
3335
3336 // An example to illustrate the transformation
3337 // From:
3338 // lsr x8, x1, #1
3339 // and x8, x8, #0x3f80
3340 // bfxil x8, x1, #0, #7
3341 // To:
3342 // and x8, x23, #0x7f
3343 // ubfx x9, x23, #8, #7
3344 // orr x23, x8, x9, lsl #7
3345 //
3346 // The number of instructions remains the same, but ORR is faster than BFXIL
3347 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3348 // the dependency chain is improved after the transformation.
3349 uint64_t SrlImm;
3350 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3351 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3352 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3353 unsigned MaskWidth =
3354 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3355 unsigned UBFMOpc =
3356 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3357 SDNode *UBFMNode = CurDAG->getMachineNode(
3358 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3359 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3360 VT),
3361 CurDAG->getTargetConstant(
3362 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3363 ShiftedOperand = SDValue(UBFMNode, 0);
3364 EncodedShiftImm = AArch64_AM::getShifterImm(
3365 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3366 return true;
3367 }
3368 }
3369 return false;
3370 }
3371
3372 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3373 ShiftedOperand = Dst.getOperand(0);
3374 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3375 return true;
3376 }
3377
3378 uint64_t SrlImm;
3379 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3380 ShiftedOperand = Dst.getOperand(0);
3381 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3382 return true;
3383 }
3384 return false;
3385}
3386
3387// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3388// the operands and select it to AArch64::ORR with shifted registers if
3389// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3390static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3391 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3392 const bool BiggerPattern) {
3393 EVT VT = N->getValueType(0);
3394 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3395 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3396 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3397 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3398 assert((VT == MVT::i32 || VT == MVT::i64) &&
3399 "Expect result type to be i32 or i64 since N is combinable to BFM");
3400 SDLoc DL(N);
3401
3402 // Bail out if BFM simplifies away one node in BFM Dst.
3403 if (OrOpd1 != Dst)
3404 return false;
3405
3406 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3407 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3408 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3409 if (BiggerPattern) {
3410 uint64_t SrcAndImm;
3411 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3412 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3413 // OrOpd0 = AND Src, #Mask
3414 // So BFM simplifies away one AND node from Src and doesn't simplify away
3415 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3416 // one node (from Rd), ORR is better since it has higher throughput and
3417 // smaller latency than BFM on many AArch64 processors (and for the rest
3418 // ORR is at least as good as BFM).
3419 SDValue ShiftedOperand;
3420 uint64_t EncodedShiftImm;
3421 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3422 EncodedShiftImm)) {
3423 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3424 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3425 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3426 return true;
3427 }
3428 }
3429 return false;
3430 }
3431
3432 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3433
3434 uint64_t ShlImm;
3435 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3436 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3437 SDValue Ops[] = {
3438 Dst, Src,
3439 CurDAG->getTargetConstant(
3441 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3442 return true;
3443 }
3444
3445 // Select the following pattern to left-shifted operand rather than BFI.
3446 // %val1 = op ..
3447 // %val2 = shl %val1, #imm
3448 // %res = or %val1, %val2
3449 //
3450 // If N is selected to be BFI, we know that
3451 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3452 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3453 //
3454 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3455 if (OrOpd0.getOperand(0) == OrOpd1) {
3456 SDValue Ops[] = {
3457 OrOpd1, OrOpd1,
3458 CurDAG->getTargetConstant(
3460 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3461 return true;
3462 }
3463 }
3464
3465 uint64_t SrlImm;
3466 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3467 // Select the following pattern to right-shifted operand rather than BFXIL.
3468 // %val1 = op ..
3469 // %val2 = lshr %val1, #imm
3470 // %res = or %val1, %val2
3471 //
3472 // If N is selected to be BFXIL, we know that
3473 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3474 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3475 //
3476 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3477 if (OrOpd0.getOperand(0) == OrOpd1) {
3478 SDValue Ops[] = {
3479 OrOpd1, OrOpd1,
3480 CurDAG->getTargetConstant(
3482 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3483 return true;
3484 }
3485 }
3486
3487 return false;
3488}
3489
3490static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3491 SelectionDAG *CurDAG) {
3492 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3493
3494 EVT VT = N->getValueType(0);
3495 if (VT != MVT::i32 && VT != MVT::i64)
3496 return false;
3497
3498 unsigned BitWidth = VT.getSizeInBits();
3499
3500 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3501 // have the expected shape. Try to undo that.
3502
3503 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3504 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3505
3506 // Given a OR operation, check if we have the following pattern
3507 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3508 // isBitfieldExtractOp)
3509 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3510 // countTrailingZeros(mask2) == imm2 - imm + 1
3511 // f = d | c
3512 // if yes, replace the OR instruction with:
3513 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3514
3515 // OR is commutative, check all combinations of operand order and values of
3516 // BiggerPattern, i.e.
3517 // Opd0, Opd1, BiggerPattern=false
3518 // Opd1, Opd0, BiggerPattern=false
3519 // Opd0, Opd1, BiggerPattern=true
3520 // Opd1, Opd0, BiggerPattern=true
3521 // Several of these combinations may match, so check with BiggerPattern=false
3522 // first since that will produce better results by matching more instructions
3523 // and/or inserting fewer extra instructions.
3524 for (int I = 0; I < 4; ++I) {
3525
3526 SDValue Dst, Src;
3527 unsigned ImmR, ImmS;
3528 bool BiggerPattern = I / 2;
3529 SDValue OrOpd0Val = N->getOperand(I % 2);
3530 SDNode *OrOpd0 = OrOpd0Val.getNode();
3531 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3532 SDNode *OrOpd1 = OrOpd1Val.getNode();
3533
3534 unsigned BFXOpc;
3535 int DstLSB, Width;
3536 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3537 NumberOfIgnoredLowBits, BiggerPattern)) {
3538 // Check that the returned opcode is compatible with the pattern,
3539 // i.e., same type and zero extended (U and not S)
3540 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3541 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3542 continue;
3543
3544 // Compute the width of the bitfield insertion
3545 DstLSB = 0;
3546 Width = ImmS - ImmR + 1;
3547 // FIXME: This constraint is to catch bitfield insertion we may
3548 // want to widen the pattern if we want to grab general bitfied
3549 // move case
3550 if (Width <= 0)
3551 continue;
3552
3553 // If the mask on the insertee is correct, we have a BFXIL operation. We
3554 // can share the ImmR and ImmS values from the already-computed UBFM.
3555 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3556 BiggerPattern,
3557 Src, DstLSB, Width)) {
3558 ImmR = (BitWidth - DstLSB) % BitWidth;
3559 ImmS = Width - 1;
3560 } else
3561 continue;
3562
3563 // Check the second part of the pattern
3564 EVT VT = OrOpd1Val.getValueType();
3565 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3566
3567 // Compute the Known Zero for the candidate of the first operand.
3568 // This allows to catch more general case than just looking for
3569 // AND with imm. Indeed, simplify-demanded-bits may have removed
3570 // the AND instruction because it proves it was useless.
3571 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3572
3573 // Check if there is enough room for the second operand to appear
3574 // in the first one
3575 APInt BitsToBeInserted =
3576 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3577
3578 if ((BitsToBeInserted & ~Known.Zero) != 0)
3579 continue;
3580
3581 // Set the first operand
3582 uint64_t Imm;
3583 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3584 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3585 // In that case, we can eliminate the AND
3586 Dst = OrOpd1->getOperand(0);
3587 else
3588 // Maybe the AND has been removed by simplify-demanded-bits
3589 // or is useful because it discards more bits
3590 Dst = OrOpd1Val;
3591
3592 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3593 // with shifted operand is more efficient.
3594 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3595 BiggerPattern))
3596 return true;
3597
3598 // both parts match
3599 SDLoc DL(N);
3600 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3601 CurDAG->getTargetConstant(ImmS, DL, VT)};
3602 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3603 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3604 return true;
3605 }
3606
3607 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3608 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3609 // mask (e.g., 0x000ffff0).
3610 uint64_t Mask0Imm, Mask1Imm;
3611 SDValue And0 = N->getOperand(0);
3612 SDValue And1 = N->getOperand(1);
3613 if (And0.hasOneUse() && And1.hasOneUse() &&
3614 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3615 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3616 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3617 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3618
3619 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3620 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3621 // bits to be inserted.
3622 if (isShiftedMask(Mask0Imm, VT)) {
3623 std::swap(And0, And1);
3624 std::swap(Mask0Imm, Mask1Imm);
3625 }
3626
3627 SDValue Src = And1->getOperand(0);
3628 SDValue Dst = And0->getOperand(0);
3629 unsigned LSB = llvm::countr_zero(Mask1Imm);
3630 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3631
3632 // The BFXIL inserts the low-order bits from a source register, so right
3633 // shift the needed bits into place.
3634 SDLoc DL(N);
3635 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3636 uint64_t LsrImm = LSB;
3637 if (Src->hasOneUse() &&
3638 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3639 (LsrImm + LSB) < BitWidth) {
3640 Src = Src->getOperand(0);
3641 LsrImm += LSB;
3642 }
3643
3644 SDNode *LSR = CurDAG->getMachineNode(
3645 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3646 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3647
3648 // BFXIL is an alias of BFM, so translate to BFM operands.
3649 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3650 unsigned ImmS = Width - 1;
3651
3652 // Create the BFXIL instruction.
3653 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3654 CurDAG->getTargetConstant(ImmR, DL, VT),
3655 CurDAG->getTargetConstant(ImmS, DL, VT)};
3656 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3657 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3658 return true;
3659 }
3660
3661 return false;
3662}
3663
3664bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3665 if (N->getOpcode() != ISD::OR)
3666 return false;
3667
3668 APInt NUsefulBits;
3669 getUsefulBits(SDValue(N, 0), NUsefulBits);
3670
3671 // If all bits are not useful, just return UNDEF.
3672 if (!NUsefulBits) {
3673 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3674 return true;
3675 }
3676
3677 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3678 return true;
3679
3680 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3681}
3682
3683/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3684/// equivalent of a left shift by a constant amount followed by an and masking
3685/// out a contiguous set of bits.
3686bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3687 if (N->getOpcode() != ISD::AND)
3688 return false;
3689
3690 EVT VT = N->getValueType(0);
3691 if (VT != MVT::i32 && VT != MVT::i64)
3692 return false;
3693
3694 SDValue Op0;
3695 int DstLSB, Width;
3696 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3697 Op0, DstLSB, Width))
3698 return false;
3699
3700 // ImmR is the rotate right amount.
3701 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3702 // ImmS is the most significant bit of the source to be moved.
3703 unsigned ImmS = Width - 1;
3704
3705 SDLoc DL(N);
3706 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3707 CurDAG->getTargetConstant(ImmS, DL, VT)};
3708 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3709 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3710 return true;
3711}
3712
3713/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3714/// variable shift/rotate instructions.
3715bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3716 EVT VT = N->getValueType(0);
3717
3718 unsigned Opc;
3719 switch (N->getOpcode()) {
3720 case ISD::ROTR:
3721 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3722 break;
3723 case ISD::SHL:
3724 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3725 break;
3726 case ISD::SRL:
3727 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3728 break;
3729 case ISD::SRA:
3730 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3731 break;
3732 default:
3733 return false;
3734 }
3735
3736 uint64_t Size;
3737 uint64_t Bits;
3738 if (VT == MVT::i32) {
3739 Bits = 5;
3740 Size = 32;
3741 } else if (VT == MVT::i64) {
3742 Bits = 6;
3743 Size = 64;
3744 } else
3745 return false;
3746
3747 SDValue ShiftAmt = N->getOperand(1);
3748 SDLoc DL(N);
3749 SDValue NewShiftAmt;
3750
3751 // Skip over an extend of the shift amount.
3752 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3753 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3754 ShiftAmt = ShiftAmt->getOperand(0);
3755
3756 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3757 SDValue Add0 = ShiftAmt->getOperand(0);
3758 SDValue Add1 = ShiftAmt->getOperand(1);
3759 uint64_t Add0Imm;
3760 uint64_t Add1Imm;
3761 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3762 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3763 // to avoid the ADD/SUB.
3764 NewShiftAmt = Add0;
3765 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3766 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3767 (Add0Imm % Size == 0)) {
3768 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3769 // to generate a NEG instead of a SUB from a constant.
3770 unsigned NegOpc;
3771 unsigned ZeroReg;
3772 EVT SubVT = ShiftAmt->getValueType(0);
3773 if (SubVT == MVT::i32) {
3774 NegOpc = AArch64::SUBWrr;
3775 ZeroReg = AArch64::WZR;
3776 } else {
3777 assert(SubVT == MVT::i64);
3778 NegOpc = AArch64::SUBXrr;
3779 ZeroReg = AArch64::XZR;
3780 }
3781 SDValue Zero =
3782 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3783 MachineSDNode *Neg =
3784 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3785 NewShiftAmt = SDValue(Neg, 0);
3786 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3787 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3788 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3789 // to generate a NOT instead of a SUB from a constant.
3790 unsigned NotOpc;
3791 unsigned ZeroReg;
3792 EVT SubVT = ShiftAmt->getValueType(0);
3793 if (SubVT == MVT::i32) {
3794 NotOpc = AArch64::ORNWrr;
3795 ZeroReg = AArch64::WZR;
3796 } else {
3797 assert(SubVT == MVT::i64);
3798 NotOpc = AArch64::ORNXrr;
3799 ZeroReg = AArch64::XZR;
3800 }
3801 SDValue Zero =
3802 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3804 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3805 NewShiftAmt = SDValue(Not, 0);
3806 } else
3807 return false;
3808 } else {
3809 // If the shift amount is masked with an AND, check that the mask covers the
3810 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3811 // the AND.
3812 uint64_t MaskImm;
3813 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3814 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3815 return false;
3816
3817 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3818 return false;
3819
3820 NewShiftAmt = ShiftAmt->getOperand(0);
3821 }
3822
3823 // Narrow/widen the shift amount to match the size of the shift operation.
3824 if (VT == MVT::i32)
3825 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3826 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3827 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3828 MachineSDNode *Ext = CurDAG->getMachineNode(
3829 AArch64::SUBREG_TO_REG, DL, VT,
3830 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3831 NewShiftAmt = SDValue(Ext, 0);
3832 }
3833
3834 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3835 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3836 return true;
3837}
3838
3840 SDValue &FixedPos,
3841 unsigned RegWidth,
3842 bool isReciprocal) {
3843 APFloat FVal(0.0);
3844 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
3845 FVal = CN->getValueAPF();
3846 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3847 // Some otherwise illegal constants are allowed in this case.
3848 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3849 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3850 return false;
3851
3852 ConstantPoolSDNode *CN =
3853 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3854 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3855 } else
3856 return false;
3857
3858 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
3859 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
3860 // x-register.
3861 //
3862 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
3863 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
3864 // integers.
3865 bool IsExact;
3866
3867 if (isReciprocal)
3868 if (!FVal.getExactInverse(&FVal))
3869 return false;
3870
3871 // fbits is between 1 and 64 in the worst-case, which means the fmul
3872 // could have 2^64 as an actual operand. Need 65 bits of precision.
3873 APSInt IntVal(65, true);
3874 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
3875
3876 // N.b. isPowerOf2 also checks for > 0.
3877 if (!IsExact || !IntVal.isPowerOf2())
3878 return false;
3879 unsigned FBits = IntVal.logBase2();
3880
3881 // Checks above should have guaranteed that we haven't lost information in
3882 // finding FBits, but it must still be in range.
3883 if (FBits == 0 || FBits > RegWidth) return false;
3884
3885 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
3886 return true;
3887}
3888
3889bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
3890 unsigned RegWidth) {
3891 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3892 false);
3893}
3894
3895bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
3896 SDValue &FixedPos,
3897 unsigned RegWidth) {
3898 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3899 true);
3900}
3901
3902// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
3903// of the string and obtains the integer values from them and combines these
3904// into a single value to be used in the MRS/MSR instruction.
3907 RegString.split(Fields, ':');
3908
3909 if (Fields.size() == 1)
3910 return -1;
3911
3912 assert(Fields.size() == 5
3913 && "Invalid number of fields in read register string");
3914
3916 bool AllIntFields = true;
3917
3918 for (StringRef Field : Fields) {
3919 unsigned IntField;
3920 AllIntFields &= !Field.getAsInteger(10, IntField);
3921 Ops.push_back(IntField);
3922 }
3923
3924 assert(AllIntFields &&
3925 "Unexpected non-integer value in special register string.");
3926 (void)AllIntFields;
3927
3928 // Need to combine the integer fields of the string into a single value
3929 // based on the bit encoding of MRS/MSR instruction.
3930 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
3931 (Ops[3] << 3) | (Ops[4]);
3932}
3933
3934// Lower the read_register intrinsic to an MRS instruction node if the special
3935// register string argument is either of the form detailed in the ALCE (the
3936// form described in getIntOperandsFromRegsterString) or is a named register
3937// known by the MRS SysReg mapper.
3938bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
3939 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3940 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3941 SDLoc DL(N);
3942
3943 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
3944
3945 unsigned Opcode64Bit = AArch64::MRS;
3946 int Imm = getIntOperandFromRegisterString(RegString->getString());
3947 if (Imm == -1) {
3948 // No match, Use the sysreg mapper to map the remaining possible strings to
3949 // the value for the register to be used for the instruction operand.
3950 const auto *TheReg =
3951 AArch64SysReg::lookupSysRegByName(RegString->getString());
3952 if (TheReg && TheReg->Readable &&
3953 TheReg->haveFeatures(Subtarget->getFeatureBits()))
3954 Imm = TheReg->Encoding;
3955 else
3956 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
3957
3958 if (Imm == -1) {
3959 // Still no match, see if this is "pc" or give up.
3960 if (!ReadIs128Bit && RegString->getString() == "pc") {
3961 Opcode64Bit = AArch64::ADR;
3962 Imm = 0;
3963 } else {
3964 return false;
3965 }
3966 }
3967 }
3968
3969 SDValue InChain = N->getOperand(0);
3970 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
3971 if (!ReadIs128Bit) {
3972 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
3973 {SysRegImm, InChain});
3974 } else {
3975 SDNode *MRRS = CurDAG->getMachineNode(
3976 AArch64::MRRS, DL,
3977 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
3978 {SysRegImm, InChain});
3979
3980 // Sysregs are not endian. The even register always contains the low half
3981 // of the register.
3982 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
3983 SDValue(MRRS, 0));
3984 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
3985 SDValue(MRRS, 0));
3986 SDValue OutChain = SDValue(MRRS, 1);
3987
3988 ReplaceUses(SDValue(N, 0), Lo);
3989 ReplaceUses(SDValue(N, 1), Hi);
3990 ReplaceUses(SDValue(N, 2), OutChain);
3991 };
3992 return true;
3993}
3994
3995// Lower the write_register intrinsic to an MSR instruction node if the special
3996// register string argument is either of the form detailed in the ALCE (the
3997// form described in getIntOperandsFromRegsterString) or is a named register
3998// known by the MSR SysReg mapper.
3999bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4000 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4001 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4002 SDLoc DL(N);
4003
4004 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4005
4006 if (!WriteIs128Bit) {
4007 // Check if the register was one of those allowed as the pstatefield value
4008 // in the MSR (immediate) instruction. To accept the values allowed in the
4009 // pstatefield for the MSR (immediate) instruction, we also require that an
4010 // immediate value has been provided as an argument, we know that this is
4011 // the case as it has been ensured by semantic checking.
4012 auto trySelectPState = [&](auto PMapper, unsigned State) {
4013 if (PMapper) {
4014 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4015 "Expected a constant integer expression.");
4016 unsigned Reg = PMapper->Encoding;
4017 uint64_t Immed = N->getConstantOperandVal(2);
4018 CurDAG->SelectNodeTo(
4019 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4020 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4021 return true;
4022 }
4023 return false;
4024 };
4025
4026 if (trySelectPState(
4027 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4028 AArch64::MSRpstateImm4))
4029 return true;
4030 if (trySelectPState(
4031 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4032 AArch64::MSRpstateImm1))
4033 return true;
4034 }
4035
4036 int Imm = getIntOperandFromRegisterString(RegString->getString());
4037 if (Imm == -1) {
4038 // Use the sysreg mapper to attempt to map the remaining possible strings
4039 // to the value for the register to be used for the MSR (register)
4040 // instruction operand.
4041 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4042 if (TheReg && TheReg->Writeable &&
4043 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4044 Imm = TheReg->Encoding;
4045 else
4046 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4047
4048 if (Imm == -1)
4049 return false;
4050 }
4051
4052 SDValue InChain = N->getOperand(0);
4053 if (!WriteIs128Bit) {
4054 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4055 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4056 N->getOperand(2), InChain);
4057 } else {
4058 // No endian swap. The lower half always goes into the even subreg, and the
4059 // higher half always into the odd supreg.
4060 SDNode *Pair = CurDAG->getMachineNode(
4061 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4062 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4063 MVT::i32),
4064 N->getOperand(2),
4065 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4066 N->getOperand(3),
4067 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4068
4069 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4070 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4071 SDValue(Pair, 0), InChain);
4072 }
4073
4074 return true;
4075}
4076
4077/// We've got special pseudo-instructions for these
4078bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4079 unsigned Opcode;
4080 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4081
4082 // Leave IR for LSE if subtarget supports it.
4083 if (Subtarget->hasLSE()) return false;
4084
4085 if (MemTy == MVT::i8)
4086 Opcode = AArch64::CMP_SWAP_8;
4087 else if (MemTy == MVT::i16)
4088 Opcode = AArch64::CMP_SWAP_16;
4089 else if (MemTy == MVT::i32)
4090 Opcode = AArch64::CMP_SWAP_32;
4091 else if (MemTy == MVT::i64)
4092 Opcode = AArch64::CMP_SWAP_64;
4093 else
4094 llvm_unreachable("Unknown AtomicCmpSwap type");
4095
4096 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4097 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4098 N->getOperand(0)};
4099 SDNode *CmpSwap = CurDAG->getMachineNode(
4100 Opcode, SDLoc(N),
4101 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4102
4103 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4104 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4105
4106 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4107 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4108 CurDAG->RemoveDeadNode(N);
4109
4110 return true;
4111}
4112
4113bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4114 SDValue &Shift) {
4115 if (!isa<ConstantSDNode>(N))
4116 return false;
4117
4118 SDLoc DL(N);
4119 uint64_t Val = cast<ConstantSDNode>(N)
4120 ->getAPIntValue()
4121 .trunc(VT.getFixedSizeInBits())
4122 .getZExtValue();
4123
4124 switch (VT.SimpleTy) {
4125 case MVT::i8:
4126 // All immediates are supported.
4127 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4128 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4129 return true;
4130 case MVT::i16:
4131 case MVT::i32:
4132 case MVT::i64:
4133 // Support 8bit unsigned immediates.
4134 if (Val <= 255) {
4135 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4136 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4137 return true;
4138 }
4139 // Support 16bit unsigned immediates that are a multiple of 256.
4140 if (Val <= 65280 && Val % 256 == 0) {
4141 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4142 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4143 return true;
4144 }
4145 break;
4146 default:
4147 break;
4148 }
4149
4150 return false;
4151}
4152
4153bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4154 SDValue &Imm, SDValue &Shift,
4155 bool Negate) {
4156 if (!isa<ConstantSDNode>(N))
4157 return false;
4158
4159 SDLoc DL(N);
4160 int64_t Val = cast<ConstantSDNode>(N)
4161 ->getAPIntValue()
4162 .trunc(VT.getFixedSizeInBits())
4163 .getSExtValue();
4164
4165 if (Negate)
4166 Val = -Val;
4167
4168 // Signed saturating instructions treat their immediate operand as unsigned,
4169 // whereas the related intrinsics define their operands to be signed. This
4170 // means we can only use the immediate form when the operand is non-negative.
4171 if (Val < 0)
4172 return false;
4173
4174 switch (VT.SimpleTy) {
4175 case MVT::i8:
4176 // All positive immediates are supported.
4177 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4178 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4179 return true;
4180 case MVT::i16:
4181 case MVT::i32:
4182 case MVT::i64:
4183 // Support 8bit positive immediates.
4184 if (Val <= 255) {
4185 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4186 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4187 return true;
4188 }
4189 // Support 16bit positive immediates that are a multiple of 256.
4190 if (Val <= 65280 && Val % 256 == 0) {
4191 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4192 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4193 return true;
4194 }
4195 break;
4196 default:
4197 break;
4198 }
4199
4200 return false;
4201}
4202
4203bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4204 SDValue &Shift) {
4205 if (!isa<ConstantSDNode>(N))
4206 return false;
4207
4208 SDLoc DL(N);
4209 int64_t Val = cast<ConstantSDNode>(N)
4210 ->getAPIntValue()
4211 .trunc(VT.getFixedSizeInBits())
4212 .getSExtValue();
4213
4214 switch (VT.SimpleTy) {
4215 case MVT::i8:
4216 // All immediates are supported.
4217 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4218 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4219 return true;
4220 case MVT::i16:
4221 case MVT::i32:
4222 case MVT::i64:
4223 // Support 8bit signed immediates.
4224 if (Val >= -128 && Val <= 127) {
4225 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4226 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4227 return true;
4228 }
4229 // Support 16bit signed immediates that are a multiple of 256.
4230 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
4231 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4232 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
4233 return true;
4234 }
4235 break;
4236 default:
4237 break;
4238 }
4239
4240 return false;
4241}
4242
4243bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4244 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4245 int64_t ImmVal = CNode->getSExtValue();
4246 SDLoc DL(N);
4247 if (ImmVal >= -128 && ImmVal < 128) {
4248 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4249 return true;
4250 }
4251 }
4252 return false;
4253}
4254
4255bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4256 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4257 uint64_t ImmVal = CNode->getZExtValue();
4258
4259 switch (VT.SimpleTy) {
4260 case MVT::i8:
4261 ImmVal &= 0xFF;
4262 break;
4263 case MVT::i16:
4264 ImmVal &= 0xFFFF;
4265 break;
4266 case MVT::i32:
4267 ImmVal &= 0xFFFFFFFF;
4268 break;
4269 case MVT::i64:
4270 break;
4271 default:
4272 llvm_unreachable("Unexpected type");
4273 }
4274
4275 if (ImmVal < 256) {
4276 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4277 return true;
4278 }
4279 }
4280 return false;
4281}
4282
4283bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4284 bool Invert) {
4285 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4286 uint64_t ImmVal = CNode->getZExtValue();
4287 SDLoc DL(N);
4288
4289 if (Invert)
4290 ImmVal = ~ImmVal;
4291
4292 // Shift mask depending on type size.
4293 switch (VT.SimpleTy) {
4294 case MVT::i8:
4295 ImmVal &= 0xFF;
4296 ImmVal |= ImmVal << 8;
4297 ImmVal |= ImmVal << 16;
4298 ImmVal |= ImmVal << 32;
4299 break;
4300 case MVT::i16:
4301 ImmVal &= 0xFFFF;
4302 ImmVal |= ImmVal << 16;
4303 ImmVal |= ImmVal << 32;
4304 break;
4305 case MVT::i32:
4306 ImmVal &= 0xFFFFFFFF;
4307 ImmVal |= ImmVal << 32;
4308 break;
4309 case MVT::i64:
4310 break;
4311 default:
4312 llvm_unreachable("Unexpected type");
4313 }
4314
4315 uint64_t encoding;
4316 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
4317 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
4318 return true;
4319 }
4320 }
4321 return false;
4322}
4323
4324// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4325// Rather than attempt to normalise everything we can sometimes saturate the
4326// shift amount during selection. This function also allows for consistent
4327// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4328// required by the instructions.
4329bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4330 uint64_t High, bool AllowSaturation,
4331 SDValue &Imm) {
4332 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4333 uint64_t ImmVal = CN->getZExtValue();
4334
4335 // Reject shift amounts that are too small.
4336 if (ImmVal < Low)
4337 return false;
4338
4339 // Reject or saturate shift amounts that are too big.
4340 if (ImmVal > High) {
4341 if (!AllowSaturation)
4342 return false;
4343 ImmVal = High;
4344 }
4345
4346 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4347 return true;
4348 }
4349
4350 return false;
4351}
4352
4353bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4354 // tagp(FrameIndex, IRGstack, tag_offset):
4355 // since the offset between FrameIndex and IRGstack is a compile-time
4356 // constant, this can be lowered to a single ADDG instruction.
4357 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4358 return false;
4359 }
4360
4361 SDValue IRG_SP = N->getOperand(2);
4362 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4363 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4364 return false;
4365 }
4366
4367 const TargetLowering *TLI = getTargetLowering();
4368 SDLoc DL(N);
4369 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4370 SDValue FiOp = CurDAG->getTargetFrameIndex(
4371 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4372 int TagOffset = N->getConstantOperandVal(3);
4373
4374 SDNode *Out = CurDAG->getMachineNode(
4375 AArch64::TAGPstack, DL, MVT::i64,
4376 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4377 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4378 ReplaceNode(N, Out);
4379 return true;
4380}
4381
4382void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4383 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4384 "llvm.aarch64.tagp third argument must be an immediate");
4385 if (trySelectStackSlotTagP(N))
4386 return;
4387 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4388 // compile-time constant, not just for stack allocations.
4389
4390 // General case for unrelated pointers in Op1 and Op2.
4391 SDLoc DL(N);
4392 int TagOffset = N->getConstantOperandVal(3);
4393 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4394 {N->getOperand(1), N->getOperand(2)});
4395 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4396 {SDValue(N1, 0), N->getOperand(2)});
4397 SDNode *N3 = CurDAG->getMachineNode(
4398 AArch64::ADDG, DL, MVT::i64,
4399 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4400 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4401 ReplaceNode(N, N3);
4402}
4403
4404bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4405 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4406
4407 // Bail when not a "cast" like insert_subvector.
4408 if (N->getConstantOperandVal(2) != 0)
4409 return false;
4410 if (!N->getOperand(0).isUndef())
4411 return false;
4412
4413 // Bail when normal isel should do the job.
4414 EVT VT = N->getValueType(0);
4415 EVT InVT = N->getOperand(1).getValueType();
4416 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4417 return false;
4418 if (InVT.getSizeInBits() <= 128)
4419 return false;
4420
4421 // NOTE: We can only get here when doing fixed length SVE code generation.
4422 // We do manual selection because the types involved are not linked to real
4423 // registers (despite being legal) and must be coerced into SVE registers.
4424
4426 "Expected to insert into a packed scalable vector!");
4427
4428 SDLoc DL(N);
4429 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4430 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4431 N->getOperand(1), RC));
4432 return true;
4433}
4434
4435bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4436 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4437
4438 // Bail when not a "cast" like extract_subvector.
4439 if (N->getConstantOperandVal(1) != 0)
4440 return false;
4441
4442 // Bail when normal isel can do the job.
4443 EVT VT = N->getValueType(0);
4444 EVT InVT = N->getOperand(0).getValueType();
4445 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4446 return false;
4447 if (VT.getSizeInBits() <= 128)
4448 return false;
4449
4450 // NOTE: We can only get here when doing fixed length SVE code generation.
4451 // We do manual selection because the types involved are not linked to real
4452 // registers (despite being legal) and must be coerced into SVE registers.
4453
4455 "Expected to extract from a packed scalable vector!");
4456
4457 SDLoc DL(N);
4458 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4459 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4460 N->getOperand(0), RC));
4461 return true;
4462}
4463
4464bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4465 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4466
4467 SDValue N0 = N->getOperand(0);
4468 SDValue N1 = N->getOperand(1);
4469 EVT VT = N->getValueType(0);
4470
4471 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4472 // Rotate by a constant is a funnel shift in IR which is exanded to
4473 // an OR with shifted operands.
4474 // We do the following transform:
4475 // OR N0, N1 -> xar (x, y, imm)
4476 // Where:
4477 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4478 // N0 = SHL_PRED true, V, splat(bits-imm)
4479 // V = (xor x, y)
4480 if (VT.isScalableVector() &&
4481 (Subtarget->hasSVE2() ||
4482 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4483 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4485 std::swap(N0, N1);
4486 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4488 return false;
4489
4490 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4491 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4492 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4493 return false;
4494
4495 SDValue XOR = N0.getOperand(1);
4496 if (XOR.getOpcode() != ISD::XOR || XOR != N1.getOperand(1))
4497 return false;
4498
4499 APInt ShlAmt, ShrAmt;
4500 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4502 return false;
4503
4504 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4505 return false;
4506
4507 SDLoc DL(N);
4508 SDValue Imm =
4509 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4510
4511 SDValue Ops[] = {XOR.getOperand(0), XOR.getOperand(1), Imm};
4512 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4513 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4514 AArch64::XAR_ZZZI_D})) {
4515 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4516 return true;
4517 }
4518 return false;
4519 }
4520
4521 if (!Subtarget->hasSHA3())
4522 return false;
4523
4524 if (N0->getOpcode() != AArch64ISD::VSHL ||
4526 return false;
4527
4528 if (N0->getOperand(0) != N1->getOperand(0) ||
4529 N1->getOperand(0)->getOpcode() != ISD::XOR)
4530 return false;
4531
4532 SDValue XOR = N0.getOperand(0);
4533 SDValue R1 = XOR.getOperand(0);
4534 SDValue R2 = XOR.getOperand(1);
4535
4536 unsigned HsAmt = N0.getConstantOperandVal(1);
4537 unsigned ShAmt = N1.getConstantOperandVal(1);
4538
4539 SDLoc DL = SDLoc(N0.getOperand(1));
4540 SDValue Imm = CurDAG->getTargetConstant(
4541 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4542
4543 if (ShAmt + HsAmt != 64)
4544 return false;
4545
4546 SDValue Ops[] = {R1, R2, Imm};
4547 CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops);
4548
4549 return true;
4550}
4551
4552void AArch64DAGToDAGISel::Select(SDNode *Node) {
4553 // If we have a custom node, we already have selected!
4554 if (Node->isMachineOpcode()) {
4555 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4556 Node->setNodeId(-1);
4557 return;
4558 }
4559
4560 // Few custom selection stuff.
4561 EVT VT = Node->getValueType(0);
4562
4563 switch (Node->getOpcode()) {
4564 default:
4565 break;
4566
4568 if (SelectCMP_SWAP(Node))
4569 return;
4570 break;
4571
4572 case ISD::READ_REGISTER:
4573 case AArch64ISD::MRRS:
4574 if (tryReadRegister(Node))
4575 return;
4576 break;
4577
4579 case AArch64ISD::MSRR:
4580 if (tryWriteRegister(Node))
4581 return;
4582 break;
4583
4584 case ISD::LOAD: {
4585 // Try to select as an indexed load. Fall through to normal processing
4586 // if we can't.
4587 if (tryIndexedLoad(Node))
4588 return;
4589 break;
4590 }
4591
4592 case ISD::SRL:
4593 case ISD::AND:
4594 case ISD::SRA:
4596 if (tryBitfieldExtractOp(Node))
4597 return;
4598 if (tryBitfieldInsertInZeroOp(Node))
4599 return;
4600 [[fallthrough]];
4601 case ISD::ROTR:
4602 case ISD::SHL:
4603 if (tryShiftAmountMod(Node))
4604 return;
4605 break;
4606
4607 case ISD::SIGN_EXTEND:
4608 if (tryBitfieldExtractOpFromSExt(Node))
4609 return;
4610 break;
4611
4612 case ISD::OR:
4613 if (tryBitfieldInsertOp(Node))
4614 return;
4615 if (trySelectXAR(Node))
4616 return;
4617 break;
4618
4620 if (trySelectCastScalableToFixedLengthVector(Node))
4621 return;
4622 break;
4623 }
4624
4625 case ISD::INSERT_SUBVECTOR: {
4626 if (trySelectCastFixedLengthToScalableVector(Node))
4627 return;
4628 break;
4629 }
4630
4631 case ISD::Constant: {
4632 // Materialize zero constants as copies from WZR/XZR. This allows
4633 // the coalescer to propagate these into other instructions.
4634 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4635 if (ConstNode->isZero()) {
4636 if (VT == MVT::i32) {
4637 SDValue New = CurDAG->getCopyFromReg(
4638 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4639 ReplaceNode(Node, New.getNode());
4640 return;
4641 } else if (VT == MVT::i64) {
4642 SDValue New = CurDAG->getCopyFromReg(
4643 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4644 ReplaceNode(Node, New.getNode());
4645 return;
4646 }
4647 }
4648 break;
4649 }
4650
4651 case ISD::FrameIndex: {
4652 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4653 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4654 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4655 const TargetLowering *TLI = getTargetLowering();
4656 SDValue TFI = CurDAG->getTargetFrameIndex(
4657 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4658 SDLoc DL(Node);
4659 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4660 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4661 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4662 return;
4663 }
4665 unsigned IntNo = Node->getConstantOperandVal(1);
4666 switch (IntNo) {
4667 default:
4668 break;
4669 case Intrinsic::aarch64_gcsss: {
4670 SDLoc DL(Node);
4671 SDValue Chain = Node->getOperand(0);
4672 SDValue Val = Node->getOperand(2);
4673 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
4674 SDNode *SS1 =
4675 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
4676 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
4677 MVT::Other, Zero, SDValue(SS1, 0));
4678 ReplaceNode(Node, SS2);
4679 return;
4680 }
4681 case Intrinsic::aarch64_ldaxp:
4682 case Intrinsic::aarch64_ldxp: {
4683 unsigned Op =
4684 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4685 SDValue MemAddr = Node->getOperand(2);
4686 SDLoc DL(Node);
4687 SDValue Chain = Node->getOperand(0);
4688
4689 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4690 MVT::Other, MemAddr, Chain);
4691
4692 // Transfer memoperands.
4694 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4695 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4696 ReplaceNode(Node, Ld);
4697 return;
4698 }
4699 case Intrinsic::aarch64_stlxp:
4700 case Intrinsic::aarch64_stxp: {
4701 unsigned Op =
4702 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4703 SDLoc DL(Node);
4704 SDValue Chain = Node->getOperand(0);
4705 SDValue ValLo = Node->getOperand(2);
4706 SDValue ValHi = Node->getOperand(3);
4707 SDValue MemAddr = Node->getOperand(4);
4708
4709 // Place arguments in the right order.
4710 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4711
4712 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4713 // Transfer memoperands.
4715 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4716 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4717
4718 ReplaceNode(Node, St);
4719 return;
4720 }
4721 case Intrinsic::aarch64_neon_ld1x2:
4722 if (VT == MVT::v8i8) {
4723 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4724 return;
4725 } else if (VT == MVT::v16i8) {
4726 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4727 return;
4728 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4729 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4730 return;
4731 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4732 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4733 return;
4734 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4735 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4736 return;
4737 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4738 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4739 return;
4740 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4741 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4742 return;
4743 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4744 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4745 return;
4746 }
4747 break;
4748 case Intrinsic::aarch64_neon_ld1x3:
4749 if (VT == MVT::v8i8) {
4750 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4751 return;
4752 } else if (VT == MVT::v16i8) {
4753 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4754 return;
4755 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4756 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
4757 return;
4758 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4759 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
4760 return;
4761 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4762 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
4763 return;
4764 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4765 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
4766 return;
4767 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4768 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4769 return;
4770 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4771 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
4772 return;
4773 }
4774 break;
4775 case Intrinsic::aarch64_neon_ld1x4:
4776 if (VT == MVT::v8i8) {
4777 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
4778 return;
4779 } else if (VT == MVT::v16i8) {
4780 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
4781 return;
4782 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4783 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
4784 return;
4785 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4786 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
4787 return;
4788 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4789 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
4790 return;
4791 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4792 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
4793 return;
4794 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4795 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4796 return;
4797 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4798 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
4799 return;
4800 }
4801 break;
4802 case Intrinsic::aarch64_neon_ld2:
4803 if (VT == MVT::v8i8) {
4804 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
4805 return;
4806 } else if (VT == MVT::v16i8) {
4807 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
4808 return;
4809 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4810 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
4811 return;
4812 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4813 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
4814 return;
4815 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4816 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
4817 return;
4818 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4819 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
4820 return;
4821 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4822 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4823 return;
4824 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4825 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
4826 return;
4827 }
4828 break;
4829 case Intrinsic::aarch64_neon_ld3:
4830 if (VT == MVT::v8i8) {
4831 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
4832 return;
4833 } else if (VT == MVT::v16i8) {
4834 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
4835 return;
4836 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4837 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
4838 return;
4839 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4840 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
4841 return;
4842 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4843 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
4844 return;
4845 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4846 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
4847 return;
4848 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4849 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4850 return;
4851 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4852 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
4853 return;
4854 }
4855 break;
4856 case Intrinsic::aarch64_neon_ld4:
4857 if (VT == MVT::v8i8) {
4858 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
4859 return;
4860 } else if (VT == MVT::v16i8) {
4861 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
4862 return;
4863 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4864 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
4865 return;
4866 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4867 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
4868 return;
4869 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4870 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
4871 return;
4872 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4873 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
4874 return;
4875 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4876 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4877 return;
4878 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4879 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
4880 return;
4881 }
4882 break;
4883 case Intrinsic::aarch64_neon_ld2r:
4884 if (VT == MVT::v8i8) {
4885 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
4886 return;
4887 } else if (VT == MVT::v16i8) {
4888 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
4889 return;
4890 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4891 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
4892 return;
4893 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4894 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
4895 return;
4896 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4897 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
4898 return;
4899 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4900 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
4901 return;
4902 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4903 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
4904 return;
4905 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4906 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
4907 return;
4908 }
4909 break;
4910 case Intrinsic::aarch64_neon_ld3r:
4911 if (VT == MVT::v8i8) {
4912 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
4913 return;
4914 } else if (VT == MVT::v16i8) {
4915 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
4916 return;
4917 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4918 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
4919 return;
4920 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4921 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
4922 return;
4923 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4924 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
4925 return;
4926 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4927 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
4928 return;
4929 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4930 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
4931 return;
4932 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4933 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
4934 return;
4935 }
4936 break;
4937 case Intrinsic::aarch64_neon_ld4r:
4938 if (VT == MVT::v8i8) {
4939 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
4940 return;
4941 } else if (VT == MVT::v16i8) {
4942 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
4943 return;
4944 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4945 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
4946 return;
4947 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4948 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
4949 return;
4950 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4951 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
4952 return;
4953 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4954 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
4955 return;
4956 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4957 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
4958 return;
4959 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4960 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
4961 return;
4962 }
4963 break;
4964 case Intrinsic::aarch64_neon_ld2lane:
4965 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4966 SelectLoadLane(Node, 2, AArch64::LD2i8);
4967 return;
4968 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4969 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4970 SelectLoadLane(Node, 2, AArch64::LD2i16);
4971 return;
4972 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4973 VT == MVT::v2f32) {
4974 SelectLoadLane(Node, 2, AArch64::LD2i32);
4975 return;
4976 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4977 VT == MVT::v1f64) {
4978 SelectLoadLane(Node, 2, AArch64::LD2i64);
4979 return;
4980 }
4981 break;
4982 case Intrinsic::aarch64_neon_ld3lane:
4983 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4984 SelectLoadLane(Node, 3, AArch64::LD3i8);
4985 return;
4986 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4987 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4988 SelectLoadLane(Node, 3, AArch64::LD3i16);
4989 return;
4990 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4991 VT == MVT::v2f32) {
4992 SelectLoadLane(Node, 3, AArch64::LD3i32);
4993 return;
4994 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4995 VT == MVT::v1f64) {
4996 SelectLoadLane(Node, 3, AArch64::LD3i64);
4997 return;
4998 }
4999 break;
5000 case Intrinsic::aarch64_neon_ld4lane:
5001 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5002 SelectLoadLane(Node, 4, AArch64::LD4i8);
5003 return;
5004 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5005 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5006 SelectLoadLane(Node, 4, AArch64::LD4i16);
5007 return;
5008 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5009 VT == MVT::v2f32) {
5010 SelectLoadLane(Node, 4, AArch64::LD4i32);
5011 return;
5012 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5013 VT == MVT::v1f64) {
5014 SelectLoadLane(Node, 4, AArch64::LD4i64);
5015 return;
5016 }
5017 break;
5018 case Intrinsic::aarch64_ld64b:
5019 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5020 return;
5021 case Intrinsic::aarch64_sve_ld2q_sret: {
5022 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5023 return;
5024 }
5025 case Intrinsic::aarch64_sve_ld3q_sret: {
5026 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5027 return;
5028 }
5029 case Intrinsic::aarch64_sve_ld4q_sret: {
5030 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5031 return;
5032 }
5033 case Intrinsic::aarch64_sve_ld2_sret: {
5034 if (VT == MVT::nxv16i8) {
5035 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5036 true);
5037 return;
5038 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5039 VT == MVT::nxv8bf16) {
5040 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5041 true);
5042 return;
5043 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5044 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5045 true);
5046 return;
5047 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5048 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5049 true);
5050 return;
5051 }
5052 break;
5053 }
5054 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5055 if (VT == MVT::nxv16i8) {
5056 if (Subtarget->hasSME2())
5057 SelectContiguousMultiVectorLoad(
5058 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5059 else if (Subtarget->hasSVE2p1())
5060 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5061 AArch64::LD1B_2Z);
5062 else
5063 break;
5064 return;
5065 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5066 VT == MVT::nxv8bf16) {
5067 if (Subtarget->hasSME2())
5068 SelectContiguousMultiVectorLoad(
5069 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5070 else if (Subtarget->hasSVE2p1())
5071 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5072 AArch64::LD1H_2Z);
5073 else
5074 break;
5075 return;
5076 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5077 if (Subtarget->hasSME2())
5078 SelectContiguousMultiVectorLoad(
5079 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5080 else if (Subtarget->hasSVE2p1())
5081 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5082 AArch64::LD1W_2Z);
5083 else
5084 break;
5085 return;
5086 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5087 if (Subtarget->hasSME2())
5088 SelectContiguousMultiVectorLoad(
5089 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5090 else if (Subtarget->hasSVE2p1())
5091 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5092 AArch64::LD1D_2Z);
5093 else
5094 break;
5095 return;
5096 }
5097 break;
5098 }
5099 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5100 if (VT == MVT::nxv16i8) {
5101 if (Subtarget->hasSME2())
5102 SelectContiguousMultiVectorLoad(
5103 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5104 else if (Subtarget->hasSVE2p1())
5105 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5106 AArch64::LD1B_4Z);
5107 else
5108 break;
5109 return;
5110 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5111 VT == MVT::nxv8bf16) {
5112 if (Subtarget->hasSME2())
5113 SelectContiguousMultiVectorLoad(
5114 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5115 else if (Subtarget->hasSVE2p1())
5116 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5117 AArch64::LD1H_4Z);
5118 else
5119 break;
5120 return;
5121 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5122 if (Subtarget->hasSME2())
5123 SelectContiguousMultiVectorLoad(
5124 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5125 else if (Subtarget->hasSVE2p1())
5126 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5127 AArch64::LD1W_4Z);
5128 else
5129 break;
5130 return;
5131 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5132 if (Subtarget->hasSME2())
5133 SelectContiguousMultiVectorLoad(
5134 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5135 else if (Subtarget->hasSVE2p1())
5136 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5137 AArch64::LD1D_4Z);
5138 else
5139 break;
5140 return;
5141 }
5142 break;
5143 }
5144 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5145 if (VT == MVT::nxv16i8) {
5146 if (Subtarget->hasSME2())
5147 SelectContiguousMultiVectorLoad(Node, 2, 0,
5148 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5149 AArch64::LDNT1B_2Z_PSEUDO);
5150 else if (Subtarget->hasSVE2p1())
5151 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5152 AArch64::LDNT1B_2Z);
5153 else
5154 break;
5155 return;
5156 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5157 VT == MVT::nxv8bf16) {
5158 if (Subtarget->hasSME2())
5159 SelectContiguousMultiVectorLoad(Node, 2, 1,
5160 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5161 AArch64::LDNT1H_2Z_PSEUDO);
5162 else if (Subtarget->hasSVE2p1())
5163 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5164 AArch64::LDNT1H_2Z);
5165 else
5166 break;
5167 return;
5168 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5169 if (Subtarget->hasSME2())
5170 SelectContiguousMultiVectorLoad(Node, 2, 2,
5171 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5172 AArch64::LDNT1W_2Z_PSEUDO);
5173 else if (Subtarget->hasSVE2p1())
5174 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5175 AArch64::LDNT1W_2Z);
5176 else
5177 break;
5178 return;
5179 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5180 if (Subtarget->hasSME2())
5181 SelectContiguousMultiVectorLoad(Node, 2, 3,
5182 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5183 AArch64::LDNT1D_2Z_PSEUDO);
5184 else if (Subtarget->hasSVE2p1())
5185 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5186 AArch64::LDNT1D_2Z);
5187 else
5188 break;
5189 return;
5190 }
5191 break;
5192 }
5193 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5194 if (VT == MVT::nxv16i8) {
5195 if (Subtarget->hasSME2())
5196 SelectContiguousMultiVectorLoad(Node, 4, 0,
5197 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5198 AArch64::LDNT1B_4Z_PSEUDO);
5199 else if (Subtarget->hasSVE2p1())
5200 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5201 AArch64::LDNT1B_4Z);
5202 else
5203 break;
5204 return;
5205 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5206 VT == MVT::nxv8bf16) {
5207 if (Subtarget->hasSME2())
5208 SelectContiguousMultiVectorLoad(Node, 4, 1,
5209 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5210 AArch64::LDNT1H_4Z_PSEUDO);
5211 else if (Subtarget->hasSVE2p1())
5212 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5213 AArch64::LDNT1H_4Z);
5214 else
5215 break;
5216 return;
5217 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5218 if (Subtarget->hasSME2())
5219 SelectContiguousMultiVectorLoad(Node, 4, 2,
5220 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5221 AArch64::LDNT1W_4Z_PSEUDO);
5222 else if (Subtarget->hasSVE2p1())
5223 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5224 AArch64::LDNT1W_4Z);
5225 else
5226 break;
5227 return;
5228 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5229 if (Subtarget->hasSME2())
5230 SelectContiguousMultiVectorLoad(Node, 4, 3,
5231 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5232 AArch64::LDNT1D_4Z_PSEUDO);
5233 else if (Subtarget->hasSVE2p1())
5234 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5235 AArch64::LDNT1D_4Z);
5236 else
5237 break;
5238 return;
5239 }
5240 break;
5241 }
5242 case Intrinsic::aarch64_sve_ld3_sret: {
5243 if (VT == MVT::nxv16i8) {
5244 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5245 true);
5246 return;
5247 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5248 VT == MVT::nxv8bf16) {
5249 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5250 true);
5251 return;
5252 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5253 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5254 true);
5255 return;
5256 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5257 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5258 true);
5259 return;
5260 }
5261 break;
5262 }
5263 case Intrinsic::aarch64_sve_ld4_sret: {
5264 if (VT == MVT::nxv16i8) {
5265 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5266 true);
5267 return;
5268 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5269 VT == MVT::nxv8bf16) {
5270 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5271 true);
5272 return;
5273 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5274 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5275 true);
5276 return;
5277 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5278 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5279 true);
5280 return;
5281 }
5282 break;
5283 }
5284 case Intrinsic::aarch64_sme_read_hor_vg2: {
5285 if (VT == MVT::nxv16i8) {
5286 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5287 AArch64::MOVA_2ZMXI_H_B);
5288 return;
5289 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5290 VT == MVT::nxv8bf16) {
5291 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5292 AArch64::MOVA_2ZMXI_H_H);
5293 return;
5294 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5295 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5296 AArch64::MOVA_2ZMXI_H_S);
5297 return;
5298 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5299 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5300 AArch64::MOVA_2ZMXI_H_D);
5301 return;
5302 }
5303 break;
5304 }
5305 case Intrinsic::aarch64_sme_read_ver_vg2: {
5306 if (VT == MVT::nxv16i8) {
5307 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5308 AArch64::MOVA_2ZMXI_V_B);
5309 return;
5310 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5311 VT == MVT::nxv8bf16) {
5312 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5313 AArch64::MOVA_2ZMXI_V_H);
5314 return;
5315 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5316 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5317 AArch64::MOVA_2ZMXI_V_S);
5318 return;
5319 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5320 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5321 AArch64::MOVA_2ZMXI_V_D);
5322 return;
5323 }
5324 break;
5325 }
5326 case Intrinsic::aarch64_sme_read_hor_vg4: {
5327 if (VT == MVT::nxv16i8) {
5328 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5329 AArch64::MOVA_4ZMXI_H_B);
5330 return;
5331 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5332 VT == MVT::nxv8bf16) {
5333 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5334 AArch64::MOVA_4ZMXI_H_H);
5335 return;
5336 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5337 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5338 AArch64::MOVA_4ZMXI_H_S);
5339 return;
5340 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5341 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5342 AArch64::MOVA_4ZMXI_H_D);
5343 return;
5344 }
5345 break;
5346 }
5347 case Intrinsic::aarch64_sme_read_ver_vg4: {
5348 if (VT == MVT::nxv16i8) {
5349 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5350 AArch64::MOVA_4ZMXI_V_B);
5351 return;
5352 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5353 VT == MVT::nxv8bf16) {
5354 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5355 AArch64::MOVA_4ZMXI_V_H);
5356 return;
5357 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5358 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5359 AArch64::MOVA_4ZMXI_V_S);
5360 return;
5361 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5362 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5363 AArch64::MOVA_4ZMXI_V_D);
5364 return;
5365 }
5366 break;
5367 }
5368 case Intrinsic::aarch64_sme_read_vg1x2: {
5369 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5370 AArch64::MOVA_VG2_2ZMXI);
5371 return;
5372 }
5373 case Intrinsic::aarch64_sme_read_vg1x4: {
5374 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5375 AArch64::MOVA_VG4_4ZMXI);
5376 return;
5377 }
5378 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5379 if (VT == MVT::nxv16i8) {
5380 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5381 return;
5382 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5383 VT == MVT::nxv8bf16) {
5384 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5385 return;
5386 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5387 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5388 return;
5389 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5390 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5391 return;
5392 }
5393 break;
5394 }
5395 case Intrinsic::aarch64_sme_readz_vert_x2: {
5396 if (VT == MVT::nxv16i8) {
5397 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5398 return;
5399 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5400 VT == MVT::nxv8bf16) {
5401 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5402 return;
5403 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5404 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5405 return;
5406 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5407 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5408 return;
5409 }
5410 break;
5411 }
5412 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5413 if (VT == MVT::nxv16i8) {
5414 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5415 return;
5416 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5417 VT == MVT::nxv8bf16) {
5418 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5419 return;
5420 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5421 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5422 return;
5423 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5424 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5425 return;
5426 }
5427 break;
5428 }
5429 case Intrinsic::aarch64_sme_readz_vert_x4: {
5430 if (VT == MVT::nxv16i8) {
5431 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5432 return;
5433 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5434 VT == MVT::nxv8bf16) {
5435 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5436 return;
5437 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5438 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5439 return;
5440 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5441 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5442 return;
5443 }
5444 break;
5445 }
5446 case Intrinsic::aarch64_sme_readz_x2: {
5447 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5448 AArch64::ZA);
5449 return;
5450 }
5451 case Intrinsic::aarch64_sme_readz_x4: {
5452 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5453 AArch64::ZA);
5454 return;
5455 }
5456 case Intrinsic::swift_async_context_addr: {
5457 SDLoc DL(Node);
5458 SDValue Chain = Node->getOperand(0);
5459 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5460 SDValue Res = SDValue(
5461 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5462 CurDAG->getTargetConstant(8, DL, MVT::i32),
5463 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5464 0);
5465 ReplaceUses(SDValue(Node, 0), Res);
5466 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5467 CurDAG->RemoveDeadNode(Node);
5468
5469 auto &MF = CurDAG->getMachineFunction();
5470 MF.getFrameInfo().setFrameAddressIsTaken(true);
5471 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5472 return;
5473 }
5474 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5475 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5476 Node->getValueType(0),
5477 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5478 AArch64::LUTI2_4ZTZI_S}))
5479 // Second Immediate must be <= 3:
5480 SelectMultiVectorLuti(Node, 4, Opc, 3);
5481 return;
5482 }
5483 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5484 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5485 Node->getValueType(0),
5486 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5487 // Second Immediate must be <= 1:
5488 SelectMultiVectorLuti(Node, 4, Opc, 1);
5489 return;
5490 }
5491 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5492 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5493 Node->getValueType(0),
5494 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5495 AArch64::LUTI2_2ZTZI_S}))
5496 // Second Immediate must be <= 7:
5497 SelectMultiVectorLuti(Node, 2, Opc, 7);
5498 return;
5499 }
5500 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5501 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5502 Node->getValueType(0),
5503 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5504 AArch64::LUTI4_2ZTZI_S}))
5505 // Second Immediate must be <= 3:
5506 SelectMultiVectorLuti(Node, 2, Opc, 3);
5507 return;
5508 }
5509 }
5510 } break;
5512 unsigned IntNo = Node->getConstantOperandVal(0);
5513 switch (IntNo) {
5514 default:
5515 break;
5516 case Intrinsic::aarch64_tagp:
5517 SelectTagP(Node);
5518 return;
5519
5520 case Intrinsic::ptrauth_auth:
5521 SelectPtrauthAuth(Node);
5522 return;
5523
5524 case Intrinsic::ptrauth_resign:
5525 SelectPtrauthResign(Node);
5526 return;
5527
5528 case Intrinsic::aarch64_neon_tbl2:
5529 SelectTable(Node, 2,
5530 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5531 false);
5532 return;
5533 case Intrinsic::aarch64_neon_tbl3:
5534 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5535 : AArch64::TBLv16i8Three,
5536 false);
5537 return;
5538 case Intrinsic::aarch64_neon_tbl4:
5539 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5540 : AArch64::TBLv16i8Four,
5541 false);
5542 return;
5543 case Intrinsic::aarch64_neon_tbx2:
5544 SelectTable(Node, 2,
5545 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5546 true);
5547 return;
5548 case Intrinsic::aarch64_neon_tbx3:
5549 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5550 : AArch64::TBXv16i8Three,
5551 true);
5552 return;
5553 case Intrinsic::aarch64_neon_tbx4:
5554 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5555 : AArch64::TBXv16i8Four,
5556 true);
5557 return;
5558 case Intrinsic::aarch64_sve_srshl_single_x2:
5559 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5560 Node->getValueType(0),
5561 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5562 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5563 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5564 return;
5565 case Intrinsic::aarch64_sve_srshl_single_x4:
5566 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5567 Node->getValueType(0),
5568 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5569 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5570 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5571 return;
5572 case Intrinsic::aarch64_sve_urshl_single_x2:
5573 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5574 Node->getValueType(0),
5575 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5576 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5577 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5578 return;
5579 case Intrinsic::aarch64_sve_urshl_single_x4:
5580 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5581 Node->getValueType(0),
5582 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5583 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5584 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5585 return;
5586 case Intrinsic::aarch64_sve_srshl_x2:
5587 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5588 Node->getValueType(0),
5589 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5590 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5591 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5592 return;
5593 case Intrinsic::aarch64_sve_srshl_x4:
5594 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5595 Node->getValueType(0),
5596 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5597 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5598 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5599 return;
5600 case Intrinsic::aarch64_sve_urshl_x2:
5601 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5602 Node->getValueType(0),
5603 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5604 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5605 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5606 return;
5607 case Intrinsic::aarch64_sve_urshl_x4:
5608 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5609 Node->getValueType(0),
5610 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5611 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5612 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5613 return;
5614 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5615 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5616 Node->getValueType(0),
5617 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5618 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5619 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5620 return;
5621 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5622 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5623 Node->getValueType(0),
5624 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5625 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5626 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5627 return;
5628 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5629 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5630 Node->getValueType(0),
5631 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5632 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5633 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5634 return;
5635 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5636 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5637 Node->getValueType(0),
5638 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5639 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5640 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5641 return;
5642 case Intrinsic::aarch64_sve_whilege_x2:
5643 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5644 Node->getValueType(0),
5645 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5646 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5647 SelectWhilePair(Node, Op);
5648 return;
5649 case Intrinsic::aarch64_sve_whilegt_x2:
5650 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5651 Node->getValueType(0),
5652 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5653 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5654 SelectWhilePair(Node, Op);
5655 return;
5656 case Intrinsic::aarch64_sve_whilehi_x2:
5657 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5658 Node->getValueType(0),
5659 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5660 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5661 SelectWhilePair(Node, Op);
5662 return;
5663 case Intrinsic::aarch64_sve_whilehs_x2:
5664 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5665 Node->getValueType(0),
5666 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5667 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5668 SelectWhilePair(Node, Op);
5669 return;
5670 case Intrinsic::aarch64_sve_whilele_x2:
5671 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5672 Node->getValueType(0),
5673 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5674 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5675 SelectWhilePair(Node, Op);
5676 return;
5677 case Intrinsic::aarch64_sve_whilelo_x2:
5678 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5679 Node->getValueType(0),
5680 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
5681 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
5682 SelectWhilePair(Node, Op);
5683 return;
5684 case Intrinsic::aarch64_sve_whilels_x2:
5685 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5686 Node->getValueType(0),
5687 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
5688 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
5689 SelectWhilePair(Node, Op);
5690 return;
5691 case Intrinsic::aarch64_sve_whilelt_x2:
5692 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5693 Node->getValueType(0),
5694 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
5695 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
5696 SelectWhilePair(Node, Op);
5697 return;
5698 case Intrinsic::aarch64_sve_smax_single_x2:
5699 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5700 Node->getValueType(0),
5701 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
5702 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
5703 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5704 return;
5705 case Intrinsic::aarch64_sve_umax_single_x2:
5706 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5707 Node->getValueType(0),
5708 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
5709 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
5710 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5711 return;
5712 case Intrinsic::aarch64_sve_fmax_single_x2:
5713 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5714 Node->getValueType(0),
5715 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
5716 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
5717 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5718 return;
5719 case Intrinsic::aarch64_sve_smax_single_x4:
5720 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5721 Node->getValueType(0),
5722 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
5723 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
5724 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5725 return;
5726 case Intrinsic::aarch64_sve_umax_single_x4:
5727 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5728 Node->getValueType(0),
5729 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
5730 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
5731 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5732 return;
5733 case Intrinsic::aarch64_sve_fmax_single_x4:
5734 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5735 Node->getValueType(0),
5736 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
5737 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
5738 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5739 return;
5740 case Intrinsic::aarch64_sve_smin_single_x2:
5741 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5742 Node->getValueType(0),
5743 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
5744 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
5745 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5746 return;
5747 case Intrinsic::aarch64_sve_umin_single_x2:
5748 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5749 Node->getValueType(0),
5750 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
5751 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
5752 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5753 return;
5754 case Intrinsic::aarch64_sve_fmin_single_x2:
5755 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5756 Node->getValueType(0),
5757 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
5758 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
5759 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5760 return;
5761 case Intrinsic::aarch64_sve_smin_single_x4:
5762 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5763 Node->getValueType(0),
5764 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
5765 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
5766 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5767 return;
5768 case Intrinsic::aarch64_sve_umin_single_x4:
5769 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5770 Node->getValueType(0),
5771 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
5772 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
5773 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5774 return;
5775 case Intrinsic::aarch64_sve_fmin_single_x4:
5776 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5777 Node->getValueType(0),
5778 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
5779 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
5780 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5781 return;
5782 case Intrinsic::aarch64_sve_smax_x2:
5783 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5784 Node->getValueType(0),
5785 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
5786 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
5787 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5788 return;
5789 case Intrinsic::aarch64_sve_umax_x2:
5790 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5791 Node->getValueType(0),
5792 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
5793 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
5794 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5795 return;
5796 case Intrinsic::aarch64_sve_fmax_x2:
5797 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5798 Node->getValueType(0),
5799 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
5800 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
5801 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5802 return;
5803 case Intrinsic::aarch64_sve_smax_x4:
5804 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5805 Node->getValueType(0),
5806 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
5807 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
5808 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5809 return;
5810 case Intrinsic::aarch64_sve_umax_x4:
5811 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5812 Node->getValueType(0),
5813 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
5814 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
5815 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5816 return;
5817 case Intrinsic::aarch64_sve_fmax_x4:
5818 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5819 Node->getValueType(0),
5820 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
5821 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
5822 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5823 return;
5824 case Intrinsic::aarch64_sve_smin_x2:
5825 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5826 Node->getValueType(0),
5827 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
5828 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
5829 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5830 return;
5831 case Intrinsic::aarch64_sve_umin_x2:
5832 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5833 Node->getValueType(0),
5834 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
5835 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
5836 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5837 return;
5838 case Intrinsic::aarch64_sve_fmin_x2:
5839 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5840 Node->getValueType(0),
5841 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
5842 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
5843 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5844 return;
5845 case Intrinsic::aarch64_sve_smin_x4:
5846 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5847 Node->getValueType(0),
5848 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
5849 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
5850 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5851 return;
5852 case Intrinsic::aarch64_sve_umin_x4:
5853 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5854 Node->getValueType(0),
5855 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
5856 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
5857 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5858 return;
5859 case Intrinsic::aarch64_sve_fmin_x4:
5860 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5861 Node->getValueType(0),
5862 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
5863 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
5864 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5865 return;
5866 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
5867 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5868 Node->getValueType(0),
5869 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
5870 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
5871 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5872 return;
5873 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
5874 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5875 Node->getValueType(0),
5876 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
5877 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
5878 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5879 return;
5880 case Intrinsic::aarch64_sve_fminnm_single_x2:
5881 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5882 Node->getValueType(0),
5883 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
5884 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
5885 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5886 return;
5887 case Intrinsic::aarch64_sve_fminnm_single_x4:
5888 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5889 Node->getValueType(0),
5890 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
5891 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
5892 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5893 return;
5894 case Intrinsic::aarch64_sve_fmaxnm_x2:
5895 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5896 Node->getValueType(0),
5897 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
5898 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
5899 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5900 return;
5901 case Intrinsic::aarch64_sve_fmaxnm_x4:
5902 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5903 Node->getValueType(0),
5904 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
5905 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
5906 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5907 return;
5908 case Intrinsic::aarch64_sve_fminnm_x2:
5909 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5910 Node->getValueType(0),
5911 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
5912 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
5913 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5914 return;
5915 case Intrinsic::aarch64_sve_fminnm_x4:
5916 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5917 Node->getValueType(0),
5918 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
5919 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
5920 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5921 return;
5922 case Intrinsic::aarch64_sve_fcvtzs_x2:
5923 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
5924 return;
5925 case Intrinsic::aarch64_sve_scvtf_x2:
5926 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
5927 return;
5928 case Intrinsic::aarch64_sve_fcvtzu_x2:
5929 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
5930 return;
5931 case Intrinsic::aarch64_sve_ucvtf_x2:
5932 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
5933 return;
5934 case Intrinsic::aarch64_sve_fcvtzs_x4:
5935 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
5936 return;
5937 case Intrinsic::aarch64_sve_scvtf_x4:
5938 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
5939 return;
5940 case Intrinsic::aarch64_sve_fcvtzu_x4:
5941 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
5942 return;
5943 case Intrinsic::aarch64_sve_ucvtf_x4:
5944 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
5945 return;
5946 case Intrinsic::aarch64_sve_fcvt_widen_x2:
5947 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
5948 return;
5949 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
5950 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
5951 return;
5952 case Intrinsic::aarch64_sve_sclamp_single_x2:
5953 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5954 Node->getValueType(0),
5955 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
5956 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
5957 SelectClamp(Node, 2, Op);
5958 return;
5959 case Intrinsic::aarch64_sve_uclamp_single_x2:
5960 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5961 Node->getValueType(0),
5962 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
5963 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
5964 SelectClamp(Node, 2, Op);
5965 return;
5966 case Intrinsic::aarch64_sve_fclamp_single_x2:
5967 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5968 Node->getValueType(0),
5969 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
5970 AArch64::FCLAMP_VG2_2Z2Z_D}))
5971 SelectClamp(Node, 2, Op);
5972 return;
5973 case Intrinsic::aarch64_sve_bfclamp_single_x2:
5974 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
5975 return;
5976 case Intrinsic::aarch64_sve_sclamp_single_x4:
5977 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5978 Node->getValueType(0),
5979 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
5980 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
5981 SelectClamp(Node, 4, Op);
5982 return;
5983 case Intrinsic::aarch64_sve_uclamp_single_x4:
5984 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5985 Node->getValueType(0),
5986 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
5987 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
5988 SelectClamp(Node, 4, Op);
5989 return;
5990 case Intrinsic::aarch64_sve_fclamp_single_x4:
5991 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5992 Node->getValueType(0),
5993 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
5994 AArch64::FCLAMP_VG4_4Z4Z_D}))
5995 SelectClamp(Node, 4, Op);
5996 return;
5997 case Intrinsic::aarch64_sve_bfclamp_single_x4:
5998 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
5999 return;
6000 case Intrinsic::aarch64_sve_add_single_x2:
6001 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6002 Node->getValueType(0),
6003 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6004 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6005 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6006 return;
6007 case Intrinsic::aarch64_sve_add_single_x4:
6008 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6009 Node->getValueType(0),
6010 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6011 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6012 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6013 return;
6014 case Intrinsic::aarch64_sve_zip_x2:
6015 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6016 Node->getValueType(0),
6017 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6018 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6019 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6020 return;
6021 case Intrinsic::aarch64_sve_zipq_x2:
6022 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6023 AArch64::ZIP_VG2_2ZZZ_Q);
6024 return;
6025 case Intrinsic::aarch64_sve_zip_x4:
6026 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6027 Node->getValueType(0),
6028 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6029 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6030 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6031 return;
6032 case Intrinsic::aarch64_sve_zipq_x4:
6033 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6034 AArch64::ZIP_VG4_4Z4Z_Q);
6035 return;
6036 case Intrinsic::aarch64_sve_uzp_x2:
6037 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6038 Node->getValueType(0),
6039 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6040 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6041 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6042 return;
6043 case Intrinsic::aarch64_sve_uzpq_x2:
6044 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6045 AArch64::UZP_VG2_2ZZZ_Q);
6046 return;
6047 case Intrinsic::aarch64_sve_uzp_x4:
6048 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6049 Node->getValueType(0),
6050 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6051 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6052 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6053 return;
6054 case Intrinsic::aarch64_sve_uzpq_x4:
6055 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6056 AArch64::UZP_VG4_4Z4Z_Q);
6057 return;
6058 case Intrinsic::aarch64_sve_sel_x2:
6059 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6060 Node->getValueType(0),
6061 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6062 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6063 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6064 return;
6065 case Intrinsic::aarch64_sve_sel_x4:
6066 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6067 Node->getValueType(0),
6068 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6069 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6070 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6071 return;
6072 case Intrinsic::aarch64_sve_frinta_x2:
6073 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6074 return;
6075 case Intrinsic::aarch64_sve_frinta_x4:
6076 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6077 return;
6078 case Intrinsic::aarch64_sve_frintm_x2:
6079 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6080 return;
6081 case Intrinsic::aarch64_sve_frintm_x4:
6082 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6083 return;
6084 case Intrinsic::aarch64_sve_frintn_x2:
6085 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6086 return;
6087 case Intrinsic::aarch64_sve_frintn_x4:
6088 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6089 return;
6090 case Intrinsic::aarch64_sve_frintp_x2:
6091 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6092 return;
6093 case Intrinsic::aarch64_sve_frintp_x4:
6094 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6095 return;
6096 case Intrinsic::aarch64_sve_sunpk_x2:
6097 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6098 Node->getValueType(0),
6099 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6100 AArch64::SUNPK_VG2_2ZZ_D}))
6101 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6102 return;
6103 case Intrinsic::aarch64_sve_uunpk_x2:
6104 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6105 Node->getValueType(0),
6106 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6107 AArch64::UUNPK_VG2_2ZZ_D}))
6108 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6109 return;
6110 case Intrinsic::aarch64_sve_sunpk_x4:
6111 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6112 Node->getValueType(0),
6113 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6114 AArch64::SUNPK_VG4_4Z2Z_D}))
6115 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6116 return;
6117 case Intrinsic::aarch64_sve_uunpk_x4:
6118 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6119 Node->getValueType(0),
6120 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6121 AArch64::UUNPK_VG4_4Z2Z_D}))
6122 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6123 return;
6124 case Intrinsic::aarch64_sve_pext_x2: {
6125 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6126 Node->getValueType(0),
6127 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6128 AArch64::PEXT_2PCI_D}))
6129 SelectPExtPair(Node, Op);
6130 return;
6131 }
6132 }
6133 break;
6134 }
6135 case ISD::INTRINSIC_VOID: {
6136 unsigned IntNo = Node->getConstantOperandVal(1);
6137 if (Node->getNumOperands() >= 3)
6138 VT = Node->getOperand(2)->getValueType(0);
6139 switch (IntNo) {
6140 default:
6141 break;
6142 case Intrinsic::aarch64_neon_st1x2: {
6143 if (VT == MVT::v8i8) {
6144 SelectStore(Node, 2, AArch64::ST1Twov8b);
6145 return;
6146 } else if (VT == MVT::v16i8) {
6147 SelectStore(Node, 2, AArch64::ST1Twov16b);
6148 return;
6149 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6150 VT == MVT::v4bf16) {
6151 SelectStore(Node, 2, AArch64::ST1Twov4h);
6152 return;
6153 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6154 VT == MVT::v8bf16) {
6155 SelectStore(Node, 2, AArch64::ST1Twov8h);
6156 return;
6157 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6158 SelectStore(Node, 2, AArch64::ST1Twov2s);
6159 return;
6160 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6161 SelectStore(Node, 2, AArch64::ST1Twov4s);
6162 return;
6163 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6164 SelectStore(Node, 2, AArch64::ST1Twov2d);
6165 return;
6166 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6167 SelectStore(Node, 2, AArch64::ST1Twov1d);
6168 return;
6169 }
6170 break;
6171 }
6172 case Intrinsic::aarch64_neon_st1x3: {
6173 if (VT == MVT::v8i8) {
6174 SelectStore(Node, 3, AArch64::ST1Threev8b);
6175 return;
6176 } else if (VT == MVT::v16i8) {
6177 SelectStore(Node, 3, AArch64::ST1Threev16b);
6178 return;
6179 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6180 VT == MVT::v4bf16) {
6181 SelectStore(Node, 3, AArch64::ST1Threev4h);
6182 return;
6183 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6184 VT == MVT::v8bf16) {
6185 SelectStore(Node, 3, AArch64::ST1Threev8h);
6186 return;
6187 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6188 SelectStore(Node, 3, AArch64::ST1Threev2s);
6189 return;
6190 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6191 SelectStore(Node, 3, AArch64::ST1Threev4s);
6192 return;
6193 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6194 SelectStore(Node, 3, AArch64::ST1Threev2d);
6195 return;
6196 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6197 SelectStore(Node, 3, AArch64::ST1Threev1d);
6198 return;
6199 }
6200 break;
6201 }
6202 case Intrinsic::aarch64_neon_st1x4: {
6203 if (VT == MVT::v8i8) {
6204 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6205 return;
6206 } else if (VT == MVT::v16i8) {
6207 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6208 return;
6209 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6210 VT == MVT::v4bf16) {
6211 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6212 return;
6213 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6214 VT == MVT::v8bf16) {
6215 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6216 return;
6217 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6218 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6219 return;
6220 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6221 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6222 return;
6223 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6224 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6225 return;
6226 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6227 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6228 return;
6229 }
6230 break;
6231 }
6232 case Intrinsic::aarch64_neon_st2: {
6233 if (VT == MVT::v8i8) {
6234 SelectStore(Node, 2, AArch64::ST2Twov8b);
6235 return;
6236 } else if (VT == MVT::v16i8) {
6237 SelectStore(Node, 2, AArch64::ST2Twov16b);
6238 return;
6239 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6240 VT == MVT::v4bf16) {
6241 SelectStore(Node, 2, AArch64::ST2Twov4h);
6242 return;
6243 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6244 VT == MVT::v8bf16) {
6245 SelectStore(Node, 2, AArch64::ST2Twov8h);
6246 return;
6247 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6248 SelectStore(Node, 2, AArch64::ST2Twov2s);
6249 return;
6250 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6251 SelectStore(Node, 2, AArch64::ST2Twov4s);
6252 return;
6253 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6254 SelectStore(Node, 2, AArch64::ST2Twov2d);
6255 return;
6256 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6257 SelectStore(Node, 2, AArch64::ST1Twov1d);
6258 return;
6259 }
6260 break;
6261 }
6262 case Intrinsic::aarch64_neon_st3: {
6263 if (VT == MVT::v8i8) {
6264 SelectStore(Node, 3, AArch64::ST3Threev8b);
6265 return;
6266 } else if (VT == MVT::v16i8) {
6267 SelectStore(Node, 3, AArch64::ST3Threev16b);
6268 return;
6269 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6270 VT == MVT::v4bf16) {
6271 SelectStore(Node, 3, AArch64::ST3Threev4h);
6272 return;
6273 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6274 VT == MVT::v8bf16) {
6275 SelectStore(Node, 3, AArch64::ST3Threev8h);
6276 return;
6277 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6278 SelectStore(Node, 3, AArch64::ST3Threev2s);
6279 return;
6280 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6281 SelectStore(Node, 3, AArch64::ST3Threev4s);
6282 return;
6283 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6284 SelectStore(Node, 3, AArch64::ST3Threev2d);
6285 return;
6286 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6287 SelectStore(Node, 3, AArch64::ST1Threev1d);
6288 return;
6289 }
6290 break;
6291 }
6292 case Intrinsic::aarch64_neon_st4: {
6293 if (VT == MVT::v8i8) {
6294 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6295 return;
6296 } else if (VT == MVT::v16i8) {
6297 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6298 return;
6299 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6300 VT == MVT::v4bf16) {
6301 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6302 return;
6303 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6304 VT == MVT::v8bf16) {
6305 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6306 return;
6307 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6308 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6309 return;
6310 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6311 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6312 return;
6313 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6314 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6315 return;
6316 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6317 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6318 return;
6319 }
6320 break;
6321 }
6322 case Intrinsic::aarch64_neon_st2lane: {
6323 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6324 SelectStoreLane(Node, 2, AArch64::ST2i8);
6325 return;
6326 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6327 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6328 SelectStoreLane(Node, 2, AArch64::ST2i16);
6329 return;
6330 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6331 VT == MVT::v2f32) {
6332 SelectStoreLane(Node, 2, AArch64::ST2i32);
6333 return;
6334 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6335 VT == MVT::v1f64) {
6336 SelectStoreLane(Node, 2, AArch64::ST2i64);
6337 return;
6338 }
6339 break;
6340 }
6341 case Intrinsic::aarch64_neon_st3lane: {
6342 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6343 SelectStoreLane(Node, 3, AArch64::ST3i8);
6344 return;
6345 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6346 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6347 SelectStoreLane(Node, 3, AArch64::ST3i16);
6348 return;
6349 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6350 VT == MVT::v2f32) {
6351 SelectStoreLane(Node, 3, AArch64::ST3i32);
6352 return;
6353 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6354 VT == MVT::v1f64) {
6355 SelectStoreLane(Node, 3, AArch64::ST3i64);
6356 return;
6357 }
6358 break;
6359 }
6360 case Intrinsic::aarch64_neon_st4lane: {
6361 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6362 SelectStoreLane(Node, 4, AArch64::ST4i8);
6363 return;
6364 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6365 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6366 SelectStoreLane(Node, 4, AArch64::ST4i16);
6367 return;
6368 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6369 VT == MVT::v2f32) {
6370 SelectStoreLane(Node, 4, AArch64::ST4i32);
6371 return;
6372 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6373 VT == MVT::v1f64) {
6374 SelectStoreLane(Node, 4, AArch64::ST4i64);
6375 return;
6376 }
6377 break;
6378 }
6379 case Intrinsic::aarch64_sve_st2q: {
6380 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6381 return;
6382 }
6383 case Intrinsic::aarch64_sve_st3q: {
6384 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6385 return;
6386 }
6387 case Intrinsic::aarch64_sve_st4q: {
6388 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6389 return;
6390 }
6391 case Intrinsic::aarch64_sve_st2: {
6392 if (VT == MVT::nxv16i8) {
6393 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6394 return;
6395 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6396 VT == MVT::nxv8bf16) {
6397 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6398 return;
6399 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6400 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6401 return;
6402 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6403 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6404 return;
6405 }
6406 break;
6407 }
6408 case Intrinsic::aarch64_sve_st3: {
6409 if (VT == MVT::nxv16i8) {
6410 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6411 return;
6412 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6413 VT == MVT::nxv8bf16) {
6414 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6415 return;
6416 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6417 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6418 return;
6419 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6420 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6421 return;
6422 }
6423 break;
6424 }
6425 case Intrinsic::aarch64_sve_st4: {
6426 if (VT == MVT::nxv16i8) {
6427 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6428 return;
6429 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6430 VT == MVT::nxv8bf16) {
6431 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6432 return;
6433 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6434 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6435 return;
6436 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6437 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6438 return;
6439 }
6440 break;
6441 }
6442 }
6443 break;
6444 }
6445 case AArch64ISD::LD2post: {
6446 if (VT == MVT::v8i8) {
6447 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6448 return;
6449 } else if (VT == MVT::v16i8) {
6450 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6451 return;
6452 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6453 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6454 return;
6455 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6456 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6457 return;
6458 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6459 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6460 return;
6461 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6462 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6463 return;
6464 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6465 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6466 return;
6467 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6468 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6469 return;
6470 }
6471 break;
6472 }
6473 case AArch64ISD::LD3post: {
6474 if (VT == MVT::v8i8) {
6475 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6476 return;
6477 } else if (VT == MVT::v16i8) {
6478 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6479 return;
6480 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6481 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6482 return;
6483 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6484 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6485 return;
6486 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6487 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6488 return;
6489 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6490 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6491 return;
6492 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6493 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6494 return;
6495 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6496 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6497 return;
6498 }
6499 break;
6500 }
6501 case AArch64ISD::LD4post: {
6502 if (VT == MVT::v8i8) {
6503 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6504 return;
6505 } else if (VT == MVT::v16i8) {
6506 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6507 return;
6508 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6509 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6510 return;
6511 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6512 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6513 return;
6514 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6515 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6516 return;
6517 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6518 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6519 return;
6520 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6521 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6522 return;
6523 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6524 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6525 return;
6526 }
6527 break;
6528 }
6529 case AArch64ISD::LD1x2post: {
6530 if (VT == MVT::v8i8) {
6531 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6532 return;
6533 } else if (VT == MVT::v16i8) {
6534 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6535 return;
6536 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6537 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6538 return;
6539 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6540 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6541 return;
6542 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6543 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6544 return;
6545 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6546 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6547 return;
6548 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6549 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6550 return;
6551 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6552 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
6553 return;
6554 }
6555 break;
6556 }
6557 case AArch64ISD::LD1x3post: {
6558 if (VT == MVT::v8i8) {
6559 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
6560 return;
6561 } else if (VT == MVT::v16i8) {
6562 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
6563 return;
6564 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6565 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
6566 return;
6567 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6568 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
6569 return;
6570 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6571 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
6572 return;
6573 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6574 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
6575 return;
6576 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6577 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6578 return;
6579 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6580 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
6581 return;
6582 }
6583 break;
6584 }
6585 case AArch64ISD::LD1x4post: {
6586 if (VT == MVT::v8i8) {
6587 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
6588 return;
6589 } else if (VT == MVT::v16i8) {
6590 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
6591 return;
6592 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6593 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
6594 return;
6595 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6596 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
6597 return;
6598 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6599 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
6600 return;
6601 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6602 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
6603 return;
6604 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6605 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6606 return;
6607 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6608 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
6609 return;
6610 }
6611 break;
6612 }
6614 if (VT == MVT::v8i8) {
6615 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
6616 return;
6617 } else if (VT == MVT::v16i8) {
6618 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
6619 return;
6620 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6621 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
6622 return;
6623 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6624 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
6625 return;
6626 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6627 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
6628 return;
6629 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6630 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
6631 return;
6632 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6633 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
6634 return;
6635 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6636 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
6637 return;
6638 }
6639 break;
6640 }
6642 if (VT == MVT::v8i8) {
6643 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
6644 return;
6645 } else if (VT == MVT::v16i8) {
6646 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
6647 return;
6648 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6649 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
6650 return;
6651 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6652 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
6653 return;
6654 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6655 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
6656 return;
6657 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6658 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
6659 return;
6660 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6661 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
6662 return;
6663 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6664 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
6665 return;
6666 }
6667 break;
6668 }
6670 if (VT == MVT::v8i8) {
6671 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
6672 return;
6673 } else if (VT == MVT::v16i8) {
6674 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
6675 return;
6676 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6677 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
6678 return;
6679 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6680 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
6681 return;
6682 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6683 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
6684 return;
6685 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6686 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
6687 return;
6688 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6689 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
6690 return;
6691 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6692 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
6693 return;
6694 }
6695 break;
6696 }
6698 if (VT == MVT::v8i8) {
6699 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
6700 return;
6701 } else if (VT == MVT::v16i8) {
6702 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
6703 return;
6704 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6705 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
6706 return;
6707 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6708 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
6709 return;
6710 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6711 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
6712 return;
6713 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6714 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
6715 return;
6716 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6717 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
6718 return;
6719 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6720 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
6721 return;
6722 }
6723 break;
6724 }
6726 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6727 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
6728 return;
6729 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6730 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6731 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
6732 return;
6733 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6734 VT == MVT::v2f32) {
6735 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
6736 return;
6737 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6738 VT == MVT::v1f64) {
6739 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
6740 return;
6741 }
6742 break;
6743 }
6745 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6746 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
6747 return;
6748 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6749 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6750 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
6751 return;
6752 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6753 VT == MVT::v2f32) {
6754 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
6755 return;
6756 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6757 VT == MVT::v1f64) {
6758 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
6759 return;
6760 }
6761 break;
6762 }
6764 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6765 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
6766 return;
6767 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6768 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6769 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
6770 return;
6771 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6772 VT == MVT::v2f32) {
6773 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
6774 return;
6775 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6776 VT == MVT::v1f64) {
6777 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
6778 return;
6779 }
6780 break;
6781 }
6783 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6784 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
6785 return;
6786 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6787 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6788 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
6789 return;
6790 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6791 VT == MVT::v2f32) {
6792 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
6793 return;
6794 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6795 VT == MVT::v1f64) {
6796 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
6797 return;
6798 }
6799 break;
6800 }
6801 case AArch64ISD::ST2post: {
6802 VT = Node->getOperand(1).getValueType();
6803 if (VT == MVT::v8i8) {
6804 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
6805 return;
6806 } else if (VT == MVT::v16i8) {
6807 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
6808 return;
6809 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6810 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
6811 return;
6812 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6813 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
6814 return;
6815 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6816 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
6817 return;
6818 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6819 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
6820 return;
6821 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6822 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
6823 return;
6824 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6825 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
6826 return;
6827 }
6828 break;
6829 }
6830 case AArch64ISD::ST3post: {
6831 VT = Node->getOperand(1).getValueType();
6832 if (VT == MVT::v8i8) {
6833 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
6834 return;
6835 } else if (VT == MVT::v16i8) {
6836 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
6837 return;
6838 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6839 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
6840 return;
6841 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6842 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
6843 return;
6844 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6845 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
6846 return;
6847 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6848 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
6849 return;
6850 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6851 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
6852 return;
6853 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6854 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
6855 return;
6856 }
6857 break;
6858 }
6859 case AArch64ISD::ST4post: {
6860 VT = Node->getOperand(1).getValueType();
6861 if (VT == MVT::v8i8) {
6862 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
6863 return;
6864 } else if (VT == MVT::v16i8) {
6865 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
6866 return;
6867 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6868 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
6869 return;
6870 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6871 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
6872 return;
6873 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6874 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
6875 return;
6876 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6877 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
6878 return;
6879 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6880 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
6881 return;
6882 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6883 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
6884 return;
6885 }
6886 break;
6887 }
6888 case AArch64ISD::ST1x2post: {
6889 VT = Node->getOperand(1).getValueType();
6890 if (VT == MVT::v8i8) {
6891 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
6892 return;
6893 } else if (VT == MVT::v16i8) {
6894 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
6895 return;
6896 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6897 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
6898 return;
6899 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6900 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
6901 return;
6902 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6903 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
6904 return;
6905 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6906 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
6907 return;
6908 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6909 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
6910 return;
6911 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6912 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
6913 return;
6914 }
6915 break;
6916 }
6917 case AArch64ISD::ST1x3post: {
6918 VT = Node->getOperand(1).getValueType();
6919 if (VT == MVT::v8i8) {
6920 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
6921 return;
6922 } else if (VT == MVT::v16i8) {
6923 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
6924 return;
6925 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6926 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
6927 return;
6928 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
6929 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
6930 return;
6931 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6932 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
6933 return;
6934 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6935 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
6936 return;
6937 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6938 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
6939 return;
6940 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6941 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
6942 return;
6943 }
6944 break;
6945 }
6946 case AArch64ISD::ST1x4post: {
6947 VT = Node->getOperand(1).getValueType();
6948 if (VT == MVT::v8i8) {
6949 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
6950 return;
6951 } else if (VT == MVT::v16i8) {
6952 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
6953 return;
6954 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6955 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
6956 return;
6957 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6958 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
6959 return;
6960 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6961 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
6962 return;
6963 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6964 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
6965 return;
6966 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6967 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
6968 return;
6969 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6970 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
6971 return;
6972 }
6973 break;
6974 }
6976 VT = Node->getOperand(1).getValueType();
6977 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6978 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
6979 return;
6980 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6981 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6982 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
6983 return;
6984 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6985 VT == MVT::v2f32) {
6986 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
6987 return;
6988 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6989 VT == MVT::v1f64) {
6990 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
6991 return;
6992 }
6993 break;
6994 }
6996 VT = Node->getOperand(1).getValueType();
6997 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6998 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
6999 return;
7000 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7001 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7002 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7003 return;
7004 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7005 VT == MVT::v2f32) {
7006 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7007 return;
7008 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7009 VT == MVT::v1f64) {
7010 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7011 return;
7012 }
7013 break;
7014 }
7016 VT = Node->getOperand(1).getValueType();
7017 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7018 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7019 return;
7020 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7021 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7022 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7023 return;
7024 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7025 VT == MVT::v2f32) {
7026 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7027 return;
7028 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7029 VT == MVT::v1f64) {
7030 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7031 return;
7032 }
7033 break;
7034 }
7036 if (VT == MVT::nxv16i8) {
7037 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
7038 return;
7039 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7040 VT == MVT::nxv8bf16) {
7041 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
7042 return;
7043 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7044 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);
7045 return;
7046 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7047 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);
7048 return;
7049 }
7050 break;
7051 }
7053 if (VT == MVT::nxv16i8) {
7054 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
7055 return;
7056 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7057 VT == MVT::nxv8bf16) {
7058 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
7059 return;
7060 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7061 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);
7062 return;
7063 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7064 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);
7065 return;
7066 }
7067 break;
7068 }
7070 if (VT == MVT::nxv16i8) {
7071 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
7072 return;
7073 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7074 VT == MVT::nxv8bf16) {
7075 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
7076 return;
7077 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7078 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);
7079 return;
7080 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7081 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);
7082 return;
7083 }
7084 break;
7085 }
7086 }
7087
7088 // Select the default instruction
7089 SelectCode(Node);
7090}
7091
7092/// createAArch64ISelDag - This pass converts a legalized DAG into a
7093/// AArch64-specific DAG, ready for instruction scheduling.
7095 CodeGenOptLevel OptLevel) {
7096 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7097}
7098
7099/// When \p PredVT is a scalable vector predicate in the form
7100/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7101/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7102/// structured vectors (NumVec >1), the output data type is
7103/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7104/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7105/// EVT.
7107 unsigned NumVec) {
7108 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7109 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7110 return EVT();
7111
7112 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7113 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7114 return EVT();
7115
7116 ElementCount EC = PredVT.getVectorElementCount();
7117 EVT ScalarVT =
7118 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7119 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7120
7121 return MemVT;
7122}
7123
7124/// Return the EVT of the data associated to a memory operation in \p
7125/// Root. If such EVT cannot be retrived, it returns an invalid EVT.
7127 if (isa<MemSDNode>(Root))
7128 return cast<MemSDNode>(Root)->getMemoryVT();
7129
7130 if (isa<MemIntrinsicSDNode>(Root))
7131 return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
7132
7133 const unsigned Opcode = Root->getOpcode();
7134 // For custom ISD nodes, we have to look at them individually to extract the
7135 // type of the data moved to/from memory.
7136 switch (Opcode) {
7141 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7143 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7146 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2);
7149 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3);
7152 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4);
7153 default:
7154 break;
7155 }
7156
7157 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7158 return EVT();
7159
7160 switch (Root->getConstantOperandVal(1)) {
7161 default:
7162 return EVT();
7163 case Intrinsic::aarch64_sme_ldr:
7164 case Intrinsic::aarch64_sme_str:
7165 return MVT::nxv16i8;
7166 case Intrinsic::aarch64_sve_prf:
7167 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7168 // width of the predicate.
7170 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7171 case Intrinsic::aarch64_sve_ld2_sret:
7172 case Intrinsic::aarch64_sve_ld2q_sret:
7174 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7175 case Intrinsic::aarch64_sve_st2q:
7177 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7178 case Intrinsic::aarch64_sve_ld3_sret:
7179 case Intrinsic::aarch64_sve_ld3q_sret:
7181 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7182 case Intrinsic::aarch64_sve_st3q:
7184 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7185 case Intrinsic::aarch64_sve_ld4_sret:
7186 case Intrinsic::aarch64_sve_ld4q_sret:
7188 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7189 case Intrinsic::aarch64_sve_st4q:
7191 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7192 case Intrinsic::aarch64_sve_ld1udq:
7193 case Intrinsic::aarch64_sve_st1dq:
7194 return EVT(MVT::nxv1i64);
7195 case Intrinsic::aarch64_sve_ld1uwq:
7196 case Intrinsic::aarch64_sve_st1wq:
7197 return EVT(MVT::nxv1i32);
7198 }
7199}
7200
7201/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7202/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7203/// where Root is the memory access using N for its address.
7204template <int64_t Min, int64_t Max>
7205bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7206 SDValue &Base,
7207 SDValue &OffImm) {
7208 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7209 const DataLayout &DL = CurDAG->getDataLayout();
7210 const MachineFrameInfo &MFI = MF->getFrameInfo();
7211
7212 if (N.getOpcode() == ISD::FrameIndex) {
7213 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7214 // We can only encode VL scaled offsets, so only fold in frame indexes
7215 // referencing SVE objects.
7217 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7218 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7219 return true;
7220 }
7221
7222 return false;
7223 }
7224
7225 if (MemVT == EVT())
7226 return false;
7227
7228 if (N.getOpcode() != ISD::ADD)
7229 return false;
7230
7231 SDValue VScale = N.getOperand(1);
7232 if (VScale.getOpcode() != ISD::VSCALE)
7233 return false;
7234
7235 TypeSize TS = MemVT.getSizeInBits();
7236 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7237 int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7238
7239 if ((MulImm % MemWidthBytes) != 0)
7240 return false;
7241
7242 int64_t Offset = MulImm / MemWidthBytes;
7243 if (Offset < Min || Offset > Max)
7244 return false;
7245
7246 Base = N.getOperand(0);
7247 if (Base.getOpcode() == ISD::FrameIndex) {
7248 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7249 // We can only encode VL scaled offsets, so only fold in frame indexes
7250 // referencing SVE objects.
7252 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7253 }
7254
7255 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7256 return true;
7257}
7258
7259/// Select register plus register addressing mode for SVE, with scaled
7260/// offset.
7261bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7262 SDValue &Base,
7263 SDValue &Offset) {
7264 if (N.getOpcode() != ISD::ADD)
7265 return false;
7266
7267 // Process an ADD node.
7268 const SDValue LHS = N.getOperand(0);
7269 const SDValue RHS = N.getOperand(1);
7270
7271 // 8 bit data does not come with the SHL node, so it is treated
7272 // separately.
7273 if (Scale == 0) {
7274 Base = LHS;
7275 Offset = RHS;
7276 return true;
7277 }
7278
7279 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7280 int64_t ImmOff = C->getSExtValue();
7281 unsigned Size = 1 << Scale;
7282
7283 // To use the reg+reg addressing mode, the immediate must be a multiple of
7284 // the vector element's byte size.
7285 if (ImmOff % Size)
7286 return false;
7287
7288 SDLoc DL(N);
7289 Base = LHS;
7290 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7291 SDValue Ops[] = {Offset};
7292 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7293 Offset = SDValue(MI, 0);
7294 return true;
7295 }
7296
7297 // Check if the RHS is a shift node with a constant.
7298 if (RHS.getOpcode() != ISD::SHL)
7299 return false;
7300
7301 const SDValue ShiftRHS = RHS.getOperand(1);
7302 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7303 if (C->getZExtValue() == Scale) {
7304 Base = LHS;
7305 Offset = RHS.getOperand(0);
7306 return true;
7307 }
7308
7309 return false;
7310}
7311
7312bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7313 const AArch64TargetLowering *TLI =
7314 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7315
7316 return TLI->isAllActivePredicate(*CurDAG, N);
7317}
7318
7319bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7320 EVT VT = N.getValueType();
7321 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7322}
7323
7324bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7326 unsigned Scale) {
7327 // Try to untangle an ADD node into a 'reg + offset'
7328 if (N.getOpcode() == ISD::ADD)
7329 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
7330 int64_t ImmOff = C->getSExtValue();
7331 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) {
7332 Base = N.getOperand(0);
7333 Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7334 return true;
7335 }
7336 }
7337
7338 // By default, just match reg + 0.
7339 Base = N;
7340 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7341 return true;
7342}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
#define PASS_NAME
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
#define DEBUG_TYPE
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
aarch64 promote const
amdgpu AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Value * RHS
Value * LHS
support::ulittle16_t & Lo
Definition: aarch32.cpp:206
support::ulittle16_t & Hi
Definition: aarch32.cpp:205
DEMANGLE_DUMP_METHOD void dump() const
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
bool getExactInverse(APFloat *inv) const
Definition: APFloat.h:1399
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1241
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1500
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1629
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:238
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1448
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1598
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1557
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1414
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:490
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:838
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint8_t getStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
A description of a memory reference used in the backend.
An SDNode that represents everything that will be needed to construct a MachineInstr.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
iterator_range< use_iterator > uses()
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps)
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:226
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:451
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:690
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
size_t size() const
Definition: SmallVector.h:92
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:685
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
unsigned getID() const
Return the register class ID number.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:927
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
const SysReg * lookupSysRegByName(StringRef)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:573
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1309
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1099
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:953
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ WRITE_REGISTER
Definition: ISDOpcodes.h:125
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1305
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:641
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:68
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:124
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1397
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1316
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:848
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition: ISDOpcodes.h:223
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1552
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1583
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
Reg
All possible values of the reg field in the ModR/M byte.
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
@ Offset
Definition: DWP.cpp:480
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:279
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:346
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:285
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1935
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:273
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
@ And
Bitwise or logical AND of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Extended Value Type.
Definition: ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:341
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:359
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:350
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:371
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:307
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:204
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:367
bool isFixedLengthVector() const
Definition: ValueTypes.h:178
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:319
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:327
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:199
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40