LLVM 22.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34// https://github.com/llvm/llvm-project/issues/114425
35#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36#pragma inline_depth(0)
37#endif
38
39//===--------------------------------------------------------------------===//
40/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41/// instructions for SelectionDAG operations.
42///
43namespace {
44
45class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const AArch64Subtarget *Subtarget;
50
51public:
52 AArch64DAGToDAGISel() = delete;
53
54 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55 CodeGenOptLevel OptLevel)
56 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
57
58 bool runOnMachineFunction(MachineFunction &MF) override {
59 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
61 }
62
63 void Select(SDNode *Node) override;
64
65 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
66 /// inline asm expressions.
67 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
68 InlineAsm::ConstraintCode ConstraintID,
69 std::vector<SDValue> &OutOps) override;
70
71 template <signed Low, signed High, signed Scale>
72 bool SelectRDVLImm(SDValue N, SDValue &Imm);
73
74 template <signed Low, signed High>
75 bool SelectRDSVLShiftImm(SDValue N, SDValue &Imm);
76
77 bool SelectAddUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
78
79 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
80 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
81 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
82 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
83 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
84 return SelectShiftedRegister(N, false, Reg, Shift);
85 }
86 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
87 return SelectShiftedRegister(N, true, Reg, Shift);
88 }
89 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
90 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
91 }
92 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
93 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
94 }
95 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
96 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
97 }
98 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
99 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
100 }
101 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
102 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
103 }
104 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
105 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
106 }
107 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
108 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
109 }
110 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
111 return SelectAddrModeIndexed(N, 1, Base, OffImm);
112 }
113 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
114 return SelectAddrModeIndexed(N, 2, Base, OffImm);
115 }
116 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
117 return SelectAddrModeIndexed(N, 4, Base, OffImm);
118 }
119 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
120 return SelectAddrModeIndexed(N, 8, Base, OffImm);
121 }
122 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
123 return SelectAddrModeIndexed(N, 16, Base, OffImm);
124 }
125 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
126 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
127 }
128 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
129 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
130 }
131 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
132 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
133 }
134 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
135 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
136 }
137 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
138 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
139 }
140 template <unsigned Size, unsigned Max>
141 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
142 // Test if there is an appropriate addressing mode and check if the
143 // immediate fits.
144 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
145 if (Found) {
146 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
147 int64_t C = CI->getSExtValue();
148 if (C <= Max)
149 return true;
150 }
151 }
152
153 // Otherwise, base only, materialize address in register.
154 Base = N;
155 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
156 return true;
157 }
158
159 template<int Width>
160 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
161 SDValue &SignExtend, SDValue &DoShift) {
162 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
163 }
164
165 template<int Width>
166 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
167 SDValue &SignExtend, SDValue &DoShift) {
168 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
169 }
170
171 bool SelectExtractHigh(SDValue N, SDValue &Res) {
172 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
173 N = N->getOperand(0);
174 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
175 !isa<ConstantSDNode>(N->getOperand(1)))
176 return false;
177 EVT VT = N->getValueType(0);
178 EVT LVT = N->getOperand(0).getValueType();
179 unsigned Index = N->getConstantOperandVal(1);
180 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
181 Index != VT.getVectorNumElements())
182 return false;
183 Res = N->getOperand(0);
184 return true;
185 }
186
187 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
188 if (N.getOpcode() != AArch64ISD::VLSHR)
189 return false;
190 SDValue Op = N->getOperand(0);
191 EVT VT = Op.getValueType();
192 unsigned ShtAmt = N->getConstantOperandVal(1);
193 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
194 return false;
195
196 APInt Imm;
197 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
198 Imm = APInt(VT.getScalarSizeInBits(),
199 Op.getOperand(1).getConstantOperandVal(0)
200 << Op.getOperand(1).getConstantOperandVal(1));
201 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
202 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
203 Imm = APInt(VT.getScalarSizeInBits(),
204 Op.getOperand(1).getConstantOperandVal(0));
205 else
206 return false;
207
208 if (Imm != 1ULL << (ShtAmt - 1))
209 return false;
210
211 Res1 = Op.getOperand(0);
212 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
213 return true;
214 }
215
216 bool SelectDupZeroOrUndef(SDValue N) {
217 switch(N->getOpcode()) {
218 case ISD::UNDEF:
219 return true;
220 case AArch64ISD::DUP:
221 case ISD::SPLAT_VECTOR: {
222 auto Opnd0 = N->getOperand(0);
223 if (isNullConstant(Opnd0))
224 return true;
225 if (isNullFPConstant(Opnd0))
226 return true;
227 break;
228 }
229 default:
230 break;
231 }
232
233 return false;
234 }
235
236 bool SelectAny(SDValue) { return true; }
237
238 bool SelectDupZero(SDValue N) {
239 switch(N->getOpcode()) {
240 case AArch64ISD::DUP:
241 case ISD::SPLAT_VECTOR: {
242 auto Opnd0 = N->getOperand(0);
243 if (isNullConstant(Opnd0))
244 return true;
245 if (isNullFPConstant(Opnd0))
246 return true;
247 break;
248 }
249 }
250
251 return false;
252 }
253
254 template <MVT::SimpleValueType VT, bool Negate>
255 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
256 return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
257 }
258
259 template <MVT::SimpleValueType VT, bool Negate>
260 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
261 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
262 }
263
264 template <MVT::SimpleValueType VT>
265 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
266 return SelectSVECpyDupImm(N, VT, Imm, Shift);
267 }
268
269 template <MVT::SimpleValueType VT, bool Invert = false>
270 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
271 return SelectSVELogicalImm(N, VT, Imm, Invert);
272 }
273
274 template <MVT::SimpleValueType VT>
275 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
276 return SelectSVEArithImm(N, VT, Imm);
277 }
278
279 template <unsigned Low, unsigned High, bool AllowSaturation = false>
280 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
281 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
282 }
283
284 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
285 if (N->getOpcode() != ISD::SPLAT_VECTOR)
286 return false;
287
288 EVT EltVT = N->getValueType(0).getVectorElementType();
289 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
290 /* High */ EltVT.getFixedSizeInBits(),
291 /* AllowSaturation */ true, Imm);
292 }
293
294 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
295 template<signed Min, signed Max, signed Scale, bool Shift>
296 bool SelectCntImm(SDValue N, SDValue &Imm) {
298 return false;
299
300 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
301 if (Shift)
302 MulImm = 1LL << MulImm;
303
304 if ((MulImm % std::abs(Scale)) != 0)
305 return false;
306
307 MulImm /= Scale;
308 if ((MulImm >= Min) && (MulImm <= Max)) {
309 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
310 return true;
311 }
312
313 return false;
314 }
315
316 template <signed Max, signed Scale>
317 bool SelectEXTImm(SDValue N, SDValue &Imm) {
319 return false;
320
321 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
322
323 if (MulImm >= 0 && MulImm <= Max) {
324 MulImm *= Scale;
325 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
326 return true;
327 }
328
329 return false;
330 }
331
332 template <unsigned BaseReg, unsigned Max>
333 bool ImmToReg(SDValue N, SDValue &Imm) {
334 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
335 uint64_t C = CI->getZExtValue();
336
337 if (C > Max)
338 return false;
339
340 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
341 return true;
342 }
343 return false;
344 }
345
346 /// Form sequences of consecutive 64/128-bit registers for use in NEON
347 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
348 /// between 1 and 4 elements. If it contains a single element that is returned
349 /// unchanged; otherwise a REG_SEQUENCE value is returned.
352 // Form a sequence of SVE registers for instructions using list of vectors,
353 // e.g. structured loads and stores (ldN, stN).
354 SDValue createZTuple(ArrayRef<SDValue> Vecs);
355
356 // Similar to above, except the register must start at a multiple of the
357 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
358 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
359
360 /// Generic helper for the createDTuple/createQTuple
361 /// functions. Those should almost always be called instead.
362 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
363 const unsigned SubRegs[]);
364
365 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
366
367 bool tryIndexedLoad(SDNode *N);
368
369 void SelectPtrauthAuth(SDNode *N);
370 void SelectPtrauthResign(SDNode *N);
371
372 bool trySelectStackSlotTagP(SDNode *N);
373 void SelectTagP(SDNode *N);
374
375 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
376 unsigned SubRegIdx);
377 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
378 unsigned SubRegIdx);
379 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
380 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
381 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
382 unsigned Opc_rr, unsigned Opc_ri,
383 bool IsIntr = false);
384 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
385 unsigned Scale, unsigned Opc_ri,
386 unsigned Opc_rr);
387 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
388 bool IsZmMulti, unsigned Opcode,
389 bool HasPred = false);
390 void SelectPExtPair(SDNode *N, unsigned Opc);
391 void SelectWhilePair(SDNode *N, unsigned Opc);
392 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
393 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
394 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
395 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
396 bool IsTupleInput, unsigned Opc);
397 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
398
399 template <unsigned MaxIdx, unsigned Scale>
400 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
401 unsigned Op);
402 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
403 unsigned Op, unsigned MaxIdx, unsigned Scale,
404 unsigned BaseReg = 0);
405 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
406 /// SVE Reg+Imm addressing mode.
407 template <int64_t Min, int64_t Max>
408 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
409 SDValue &OffImm);
410 /// SVE Reg+Reg address mode.
411 template <unsigned Scale>
412 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
413 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
414 }
415
416 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
417 unsigned Opc, uint32_t MaxImm);
418
419 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
420
421 template <unsigned MaxIdx, unsigned Scale>
422 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
423 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
424 }
425
426 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
427 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
428 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
429 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
430 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
431 unsigned Opc_rr, unsigned Opc_ri);
432 std::tuple<unsigned, SDValue, SDValue>
433 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
434 const SDValue &OldBase, const SDValue &OldOffset,
435 unsigned Scale);
436
437 bool tryBitfieldExtractOp(SDNode *N);
438 bool tryBitfieldExtractOpFromSExt(SDNode *N);
439 bool tryBitfieldInsertOp(SDNode *N);
440 bool tryBitfieldInsertInZeroOp(SDNode *N);
441 bool tryShiftAmountMod(SDNode *N);
442
443 bool tryReadRegister(SDNode *N);
444 bool tryWriteRegister(SDNode *N);
445
446 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
447 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
448
449 bool trySelectXAR(SDNode *N);
450
451// Include the pieces autogenerated from the target description.
452#include "AArch64GenDAGISel.inc"
453
454private:
455 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
456 SDValue &Shift);
457 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
458 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
459 SDValue &OffImm) {
460 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
461 }
462 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
463 unsigned Size, SDValue &Base,
464 SDValue &OffImm);
465 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
466 SDValue &OffImm);
467 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
468 SDValue &OffImm);
469 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
470 SDValue &Offset, SDValue &SignExtend,
471 SDValue &DoShift);
472 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
473 SDValue &Offset, SDValue &SignExtend,
474 SDValue &DoShift);
475 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
476 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
477 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
478 SDValue &Offset, SDValue &SignExtend);
479
480 template<unsigned RegWidth>
481 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
482 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
483 }
484
485 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
486
487 template<unsigned RegWidth>
488 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
489 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
490 }
491
492 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
493 unsigned Width);
494
495 bool SelectCMP_SWAP(SDNode *N);
496
497 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
498 bool Negate);
499 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
500 bool Negate);
501 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
502 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
503
504 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
505 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
506 bool AllowSaturation, SDValue &Imm);
507
508 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
509 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
510 SDValue &Offset);
511 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
512 SDValue &Offset, unsigned Scale = 1);
513
514 bool SelectAllActivePredicate(SDValue N);
515 bool SelectAnyPredicate(SDValue N);
516
517 bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm);
518
519 template <bool MatchCBB>
520 bool SelectCmpBranchExtOperand(SDValue N, SDValue &Reg, SDValue &ExtType);
521};
522
523class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
524public:
525 static char ID;
526 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
527 CodeGenOptLevel OptLevel)
529 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
530};
531} // end anonymous namespace
532
533char AArch64DAGToDAGISelLegacy::ID = 0;
534
535INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
536
537/// isIntImmediate - This method tests to see if the node is a constant
538/// operand. If so Imm will receive the 32-bit value.
539static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
541 Imm = C->getZExtValue();
542 return true;
543 }
544 return false;
545}
546
547// isIntImmediate - This method tests to see if a constant operand.
548// If so Imm will receive the value.
549static bool isIntImmediate(SDValue N, uint64_t &Imm) {
550 return isIntImmediate(N.getNode(), Imm);
551}
552
553// isOpcWithIntImmediate - This method tests to see if the node is a specific
554// opcode and that it has a immediate integer right operand.
555// If so Imm will receive the 32 bit value.
556static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
557 uint64_t &Imm) {
558 return N->getOpcode() == Opc &&
559 isIntImmediate(N->getOperand(1).getNode(), Imm);
560}
561
562// isIntImmediateEq - This method tests to see if N is a constant operand that
563// is equivalent to 'ImmExpected'.
564#ifndef NDEBUG
565static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
566 uint64_t Imm;
567 if (!isIntImmediate(N.getNode(), Imm))
568 return false;
569 return Imm == ImmExpected;
570}
571#endif
572
573bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
574 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
575 std::vector<SDValue> &OutOps) {
576 switch(ConstraintID) {
577 default:
578 llvm_unreachable("Unexpected asm memory constraint");
579 case InlineAsm::ConstraintCode::m:
580 case InlineAsm::ConstraintCode::o:
581 case InlineAsm::ConstraintCode::Q:
582 // We need to make sure that this one operand does not end up in XZR, thus
583 // require the address to be in a PointerRegClass register.
584 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
585 const TargetRegisterClass *TRC = TRI->getPointerRegClass();
586 SDLoc dl(Op);
587 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
588 SDValue NewOp =
589 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
590 dl, Op.getValueType(),
591 Op, RC), 0);
592 OutOps.push_back(NewOp);
593 return false;
594 }
595 return true;
596}
597
598/// SelectArithImmed - Select an immediate value that can be represented as
599/// a 12-bit value shifted left by either 0 or 12. If so, return true with
600/// Val set to the 12-bit value and Shift set to the shifter operand.
601bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
602 SDValue &Shift) {
603 // This function is called from the addsub_shifted_imm ComplexPattern,
604 // which lists [imm] as the list of opcode it's interested in, however
605 // we still need to check whether the operand is actually an immediate
606 // here because the ComplexPattern opcode list is only used in
607 // root-level opcode matching.
608 if (!isa<ConstantSDNode>(N.getNode()))
609 return false;
610
611 uint64_t Immed = N.getNode()->getAsZExtVal();
612 unsigned ShiftAmt;
613
614 if (Immed >> 12 == 0) {
615 ShiftAmt = 0;
616 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
617 ShiftAmt = 12;
618 Immed = Immed >> 12;
619 } else
620 return false;
621
622 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
623 SDLoc dl(N);
624 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
625 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
626 return true;
627}
628
629/// SelectNegArithImmed - As above, but negates the value before trying to
630/// select it.
631bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
632 SDValue &Shift) {
633 // This function is called from the addsub_shifted_imm ComplexPattern,
634 // which lists [imm] as the list of opcode it's interested in, however
635 // we still need to check whether the operand is actually an immediate
636 // here because the ComplexPattern opcode list is only used in
637 // root-level opcode matching.
638 if (!isa<ConstantSDNode>(N.getNode()))
639 return false;
640
641 // The immediate operand must be a 24-bit zero-extended immediate.
642 uint64_t Immed = N.getNode()->getAsZExtVal();
643
644 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
645 // have the opposite effect on the C flag, so this pattern mustn't match under
646 // those circumstances.
647 if (Immed == 0)
648 return false;
649
650 if (N.getValueType() == MVT::i32)
651 Immed = ~((uint32_t)Immed) + 1;
652 else
653 Immed = ~Immed + 1ULL;
654 if (Immed & 0xFFFFFFFFFF000000ULL)
655 return false;
656
657 Immed &= 0xFFFFFFULL;
658 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
659 Shift);
660}
661
662/// getShiftTypeForNode - Translate a shift node to the corresponding
663/// ShiftType value.
665 switch (N.getOpcode()) {
666 default:
668 case ISD::SHL:
669 return AArch64_AM::LSL;
670 case ISD::SRL:
671 return AArch64_AM::LSR;
672 case ISD::SRA:
673 return AArch64_AM::ASR;
674 case ISD::ROTR:
675 return AArch64_AM::ROR;
676 }
677}
678
680 return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
681}
682
683/// Determine whether it is worth it to fold SHL into the addressing
684/// mode.
686 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
687 // It is worth folding logical shift of up to three places.
688 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
689 if (!CSD)
690 return false;
691 unsigned ShiftVal = CSD->getZExtValue();
692 if (ShiftVal > 3)
693 return false;
694
695 // Check if this particular node is reused in any non-memory related
696 // operation. If yes, do not try to fold this node into the address
697 // computation, since the computation will be kept.
698 const SDNode *Node = V.getNode();
699 for (SDNode *UI : Node->users())
700 if (!isMemOpOrPrefetch(UI))
701 for (SDNode *UII : UI->users())
702 if (!isMemOpOrPrefetch(UII))
703 return false;
704 return true;
705}
706
707/// Determine whether it is worth to fold V into an extended register addressing
708/// mode.
709bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
710 // Trivial if we are optimizing for code size or if there is only
711 // one use of the value.
712 if (CurDAG->shouldOptForSize() || V.hasOneUse())
713 return true;
714
715 // If a subtarget has a slow shift, folding a shift into multiple loads
716 // costs additional micro-ops.
717 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
718 return false;
719
720 // Check whether we're going to emit the address arithmetic anyway because
721 // it's used by a non-address operation.
722 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
723 return true;
724 if (V.getOpcode() == ISD::ADD) {
725 const SDValue LHS = V.getOperand(0);
726 const SDValue RHS = V.getOperand(1);
727 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
728 return true;
729 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
730 return true;
731 }
732
733 // It hurts otherwise, since the value will be reused.
734 return false;
735}
736
737/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
738/// to select more shifted register
739bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
740 SDValue &Shift) {
741 EVT VT = N.getValueType();
742 if (VT != MVT::i32 && VT != MVT::i64)
743 return false;
744
745 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
746 return false;
747 SDValue LHS = N.getOperand(0);
748 if (!LHS->hasOneUse())
749 return false;
750
751 unsigned LHSOpcode = LHS->getOpcode();
752 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
753 return false;
754
755 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
756 if (!ShiftAmtNode)
757 return false;
758
759 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
760 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
761 if (!RHSC)
762 return false;
763
764 APInt AndMask = RHSC->getAPIntValue();
765 unsigned LowZBits, MaskLen;
766 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
767 return false;
768
769 unsigned BitWidth = N.getValueSizeInBits();
770 SDLoc DL(LHS);
771 uint64_t NewShiftC;
772 unsigned NewShiftOp;
773 if (LHSOpcode == ISD::SHL) {
774 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
775 // BitWidth != LowZBits + MaskLen doesn't match the pattern
776 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
777 return false;
778
779 NewShiftC = LowZBits - ShiftAmtC;
780 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
781 } else {
782 if (LowZBits == 0)
783 return false;
784
785 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
786 NewShiftC = LowZBits + ShiftAmtC;
787 if (NewShiftC >= BitWidth)
788 return false;
789
790 // SRA need all high bits
791 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
792 return false;
793
794 // SRL high bits can be 0 or 1
795 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
796 return false;
797
798 if (LHSOpcode == ISD::SRL)
799 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
800 else
801 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
802 }
803
804 assert(NewShiftC < BitWidth && "Invalid shift amount");
805 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
806 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
807 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
808 NewShiftAmt, BitWidthMinus1),
809 0);
810 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
811 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
812 return true;
813}
814
815/// getExtendTypeForNode - Translate an extend node to the corresponding
816/// ExtendType value.
818getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
819 if (N.getOpcode() == ISD::SIGN_EXTEND ||
820 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
821 EVT SrcVT;
822 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
823 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
824 else
825 SrcVT = N.getOperand(0).getValueType();
826
827 if (!IsLoadStore && SrcVT == MVT::i8)
828 return AArch64_AM::SXTB;
829 else if (!IsLoadStore && SrcVT == MVT::i16)
830 return AArch64_AM::SXTH;
831 else if (SrcVT == MVT::i32)
832 return AArch64_AM::SXTW;
833 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
834
836 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
837 N.getOpcode() == ISD::ANY_EXTEND) {
838 EVT SrcVT = N.getOperand(0).getValueType();
839 if (!IsLoadStore && SrcVT == MVT::i8)
840 return AArch64_AM::UXTB;
841 else if (!IsLoadStore && SrcVT == MVT::i16)
842 return AArch64_AM::UXTH;
843 else if (SrcVT == MVT::i32)
844 return AArch64_AM::UXTW;
845 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
846
848 } else if (N.getOpcode() == ISD::AND) {
849 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
850 if (!CSD)
852 uint64_t AndMask = CSD->getZExtValue();
853
854 switch (AndMask) {
855 default:
857 case 0xFF:
858 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
859 case 0xFFFF:
860 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
861 case 0xFFFFFFFF:
862 return AArch64_AM::UXTW;
863 }
864 }
865
867}
868
869/// Determine whether it is worth to fold V into an extended register of an
870/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
871/// instruction, and the shift should be treated as worth folding even if has
872/// multiple uses.
873bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
874 // Trivial if we are optimizing for code size or if there is only
875 // one use of the value.
876 if (CurDAG->shouldOptForSize() || V.hasOneUse())
877 return true;
878
879 // If a subtarget has a fastpath LSL we can fold a logical shift into
880 // the add/sub and save a cycle.
881 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
882 V.getConstantOperandVal(1) <= 4 &&
884 return true;
885
886 // It hurts otherwise, since the value will be reused.
887 return false;
888}
889
890/// SelectShiftedRegister - Select a "shifted register" operand. If the value
891/// is not shifted, set the Shift operand to default of "LSL 0". The logical
892/// instructions allow the shifted register to be rotated, but the arithmetic
893/// instructions do not. The AllowROR parameter specifies whether ROR is
894/// supported.
895bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
896 SDValue &Reg, SDValue &Shift) {
897 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
898 return true;
899
901 if (ShType == AArch64_AM::InvalidShiftExtend)
902 return false;
903 if (!AllowROR && ShType == AArch64_AM::ROR)
904 return false;
905
906 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
907 unsigned BitSize = N.getValueSizeInBits();
908 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
909 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
910
911 Reg = N.getOperand(0);
912 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
913 return isWorthFoldingALU(N, true);
914 }
915
916 return false;
917}
918
919/// Instructions that accept extend modifiers like UXTW expect the register
920/// being extended to be a GPR32, but the incoming DAG might be acting on a
921/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
922/// this is the case.
924 if (N.getValueType() == MVT::i32)
925 return N;
926
927 SDLoc dl(N);
928 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
929}
930
931// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
932template<signed Low, signed High, signed Scale>
933bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
935 return false;
936
937 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
938 if ((MulImm % std::abs(Scale)) == 0) {
939 int64_t RDVLImm = MulImm / Scale;
940 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
941 Imm = CurDAG->getSignedTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
942 return true;
943 }
944 }
945
946 return false;
947}
948
949// Returns a suitable RDSVL multiplier from a left shift.
950template <signed Low, signed High>
951bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) {
953 return false;
954
955 int64_t MulImm = 1LL << cast<ConstantSDNode>(N)->getSExtValue();
956 if (MulImm >= Low && MulImm <= High) {
957 Imm = CurDAG->getSignedTargetConstant(MulImm, SDLoc(N), MVT::i32);
958 return true;
959 }
960
961 return false;
962}
963
964/// SelectAddUXTXRegister - Select a "UXTX register" operand. This
965/// operand is referred by the instructions have SP operand
966bool AArch64DAGToDAGISel::SelectAddUXTXRegister(SDValue N, SDValue &Reg,
967 SDValue &Shift) {
968 // TODO: Relax condition to apply to more scenarios
969 if (N.getOpcode() != ISD::LOAD)
970 return false;
971 Reg = N;
972 Shift = CurDAG->getTargetConstant(getArithExtendImm(AArch64_AM::UXTX, 0),
973 SDLoc(N), MVT::i32);
974 return true;
975}
976
977/// SelectArithExtendedRegister - Select a "extended register" operand. This
978/// operand folds in an extend followed by an optional left shift.
979bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
980 SDValue &Shift) {
981 unsigned ShiftVal = 0;
983
984 if (N.getOpcode() == ISD::SHL) {
985 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
986 if (!CSD)
987 return false;
988 ShiftVal = CSD->getZExtValue();
989 if (ShiftVal > 4)
990 return false;
991
992 Ext = getExtendTypeForNode(N.getOperand(0));
994 return false;
995
996 Reg = N.getOperand(0).getOperand(0);
997 } else {
998 Ext = getExtendTypeForNode(N);
1000 return false;
1001
1002 Reg = N.getOperand(0);
1003
1004 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
1005 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
1006 auto isDef32 = [](SDValue N) {
1007 unsigned Opc = N.getOpcode();
1008 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
1011 Opc != ISD::FREEZE;
1012 };
1013 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
1014 isDef32(Reg))
1015 return false;
1016 }
1017
1018 // AArch64 mandates that the RHS of the operation must use the smallest
1019 // register class that could contain the size being extended from. Thus,
1020 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
1021 // there might not be an actual 32-bit value in the program. We can
1022 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
1023 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
1024 Reg = narrowIfNeeded(CurDAG, Reg);
1025 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1026 MVT::i32);
1027 return isWorthFoldingALU(N);
1028}
1029
1030/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
1031/// operand is referred by the instructions have SP operand
1032bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
1033 SDValue &Shift) {
1034 unsigned ShiftVal = 0;
1036
1037 if (N.getOpcode() != ISD::SHL)
1038 return false;
1039
1040 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1041 if (!CSD)
1042 return false;
1043 ShiftVal = CSD->getZExtValue();
1044 if (ShiftVal > 4)
1045 return false;
1046
1047 Ext = AArch64_AM::UXTX;
1048 Reg = N.getOperand(0);
1049 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1050 MVT::i32);
1051 return isWorthFoldingALU(N);
1052}
1053
1054/// If there's a use of this ADDlow that's not itself a load/store then we'll
1055/// need to create a real ADD instruction from it anyway and there's no point in
1056/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1057/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1058/// leads to duplicated ADRP instructions.
1060 for (auto *User : N->users()) {
1061 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1062 User->getOpcode() != ISD::ATOMIC_LOAD &&
1063 User->getOpcode() != ISD::ATOMIC_STORE)
1064 return false;
1065
1066 // ldar and stlr have much more restrictive addressing modes (just a
1067 // register).
1068 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
1069 return false;
1070 }
1071
1072 return true;
1073}
1074
1075/// Check if the immediate offset is valid as a scaled immediate.
1076static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1077 unsigned Size) {
1078 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1079 Offset < (Range << Log2_32(Size)))
1080 return true;
1081 return false;
1082}
1083
1084/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1085/// immediate" address. The "Size" argument is the size in bytes of the memory
1086/// reference, which determines the scale.
1087bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1088 unsigned BW, unsigned Size,
1089 SDValue &Base,
1090 SDValue &OffImm) {
1091 SDLoc dl(N);
1092 const DataLayout &DL = CurDAG->getDataLayout();
1093 const TargetLowering *TLI = getTargetLowering();
1094 if (N.getOpcode() == ISD::FrameIndex) {
1095 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1096 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1097 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1098 return true;
1099 }
1100
1101 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1102 // selected here doesn't support labels/immediates, only base+offset.
1103 if (CurDAG->isBaseWithConstantOffset(N)) {
1104 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1105 if (IsSignedImm) {
1106 int64_t RHSC = RHS->getSExtValue();
1107 unsigned Scale = Log2_32(Size);
1108 int64_t Range = 0x1LL << (BW - 1);
1109
1110 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1111 RHSC < (Range << Scale)) {
1112 Base = N.getOperand(0);
1113 if (Base.getOpcode() == ISD::FrameIndex) {
1114 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1115 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1116 }
1117 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1118 return true;
1119 }
1120 } else {
1121 // unsigned Immediate
1122 uint64_t RHSC = RHS->getZExtValue();
1123 unsigned Scale = Log2_32(Size);
1124 uint64_t Range = 0x1ULL << BW;
1125
1126 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1127 Base = N.getOperand(0);
1128 if (Base.getOpcode() == ISD::FrameIndex) {
1129 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1130 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1131 }
1132 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1133 return true;
1134 }
1135 }
1136 }
1137 }
1138 // Base only. The address will be materialized into a register before
1139 // the memory is accessed.
1140 // add x0, Xbase, #offset
1141 // stp x1, x2, [x0]
1142 Base = N;
1143 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1144 return true;
1145}
1146
1147/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1148/// immediate" address. The "Size" argument is the size in bytes of the memory
1149/// reference, which determines the scale.
1150bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1151 SDValue &Base, SDValue &OffImm) {
1152 SDLoc dl(N);
1153 const DataLayout &DL = CurDAG->getDataLayout();
1154 const TargetLowering *TLI = getTargetLowering();
1155 if (N.getOpcode() == ISD::FrameIndex) {
1156 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1157 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1158 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1159 return true;
1160 }
1161
1162 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1163 GlobalAddressSDNode *GAN =
1164 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1165 Base = N.getOperand(0);
1166 OffImm = N.getOperand(1);
1167 if (!GAN)
1168 return true;
1169
1170 if (GAN->getOffset() % Size == 0 &&
1172 return true;
1173 }
1174
1175 if (CurDAG->isBaseWithConstantOffset(N)) {
1176 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1177 int64_t RHSC = (int64_t)RHS->getZExtValue();
1178 unsigned Scale = Log2_32(Size);
1179 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1180 Base = N.getOperand(0);
1181 if (Base.getOpcode() == ISD::FrameIndex) {
1182 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1183 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1184 }
1185 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1186 return true;
1187 }
1188 }
1189 }
1190
1191 // Before falling back to our general case, check if the unscaled
1192 // instructions can handle this. If so, that's preferable.
1193 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1194 return false;
1195
1196 // Base only. The address will be materialized into a register before
1197 // the memory is accessed.
1198 // add x0, Xbase, #offset
1199 // ldr x0, [x0]
1200 Base = N;
1201 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1202 return true;
1203}
1204
1205/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1206/// immediate" address. This should only match when there is an offset that
1207/// is not valid for a scaled immediate addressing mode. The "Size" argument
1208/// is the size in bytes of the memory reference, which is needed here to know
1209/// what is valid for a scaled immediate.
1210bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1211 SDValue &Base,
1212 SDValue &OffImm) {
1213 if (!CurDAG->isBaseWithConstantOffset(N))
1214 return false;
1215 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1216 int64_t RHSC = RHS->getSExtValue();
1217 if (RHSC >= -256 && RHSC < 256) {
1218 Base = N.getOperand(0);
1219 if (Base.getOpcode() == ISD::FrameIndex) {
1220 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1221 const TargetLowering *TLI = getTargetLowering();
1222 Base = CurDAG->getTargetFrameIndex(
1223 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1224 }
1225 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1226 return true;
1227 }
1228 }
1229 return false;
1230}
1231
1233 SDLoc dl(N);
1234 SDValue ImpDef = SDValue(
1235 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1236 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1237 N);
1238}
1239
1240/// Check if the given SHL node (\p N), can be used to form an
1241/// extended register for an addressing mode.
1242bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1243 bool WantExtend, SDValue &Offset,
1244 SDValue &SignExtend) {
1245 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1246 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1247 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1248 return false;
1249
1250 SDLoc dl(N);
1251 if (WantExtend) {
1253 getExtendTypeForNode(N.getOperand(0), true);
1255 return false;
1256
1257 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1258 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1259 MVT::i32);
1260 } else {
1261 Offset = N.getOperand(0);
1262 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1263 }
1264
1265 unsigned LegalShiftVal = Log2_32(Size);
1266 unsigned ShiftVal = CSD->getZExtValue();
1267
1268 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1269 return false;
1270
1271 return isWorthFoldingAddr(N, Size);
1272}
1273
1274bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1276 SDValue &SignExtend,
1277 SDValue &DoShift) {
1278 if (N.getOpcode() != ISD::ADD)
1279 return false;
1280 SDValue LHS = N.getOperand(0);
1281 SDValue RHS = N.getOperand(1);
1282 SDLoc dl(N);
1283
1284 // We don't want to match immediate adds here, because they are better lowered
1285 // to the register-immediate addressing modes.
1287 return false;
1288
1289 // Check if this particular node is reused in any non-memory related
1290 // operation. If yes, do not try to fold this node into the address
1291 // computation, since the computation will be kept.
1292 const SDNode *Node = N.getNode();
1293 for (SDNode *UI : Node->users()) {
1294 if (!isMemOpOrPrefetch(UI))
1295 return false;
1296 }
1297
1298 // Remember if it is worth folding N when it produces extended register.
1299 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1300
1301 // Try to match a shifted extend on the RHS.
1302 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1303 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1304 Base = LHS;
1305 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1306 return true;
1307 }
1308
1309 // Try to match a shifted extend on the LHS.
1310 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1311 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1312 Base = RHS;
1313 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1314 return true;
1315 }
1316
1317 // There was no shift, whatever else we find.
1318 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1319
1321 // Try to match an unshifted extend on the LHS.
1322 if (IsExtendedRegisterWorthFolding &&
1323 (Ext = getExtendTypeForNode(LHS, true)) !=
1325 Base = RHS;
1326 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1327 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1328 MVT::i32);
1329 if (isWorthFoldingAddr(LHS, Size))
1330 return true;
1331 }
1332
1333 // Try to match an unshifted extend on the RHS.
1334 if (IsExtendedRegisterWorthFolding &&
1335 (Ext = getExtendTypeForNode(RHS, true)) !=
1337 Base = LHS;
1338 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1339 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1340 MVT::i32);
1341 if (isWorthFoldingAddr(RHS, Size))
1342 return true;
1343 }
1344
1345 return false;
1346}
1347
1348// Check if the given immediate is preferred by ADD. If an immediate can be
1349// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1350// encoded by one MOVZ, return true.
1351static bool isPreferredADD(int64_t ImmOff) {
1352 // Constant in [0x0, 0xfff] can be encoded in ADD.
1353 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1354 return true;
1355 // Check if it can be encoded in an "ADD LSL #12".
1356 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1357 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1358 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1359 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1360 return false;
1361}
1362
1363bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1365 SDValue &SignExtend,
1366 SDValue &DoShift) {
1367 if (N.getOpcode() != ISD::ADD)
1368 return false;
1369 SDValue LHS = N.getOperand(0);
1370 SDValue RHS = N.getOperand(1);
1371 SDLoc DL(N);
1372
1373 // Check if this particular node is reused in any non-memory related
1374 // operation. If yes, do not try to fold this node into the address
1375 // computation, since the computation will be kept.
1376 const SDNode *Node = N.getNode();
1377 for (SDNode *UI : Node->users()) {
1378 if (!isMemOpOrPrefetch(UI))
1379 return false;
1380 }
1381
1382 // Watch out if RHS is a wide immediate, it can not be selected into
1383 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1384 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1385 // instructions like:
1386 // MOV X0, WideImmediate
1387 // ADD X1, BaseReg, X0
1388 // LDR X2, [X1, 0]
1389 // For such situation, using [BaseReg, XReg] addressing mode can save one
1390 // ADD/SUB:
1391 // MOV X0, WideImmediate
1392 // LDR X2, [BaseReg, X0]
1393 if (isa<ConstantSDNode>(RHS)) {
1394 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1395 // Skip the immediate can be selected by load/store addressing mode.
1396 // Also skip the immediate can be encoded by a single ADD (SUB is also
1397 // checked by using -ImmOff).
1398 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1399 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1400 return false;
1401
1402 SDValue Ops[] = { RHS };
1403 SDNode *MOVI =
1404 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1405 SDValue MOVIV = SDValue(MOVI, 0);
1406 // This ADD of two X register will be selected into [Reg+Reg] mode.
1407 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1408 }
1409
1410 // Remember if it is worth folding N when it produces extended register.
1411 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1412
1413 // Try to match a shifted extend on the RHS.
1414 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1415 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1416 Base = LHS;
1417 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1418 return true;
1419 }
1420
1421 // Try to match a shifted extend on the LHS.
1422 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1423 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1424 Base = RHS;
1425 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1426 return true;
1427 }
1428
1429 // Match any non-shifted, non-extend, non-immediate add expression.
1430 Base = LHS;
1431 Offset = RHS;
1432 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1433 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1434 // Reg1 + Reg2 is free: no check needed.
1435 return true;
1436}
1437
1438SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1439 static const unsigned RegClassIDs[] = {
1440 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1441 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1442 AArch64::dsub2, AArch64::dsub3};
1443
1444 return createTuple(Regs, RegClassIDs, SubRegs);
1445}
1446
1447SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1448 static const unsigned RegClassIDs[] = {
1449 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1450 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1451 AArch64::qsub2, AArch64::qsub3};
1452
1453 return createTuple(Regs, RegClassIDs, SubRegs);
1454}
1455
1456SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1457 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1458 AArch64::ZPR3RegClassID,
1459 AArch64::ZPR4RegClassID};
1460 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1461 AArch64::zsub2, AArch64::zsub3};
1462
1463 return createTuple(Regs, RegClassIDs, SubRegs);
1464}
1465
1466SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1467 assert(Regs.size() == 2 || Regs.size() == 4);
1468
1469 // The createTuple interface requires 3 RegClassIDs for each possible
1470 // tuple type even though we only have them for ZPR2 and ZPR4.
1471 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1472 AArch64::ZPR4Mul4RegClassID};
1473 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1474 AArch64::zsub2, AArch64::zsub3};
1475 return createTuple(Regs, RegClassIDs, SubRegs);
1476}
1477
1478SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1479 const unsigned RegClassIDs[],
1480 const unsigned SubRegs[]) {
1481 // There's no special register-class for a vector-list of 1 element: it's just
1482 // a vector.
1483 if (Regs.size() == 1)
1484 return Regs[0];
1485
1486 assert(Regs.size() >= 2 && Regs.size() <= 4);
1487
1488 SDLoc DL(Regs[0]);
1489
1491
1492 // First operand of REG_SEQUENCE is the desired RegClass.
1493 Ops.push_back(
1494 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1495
1496 // Then we get pairs of source & subregister-position for the components.
1497 for (unsigned i = 0; i < Regs.size(); ++i) {
1498 Ops.push_back(Regs[i]);
1499 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1500 }
1501
1502 SDNode *N =
1503 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1504 return SDValue(N, 0);
1505}
1506
1507void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1508 bool isExt) {
1509 SDLoc dl(N);
1510 EVT VT = N->getValueType(0);
1511
1512 unsigned ExtOff = isExt;
1513
1514 // Form a REG_SEQUENCE to force register allocation.
1515 unsigned Vec0Off = ExtOff + 1;
1516 SmallVector<SDValue, 4> Regs(N->ops().slice(Vec0Off, NumVecs));
1517 SDValue RegSeq = createQTuple(Regs);
1518
1520 if (isExt)
1521 Ops.push_back(N->getOperand(1));
1522 Ops.push_back(RegSeq);
1523 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1524 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1525}
1526
1527static std::tuple<SDValue, SDValue>
1529 SDLoc DL(Disc);
1530 SDValue AddrDisc;
1531 SDValue ConstDisc;
1532
1533 // If this is a blend, remember the constant and address discriminators.
1534 // Otherwise, it's either a constant discriminator, or a non-blended
1535 // address discriminator.
1536 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1537 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1538 AddrDisc = Disc->getOperand(1);
1539 ConstDisc = Disc->getOperand(2);
1540 } else {
1541 ConstDisc = Disc;
1542 }
1543
1544 // If the constant discriminator (either the blend RHS, or the entire
1545 // discriminator value) isn't a 16-bit constant, bail out, and let the
1546 // discriminator be computed separately.
1547 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1548 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1549 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1550
1551 // If there's no address discriminator, use XZR directly.
1552 if (!AddrDisc)
1553 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1554
1555 return std::make_tuple(
1556 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1557 AddrDisc);
1558}
1559
1560void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1561 SDLoc DL(N);
1562 // IntrinsicID is operand #0
1563 SDValue Val = N->getOperand(1);
1564 SDValue AUTKey = N->getOperand(2);
1565 SDValue AUTDisc = N->getOperand(3);
1566
1567 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1568 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1569
1570 SDValue AUTAddrDisc, AUTConstDisc;
1571 std::tie(AUTConstDisc, AUTAddrDisc) =
1572 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1573
1574 if (!Subtarget->isX16X17Safer()) {
1575 std::vector<SDValue> Ops = {Val, AUTKey, AUTConstDisc, AUTAddrDisc};
1576 // Copy deactivation symbol if present.
1577 if (N->getNumOperands() > 4)
1578 Ops.push_back(N->getOperand(4));
1579
1580 SDNode *AUT =
1581 CurDAG->getMachineNode(AArch64::AUTxMxN, DL, MVT::i64, MVT::i64, Ops);
1582 ReplaceNode(N, AUT);
1583 } else {
1584 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1585 AArch64::X16, Val, SDValue());
1586 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1587
1588 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUTx16x17, DL, MVT::i64, Ops);
1589 ReplaceNode(N, AUT);
1590 }
1591}
1592
1593void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1594 SDLoc DL(N);
1595 // IntrinsicID is operand #0
1596 SDValue Val = N->getOperand(1);
1597 SDValue AUTKey = N->getOperand(2);
1598 SDValue AUTDisc = N->getOperand(3);
1599 SDValue PACKey = N->getOperand(4);
1600 SDValue PACDisc = N->getOperand(5);
1601
1602 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1603 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1604
1605 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1606 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1607
1608 SDValue AUTAddrDisc, AUTConstDisc;
1609 std::tie(AUTConstDisc, AUTAddrDisc) =
1610 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1611
1612 SDValue PACAddrDisc, PACConstDisc;
1613 std::tie(PACConstDisc, PACAddrDisc) =
1614 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1615
1616 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1617 AArch64::X16, Val, SDValue());
1618
1619 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1620 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1621
1622 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1623 ReplaceNode(N, AUTPAC);
1624}
1625
1626bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1627 LoadSDNode *LD = cast<LoadSDNode>(N);
1628 if (LD->isUnindexed())
1629 return false;
1630 EVT VT = LD->getMemoryVT();
1631 EVT DstVT = N->getValueType(0);
1632 ISD::MemIndexedMode AM = LD->getAddressingMode();
1633 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1634 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1635 int OffsetVal = (int)OffsetOp->getZExtValue();
1636
1637 // We're not doing validity checking here. That was done when checking
1638 // if we should mark the load as indexed or not. We're just selecting
1639 // the right instruction.
1640 unsigned Opcode = 0;
1641
1642 ISD::LoadExtType ExtType = LD->getExtensionType();
1643 bool InsertTo64 = false;
1644 if (VT == MVT::i64)
1645 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1646 else if (VT == MVT::i32) {
1647 if (ExtType == ISD::NON_EXTLOAD)
1648 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1649 else if (ExtType == ISD::SEXTLOAD)
1650 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1651 else {
1652 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1653 InsertTo64 = true;
1654 // The result of the load is only i32. It's the subreg_to_reg that makes
1655 // it into an i64.
1656 DstVT = MVT::i32;
1657 }
1658 } else if (VT == MVT::i16) {
1659 if (ExtType == ISD::SEXTLOAD) {
1660 if (DstVT == MVT::i64)
1661 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1662 else
1663 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1664 } else {
1665 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1666 InsertTo64 = DstVT == MVT::i64;
1667 // The result of the load is only i32. It's the subreg_to_reg that makes
1668 // it into an i64.
1669 DstVT = MVT::i32;
1670 }
1671 } else if (VT == MVT::i8) {
1672 if (ExtType == ISD::SEXTLOAD) {
1673 if (DstVT == MVT::i64)
1674 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1675 else
1676 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1677 } else {
1678 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1679 InsertTo64 = DstVT == MVT::i64;
1680 // The result of the load is only i32. It's the subreg_to_reg that makes
1681 // it into an i64.
1682 DstVT = MVT::i32;
1683 }
1684 } else if (VT == MVT::f16) {
1685 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1686 } else if (VT == MVT::bf16) {
1687 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1688 } else if (VT == MVT::f32) {
1689 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1690 } else if (VT == MVT::f64 ||
1691 (VT.is64BitVector() && Subtarget->isLittleEndian())) {
1692 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1693 } else if (VT.is128BitVector() && Subtarget->isLittleEndian()) {
1694 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1695 } else if (VT.is64BitVector()) {
1696 if (IsPre || OffsetVal != 8)
1697 return false;
1698 switch (VT.getScalarSizeInBits()) {
1699 case 8:
1700 Opcode = AArch64::LD1Onev8b_POST;
1701 break;
1702 case 16:
1703 Opcode = AArch64::LD1Onev4h_POST;
1704 break;
1705 case 32:
1706 Opcode = AArch64::LD1Onev2s_POST;
1707 break;
1708 case 64:
1709 Opcode = AArch64::LD1Onev1d_POST;
1710 break;
1711 default:
1712 llvm_unreachable("Expected vector element to be a power of 2");
1713 }
1714 } else if (VT.is128BitVector()) {
1715 if (IsPre || OffsetVal != 16)
1716 return false;
1717 switch (VT.getScalarSizeInBits()) {
1718 case 8:
1719 Opcode = AArch64::LD1Onev16b_POST;
1720 break;
1721 case 16:
1722 Opcode = AArch64::LD1Onev8h_POST;
1723 break;
1724 case 32:
1725 Opcode = AArch64::LD1Onev4s_POST;
1726 break;
1727 case 64:
1728 Opcode = AArch64::LD1Onev2d_POST;
1729 break;
1730 default:
1731 llvm_unreachable("Expected vector element to be a power of 2");
1732 }
1733 } else
1734 return false;
1735 SDValue Chain = LD->getChain();
1736 SDValue Base = LD->getBasePtr();
1737 SDLoc dl(N);
1738 // LD1 encodes an immediate offset by using XZR as the offset register.
1739 SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian())
1740 ? CurDAG->getRegister(AArch64::XZR, MVT::i64)
1741 : CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1742 SDValue Ops[] = { Base, Offset, Chain };
1743 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1744 MVT::Other, Ops);
1745
1746 // Transfer memoperands.
1747 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1748 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1749
1750 // Either way, we're replacing the node, so tell the caller that.
1751 SDValue LoadedVal = SDValue(Res, 1);
1752 if (InsertTo64) {
1753 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1754 LoadedVal =
1755 SDValue(CurDAG->getMachineNode(
1756 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1757 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1758 SubReg),
1759 0);
1760 }
1761
1762 ReplaceUses(SDValue(N, 0), LoadedVal);
1763 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1764 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1765 CurDAG->RemoveDeadNode(N);
1766 return true;
1767}
1768
1769void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1770 unsigned SubRegIdx) {
1771 SDLoc dl(N);
1772 EVT VT = N->getValueType(0);
1773 SDValue Chain = N->getOperand(0);
1774
1775 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1776 Chain};
1777
1778 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1779
1780 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1781 SDValue SuperReg = SDValue(Ld, 0);
1782 for (unsigned i = 0; i < NumVecs; ++i)
1783 ReplaceUses(SDValue(N, i),
1784 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1785
1786 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1787
1788 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1789 // because it's too simple to have needed special treatment during lowering.
1790 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1791 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1792 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1793 }
1794
1795 CurDAG->RemoveDeadNode(N);
1796}
1797
1798void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1799 unsigned Opc, unsigned SubRegIdx) {
1800 SDLoc dl(N);
1801 EVT VT = N->getValueType(0);
1802 SDValue Chain = N->getOperand(0);
1803
1804 SDValue Ops[] = {N->getOperand(1), // Mem operand
1805 N->getOperand(2), // Incremental
1806 Chain};
1807
1808 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1809 MVT::Untyped, MVT::Other};
1810
1811 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1812
1813 // Update uses of write back register
1814 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1815
1816 // Update uses of vector list
1817 SDValue SuperReg = SDValue(Ld, 1);
1818 if (NumVecs == 1)
1819 ReplaceUses(SDValue(N, 0), SuperReg);
1820 else
1821 for (unsigned i = 0; i < NumVecs; ++i)
1822 ReplaceUses(SDValue(N, i),
1823 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1824
1825 // Update the chain
1826 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1827 CurDAG->RemoveDeadNode(N);
1828}
1829
1830/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1831/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1832/// new Base and an SDValue representing the new offset.
1833std::tuple<unsigned, SDValue, SDValue>
1834AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1835 unsigned Opc_ri,
1836 const SDValue &OldBase,
1837 const SDValue &OldOffset,
1838 unsigned Scale) {
1839 SDValue NewBase = OldBase;
1840 SDValue NewOffset = OldOffset;
1841 // Detect a possible Reg+Imm addressing mode.
1842 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1843 N, OldBase, NewBase, NewOffset);
1844
1845 // Detect a possible reg+reg addressing mode, but only if we haven't already
1846 // detected a Reg+Imm one.
1847 const bool IsRegReg =
1848 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1849
1850 // Select the instruction.
1851 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1852}
1853
1854enum class SelectTypeKind {
1855 Int1 = 0,
1856 Int = 1,
1857 FP = 2,
1859};
1860
1861/// This function selects an opcode from a list of opcodes, which is
1862/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1863/// element types, in this order.
1864template <SelectTypeKind Kind>
1865static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1866 // Only match scalable vector VTs
1867 if (!VT.isScalableVector())
1868 return 0;
1869
1870 EVT EltVT = VT.getVectorElementType();
1871 unsigned Key = VT.getVectorMinNumElements();
1872 switch (Kind) {
1874 break;
1876 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1877 EltVT != MVT::i64)
1878 return 0;
1879 break;
1881 if (EltVT != MVT::i1)
1882 return 0;
1883 break;
1884 case SelectTypeKind::FP:
1885 if (EltVT == MVT::bf16)
1886 Key = 16;
1887 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1888 EltVT != MVT::f64)
1889 return 0;
1890 break;
1891 }
1892
1893 unsigned Offset;
1894 switch (Key) {
1895 case 16: // 8-bit or bf16
1896 Offset = 0;
1897 break;
1898 case 8: // 16-bit
1899 Offset = 1;
1900 break;
1901 case 4: // 32-bit
1902 Offset = 2;
1903 break;
1904 case 2: // 64-bit
1905 Offset = 3;
1906 break;
1907 default:
1908 return 0;
1909 }
1910
1911 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1912}
1913
1914// This function is almost identical to SelectWhilePair, but has an
1915// extra check on the range of the immediate operand.
1916// TODO: Merge these two functions together at some point?
1917void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1918 // Immediate can be either 0 or 1.
1919 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1920 if (Imm->getZExtValue() > 1)
1921 return;
1922
1923 SDLoc DL(N);
1924 EVT VT = N->getValueType(0);
1925 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1926 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1927 SDValue SuperReg = SDValue(WhilePair, 0);
1928
1929 for (unsigned I = 0; I < 2; ++I)
1930 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1931 AArch64::psub0 + I, DL, VT, SuperReg));
1932
1933 CurDAG->RemoveDeadNode(N);
1934}
1935
1936void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1937 SDLoc DL(N);
1938 EVT VT = N->getValueType(0);
1939
1940 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1941
1942 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1943 SDValue SuperReg = SDValue(WhilePair, 0);
1944
1945 for (unsigned I = 0; I < 2; ++I)
1946 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1947 AArch64::psub0 + I, DL, VT, SuperReg));
1948
1949 CurDAG->RemoveDeadNode(N);
1950}
1951
1952void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1953 unsigned Opcode) {
1954 EVT VT = N->getValueType(0);
1955 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
1956 SDValue Ops = createZTuple(Regs);
1957 SDLoc DL(N);
1958 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1959 SDValue SuperReg = SDValue(Intrinsic, 0);
1960 for (unsigned i = 0; i < NumVecs; ++i)
1961 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1962 AArch64::zsub0 + i, DL, VT, SuperReg));
1963
1964 CurDAG->RemoveDeadNode(N);
1965}
1966
1967void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
1968 unsigned Opcode) {
1969 SDLoc DL(N);
1970 EVT VT = N->getValueType(0);
1971 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
1972 Ops.push_back(/*Chain*/ N->getOperand(0));
1973
1974 SDNode *Instruction =
1975 CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
1976 SDValue SuperReg = SDValue(Instruction, 0);
1977
1978 for (unsigned i = 0; i < NumVecs; ++i)
1979 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1980 AArch64::zsub0 + i, DL, VT, SuperReg));
1981
1982 // Copy chain
1983 unsigned ChainIdx = NumVecs;
1984 ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
1985 CurDAG->RemoveDeadNode(N);
1986}
1987
1988void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1989 unsigned NumVecs,
1990 bool IsZmMulti,
1991 unsigned Opcode,
1992 bool HasPred) {
1993 assert(Opcode != 0 && "Unexpected opcode");
1994
1995 SDLoc DL(N);
1996 EVT VT = N->getValueType(0);
1997 unsigned FirstVecIdx = HasPred ? 2 : 1;
1998
1999 auto GetMultiVecOperand = [=](unsigned StartIdx) {
2000 SmallVector<SDValue, 4> Regs(N->ops().slice(StartIdx, NumVecs));
2001 return createZMulTuple(Regs);
2002 };
2003
2004 SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
2005
2006 SDValue Zm;
2007 if (IsZmMulti)
2008 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
2009 else
2010 Zm = N->getOperand(NumVecs + FirstVecIdx);
2011
2012 SDNode *Intrinsic;
2013 if (HasPred)
2014 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
2015 N->getOperand(1), Zdn, Zm);
2016 else
2017 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
2018 SDValue SuperReg = SDValue(Intrinsic, 0);
2019 for (unsigned i = 0; i < NumVecs; ++i)
2020 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2021 AArch64::zsub0 + i, DL, VT, SuperReg));
2022
2023 CurDAG->RemoveDeadNode(N);
2024}
2025
2026void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
2027 unsigned Scale, unsigned Opc_ri,
2028 unsigned Opc_rr, bool IsIntr) {
2029 assert(Scale < 5 && "Invalid scaling value.");
2030 SDLoc DL(N);
2031 EVT VT = N->getValueType(0);
2032 SDValue Chain = N->getOperand(0);
2033
2034 // Optimize addressing mode.
2036 unsigned Opc;
2037 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2038 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
2039 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
2040
2041 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
2042 Base, // Memory operand
2043 Offset, Chain};
2044
2045 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2046
2047 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2048 SDValue SuperReg = SDValue(Load, 0);
2049 for (unsigned i = 0; i < NumVecs; ++i)
2050 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2051 AArch64::zsub0 + i, DL, VT, SuperReg));
2052
2053 // Copy chain
2054 unsigned ChainIdx = NumVecs;
2055 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2056 CurDAG->RemoveDeadNode(N);
2057}
2058
2059void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
2060 unsigned NumVecs,
2061 unsigned Scale,
2062 unsigned Opc_ri,
2063 unsigned Opc_rr) {
2064 assert(Scale < 4 && "Invalid scaling value.");
2065 SDLoc DL(N);
2066 EVT VT = N->getValueType(0);
2067 SDValue Chain = N->getOperand(0);
2068
2069 SDValue PNg = N->getOperand(2);
2070 SDValue Base = N->getOperand(3);
2071 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
2072 unsigned Opc;
2073 std::tie(Opc, Base, Offset) =
2074 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
2075
2076 SDValue Ops[] = {PNg, // Predicate-as-counter
2077 Base, // Memory operand
2078 Offset, Chain};
2079
2080 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2081
2082 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2083 SDValue SuperReg = SDValue(Load, 0);
2084 for (unsigned i = 0; i < NumVecs; ++i)
2085 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2086 AArch64::zsub0 + i, DL, VT, SuperReg));
2087
2088 // Copy chain
2089 unsigned ChainIdx = NumVecs;
2090 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2091 CurDAG->RemoveDeadNode(N);
2092}
2093
2094void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2095 unsigned Opcode) {
2096 if (N->getValueType(0) != MVT::nxv4f32)
2097 return;
2098 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
2099}
2100
2101void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2102 unsigned NumOutVecs,
2103 unsigned Opc,
2104 uint32_t MaxImm) {
2105 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
2106 if (Imm->getZExtValue() > MaxImm)
2107 return;
2108
2109 SDValue ZtValue;
2110 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2111 return;
2112
2113 SDValue Chain = Node->getOperand(0);
2114 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4), Chain};
2115 SDLoc DL(Node);
2116 EVT VT = Node->getValueType(0);
2117
2118 SDNode *Instruction =
2119 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2120 SDValue SuperReg = SDValue(Instruction, 0);
2121
2122 for (unsigned I = 0; I < NumOutVecs; ++I)
2123 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2124 AArch64::zsub0 + I, DL, VT, SuperReg));
2125
2126 // Copy chain
2127 unsigned ChainIdx = NumOutVecs;
2128 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2129 CurDAG->RemoveDeadNode(Node);
2130}
2131
2132void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2133 unsigned NumOutVecs,
2134 unsigned Opc) {
2135 SDValue ZtValue;
2136 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2137 return;
2138
2139 SDValue Chain = Node->getOperand(0);
2140 SDValue Ops[] = {ZtValue,
2141 createZMulTuple({Node->getOperand(3), Node->getOperand(4)}),
2142 Chain};
2143
2144 SDLoc DL(Node);
2145 EVT VT = Node->getValueType(0);
2146
2147 SDNode *Instruction =
2148 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2149 SDValue SuperReg = SDValue(Instruction, 0);
2150
2151 for (unsigned I = 0; I < NumOutVecs; ++I)
2152 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2153 AArch64::zsub0 + I, DL, VT, SuperReg));
2154
2155 // Copy chain
2156 unsigned ChainIdx = NumOutVecs;
2157 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2158 CurDAG->RemoveDeadNode(Node);
2159}
2160
2161void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2162 unsigned Op) {
2163 SDLoc DL(N);
2164 EVT VT = N->getValueType(0);
2165
2166 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2167 SDValue Zd = createZMulTuple(Regs);
2168 SDValue Zn = N->getOperand(1 + NumVecs);
2169 SDValue Zm = N->getOperand(2 + NumVecs);
2170
2171 SDValue Ops[] = {Zd, Zn, Zm};
2172
2173 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2174 SDValue SuperReg = SDValue(Intrinsic, 0);
2175 for (unsigned i = 0; i < NumVecs; ++i)
2176 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2177 AArch64::zsub0 + i, DL, VT, SuperReg));
2178
2179 CurDAG->RemoveDeadNode(N);
2180}
2181
2182bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2183 switch (BaseReg) {
2184 default:
2185 return false;
2186 case AArch64::ZA:
2187 case AArch64::ZAB0:
2188 if (TileNum == 0)
2189 break;
2190 return false;
2191 case AArch64::ZAH0:
2192 if (TileNum <= 1)
2193 break;
2194 return false;
2195 case AArch64::ZAS0:
2196 if (TileNum <= 3)
2197 break;
2198 return false;
2199 case AArch64::ZAD0:
2200 if (TileNum <= 7)
2201 break;
2202 return false;
2203 }
2204
2205 BaseReg += TileNum;
2206 return true;
2207}
2208
2209template <unsigned MaxIdx, unsigned Scale>
2210void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2211 unsigned BaseReg, unsigned Op) {
2212 unsigned TileNum = 0;
2213 if (BaseReg != AArch64::ZA)
2214 TileNum = N->getConstantOperandVal(2);
2215
2216 if (!SelectSMETile(BaseReg, TileNum))
2217 return;
2218
2219 SDValue SliceBase, Base, Offset;
2220 if (BaseReg == AArch64::ZA)
2221 SliceBase = N->getOperand(2);
2222 else
2223 SliceBase = N->getOperand(3);
2224
2225 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2226 return;
2227
2228 SDLoc DL(N);
2229 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2230 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2231 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2232
2233 EVT VT = N->getValueType(0);
2234 for (unsigned I = 0; I < NumVecs; ++I)
2235 ReplaceUses(SDValue(N, I),
2236 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2237 SDValue(Mov, 0)));
2238 // Copy chain
2239 unsigned ChainIdx = NumVecs;
2240 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2241 CurDAG->RemoveDeadNode(N);
2242}
2243
2244void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2245 unsigned Op, unsigned MaxIdx,
2246 unsigned Scale, unsigned BaseReg) {
2247 // Slice can be in different positions
2248 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2249 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2250 SDValue SliceBase = N->getOperand(2);
2251 if (BaseReg != AArch64::ZA)
2252 SliceBase = N->getOperand(3);
2253
2255 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2256 return;
2257 // The correct Za tile number is computed in Machine Instruction
2258 // See EmitZAInstr
2259 // DAG cannot select Za tile as an output register with ZReg
2260 SDLoc DL(N);
2262 if (BaseReg != AArch64::ZA )
2263 Ops.push_back(N->getOperand(2));
2264 Ops.push_back(Base);
2265 Ops.push_back(Offset);
2266 Ops.push_back(N->getOperand(0)); //Chain
2267 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2268
2269 EVT VT = N->getValueType(0);
2270 for (unsigned I = 0; I < NumVecs; ++I)
2271 ReplaceUses(SDValue(N, I),
2272 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2273 SDValue(Mov, 0)));
2274
2275 // Copy chain
2276 unsigned ChainIdx = NumVecs;
2277 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2278 CurDAG->RemoveDeadNode(N);
2279}
2280
2281void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2282 unsigned NumOutVecs,
2283 bool IsTupleInput,
2284 unsigned Opc) {
2285 SDLoc DL(N);
2286 EVT VT = N->getValueType(0);
2287 unsigned NumInVecs = N->getNumOperands() - 1;
2288
2290 if (IsTupleInput) {
2291 assert((NumInVecs == 2 || NumInVecs == 4) &&
2292 "Don't know how to handle multi-register input!");
2293 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2294 Ops.push_back(createZMulTuple(Regs));
2295 } else {
2296 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2297 for (unsigned I = 0; I < NumInVecs; I++)
2298 Ops.push_back(N->getOperand(1 + I));
2299 }
2300
2301 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2302 SDValue SuperReg = SDValue(Res, 0);
2303
2304 for (unsigned I = 0; I < NumOutVecs; I++)
2305 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2306 AArch64::zsub0 + I, DL, VT, SuperReg));
2307 CurDAG->RemoveDeadNode(N);
2308}
2309
2310void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2311 unsigned Opc) {
2312 SDLoc dl(N);
2313 EVT VT = N->getOperand(2)->getValueType(0);
2314
2315 // Form a REG_SEQUENCE to force register allocation.
2316 bool Is128Bit = VT.getSizeInBits() == 128;
2317 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2318 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2319
2320 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2321 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2322
2323 // Transfer memoperands.
2324 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2325 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2326
2327 ReplaceNode(N, St);
2328}
2329
2330void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2331 unsigned Scale, unsigned Opc_rr,
2332 unsigned Opc_ri) {
2333 SDLoc dl(N);
2334
2335 // Form a REG_SEQUENCE to force register allocation.
2336 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2337 SDValue RegSeq = createZTuple(Regs);
2338
2339 // Optimize addressing mode.
2340 unsigned Opc;
2342 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2343 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2344 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2345
2346 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2347 Base, // address
2348 Offset, // offset
2349 N->getOperand(0)}; // chain
2350 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2351
2352 ReplaceNode(N, St);
2353}
2354
2355bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2356 SDValue &OffImm) {
2357 SDLoc dl(N);
2358 const DataLayout &DL = CurDAG->getDataLayout();
2359 const TargetLowering *TLI = getTargetLowering();
2360
2361 // Try to match it for the frame address
2362 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2363 int FI = FINode->getIndex();
2364 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2365 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2366 return true;
2367 }
2368
2369 return false;
2370}
2371
2372void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2373 unsigned Opc) {
2374 SDLoc dl(N);
2375 EVT VT = N->getOperand(2)->getValueType(0);
2376 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2377 MVT::Other}; // Type for the Chain
2378
2379 // Form a REG_SEQUENCE to force register allocation.
2380 bool Is128Bit = VT.getSizeInBits() == 128;
2381 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2382 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2383
2384 SDValue Ops[] = {RegSeq,
2385 N->getOperand(NumVecs + 1), // base register
2386 N->getOperand(NumVecs + 2), // Incremental
2387 N->getOperand(0)}; // Chain
2388 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2389
2390 ReplaceNode(N, St);
2391}
2392
2393namespace {
2394/// WidenVector - Given a value in the V64 register class, produce the
2395/// equivalent value in the V128 register class.
2396class WidenVector {
2397 SelectionDAG &DAG;
2398
2399public:
2400 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2401
2402 SDValue operator()(SDValue V64Reg) {
2403 EVT VT = V64Reg.getValueType();
2404 unsigned NarrowSize = VT.getVectorNumElements();
2405 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2406 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2407 SDLoc DL(V64Reg);
2408
2409 SDValue Undef =
2410 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2411 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2412 }
2413};
2414} // namespace
2415
2416/// NarrowVector - Given a value in the V128 register class, produce the
2417/// equivalent value in the V64 register class.
2419 EVT VT = V128Reg.getValueType();
2420 unsigned WideSize = VT.getVectorNumElements();
2421 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2422 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2423
2424 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2425 V128Reg);
2426}
2427
2428void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2429 unsigned Opc) {
2430 SDLoc dl(N);
2431 EVT VT = N->getValueType(0);
2432 bool Narrow = VT.getSizeInBits() == 64;
2433
2434 // Form a REG_SEQUENCE to force register allocation.
2435 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2436
2437 if (Narrow)
2438 transform(Regs, Regs.begin(),
2439 WidenVector(*CurDAG));
2440
2441 SDValue RegSeq = createQTuple(Regs);
2442
2443 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2444
2445 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2446
2447 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2448 N->getOperand(NumVecs + 3), N->getOperand(0)};
2449 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2450 SDValue SuperReg = SDValue(Ld, 0);
2451
2452 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2453 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2454 AArch64::qsub2, AArch64::qsub3 };
2455 for (unsigned i = 0; i < NumVecs; ++i) {
2456 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2457 if (Narrow)
2458 NV = NarrowVector(NV, *CurDAG);
2459 ReplaceUses(SDValue(N, i), NV);
2460 }
2461
2462 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2463 CurDAG->RemoveDeadNode(N);
2464}
2465
2466void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2467 unsigned Opc) {
2468 SDLoc dl(N);
2469 EVT VT = N->getValueType(0);
2470 bool Narrow = VT.getSizeInBits() == 64;
2471
2472 // Form a REG_SEQUENCE to force register allocation.
2473 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2474
2475 if (Narrow)
2476 transform(Regs, Regs.begin(),
2477 WidenVector(*CurDAG));
2478
2479 SDValue RegSeq = createQTuple(Regs);
2480
2481 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2482 RegSeq->getValueType(0), MVT::Other};
2483
2484 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2485
2486 SDValue Ops[] = {RegSeq,
2487 CurDAG->getTargetConstant(LaneNo, dl,
2488 MVT::i64), // Lane Number
2489 N->getOperand(NumVecs + 2), // Base register
2490 N->getOperand(NumVecs + 3), // Incremental
2491 N->getOperand(0)};
2492 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2493
2494 // Update uses of the write back register
2495 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2496
2497 // Update uses of the vector list
2498 SDValue SuperReg = SDValue(Ld, 1);
2499 if (NumVecs == 1) {
2500 ReplaceUses(SDValue(N, 0),
2501 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2502 } else {
2503 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2504 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2505 AArch64::qsub2, AArch64::qsub3 };
2506 for (unsigned i = 0; i < NumVecs; ++i) {
2507 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2508 SuperReg);
2509 if (Narrow)
2510 NV = NarrowVector(NV, *CurDAG);
2511 ReplaceUses(SDValue(N, i), NV);
2512 }
2513 }
2514
2515 // Update the Chain
2516 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2517 CurDAG->RemoveDeadNode(N);
2518}
2519
2520void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2521 unsigned Opc) {
2522 SDLoc dl(N);
2523 EVT VT = N->getOperand(2)->getValueType(0);
2524 bool Narrow = VT.getSizeInBits() == 64;
2525
2526 // Form a REG_SEQUENCE to force register allocation.
2527 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2528
2529 if (Narrow)
2530 transform(Regs, Regs.begin(),
2531 WidenVector(*CurDAG));
2532
2533 SDValue RegSeq = createQTuple(Regs);
2534
2535 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2536
2537 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2538 N->getOperand(NumVecs + 3), N->getOperand(0)};
2539 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2540
2541 // Transfer memoperands.
2542 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2543 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2544
2545 ReplaceNode(N, St);
2546}
2547
2548void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2549 unsigned Opc) {
2550 SDLoc dl(N);
2551 EVT VT = N->getOperand(2)->getValueType(0);
2552 bool Narrow = VT.getSizeInBits() == 64;
2553
2554 // Form a REG_SEQUENCE to force register allocation.
2555 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2556
2557 if (Narrow)
2558 transform(Regs, Regs.begin(),
2559 WidenVector(*CurDAG));
2560
2561 SDValue RegSeq = createQTuple(Regs);
2562
2563 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2564 MVT::Other};
2565
2566 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2567
2568 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2569 N->getOperand(NumVecs + 2), // Base Register
2570 N->getOperand(NumVecs + 3), // Incremental
2571 N->getOperand(0)};
2572 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2573
2574 // Transfer memoperands.
2575 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2576 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2577
2578 ReplaceNode(N, St);
2579}
2580
2582 unsigned &Opc, SDValue &Opd0,
2583 unsigned &LSB, unsigned &MSB,
2584 unsigned NumberOfIgnoredLowBits,
2585 bool BiggerPattern) {
2586 assert(N->getOpcode() == ISD::AND &&
2587 "N must be a AND operation to call this function");
2588
2589 EVT VT = N->getValueType(0);
2590
2591 // Here we can test the type of VT and return false when the type does not
2592 // match, but since it is done prior to that call in the current context
2593 // we turned that into an assert to avoid redundant code.
2594 assert((VT == MVT::i32 || VT == MVT::i64) &&
2595 "Type checking must have been done before calling this function");
2596
2597 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2598 // changed the AND node to a 32-bit mask operation. We'll have to
2599 // undo that as part of the transform here if we want to catch all
2600 // the opportunities.
2601 // Currently the NumberOfIgnoredLowBits argument helps to recover
2602 // from these situations when matching bigger pattern (bitfield insert).
2603
2604 // For unsigned extracts, check for a shift right and mask
2605 uint64_t AndImm = 0;
2606 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2607 return false;
2608
2609 const SDNode *Op0 = N->getOperand(0).getNode();
2610
2611 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2612 // simplified. Try to undo that
2613 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2614
2615 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2616 if (AndImm & (AndImm + 1))
2617 return false;
2618
2619 bool ClampMSB = false;
2620 uint64_t SrlImm = 0;
2621 // Handle the SRL + ANY_EXTEND case.
2622 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2623 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2624 // Extend the incoming operand of the SRL to 64-bit.
2625 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2626 // Make sure to clamp the MSB so that we preserve the semantics of the
2627 // original operations.
2628 ClampMSB = true;
2629 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2631 SrlImm)) {
2632 // If the shift result was truncated, we can still combine them.
2633 Opd0 = Op0->getOperand(0).getOperand(0);
2634
2635 // Use the type of SRL node.
2636 VT = Opd0->getValueType(0);
2637 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2638 Opd0 = Op0->getOperand(0);
2639 ClampMSB = (VT == MVT::i32);
2640 } else if (BiggerPattern) {
2641 // Let's pretend a 0 shift right has been performed.
2642 // The resulting code will be at least as good as the original one
2643 // plus it may expose more opportunities for bitfield insert pattern.
2644 // FIXME: Currently we limit this to the bigger pattern, because
2645 // some optimizations expect AND and not UBFM.
2646 Opd0 = N->getOperand(0);
2647 } else
2648 return false;
2649
2650 // Bail out on large immediates. This happens when no proper
2651 // combining/constant folding was performed.
2652 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2653 LLVM_DEBUG(
2654 (dbgs() << N
2655 << ": Found large shift immediate, this should not happen\n"));
2656 return false;
2657 }
2658
2659 LSB = SrlImm;
2660 MSB = SrlImm +
2661 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2662 : llvm::countr_one<uint64_t>(AndImm)) -
2663 1;
2664 if (ClampMSB)
2665 // Since we're moving the extend before the right shift operation, we need
2666 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2667 // the zeros which would get shifted in with the original right shift
2668 // operation.
2669 MSB = MSB > 31 ? 31 : MSB;
2670
2671 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2672 return true;
2673}
2674
2676 SDValue &Opd0, unsigned &Immr,
2677 unsigned &Imms) {
2678 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2679
2680 EVT VT = N->getValueType(0);
2681 unsigned BitWidth = VT.getSizeInBits();
2682 assert((VT == MVT::i32 || VT == MVT::i64) &&
2683 "Type checking must have been done before calling this function");
2684
2685 SDValue Op = N->getOperand(0);
2686 if (Op->getOpcode() == ISD::TRUNCATE) {
2687 Op = Op->getOperand(0);
2688 VT = Op->getValueType(0);
2689 BitWidth = VT.getSizeInBits();
2690 }
2691
2692 uint64_t ShiftImm;
2693 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2694 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2695 return false;
2696
2697 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2698 if (ShiftImm + Width > BitWidth)
2699 return false;
2700
2701 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2702 Opd0 = Op.getOperand(0);
2703 Immr = ShiftImm;
2704 Imms = ShiftImm + Width - 1;
2705 return true;
2706}
2707
2709 SDValue &Opd0, unsigned &LSB,
2710 unsigned &MSB) {
2711 // We are looking for the following pattern which basically extracts several
2712 // continuous bits from the source value and places it from the LSB of the
2713 // destination value, all other bits of the destination value or set to zero:
2714 //
2715 // Value2 = AND Value, MaskImm
2716 // SRL Value2, ShiftImm
2717 //
2718 // with MaskImm >> ShiftImm to search for the bit width.
2719 //
2720 // This gets selected into a single UBFM:
2721 //
2722 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2723 //
2724
2725 if (N->getOpcode() != ISD::SRL)
2726 return false;
2727
2728 uint64_t AndMask = 0;
2729 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2730 return false;
2731
2732 Opd0 = N->getOperand(0).getOperand(0);
2733
2734 uint64_t SrlImm = 0;
2735 if (!isIntImmediate(N->getOperand(1), SrlImm))
2736 return false;
2737
2738 // Check whether we really have several bits extract here.
2739 if (!isMask_64(AndMask >> SrlImm))
2740 return false;
2741
2742 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2743 LSB = SrlImm;
2744 MSB = llvm::Log2_64(AndMask);
2745 return true;
2746}
2747
2748static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2749 unsigned &Immr, unsigned &Imms,
2750 bool BiggerPattern) {
2751 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2752 "N must be a SHR/SRA operation to call this function");
2753
2754 EVT VT = N->getValueType(0);
2755
2756 // Here we can test the type of VT and return false when the type does not
2757 // match, but since it is done prior to that call in the current context
2758 // we turned that into an assert to avoid redundant code.
2759 assert((VT == MVT::i32 || VT == MVT::i64) &&
2760 "Type checking must have been done before calling this function");
2761
2762 // Check for AND + SRL doing several bits extract.
2763 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2764 return true;
2765
2766 // We're looking for a shift of a shift.
2767 uint64_t ShlImm = 0;
2768 uint64_t TruncBits = 0;
2769 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2770 Opd0 = N->getOperand(0).getOperand(0);
2771 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2772 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2773 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2774 // be considered as setting high 32 bits as zero. Our strategy here is to
2775 // always generate 64bit UBFM. This consistency will help the CSE pass
2776 // later find more redundancy.
2777 Opd0 = N->getOperand(0).getOperand(0);
2778 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2779 VT = Opd0.getValueType();
2780 assert(VT == MVT::i64 && "the promoted type should be i64");
2781 } else if (BiggerPattern) {
2782 // Let's pretend a 0 shift left has been performed.
2783 // FIXME: Currently we limit this to the bigger pattern case,
2784 // because some optimizations expect AND and not UBFM
2785 Opd0 = N->getOperand(0);
2786 } else
2787 return false;
2788
2789 // Missing combines/constant folding may have left us with strange
2790 // constants.
2791 if (ShlImm >= VT.getSizeInBits()) {
2792 LLVM_DEBUG(
2793 (dbgs() << N
2794 << ": Found large shift immediate, this should not happen\n"));
2795 return false;
2796 }
2797
2798 uint64_t SrlImm = 0;
2799 if (!isIntImmediate(N->getOperand(1), SrlImm))
2800 return false;
2801
2802 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2803 "bad amount in shift node!");
2804 int immr = SrlImm - ShlImm;
2805 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2806 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2807 // SRA requires a signed extraction
2808 if (VT == MVT::i32)
2809 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2810 else
2811 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2812 return true;
2813}
2814
2815bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2816 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2817
2818 EVT VT = N->getValueType(0);
2819 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2820 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2821 return false;
2822
2823 uint64_t ShiftImm;
2824 SDValue Op = N->getOperand(0);
2825 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2826 return false;
2827
2828 SDLoc dl(N);
2829 // Extend the incoming operand of the shift to 64-bits.
2830 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2831 unsigned Immr = ShiftImm;
2832 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2833 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2834 CurDAG->getTargetConstant(Imms, dl, VT)};
2835 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2836 return true;
2837}
2838
2839static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2840 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2841 unsigned NumberOfIgnoredLowBits = 0,
2842 bool BiggerPattern = false) {
2843 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2844 return false;
2845
2846 switch (N->getOpcode()) {
2847 default:
2848 if (!N->isMachineOpcode())
2849 return false;
2850 break;
2851 case ISD::AND:
2852 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2853 NumberOfIgnoredLowBits, BiggerPattern);
2854 case ISD::SRL:
2855 case ISD::SRA:
2856 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2857
2859 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2860 }
2861
2862 unsigned NOpc = N->getMachineOpcode();
2863 switch (NOpc) {
2864 default:
2865 return false;
2866 case AArch64::SBFMWri:
2867 case AArch64::UBFMWri:
2868 case AArch64::SBFMXri:
2869 case AArch64::UBFMXri:
2870 Opc = NOpc;
2871 Opd0 = N->getOperand(0);
2872 Immr = N->getConstantOperandVal(1);
2873 Imms = N->getConstantOperandVal(2);
2874 return true;
2875 }
2876 // Unreachable
2877 return false;
2878}
2879
2880bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2881 unsigned Opc, Immr, Imms;
2882 SDValue Opd0;
2883 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2884 return false;
2885
2886 EVT VT = N->getValueType(0);
2887 SDLoc dl(N);
2888
2889 // If the bit extract operation is 64bit but the original type is 32bit, we
2890 // need to add one EXTRACT_SUBREG.
2891 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2892 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2893 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2894
2895 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2896 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2897 MVT::i32, SDValue(BFM, 0));
2898 ReplaceNode(N, Inner.getNode());
2899 return true;
2900 }
2901
2902 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2903 CurDAG->getTargetConstant(Imms, dl, VT)};
2904 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2905 return true;
2906}
2907
2908/// Does DstMask form a complementary pair with the mask provided by
2909/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2910/// this asks whether DstMask zeroes precisely those bits that will be set by
2911/// the other half.
2912static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2913 unsigned NumberOfIgnoredHighBits, EVT VT) {
2914 assert((VT == MVT::i32 || VT == MVT::i64) &&
2915 "i32 or i64 mask type expected!");
2916 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2917
2918 // Enable implicitTrunc as we're intentionally ignoring high bits.
2919 APInt SignificantDstMask =
2920 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
2921 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2922
2923 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2924 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2925}
2926
2927// Look for bits that will be useful for later uses.
2928// A bit is consider useless as soon as it is dropped and never used
2929// before it as been dropped.
2930// E.g., looking for useful bit of x
2931// 1. y = x & 0x7
2932// 2. z = y >> 2
2933// After #1, x useful bits are 0x7, then the useful bits of x, live through
2934// y.
2935// After #2, the useful bits of x are 0x4.
2936// However, if x is used on an unpredictable instruction, then all its bits
2937// are useful.
2938// E.g.
2939// 1. y = x & 0x7
2940// 2. z = y >> 2
2941// 3. str x, [@x]
2942static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2943
2945 unsigned Depth) {
2946 uint64_t Imm =
2947 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2948 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2949 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2950 getUsefulBits(Op, UsefulBits, Depth + 1);
2951}
2952
2954 uint64_t Imm, uint64_t MSB,
2955 unsigned Depth) {
2956 // inherit the bitwidth value
2957 APInt OpUsefulBits(UsefulBits);
2958 OpUsefulBits = 1;
2959
2960 if (MSB >= Imm) {
2961 OpUsefulBits <<= MSB - Imm + 1;
2962 --OpUsefulBits;
2963 // The interesting part will be in the lower part of the result
2964 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2965 // The interesting part was starting at Imm in the argument
2966 OpUsefulBits <<= Imm;
2967 } else {
2968 OpUsefulBits <<= MSB + 1;
2969 --OpUsefulBits;
2970 // The interesting part will be shifted in the result
2971 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2972 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2973 // The interesting part was at zero in the argument
2974 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2975 }
2976
2977 UsefulBits &= OpUsefulBits;
2978}
2979
2980static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2981 unsigned Depth) {
2982 uint64_t Imm =
2983 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2984 uint64_t MSB =
2985 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2986
2987 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2988}
2989
2991 unsigned Depth) {
2992 uint64_t ShiftTypeAndValue =
2993 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2994 APInt Mask(UsefulBits);
2995 Mask.clearAllBits();
2996 Mask.flipAllBits();
2997
2998 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2999 // Shift Left
3000 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
3001 Mask <<= ShiftAmt;
3002 getUsefulBits(Op, Mask, Depth + 1);
3003 Mask.lshrInPlace(ShiftAmt);
3004 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
3005 // Shift Right
3006 // We do not handle AArch64_AM::ASR, because the sign will change the
3007 // number of useful bits
3008 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
3009 Mask.lshrInPlace(ShiftAmt);
3010 getUsefulBits(Op, Mask, Depth + 1);
3011 Mask <<= ShiftAmt;
3012 } else
3013 return;
3014
3015 UsefulBits &= Mask;
3016}
3017
3018static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
3019 unsigned Depth) {
3020 uint64_t Imm =
3021 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3022 uint64_t MSB =
3023 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
3024
3025 APInt OpUsefulBits(UsefulBits);
3026 OpUsefulBits = 1;
3027
3028 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
3029 ResultUsefulBits.flipAllBits();
3030 APInt Mask(UsefulBits.getBitWidth(), 0);
3031
3032 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
3033
3034 if (MSB >= Imm) {
3035 // The instruction is a BFXIL.
3036 uint64_t Width = MSB - Imm + 1;
3037 uint64_t LSB = Imm;
3038
3039 OpUsefulBits <<= Width;
3040 --OpUsefulBits;
3041
3042 if (Op.getOperand(1) == Orig) {
3043 // Copy the low bits from the result to bits starting from LSB.
3044 Mask = ResultUsefulBits & OpUsefulBits;
3045 Mask <<= LSB;
3046 }
3047
3048 if (Op.getOperand(0) == Orig)
3049 // Bits starting from LSB in the input contribute to the result.
3050 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3051 } else {
3052 // The instruction is a BFI.
3053 uint64_t Width = MSB + 1;
3054 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
3055
3056 OpUsefulBits <<= Width;
3057 --OpUsefulBits;
3058 OpUsefulBits <<= LSB;
3059
3060 if (Op.getOperand(1) == Orig) {
3061 // Copy the bits from the result to the zero bits.
3062 Mask = ResultUsefulBits & OpUsefulBits;
3063 Mask.lshrInPlace(LSB);
3064 }
3065
3066 if (Op.getOperand(0) == Orig)
3067 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3068 }
3069
3070 UsefulBits &= Mask;
3071}
3072
3073static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
3074 SDValue Orig, unsigned Depth) {
3075
3076 // Users of this node should have already been instruction selected
3077 // FIXME: Can we turn that into an assert?
3078 if (!UserNode->isMachineOpcode())
3079 return;
3080
3081 switch (UserNode->getMachineOpcode()) {
3082 default:
3083 return;
3084 case AArch64::ANDSWri:
3085 case AArch64::ANDSXri:
3086 case AArch64::ANDWri:
3087 case AArch64::ANDXri:
3088 // We increment Depth only when we call the getUsefulBits
3089 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
3090 Depth);
3091 case AArch64::UBFMWri:
3092 case AArch64::UBFMXri:
3093 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
3094
3095 case AArch64::ORRWrs:
3096 case AArch64::ORRXrs:
3097 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
3098 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
3099 Depth);
3100 return;
3101 case AArch64::BFMWri:
3102 case AArch64::BFMXri:
3103 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3104
3105 case AArch64::STRBBui:
3106 case AArch64::STURBBi:
3107 if (UserNode->getOperand(0) != Orig)
3108 return;
3109 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3110 return;
3111
3112 case AArch64::STRHHui:
3113 case AArch64::STURHHi:
3114 if (UserNode->getOperand(0) != Orig)
3115 return;
3116 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3117 return;
3118 }
3119}
3120
3121static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3123 return;
3124 // Initialize UsefulBits
3125 if (!Depth) {
3126 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3127 // At the beginning, assume every produced bits is useful
3128 UsefulBits = APInt(Bitwidth, 0);
3129 UsefulBits.flipAllBits();
3130 }
3131 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3132
3133 for (SDNode *Node : Op.getNode()->users()) {
3134 // A use cannot produce useful bits
3135 APInt UsefulBitsForUse = APInt(UsefulBits);
3136 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
3137 UsersUsefulBits |= UsefulBitsForUse;
3138 }
3139 // UsefulBits contains the produced bits that are meaningful for the
3140 // current definition, thus a user cannot make a bit meaningful at
3141 // this point
3142 UsefulBits &= UsersUsefulBits;
3143}
3144
3145/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3146/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3147/// 0, return Op unchanged.
3148static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3149 if (ShlAmount == 0)
3150 return Op;
3151
3152 EVT VT = Op.getValueType();
3153 SDLoc dl(Op);
3154 unsigned BitWidth = VT.getSizeInBits();
3155 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3156
3157 SDNode *ShiftNode;
3158 if (ShlAmount > 0) {
3159 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3160 ShiftNode = CurDAG->getMachineNode(
3161 UBFMOpc, dl, VT, Op,
3162 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3163 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3164 } else {
3165 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3166 assert(ShlAmount < 0 && "expected right shift");
3167 int ShrAmount = -ShlAmount;
3168 ShiftNode = CurDAG->getMachineNode(
3169 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3170 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3171 }
3172
3173 return SDValue(ShiftNode, 0);
3174}
3175
3176// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3177static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3178 bool BiggerPattern,
3179 const uint64_t NonZeroBits,
3180 SDValue &Src, int &DstLSB,
3181 int &Width);
3182
3183// For bit-field-positioning pattern "shl VAL, N)".
3184static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3185 bool BiggerPattern,
3186 const uint64_t NonZeroBits,
3187 SDValue &Src, int &DstLSB,
3188 int &Width);
3189
3190/// Does this tree qualify as an attempt to move a bitfield into position,
3191/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3193 bool BiggerPattern, SDValue &Src,
3194 int &DstLSB, int &Width) {
3195 EVT VT = Op.getValueType();
3196 unsigned BitWidth = VT.getSizeInBits();
3197 (void)BitWidth;
3198 assert(BitWidth == 32 || BitWidth == 64);
3199
3200 KnownBits Known = CurDAG->computeKnownBits(Op);
3201
3202 // Non-zero in the sense that they're not provably zero, which is the key
3203 // point if we want to use this value
3204 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3205 if (!isShiftedMask_64(NonZeroBits))
3206 return false;
3207
3208 switch (Op.getOpcode()) {
3209 default:
3210 break;
3211 case ISD::AND:
3212 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3213 NonZeroBits, Src, DstLSB, Width);
3214 case ISD::SHL:
3215 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3216 NonZeroBits, Src, DstLSB, Width);
3217 }
3218
3219 return false;
3220}
3221
3223 bool BiggerPattern,
3224 const uint64_t NonZeroBits,
3225 SDValue &Src, int &DstLSB,
3226 int &Width) {
3227 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3228
3229 EVT VT = Op.getValueType();
3230 assert((VT == MVT::i32 || VT == MVT::i64) &&
3231 "Caller guarantees VT is one of i32 or i64");
3232 (void)VT;
3233
3234 uint64_t AndImm;
3235 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3236 return false;
3237
3238 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3239 // 1) (AndImm & (1 << POS) == 0)
3240 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3241 //
3242 // 1) and 2) don't agree so something must be wrong (e.g., in
3243 // 'SelectionDAG::computeKnownBits')
3244 assert((~AndImm & NonZeroBits) == 0 &&
3245 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3246
3247 SDValue AndOp0 = Op.getOperand(0);
3248
3249 uint64_t ShlImm;
3250 SDValue ShlOp0;
3251 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3252 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3253 ShlOp0 = AndOp0.getOperand(0);
3254 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3256 ShlImm)) {
3257 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3258
3259 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3260 SDValue ShlVal = AndOp0.getOperand(0);
3261
3262 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3263 // expect VT to be MVT::i32.
3264 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3265
3266 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3267 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3268 } else
3269 return false;
3270
3271 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3272 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3273 // AndOp0+AND.
3274 if (!BiggerPattern && !AndOp0.hasOneUse())
3275 return false;
3276
3277 DstLSB = llvm::countr_zero(NonZeroBits);
3278 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3279
3280 // Bail out on large Width. This happens when no proper combining / constant
3281 // folding was performed.
3282 if (Width >= (int)VT.getSizeInBits()) {
3283 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3284 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3285 // "val".
3286 // If VT is i32, what Width >= 32 means:
3287 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3288 // demands at least 'Width' bits (after dag-combiner). This together with
3289 // `any_extend` Op (undefined higher bits) indicates missed combination
3290 // when lowering the 'and' IR instruction to an machine IR instruction.
3291 LLVM_DEBUG(
3292 dbgs()
3293 << "Found large Width in bit-field-positioning -- this indicates no "
3294 "proper combining / constant folding was performed\n");
3295 return false;
3296 }
3297
3298 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3299 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3300 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3301 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3302 // which case it is not profitable to insert an extra shift.
3303 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3304 return false;
3305
3306 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3307 return true;
3308}
3309
3310// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3311// UBFIZ.
3313 SDValue &Src, int &DstLSB,
3314 int &Width) {
3315 // Caller should have verified that N is a left shift with constant shift
3316 // amount; asserts that.
3317 assert(Op.getOpcode() == ISD::SHL &&
3318 "Op.getNode() should be a SHL node to call this function");
3319 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3320 "Op.getNode() should shift ShlImm to call this function");
3321
3322 uint64_t AndImm = 0;
3323 SDValue Op0 = Op.getOperand(0);
3324 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3325 return false;
3326
3327 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3328 if (isMask_64(ShiftedAndImm)) {
3329 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3330 // should end with Mask, and could be prefixed with random bits if those
3331 // bits are shifted out.
3332 //
3333 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3334 // the AND result corresponding to those bits are shifted out, so it's fine
3335 // to not extract them.
3336 Width = llvm::countr_one(ShiftedAndImm);
3337 DstLSB = ShlImm;
3338 Src = Op0.getOperand(0);
3339 return true;
3340 }
3341 return false;
3342}
3343
3345 bool BiggerPattern,
3346 const uint64_t NonZeroBits,
3347 SDValue &Src, int &DstLSB,
3348 int &Width) {
3349 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3350
3351 EVT VT = Op.getValueType();
3352 assert((VT == MVT::i32 || VT == MVT::i64) &&
3353 "Caller guarantees that type is i32 or i64");
3354 (void)VT;
3355
3356 uint64_t ShlImm;
3357 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3358 return false;
3359
3360 if (!BiggerPattern && !Op.hasOneUse())
3361 return false;
3362
3363 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3364 return true;
3365
3366 DstLSB = llvm::countr_zero(NonZeroBits);
3367 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3368
3369 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3370 return false;
3371
3372 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3373 return true;
3374}
3375
3376static bool isShiftedMask(uint64_t Mask, EVT VT) {
3377 assert(VT == MVT::i32 || VT == MVT::i64);
3378 if (VT == MVT::i32)
3379 return isShiftedMask_32(Mask);
3380 return isShiftedMask_64(Mask);
3381}
3382
3383// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3384// inserted only sets known zero bits.
3386 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3387
3388 EVT VT = N->getValueType(0);
3389 if (VT != MVT::i32 && VT != MVT::i64)
3390 return false;
3391
3392 unsigned BitWidth = VT.getSizeInBits();
3393
3394 uint64_t OrImm;
3395 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3396 return false;
3397
3398 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3399 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3400 // performance neutral.
3402 return false;
3403
3404 uint64_t MaskImm;
3405 SDValue And = N->getOperand(0);
3406 // Must be a single use AND with an immediate operand.
3407 if (!And.hasOneUse() ||
3408 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3409 return false;
3410
3411 // Compute the Known Zero for the AND as this allows us to catch more general
3412 // cases than just looking for AND with imm.
3413 KnownBits Known = CurDAG->computeKnownBits(And);
3414
3415 // Non-zero in the sense that they're not provably zero, which is the key
3416 // point if we want to use this value.
3417 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3418
3419 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3420 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3421 return false;
3422
3423 // The bits being inserted must only set those bits that are known to be zero.
3424 if ((OrImm & NotKnownZero) != 0) {
3425 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3426 // currently handle this case.
3427 return false;
3428 }
3429
3430 // BFI/BFXIL dst, src, #lsb, #width.
3431 int LSB = llvm::countr_one(NotKnownZero);
3432 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3433
3434 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3435 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3436 unsigned ImmS = Width - 1;
3437
3438 // If we're creating a BFI instruction avoid cases where we need more
3439 // instructions to materialize the BFI constant as compared to the original
3440 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3441 // should be no worse in this case.
3442 bool IsBFI = LSB != 0;
3443 uint64_t BFIImm = OrImm >> LSB;
3444 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3445 // We have a BFI instruction and we know the constant can't be materialized
3446 // with a ORR-immediate with the zero register.
3447 unsigned OrChunks = 0, BFIChunks = 0;
3448 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3449 if (((OrImm >> Shift) & 0xFFFF) != 0)
3450 ++OrChunks;
3451 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3452 ++BFIChunks;
3453 }
3454 if (BFIChunks > OrChunks)
3455 return false;
3456 }
3457
3458 // Materialize the constant to be inserted.
3459 SDLoc DL(N);
3460 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3461 SDNode *MOVI = CurDAG->getMachineNode(
3462 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3463
3464 // Create the BFI/BFXIL instruction.
3465 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3466 CurDAG->getTargetConstant(ImmR, DL, VT),
3467 CurDAG->getTargetConstant(ImmS, DL, VT)};
3468 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3469 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3470 return true;
3471}
3472
3474 SDValue &ShiftedOperand,
3475 uint64_t &EncodedShiftImm) {
3476 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3477 if (!Dst.hasOneUse())
3478 return false;
3479
3480 EVT VT = Dst.getValueType();
3481 assert((VT == MVT::i32 || VT == MVT::i64) &&
3482 "Caller should guarantee that VT is one of i32 or i64");
3483 const unsigned SizeInBits = VT.getSizeInBits();
3484
3485 SDLoc DL(Dst.getNode());
3486 uint64_t AndImm, ShlImm;
3487 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3488 isShiftedMask_64(AndImm)) {
3489 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3490 SDValue DstOp0 = Dst.getOperand(0);
3491 if (!DstOp0.hasOneUse())
3492 return false;
3493
3494 // An example to illustrate the transformation
3495 // From:
3496 // lsr x8, x1, #1
3497 // and x8, x8, #0x3f80
3498 // bfxil x8, x1, #0, #7
3499 // To:
3500 // and x8, x23, #0x7f
3501 // ubfx x9, x23, #8, #7
3502 // orr x23, x8, x9, lsl #7
3503 //
3504 // The number of instructions remains the same, but ORR is faster than BFXIL
3505 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3506 // the dependency chain is improved after the transformation.
3507 uint64_t SrlImm;
3508 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3509 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3510 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3511 unsigned MaskWidth =
3512 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3513 unsigned UBFMOpc =
3514 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3515 SDNode *UBFMNode = CurDAG->getMachineNode(
3516 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3517 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3518 VT),
3519 CurDAG->getTargetConstant(
3520 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3521 ShiftedOperand = SDValue(UBFMNode, 0);
3522 EncodedShiftImm = AArch64_AM::getShifterImm(
3523 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3524 return true;
3525 }
3526 }
3527 return false;
3528 }
3529
3530 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3531 ShiftedOperand = Dst.getOperand(0);
3532 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3533 return true;
3534 }
3535
3536 uint64_t SrlImm;
3537 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3538 ShiftedOperand = Dst.getOperand(0);
3539 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3540 return true;
3541 }
3542 return false;
3543}
3544
3545// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3546// the operands and select it to AArch64::ORR with shifted registers if
3547// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3548static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3549 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3550 const bool BiggerPattern) {
3551 EVT VT = N->getValueType(0);
3552 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3553 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3554 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3555 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3556 assert((VT == MVT::i32 || VT == MVT::i64) &&
3557 "Expect result type to be i32 or i64 since N is combinable to BFM");
3558 SDLoc DL(N);
3559
3560 // Bail out if BFM simplifies away one node in BFM Dst.
3561 if (OrOpd1 != Dst)
3562 return false;
3563
3564 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3565 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3566 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3567 if (BiggerPattern) {
3568 uint64_t SrcAndImm;
3569 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3570 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3571 // OrOpd0 = AND Src, #Mask
3572 // So BFM simplifies away one AND node from Src and doesn't simplify away
3573 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3574 // one node (from Rd), ORR is better since it has higher throughput and
3575 // smaller latency than BFM on many AArch64 processors (and for the rest
3576 // ORR is at least as good as BFM).
3577 SDValue ShiftedOperand;
3578 uint64_t EncodedShiftImm;
3579 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3580 EncodedShiftImm)) {
3581 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3582 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3583 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3584 return true;
3585 }
3586 }
3587 return false;
3588 }
3589
3590 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3591
3592 uint64_t ShlImm;
3593 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3594 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3595 SDValue Ops[] = {
3596 Dst, Src,
3597 CurDAG->getTargetConstant(
3599 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3600 return true;
3601 }
3602
3603 // Select the following pattern to left-shifted operand rather than BFI.
3604 // %val1 = op ..
3605 // %val2 = shl %val1, #imm
3606 // %res = or %val1, %val2
3607 //
3608 // If N is selected to be BFI, we know that
3609 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3610 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3611 //
3612 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3613 if (OrOpd0.getOperand(0) == OrOpd1) {
3614 SDValue Ops[] = {
3615 OrOpd1, OrOpd1,
3616 CurDAG->getTargetConstant(
3618 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3619 return true;
3620 }
3621 }
3622
3623 uint64_t SrlImm;
3624 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3625 // Select the following pattern to right-shifted operand rather than BFXIL.
3626 // %val1 = op ..
3627 // %val2 = lshr %val1, #imm
3628 // %res = or %val1, %val2
3629 //
3630 // If N is selected to be BFXIL, we know that
3631 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3632 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3633 //
3634 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3635 if (OrOpd0.getOperand(0) == OrOpd1) {
3636 SDValue Ops[] = {
3637 OrOpd1, OrOpd1,
3638 CurDAG->getTargetConstant(
3640 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3641 return true;
3642 }
3643 }
3644
3645 return false;
3646}
3647
3648static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3649 SelectionDAG *CurDAG) {
3650 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3651
3652 EVT VT = N->getValueType(0);
3653 if (VT != MVT::i32 && VT != MVT::i64)
3654 return false;
3655
3656 unsigned BitWidth = VT.getSizeInBits();
3657
3658 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3659 // have the expected shape. Try to undo that.
3660
3661 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3662 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3663
3664 // Given a OR operation, check if we have the following pattern
3665 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3666 // isBitfieldExtractOp)
3667 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3668 // countTrailingZeros(mask2) == imm2 - imm + 1
3669 // f = d | c
3670 // if yes, replace the OR instruction with:
3671 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3672
3673 // OR is commutative, check all combinations of operand order and values of
3674 // BiggerPattern, i.e.
3675 // Opd0, Opd1, BiggerPattern=false
3676 // Opd1, Opd0, BiggerPattern=false
3677 // Opd0, Opd1, BiggerPattern=true
3678 // Opd1, Opd0, BiggerPattern=true
3679 // Several of these combinations may match, so check with BiggerPattern=false
3680 // first since that will produce better results by matching more instructions
3681 // and/or inserting fewer extra instructions.
3682 for (int I = 0; I < 4; ++I) {
3683
3684 SDValue Dst, Src;
3685 unsigned ImmR, ImmS;
3686 bool BiggerPattern = I / 2;
3687 SDValue OrOpd0Val = N->getOperand(I % 2);
3688 SDNode *OrOpd0 = OrOpd0Val.getNode();
3689 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3690 SDNode *OrOpd1 = OrOpd1Val.getNode();
3691
3692 unsigned BFXOpc;
3693 int DstLSB, Width;
3694 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3695 NumberOfIgnoredLowBits, BiggerPattern)) {
3696 // Check that the returned opcode is compatible with the pattern,
3697 // i.e., same type and zero extended (U and not S)
3698 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3699 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3700 continue;
3701
3702 // Compute the width of the bitfield insertion
3703 DstLSB = 0;
3704 Width = ImmS - ImmR + 1;
3705 // FIXME: This constraint is to catch bitfield insertion we may
3706 // want to widen the pattern if we want to grab general bitfield
3707 // move case
3708 if (Width <= 0)
3709 continue;
3710
3711 // If the mask on the insertee is correct, we have a BFXIL operation. We
3712 // can share the ImmR and ImmS values from the already-computed UBFM.
3713 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3714 BiggerPattern,
3715 Src, DstLSB, Width)) {
3716 ImmR = (BitWidth - DstLSB) % BitWidth;
3717 ImmS = Width - 1;
3718 } else
3719 continue;
3720
3721 // Check the second part of the pattern
3722 EVT VT = OrOpd1Val.getValueType();
3723 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3724
3725 // Compute the Known Zero for the candidate of the first operand.
3726 // This allows to catch more general case than just looking for
3727 // AND with imm. Indeed, simplify-demanded-bits may have removed
3728 // the AND instruction because it proves it was useless.
3729 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3730
3731 // Check if there is enough room for the second operand to appear
3732 // in the first one
3733 APInt BitsToBeInserted =
3734 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3735
3736 if ((BitsToBeInserted & ~Known.Zero) != 0)
3737 continue;
3738
3739 // Set the first operand
3740 uint64_t Imm;
3741 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3742 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3743 // In that case, we can eliminate the AND
3744 Dst = OrOpd1->getOperand(0);
3745 else
3746 // Maybe the AND has been removed by simplify-demanded-bits
3747 // or is useful because it discards more bits
3748 Dst = OrOpd1Val;
3749
3750 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3751 // with shifted operand is more efficient.
3752 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3753 BiggerPattern))
3754 return true;
3755
3756 // both parts match
3757 SDLoc DL(N);
3758 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3759 CurDAG->getTargetConstant(ImmS, DL, VT)};
3760 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3761 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3762 return true;
3763 }
3764
3765 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3766 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3767 // mask (e.g., 0x000ffff0).
3768 uint64_t Mask0Imm, Mask1Imm;
3769 SDValue And0 = N->getOperand(0);
3770 SDValue And1 = N->getOperand(1);
3771 if (And0.hasOneUse() && And1.hasOneUse() &&
3772 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3773 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3774 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3775 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3776
3777 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3778 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3779 // bits to be inserted.
3780 if (isShiftedMask(Mask0Imm, VT)) {
3781 std::swap(And0, And1);
3782 std::swap(Mask0Imm, Mask1Imm);
3783 }
3784
3785 SDValue Src = And1->getOperand(0);
3786 SDValue Dst = And0->getOperand(0);
3787 unsigned LSB = llvm::countr_zero(Mask1Imm);
3788 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3789
3790 // The BFXIL inserts the low-order bits from a source register, so right
3791 // shift the needed bits into place.
3792 SDLoc DL(N);
3793 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3794 uint64_t LsrImm = LSB;
3795 if (Src->hasOneUse() &&
3796 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3797 (LsrImm + LSB) < BitWidth) {
3798 Src = Src->getOperand(0);
3799 LsrImm += LSB;
3800 }
3801
3802 SDNode *LSR = CurDAG->getMachineNode(
3803 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3804 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3805
3806 // BFXIL is an alias of BFM, so translate to BFM operands.
3807 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3808 unsigned ImmS = Width - 1;
3809
3810 // Create the BFXIL instruction.
3811 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3812 CurDAG->getTargetConstant(ImmR, DL, VT),
3813 CurDAG->getTargetConstant(ImmS, DL, VT)};
3814 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3815 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3816 return true;
3817 }
3818
3819 return false;
3820}
3821
3822bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3823 if (N->getOpcode() != ISD::OR)
3824 return false;
3825
3826 APInt NUsefulBits;
3827 getUsefulBits(SDValue(N, 0), NUsefulBits);
3828
3829 // If all bits are not useful, just return UNDEF.
3830 if (!NUsefulBits) {
3831 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3832 return true;
3833 }
3834
3835 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3836 return true;
3837
3838 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3839}
3840
3841/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3842/// equivalent of a left shift by a constant amount followed by an and masking
3843/// out a contiguous set of bits.
3844bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3845 if (N->getOpcode() != ISD::AND)
3846 return false;
3847
3848 EVT VT = N->getValueType(0);
3849 if (VT != MVT::i32 && VT != MVT::i64)
3850 return false;
3851
3852 SDValue Op0;
3853 int DstLSB, Width;
3854 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3855 Op0, DstLSB, Width))
3856 return false;
3857
3858 // ImmR is the rotate right amount.
3859 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3860 // ImmS is the most significant bit of the source to be moved.
3861 unsigned ImmS = Width - 1;
3862
3863 SDLoc DL(N);
3864 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3865 CurDAG->getTargetConstant(ImmS, DL, VT)};
3866 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3867 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3868 return true;
3869}
3870
3871/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3872/// variable shift/rotate instructions.
3873bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3874 EVT VT = N->getValueType(0);
3875
3876 unsigned Opc;
3877 switch (N->getOpcode()) {
3878 case ISD::ROTR:
3879 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3880 break;
3881 case ISD::SHL:
3882 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3883 break;
3884 case ISD::SRL:
3885 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3886 break;
3887 case ISD::SRA:
3888 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3889 break;
3890 default:
3891 return false;
3892 }
3893
3894 uint64_t Size;
3895 uint64_t Bits;
3896 if (VT == MVT::i32) {
3897 Bits = 5;
3898 Size = 32;
3899 } else if (VT == MVT::i64) {
3900 Bits = 6;
3901 Size = 64;
3902 } else
3903 return false;
3904
3905 SDValue ShiftAmt = N->getOperand(1);
3906 SDLoc DL(N);
3907 SDValue NewShiftAmt;
3908
3909 // Skip over an extend of the shift amount.
3910 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3911 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3912 ShiftAmt = ShiftAmt->getOperand(0);
3913
3914 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3915 SDValue Add0 = ShiftAmt->getOperand(0);
3916 SDValue Add1 = ShiftAmt->getOperand(1);
3917 uint64_t Add0Imm;
3918 uint64_t Add1Imm;
3919 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3920 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3921 // to avoid the ADD/SUB.
3922 NewShiftAmt = Add0;
3923 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3924 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3925 (Add0Imm % Size == 0)) {
3926 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3927 // to generate a NEG instead of a SUB from a constant.
3928 unsigned NegOpc;
3929 unsigned ZeroReg;
3930 EVT SubVT = ShiftAmt->getValueType(0);
3931 if (SubVT == MVT::i32) {
3932 NegOpc = AArch64::SUBWrr;
3933 ZeroReg = AArch64::WZR;
3934 } else {
3935 assert(SubVT == MVT::i64);
3936 NegOpc = AArch64::SUBXrr;
3937 ZeroReg = AArch64::XZR;
3938 }
3939 SDValue Zero =
3940 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3941 MachineSDNode *Neg =
3942 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3943 NewShiftAmt = SDValue(Neg, 0);
3944 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3945 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3946 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3947 // to generate a NOT instead of a SUB from a constant.
3948 unsigned NotOpc;
3949 unsigned ZeroReg;
3950 EVT SubVT = ShiftAmt->getValueType(0);
3951 if (SubVT == MVT::i32) {
3952 NotOpc = AArch64::ORNWrr;
3953 ZeroReg = AArch64::WZR;
3954 } else {
3955 assert(SubVT == MVT::i64);
3956 NotOpc = AArch64::ORNXrr;
3957 ZeroReg = AArch64::XZR;
3958 }
3959 SDValue Zero =
3960 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3961 MachineSDNode *Not =
3962 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3963 NewShiftAmt = SDValue(Not, 0);
3964 } else
3965 return false;
3966 } else {
3967 // If the shift amount is masked with an AND, check that the mask covers the
3968 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3969 // the AND.
3970 uint64_t MaskImm;
3971 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3972 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3973 return false;
3974
3975 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3976 return false;
3977
3978 NewShiftAmt = ShiftAmt->getOperand(0);
3979 }
3980
3981 // Narrow/widen the shift amount to match the size of the shift operation.
3982 if (VT == MVT::i32)
3983 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3984 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3985 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3986 MachineSDNode *Ext = CurDAG->getMachineNode(
3987 AArch64::SUBREG_TO_REG, DL, VT,
3988 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3989 NewShiftAmt = SDValue(Ext, 0);
3990 }
3991
3992 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3993 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3994 return true;
3995}
3996
3998 SDValue &FixedPos,
3999 unsigned RegWidth,
4000 bool isReciprocal) {
4001 APFloat FVal(0.0);
4003 FVal = CN->getValueAPF();
4004 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
4005 // Some otherwise illegal constants are allowed in this case.
4006 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
4007 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
4008 return false;
4009
4010 ConstantPoolSDNode *CN =
4011 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
4012 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
4013 } else
4014 return false;
4015
4016 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
4017 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
4018 // x-register.
4019 //
4020 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
4021 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
4022 // integers.
4023 bool IsExact;
4024
4025 if (isReciprocal)
4026 if (!FVal.getExactInverse(&FVal))
4027 return false;
4028
4029 // fbits is between 1 and 64 in the worst-case, which means the fmul
4030 // could have 2^64 as an actual operand. Need 65 bits of precision.
4031 APSInt IntVal(65, true);
4032 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
4033
4034 // N.b. isPowerOf2 also checks for > 0.
4035 if (!IsExact || !IntVal.isPowerOf2())
4036 return false;
4037 unsigned FBits = IntVal.logBase2();
4038
4039 // Checks above should have guaranteed that we haven't lost information in
4040 // finding FBits, but it must still be in range.
4041 if (FBits == 0 || FBits > RegWidth) return false;
4042
4043 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4044 return true;
4045}
4046
4047bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4048 unsigned RegWidth) {
4049 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4050 false);
4051}
4052
4053bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
4054 SDValue &FixedPos,
4055 unsigned RegWidth) {
4056 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4057 true);
4058}
4059
4060// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
4061// of the string and obtains the integer values from them and combines these
4062// into a single value to be used in the MRS/MSR instruction.
4065 RegString.split(Fields, ':');
4066
4067 if (Fields.size() == 1)
4068 return -1;
4069
4070 assert(Fields.size() == 5
4071 && "Invalid number of fields in read register string");
4072
4074 bool AllIntFields = true;
4075
4076 for (StringRef Field : Fields) {
4077 unsigned IntField;
4078 AllIntFields &= !Field.getAsInteger(10, IntField);
4079 Ops.push_back(IntField);
4080 }
4081
4082 assert(AllIntFields &&
4083 "Unexpected non-integer value in special register string.");
4084 (void)AllIntFields;
4085
4086 // Need to combine the integer fields of the string into a single value
4087 // based on the bit encoding of MRS/MSR instruction.
4088 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
4089 (Ops[3] << 3) | (Ops[4]);
4090}
4091
4092// Lower the read_register intrinsic to an MRS instruction node if the special
4093// register string argument is either of the form detailed in the ALCE (the
4094// form described in getIntOperandsFromRegisterString) or is a named register
4095// known by the MRS SysReg mapper.
4096bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4097 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4098 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4099 SDLoc DL(N);
4100
4101 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4102
4103 unsigned Opcode64Bit = AArch64::MRS;
4104 int Imm = getIntOperandFromRegisterString(RegString->getString());
4105 if (Imm == -1) {
4106 // No match, Use the sysreg mapper to map the remaining possible strings to
4107 // the value for the register to be used for the instruction operand.
4108 const auto *TheReg =
4109 AArch64SysReg::lookupSysRegByName(RegString->getString());
4110 if (TheReg && TheReg->Readable &&
4111 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4112 Imm = TheReg->Encoding;
4113 else
4114 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4115
4116 if (Imm == -1) {
4117 // Still no match, see if this is "pc" or give up.
4118 if (!ReadIs128Bit && RegString->getString() == "pc") {
4119 Opcode64Bit = AArch64::ADR;
4120 Imm = 0;
4121 } else {
4122 return false;
4123 }
4124 }
4125 }
4126
4127 SDValue InChain = N->getOperand(0);
4128 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
4129 if (!ReadIs128Bit) {
4130 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
4131 {SysRegImm, InChain});
4132 } else {
4133 SDNode *MRRS = CurDAG->getMachineNode(
4134 AArch64::MRRS, DL,
4135 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4136 {SysRegImm, InChain});
4137
4138 // Sysregs are not endian. The even register always contains the low half
4139 // of the register.
4140 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
4141 SDValue(MRRS, 0));
4142 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
4143 SDValue(MRRS, 0));
4144 SDValue OutChain = SDValue(MRRS, 1);
4145
4146 ReplaceUses(SDValue(N, 0), Lo);
4147 ReplaceUses(SDValue(N, 1), Hi);
4148 ReplaceUses(SDValue(N, 2), OutChain);
4149 };
4150 return true;
4151}
4152
4153// Lower the write_register intrinsic to an MSR instruction node if the special
4154// register string argument is either of the form detailed in the ALCE (the
4155// form described in getIntOperandsFromRegisterString) or is a named register
4156// known by the MSR SysReg mapper.
4157bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4158 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4159 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4160 SDLoc DL(N);
4161
4162 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4163
4164 if (!WriteIs128Bit) {
4165 // Check if the register was one of those allowed as the pstatefield value
4166 // in the MSR (immediate) instruction. To accept the values allowed in the
4167 // pstatefield for the MSR (immediate) instruction, we also require that an
4168 // immediate value has been provided as an argument, we know that this is
4169 // the case as it has been ensured by semantic checking.
4170 auto trySelectPState = [&](auto PMapper, unsigned State) {
4171 if (PMapper) {
4172 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4173 "Expected a constant integer expression.");
4174 unsigned Reg = PMapper->Encoding;
4175 uint64_t Immed = N->getConstantOperandVal(2);
4176 CurDAG->SelectNodeTo(
4177 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4178 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4179 return true;
4180 }
4181 return false;
4182 };
4183
4184 if (trySelectPState(
4185 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4186 AArch64::MSRpstateImm4))
4187 return true;
4188 if (trySelectPState(
4189 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4190 AArch64::MSRpstateImm1))
4191 return true;
4192 }
4193
4194 int Imm = getIntOperandFromRegisterString(RegString->getString());
4195 if (Imm == -1) {
4196 // Use the sysreg mapper to attempt to map the remaining possible strings
4197 // to the value for the register to be used for the MSR (register)
4198 // instruction operand.
4199 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4200 if (TheReg && TheReg->Writeable &&
4201 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4202 Imm = TheReg->Encoding;
4203 else
4204 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4205
4206 if (Imm == -1)
4207 return false;
4208 }
4209
4210 SDValue InChain = N->getOperand(0);
4211 if (!WriteIs128Bit) {
4212 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4213 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4214 N->getOperand(2), InChain);
4215 } else {
4216 // No endian swap. The lower half always goes into the even subreg, and the
4217 // higher half always into the odd supreg.
4218 SDNode *Pair = CurDAG->getMachineNode(
4219 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4220 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4221 MVT::i32),
4222 N->getOperand(2),
4223 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4224 N->getOperand(3),
4225 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4226
4227 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4228 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4229 SDValue(Pair, 0), InChain);
4230 }
4231
4232 return true;
4233}
4234
4235/// We've got special pseudo-instructions for these
4236bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4237 unsigned Opcode;
4238 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4239
4240 // Leave IR for LSE if subtarget supports it.
4241 if (Subtarget->hasLSE()) return false;
4242
4243 if (MemTy == MVT::i8)
4244 Opcode = AArch64::CMP_SWAP_8;
4245 else if (MemTy == MVT::i16)
4246 Opcode = AArch64::CMP_SWAP_16;
4247 else if (MemTy == MVT::i32)
4248 Opcode = AArch64::CMP_SWAP_32;
4249 else if (MemTy == MVT::i64)
4250 Opcode = AArch64::CMP_SWAP_64;
4251 else
4252 llvm_unreachable("Unknown AtomicCmpSwap type");
4253
4254 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4255 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4256 N->getOperand(0)};
4257 SDNode *CmpSwap = CurDAG->getMachineNode(
4258 Opcode, SDLoc(N),
4259 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4260
4261 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4262 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4263
4264 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4265 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4266 CurDAG->RemoveDeadNode(N);
4267
4268 return true;
4269}
4270
4271bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4272 SDValue &Shift, bool Negate) {
4273 if (!isa<ConstantSDNode>(N))
4274 return false;
4275
4276 SDLoc DL(N);
4277 APInt Val =
4278 cast<ConstantSDNode>(N)->getAPIntValue().trunc(VT.getFixedSizeInBits());
4279
4280 if (Negate)
4281 Val = -Val;
4282
4283 switch (VT.SimpleTy) {
4284 case MVT::i8:
4285 // All immediates are supported.
4286 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4287 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4288 return true;
4289 case MVT::i16:
4290 case MVT::i32:
4291 case MVT::i64:
4292 // Support 8bit unsigned immediates.
4293 if ((Val & ~0xff) == 0) {
4294 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4295 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4296 return true;
4297 }
4298 // Support 16bit unsigned immediates that are a multiple of 256.
4299 if ((Val & ~0xff00) == 0) {
4300 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4301 Imm = CurDAG->getTargetConstant(Val.lshr(8).getZExtValue(), DL, MVT::i32);
4302 return true;
4303 }
4304 break;
4305 default:
4306 break;
4307 }
4308
4309 return false;
4310}
4311
4312bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4313 SDValue &Imm, SDValue &Shift,
4314 bool Negate) {
4315 if (!isa<ConstantSDNode>(N))
4316 return false;
4317
4318 SDLoc DL(N);
4319 int64_t Val = cast<ConstantSDNode>(N)
4320 ->getAPIntValue()
4322 .getSExtValue();
4323
4324 if (Negate)
4325 Val = -Val;
4326
4327 // Signed saturating instructions treat their immediate operand as unsigned,
4328 // whereas the related intrinsics define their operands to be signed. This
4329 // means we can only use the immediate form when the operand is non-negative.
4330 if (Val < 0)
4331 return false;
4332
4333 switch (VT.SimpleTy) {
4334 case MVT::i8:
4335 // All positive immediates are supported.
4336 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4337 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4338 return true;
4339 case MVT::i16:
4340 case MVT::i32:
4341 case MVT::i64:
4342 // Support 8bit positive immediates.
4343 if (Val <= 255) {
4344 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4345 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4346 return true;
4347 }
4348 // Support 16bit positive immediates that are a multiple of 256.
4349 if (Val <= 65280 && Val % 256 == 0) {
4350 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4351 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4352 return true;
4353 }
4354 break;
4355 default:
4356 break;
4357 }
4358
4359 return false;
4360}
4361
4362bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4363 SDValue &Shift) {
4364 if (!isa<ConstantSDNode>(N))
4365 return false;
4366
4367 SDLoc DL(N);
4368 int64_t Val = cast<ConstantSDNode>(N)
4369 ->getAPIntValue()
4370 .trunc(VT.getFixedSizeInBits())
4371 .getSExtValue();
4372 int32_t ImmVal, ShiftVal;
4373 if (!AArch64_AM::isSVECpyDupImm(VT.getScalarSizeInBits(), Val, ImmVal,
4374 ShiftVal))
4375 return false;
4376
4377 Shift = CurDAG->getTargetConstant(ShiftVal, DL, MVT::i32);
4378 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4379 return true;
4380}
4381
4382bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4383 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4384 int64_t ImmVal = CNode->getSExtValue();
4385 SDLoc DL(N);
4386 if (ImmVal >= -128 && ImmVal < 128) {
4387 Imm = CurDAG->getSignedTargetConstant(ImmVal, DL, MVT::i32);
4388 return true;
4389 }
4390 }
4391 return false;
4392}
4393
4394bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4395 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4396 uint64_t ImmVal = CNode->getZExtValue();
4397
4398 switch (VT.SimpleTy) {
4399 case MVT::i8:
4400 ImmVal &= 0xFF;
4401 break;
4402 case MVT::i16:
4403 ImmVal &= 0xFFFF;
4404 break;
4405 case MVT::i32:
4406 ImmVal &= 0xFFFFFFFF;
4407 break;
4408 case MVT::i64:
4409 break;
4410 default:
4411 llvm_unreachable("Unexpected type");
4412 }
4413
4414 if (ImmVal < 256) {
4415 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4416 return true;
4417 }
4418 }
4419 return false;
4420}
4421
4422bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4423 bool Invert) {
4424 uint64_t ImmVal;
4425 if (auto CI = dyn_cast<ConstantSDNode>(N))
4426 ImmVal = CI->getZExtValue();
4427 else if (auto CFP = dyn_cast<ConstantFPSDNode>(N))
4428 ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
4429 else
4430 return false;
4431
4432 if (Invert)
4433 ImmVal = ~ImmVal;
4434
4435 // Shift mask depending on type size.
4436 switch (VT.SimpleTy) {
4437 case MVT::i8:
4438 ImmVal &= 0xFF;
4439 ImmVal |= ImmVal << 8;
4440 ImmVal |= ImmVal << 16;
4441 ImmVal |= ImmVal << 32;
4442 break;
4443 case MVT::i16:
4444 ImmVal &= 0xFFFF;
4445 ImmVal |= ImmVal << 16;
4446 ImmVal |= ImmVal << 32;
4447 break;
4448 case MVT::i32:
4449 ImmVal &= 0xFFFFFFFF;
4450 ImmVal |= ImmVal << 32;
4451 break;
4452 case MVT::i64:
4453 break;
4454 default:
4455 llvm_unreachable("Unexpected type");
4456 }
4457
4458 uint64_t encoding;
4459 if (!AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding))
4460 return false;
4461
4462 Imm = CurDAG->getTargetConstant(encoding, SDLoc(N), MVT::i64);
4463 return true;
4464}
4465
4466// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4467// Rather than attempt to normalise everything we can sometimes saturate the
4468// shift amount during selection. This function also allows for consistent
4469// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4470// required by the instructions.
4471bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4472 uint64_t High, bool AllowSaturation,
4473 SDValue &Imm) {
4474 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4475 uint64_t ImmVal = CN->getZExtValue();
4476
4477 // Reject shift amounts that are too small.
4478 if (ImmVal < Low)
4479 return false;
4480
4481 // Reject or saturate shift amounts that are too big.
4482 if (ImmVal > High) {
4483 if (!AllowSaturation)
4484 return false;
4485 ImmVal = High;
4486 }
4487
4488 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4489 return true;
4490 }
4491
4492 return false;
4493}
4494
4495bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4496 // tagp(FrameIndex, IRGstack, tag_offset):
4497 // since the offset between FrameIndex and IRGstack is a compile-time
4498 // constant, this can be lowered to a single ADDG instruction.
4499 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4500 return false;
4501 }
4502
4503 SDValue IRG_SP = N->getOperand(2);
4504 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4505 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4506 return false;
4507 }
4508
4509 const TargetLowering *TLI = getTargetLowering();
4510 SDLoc DL(N);
4511 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4512 SDValue FiOp = CurDAG->getTargetFrameIndex(
4513 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4514 int TagOffset = N->getConstantOperandVal(3);
4515
4516 SDNode *Out = CurDAG->getMachineNode(
4517 AArch64::TAGPstack, DL, MVT::i64,
4518 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4519 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4520 ReplaceNode(N, Out);
4521 return true;
4522}
4523
4524void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4525 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4526 "llvm.aarch64.tagp third argument must be an immediate");
4527 if (trySelectStackSlotTagP(N))
4528 return;
4529 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4530 // compile-time constant, not just for stack allocations.
4531
4532 // General case for unrelated pointers in Op1 and Op2.
4533 SDLoc DL(N);
4534 int TagOffset = N->getConstantOperandVal(3);
4535 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4536 {N->getOperand(1), N->getOperand(2)});
4537 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4538 {SDValue(N1, 0), N->getOperand(2)});
4539 SDNode *N3 = CurDAG->getMachineNode(
4540 AArch64::ADDG, DL, MVT::i64,
4541 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4542 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4543 ReplaceNode(N, N3);
4544}
4545
4546bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4547 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4548
4549 // Bail when not a "cast" like insert_subvector.
4550 if (N->getConstantOperandVal(2) != 0)
4551 return false;
4552 if (!N->getOperand(0).isUndef())
4553 return false;
4554
4555 // Bail when normal isel should do the job.
4556 EVT VT = N->getValueType(0);
4557 EVT InVT = N->getOperand(1).getValueType();
4558 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4559 return false;
4560 if (InVT.getSizeInBits() <= 128)
4561 return false;
4562
4563 // NOTE: We can only get here when doing fixed length SVE code generation.
4564 // We do manual selection because the types involved are not linked to real
4565 // registers (despite being legal) and must be coerced into SVE registers.
4566
4568 "Expected to insert into a packed scalable vector!");
4569
4570 SDLoc DL(N);
4571 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4572 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4573 N->getOperand(1), RC));
4574 return true;
4575}
4576
4577bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4578 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4579
4580 // Bail when not a "cast" like extract_subvector.
4581 if (N->getConstantOperandVal(1) != 0)
4582 return false;
4583
4584 // Bail when normal isel can do the job.
4585 EVT VT = N->getValueType(0);
4586 EVT InVT = N->getOperand(0).getValueType();
4587 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4588 return false;
4589 if (VT.getSizeInBits() <= 128)
4590 return false;
4591
4592 // NOTE: We can only get here when doing fixed length SVE code generation.
4593 // We do manual selection because the types involved are not linked to real
4594 // registers (despite being legal) and must be coerced into SVE registers.
4595
4597 "Expected to extract from a packed scalable vector!");
4598
4599 SDLoc DL(N);
4600 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4601 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4602 N->getOperand(0), RC));
4603 return true;
4604}
4605
4606bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4607 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4608
4609 SDValue N0 = N->getOperand(0);
4610 SDValue N1 = N->getOperand(1);
4611
4612 EVT VT = N->getValueType(0);
4613 SDLoc DL(N);
4614
4615 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4616 // Rotate by a constant is a funnel shift in IR which is exanded to
4617 // an OR with shifted operands.
4618 // We do the following transform:
4619 // OR N0, N1 -> xar (x, y, imm)
4620 // Where:
4621 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4622 // N0 = SHL_PRED true, V, splat(bits-imm)
4623 // V = (xor x, y)
4624 if (VT.isScalableVector() &&
4625 (Subtarget->hasSVE2() ||
4626 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4627 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4628 N1.getOpcode() != AArch64ISD::SRL_PRED)
4629 std::swap(N0, N1);
4630 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4631 N1.getOpcode() != AArch64ISD::SRL_PRED)
4632 return false;
4633
4634 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4635 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4636 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4637 return false;
4638
4639 if (N0.getOperand(1) != N1.getOperand(1))
4640 return false;
4641
4642 SDValue R1, R2;
4643 bool IsXOROperand = true;
4644 if (N0.getOperand(1).getOpcode() != ISD::XOR) {
4645 IsXOROperand = false;
4646 } else {
4647 R1 = N0.getOperand(1).getOperand(0);
4648 R2 = N1.getOperand(1).getOperand(1);
4649 }
4650
4651 APInt ShlAmt, ShrAmt;
4652 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4654 return false;
4655
4656 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4657 return false;
4658
4659 if (!IsXOROperand) {
4660 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4661 SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
4662 SDValue MOVIV = SDValue(MOV, 0);
4663
4664 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4665 SDNode *SubRegToReg = CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL,
4666 VT, Zero, MOVIV, ZSub);
4667
4668 R1 = N1->getOperand(1);
4669 R2 = SDValue(SubRegToReg, 0);
4670 }
4671
4672 SDValue Imm =
4673 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4674
4675 SDValue Ops[] = {R1, R2, Imm};
4677 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4678 AArch64::XAR_ZZZI_D})) {
4679 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4680 return true;
4681 }
4682 return false;
4683 }
4684
4685 // We have Neon SHA3 XAR operation for v2i64 but for types
4686 // v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4687 // is available.
4688 EVT SVT;
4689 switch (VT.getSimpleVT().SimpleTy) {
4690 case MVT::v4i32:
4691 case MVT::v2i32:
4692 SVT = MVT::nxv4i32;
4693 break;
4694 case MVT::v8i16:
4695 case MVT::v4i16:
4696 SVT = MVT::nxv8i16;
4697 break;
4698 case MVT::v16i8:
4699 case MVT::v8i8:
4700 SVT = MVT::nxv16i8;
4701 break;
4702 case MVT::v2i64:
4703 case MVT::v1i64:
4704 SVT = Subtarget->hasSHA3() ? MVT::v2i64 : MVT::nxv2i64;
4705 break;
4706 default:
4707 return false;
4708 }
4709
4710 if ((!SVT.isScalableVector() && !Subtarget->hasSHA3()) ||
4711 (SVT.isScalableVector() && !Subtarget->hasSVE2()))
4712 return false;
4713
4714 if (N0->getOpcode() != AArch64ISD::VSHL ||
4715 N1->getOpcode() != AArch64ISD::VLSHR)
4716 return false;
4717
4718 if (N0->getOperand(0) != N1->getOperand(0))
4719 return false;
4720
4721 SDValue R1, R2;
4722 bool IsXOROperand = true;
4723 if (N1->getOperand(0)->getOpcode() != ISD::XOR) {
4724 IsXOROperand = false;
4725 } else {
4726 SDValue XOR = N0.getOperand(0);
4727 R1 = XOR.getOperand(0);
4728 R2 = XOR.getOperand(1);
4729 }
4730
4731 unsigned HsAmt = N0.getConstantOperandVal(1);
4732 unsigned ShAmt = N1.getConstantOperandVal(1);
4733
4734 SDValue Imm = CurDAG->getTargetConstant(
4735 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4736
4737 unsigned VTSizeInBits = VT.getScalarSizeInBits();
4738 if (ShAmt + HsAmt != VTSizeInBits)
4739 return false;
4740
4741 if (!IsXOROperand) {
4742 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4743 SDNode *MOV =
4744 CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, MVT::v2i64, Zero);
4745 SDValue MOVIV = SDValue(MOV, 0);
4746
4747 R1 = N1->getOperand(0);
4748 R2 = MOVIV;
4749 }
4750
4751 if (SVT != VT) {
4752 SDValue Undef =
4753 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, SVT), 0);
4754
4755 if (SVT.isScalableVector() && VT.is64BitVector()) {
4756 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4757
4758 SDValue UndefQ = SDValue(
4759 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, QVT), 0);
4760 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4761
4762 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4763 UndefQ, R1, DSub),
4764 0);
4765 if (R2.getValueType() == VT)
4766 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4767 UndefQ, R2, DSub),
4768 0);
4769 }
4770
4771 SDValue SubReg = CurDAG->getTargetConstant(
4772 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL, MVT::i32);
4773
4774 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
4775 R1, SubReg),
4776 0);
4777
4778 if (SVT.isScalableVector() || R2.getValueType() != SVT)
4779 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT,
4780 Undef, R2, SubReg),
4781 0);
4782 }
4783
4784 SDValue Ops[] = {R1, R2, Imm};
4785 SDNode *XAR = nullptr;
4786
4787 if (SVT.isScalableVector()) {
4789 SVT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4790 AArch64::XAR_ZZZI_D}))
4791 XAR = CurDAG->getMachineNode(Opc, DL, SVT, Ops);
4792 } else {
4793 XAR = CurDAG->getMachineNode(AArch64::XAR, DL, SVT, Ops);
4794 }
4795
4796 assert(XAR && "Unexpected NULL value for XAR instruction in DAG");
4797
4798 if (SVT != VT) {
4799 if (VT.is64BitVector() && SVT.isScalableVector()) {
4800 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4801
4802 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4803 SDNode *Q = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, QVT,
4804 SDValue(XAR, 0), ZSub);
4805
4806 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4807 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4808 SDValue(Q, 0), DSub);
4809 } else {
4810 SDValue SubReg = CurDAG->getTargetConstant(
4811 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL,
4812 MVT::i32);
4813 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4814 SDValue(XAR, 0), SubReg);
4815 }
4816 }
4817 ReplaceNode(N, XAR);
4818 return true;
4819}
4820
4821void AArch64DAGToDAGISel::Select(SDNode *Node) {
4822 // If we have a custom node, we already have selected!
4823 if (Node->isMachineOpcode()) {
4824 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4825 Node->setNodeId(-1);
4826 return;
4827 }
4828
4829 // Few custom selection stuff.
4830 EVT VT = Node->getValueType(0);
4831
4832 switch (Node->getOpcode()) {
4833 default:
4834 break;
4835
4837 if (SelectCMP_SWAP(Node))
4838 return;
4839 break;
4840
4841 case ISD::READ_REGISTER:
4842 case AArch64ISD::MRRS:
4843 if (tryReadRegister(Node))
4844 return;
4845 break;
4846
4848 case AArch64ISD::MSRR:
4849 if (tryWriteRegister(Node))
4850 return;
4851 break;
4852
4853 case ISD::LOAD: {
4854 // Try to select as an indexed load. Fall through to normal processing
4855 // if we can't.
4856 if (tryIndexedLoad(Node))
4857 return;
4858 break;
4859 }
4860
4861 case ISD::SRL:
4862 case ISD::AND:
4863 case ISD::SRA:
4865 if (tryBitfieldExtractOp(Node))
4866 return;
4867 if (tryBitfieldInsertInZeroOp(Node))
4868 return;
4869 [[fallthrough]];
4870 case ISD::ROTR:
4871 case ISD::SHL:
4872 if (tryShiftAmountMod(Node))
4873 return;
4874 break;
4875
4876 case ISD::SIGN_EXTEND:
4877 if (tryBitfieldExtractOpFromSExt(Node))
4878 return;
4879 break;
4880
4881 case ISD::OR:
4882 if (tryBitfieldInsertOp(Node))
4883 return;
4884 if (trySelectXAR(Node))
4885 return;
4886 break;
4887
4889 if (trySelectCastScalableToFixedLengthVector(Node))
4890 return;
4891 break;
4892 }
4893
4894 case ISD::INSERT_SUBVECTOR: {
4895 if (trySelectCastFixedLengthToScalableVector(Node))
4896 return;
4897 break;
4898 }
4899
4900 case ISD::Constant: {
4901 // Materialize zero constants as copies from WZR/XZR. This allows
4902 // the coalescer to propagate these into other instructions.
4903 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4904 if (ConstNode->isZero()) {
4905 if (VT == MVT::i32) {
4906 SDValue New = CurDAG->getCopyFromReg(
4907 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4908 ReplaceNode(Node, New.getNode());
4909 return;
4910 } else if (VT == MVT::i64) {
4911 SDValue New = CurDAG->getCopyFromReg(
4912 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4913 ReplaceNode(Node, New.getNode());
4914 return;
4915 }
4916 }
4917 break;
4918 }
4919
4920 case ISD::FrameIndex: {
4921 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4922 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4923 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4924 const TargetLowering *TLI = getTargetLowering();
4925 SDValue TFI = CurDAG->getTargetFrameIndex(
4926 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4927 SDLoc DL(Node);
4928 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4929 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4930 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4931 return;
4932 }
4934 unsigned IntNo = Node->getConstantOperandVal(1);
4935 switch (IntNo) {
4936 default:
4937 break;
4938 case Intrinsic::aarch64_gcsss: {
4939 SDLoc DL(Node);
4940 SDValue Chain = Node->getOperand(0);
4941 SDValue Val = Node->getOperand(2);
4942 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
4943 SDNode *SS1 =
4944 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
4945 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
4946 MVT::Other, Zero, SDValue(SS1, 0));
4947 ReplaceNode(Node, SS2);
4948 return;
4949 }
4950 case Intrinsic::aarch64_ldaxp:
4951 case Intrinsic::aarch64_ldxp: {
4952 unsigned Op =
4953 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4954 SDValue MemAddr = Node->getOperand(2);
4955 SDLoc DL(Node);
4956 SDValue Chain = Node->getOperand(0);
4957
4958 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4959 MVT::Other, MemAddr, Chain);
4960
4961 // Transfer memoperands.
4962 MachineMemOperand *MemOp =
4963 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4964 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4965 ReplaceNode(Node, Ld);
4966 return;
4967 }
4968 case Intrinsic::aarch64_stlxp:
4969 case Intrinsic::aarch64_stxp: {
4970 unsigned Op =
4971 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4972 SDLoc DL(Node);
4973 SDValue Chain = Node->getOperand(0);
4974 SDValue ValLo = Node->getOperand(2);
4975 SDValue ValHi = Node->getOperand(3);
4976 SDValue MemAddr = Node->getOperand(4);
4977
4978 // Place arguments in the right order.
4979 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4980
4981 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4982 // Transfer memoperands.
4983 MachineMemOperand *MemOp =
4984 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4985 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4986
4987 ReplaceNode(Node, St);
4988 return;
4989 }
4990 case Intrinsic::aarch64_neon_ld1x2:
4991 if (VT == MVT::v8i8) {
4992 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4993 return;
4994 } else if (VT == MVT::v16i8) {
4995 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4996 return;
4997 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4998 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4999 return;
5000 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5001 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
5002 return;
5003 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5004 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
5005 return;
5006 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5007 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
5008 return;
5009 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5010 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5011 return;
5012 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5013 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
5014 return;
5015 }
5016 break;
5017 case Intrinsic::aarch64_neon_ld1x3:
5018 if (VT == MVT::v8i8) {
5019 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
5020 return;
5021 } else if (VT == MVT::v16i8) {
5022 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
5023 return;
5024 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5025 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
5026 return;
5027 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5028 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
5029 return;
5030 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5031 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
5032 return;
5033 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5034 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
5035 return;
5036 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5037 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5038 return;
5039 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5040 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
5041 return;
5042 }
5043 break;
5044 case Intrinsic::aarch64_neon_ld1x4:
5045 if (VT == MVT::v8i8) {
5046 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
5047 return;
5048 } else if (VT == MVT::v16i8) {
5049 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
5050 return;
5051 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5052 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
5053 return;
5054 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5055 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
5056 return;
5057 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5058 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
5059 return;
5060 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5061 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
5062 return;
5063 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5064 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5065 return;
5066 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5067 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
5068 return;
5069 }
5070 break;
5071 case Intrinsic::aarch64_neon_ld2:
5072 if (VT == MVT::v8i8) {
5073 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
5074 return;
5075 } else if (VT == MVT::v16i8) {
5076 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
5077 return;
5078 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5079 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
5080 return;
5081 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5082 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
5083 return;
5084 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5085 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
5086 return;
5087 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5088 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
5089 return;
5090 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5091 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5092 return;
5093 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5094 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
5095 return;
5096 }
5097 break;
5098 case Intrinsic::aarch64_neon_ld3:
5099 if (VT == MVT::v8i8) {
5100 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
5101 return;
5102 } else if (VT == MVT::v16i8) {
5103 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
5104 return;
5105 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5106 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
5107 return;
5108 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5109 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
5110 return;
5111 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5112 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
5113 return;
5114 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5115 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
5116 return;
5117 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5118 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5119 return;
5120 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5121 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
5122 return;
5123 }
5124 break;
5125 case Intrinsic::aarch64_neon_ld4:
5126 if (VT == MVT::v8i8) {
5127 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
5128 return;
5129 } else if (VT == MVT::v16i8) {
5130 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
5131 return;
5132 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5133 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
5134 return;
5135 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5136 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
5137 return;
5138 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5139 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
5140 return;
5141 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5142 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
5143 return;
5144 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5145 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5146 return;
5147 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5148 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
5149 return;
5150 }
5151 break;
5152 case Intrinsic::aarch64_neon_ld2r:
5153 if (VT == MVT::v8i8) {
5154 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
5155 return;
5156 } else if (VT == MVT::v16i8) {
5157 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
5158 return;
5159 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5160 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
5161 return;
5162 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5163 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
5164 return;
5165 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5166 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
5167 return;
5168 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5169 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
5170 return;
5171 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5172 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
5173 return;
5174 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5175 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
5176 return;
5177 }
5178 break;
5179 case Intrinsic::aarch64_neon_ld3r:
5180 if (VT == MVT::v8i8) {
5181 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
5182 return;
5183 } else if (VT == MVT::v16i8) {
5184 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
5185 return;
5186 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5187 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
5188 return;
5189 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5190 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
5191 return;
5192 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5193 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
5194 return;
5195 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5196 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
5197 return;
5198 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5199 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
5200 return;
5201 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5202 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
5203 return;
5204 }
5205 break;
5206 case Intrinsic::aarch64_neon_ld4r:
5207 if (VT == MVT::v8i8) {
5208 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
5209 return;
5210 } else if (VT == MVT::v16i8) {
5211 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
5212 return;
5213 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5214 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
5215 return;
5216 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5217 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
5218 return;
5219 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5220 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
5221 return;
5222 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5223 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
5224 return;
5225 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5226 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
5227 return;
5228 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5229 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
5230 return;
5231 }
5232 break;
5233 case Intrinsic::aarch64_neon_ld2lane:
5234 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5235 SelectLoadLane(Node, 2, AArch64::LD2i8);
5236 return;
5237 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5238 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5239 SelectLoadLane(Node, 2, AArch64::LD2i16);
5240 return;
5241 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5242 VT == MVT::v2f32) {
5243 SelectLoadLane(Node, 2, AArch64::LD2i32);
5244 return;
5245 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5246 VT == MVT::v1f64) {
5247 SelectLoadLane(Node, 2, AArch64::LD2i64);
5248 return;
5249 }
5250 break;
5251 case Intrinsic::aarch64_neon_ld3lane:
5252 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5253 SelectLoadLane(Node, 3, AArch64::LD3i8);
5254 return;
5255 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5256 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5257 SelectLoadLane(Node, 3, AArch64::LD3i16);
5258 return;
5259 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5260 VT == MVT::v2f32) {
5261 SelectLoadLane(Node, 3, AArch64::LD3i32);
5262 return;
5263 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5264 VT == MVT::v1f64) {
5265 SelectLoadLane(Node, 3, AArch64::LD3i64);
5266 return;
5267 }
5268 break;
5269 case Intrinsic::aarch64_neon_ld4lane:
5270 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5271 SelectLoadLane(Node, 4, AArch64::LD4i8);
5272 return;
5273 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5274 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5275 SelectLoadLane(Node, 4, AArch64::LD4i16);
5276 return;
5277 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5278 VT == MVT::v2f32) {
5279 SelectLoadLane(Node, 4, AArch64::LD4i32);
5280 return;
5281 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5282 VT == MVT::v1f64) {
5283 SelectLoadLane(Node, 4, AArch64::LD4i64);
5284 return;
5285 }
5286 break;
5287 case Intrinsic::aarch64_ld64b:
5288 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5289 return;
5290 case Intrinsic::aarch64_sve_ld2q_sret: {
5291 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5292 return;
5293 }
5294 case Intrinsic::aarch64_sve_ld3q_sret: {
5295 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5296 return;
5297 }
5298 case Intrinsic::aarch64_sve_ld4q_sret: {
5299 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5300 return;
5301 }
5302 case Intrinsic::aarch64_sve_ld2_sret: {
5303 if (VT == MVT::nxv16i8) {
5304 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5305 true);
5306 return;
5307 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5308 VT == MVT::nxv8bf16) {
5309 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5310 true);
5311 return;
5312 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5313 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5314 true);
5315 return;
5316 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5317 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5318 true);
5319 return;
5320 }
5321 break;
5322 }
5323 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5324 if (VT == MVT::nxv16i8) {
5325 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5326 SelectContiguousMultiVectorLoad(
5327 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5328 else if (Subtarget->hasSVE2p1())
5329 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5330 AArch64::LD1B_2Z);
5331 else
5332 break;
5333 return;
5334 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5335 VT == MVT::nxv8bf16) {
5336 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5337 SelectContiguousMultiVectorLoad(
5338 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5339 else if (Subtarget->hasSVE2p1())
5340 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5341 AArch64::LD1H_2Z);
5342 else
5343 break;
5344 return;
5345 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5346 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5347 SelectContiguousMultiVectorLoad(
5348 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5349 else if (Subtarget->hasSVE2p1())
5350 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5351 AArch64::LD1W_2Z);
5352 else
5353 break;
5354 return;
5355 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5356 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5357 SelectContiguousMultiVectorLoad(
5358 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5359 else if (Subtarget->hasSVE2p1())
5360 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5361 AArch64::LD1D_2Z);
5362 else
5363 break;
5364 return;
5365 }
5366 break;
5367 }
5368 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5369 if (VT == MVT::nxv16i8) {
5370 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5371 SelectContiguousMultiVectorLoad(
5372 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5373 else if (Subtarget->hasSVE2p1())
5374 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5375 AArch64::LD1B_4Z);
5376 else
5377 break;
5378 return;
5379 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5380 VT == MVT::nxv8bf16) {
5381 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5382 SelectContiguousMultiVectorLoad(
5383 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5384 else if (Subtarget->hasSVE2p1())
5385 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5386 AArch64::LD1H_4Z);
5387 else
5388 break;
5389 return;
5390 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5391 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5392 SelectContiguousMultiVectorLoad(
5393 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5394 else if (Subtarget->hasSVE2p1())
5395 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5396 AArch64::LD1W_4Z);
5397 else
5398 break;
5399 return;
5400 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5401 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5402 SelectContiguousMultiVectorLoad(
5403 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5404 else if (Subtarget->hasSVE2p1())
5405 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5406 AArch64::LD1D_4Z);
5407 else
5408 break;
5409 return;
5410 }
5411 break;
5412 }
5413 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5414 if (VT == MVT::nxv16i8) {
5415 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5416 SelectContiguousMultiVectorLoad(Node, 2, 0,
5417 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5418 AArch64::LDNT1B_2Z_PSEUDO);
5419 else if (Subtarget->hasSVE2p1())
5420 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5421 AArch64::LDNT1B_2Z);
5422 else
5423 break;
5424 return;
5425 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5426 VT == MVT::nxv8bf16) {
5427 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5428 SelectContiguousMultiVectorLoad(Node, 2, 1,
5429 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5430 AArch64::LDNT1H_2Z_PSEUDO);
5431 else if (Subtarget->hasSVE2p1())
5432 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5433 AArch64::LDNT1H_2Z);
5434 else
5435 break;
5436 return;
5437 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5438 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5439 SelectContiguousMultiVectorLoad(Node, 2, 2,
5440 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5441 AArch64::LDNT1W_2Z_PSEUDO);
5442 else if (Subtarget->hasSVE2p1())
5443 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5444 AArch64::LDNT1W_2Z);
5445 else
5446 break;
5447 return;
5448 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5449 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5450 SelectContiguousMultiVectorLoad(Node, 2, 3,
5451 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5452 AArch64::LDNT1D_2Z_PSEUDO);
5453 else if (Subtarget->hasSVE2p1())
5454 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5455 AArch64::LDNT1D_2Z);
5456 else
5457 break;
5458 return;
5459 }
5460 break;
5461 }
5462 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5463 if (VT == MVT::nxv16i8) {
5464 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5465 SelectContiguousMultiVectorLoad(Node, 4, 0,
5466 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5467 AArch64::LDNT1B_4Z_PSEUDO);
5468 else if (Subtarget->hasSVE2p1())
5469 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5470 AArch64::LDNT1B_4Z);
5471 else
5472 break;
5473 return;
5474 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5475 VT == MVT::nxv8bf16) {
5476 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5477 SelectContiguousMultiVectorLoad(Node, 4, 1,
5478 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5479 AArch64::LDNT1H_4Z_PSEUDO);
5480 else if (Subtarget->hasSVE2p1())
5481 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5482 AArch64::LDNT1H_4Z);
5483 else
5484 break;
5485 return;
5486 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5487 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5488 SelectContiguousMultiVectorLoad(Node, 4, 2,
5489 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5490 AArch64::LDNT1W_4Z_PSEUDO);
5491 else if (Subtarget->hasSVE2p1())
5492 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5493 AArch64::LDNT1W_4Z);
5494 else
5495 break;
5496 return;
5497 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5498 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5499 SelectContiguousMultiVectorLoad(Node, 4, 3,
5500 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5501 AArch64::LDNT1D_4Z_PSEUDO);
5502 else if (Subtarget->hasSVE2p1())
5503 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5504 AArch64::LDNT1D_4Z);
5505 else
5506 break;
5507 return;
5508 }
5509 break;
5510 }
5511 case Intrinsic::aarch64_sve_ld3_sret: {
5512 if (VT == MVT::nxv16i8) {
5513 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5514 true);
5515 return;
5516 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5517 VT == MVT::nxv8bf16) {
5518 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5519 true);
5520 return;
5521 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5522 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5523 true);
5524 return;
5525 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5526 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5527 true);
5528 return;
5529 }
5530 break;
5531 }
5532 case Intrinsic::aarch64_sve_ld4_sret: {
5533 if (VT == MVT::nxv16i8) {
5534 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5535 true);
5536 return;
5537 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5538 VT == MVT::nxv8bf16) {
5539 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5540 true);
5541 return;
5542 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5543 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5544 true);
5545 return;
5546 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5547 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5548 true);
5549 return;
5550 }
5551 break;
5552 }
5553 case Intrinsic::aarch64_sme_read_hor_vg2: {
5554 if (VT == MVT::nxv16i8) {
5555 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5556 AArch64::MOVA_2ZMXI_H_B);
5557 return;
5558 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5559 VT == MVT::nxv8bf16) {
5560 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5561 AArch64::MOVA_2ZMXI_H_H);
5562 return;
5563 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5564 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5565 AArch64::MOVA_2ZMXI_H_S);
5566 return;
5567 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5568 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5569 AArch64::MOVA_2ZMXI_H_D);
5570 return;
5571 }
5572 break;
5573 }
5574 case Intrinsic::aarch64_sme_read_ver_vg2: {
5575 if (VT == MVT::nxv16i8) {
5576 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5577 AArch64::MOVA_2ZMXI_V_B);
5578 return;
5579 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5580 VT == MVT::nxv8bf16) {
5581 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5582 AArch64::MOVA_2ZMXI_V_H);
5583 return;
5584 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5585 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5586 AArch64::MOVA_2ZMXI_V_S);
5587 return;
5588 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5589 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5590 AArch64::MOVA_2ZMXI_V_D);
5591 return;
5592 }
5593 break;
5594 }
5595 case Intrinsic::aarch64_sme_read_hor_vg4: {
5596 if (VT == MVT::nxv16i8) {
5597 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5598 AArch64::MOVA_4ZMXI_H_B);
5599 return;
5600 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5601 VT == MVT::nxv8bf16) {
5602 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5603 AArch64::MOVA_4ZMXI_H_H);
5604 return;
5605 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5606 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5607 AArch64::MOVA_4ZMXI_H_S);
5608 return;
5609 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5610 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5611 AArch64::MOVA_4ZMXI_H_D);
5612 return;
5613 }
5614 break;
5615 }
5616 case Intrinsic::aarch64_sme_read_ver_vg4: {
5617 if (VT == MVT::nxv16i8) {
5618 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5619 AArch64::MOVA_4ZMXI_V_B);
5620 return;
5621 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5622 VT == MVT::nxv8bf16) {
5623 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5624 AArch64::MOVA_4ZMXI_V_H);
5625 return;
5626 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5627 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5628 AArch64::MOVA_4ZMXI_V_S);
5629 return;
5630 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5631 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5632 AArch64::MOVA_4ZMXI_V_D);
5633 return;
5634 }
5635 break;
5636 }
5637 case Intrinsic::aarch64_sme_read_vg1x2: {
5638 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5639 AArch64::MOVA_VG2_2ZMXI);
5640 return;
5641 }
5642 case Intrinsic::aarch64_sme_read_vg1x4: {
5643 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5644 AArch64::MOVA_VG4_4ZMXI);
5645 return;
5646 }
5647 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5648 if (VT == MVT::nxv16i8) {
5649 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5650 return;
5651 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5652 VT == MVT::nxv8bf16) {
5653 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5654 return;
5655 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5656 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5657 return;
5658 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5659 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5660 return;
5661 }
5662 break;
5663 }
5664 case Intrinsic::aarch64_sme_readz_vert_x2: {
5665 if (VT == MVT::nxv16i8) {
5666 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5667 return;
5668 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5669 VT == MVT::nxv8bf16) {
5670 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5671 return;
5672 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5673 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5674 return;
5675 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5676 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5677 return;
5678 }
5679 break;
5680 }
5681 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5682 if (VT == MVT::nxv16i8) {
5683 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5684 return;
5685 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5686 VT == MVT::nxv8bf16) {
5687 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5688 return;
5689 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5690 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5691 return;
5692 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5693 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5694 return;
5695 }
5696 break;
5697 }
5698 case Intrinsic::aarch64_sme_readz_vert_x4: {
5699 if (VT == MVT::nxv16i8) {
5700 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5701 return;
5702 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5703 VT == MVT::nxv8bf16) {
5704 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5705 return;
5706 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5707 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5708 return;
5709 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5710 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5711 return;
5712 }
5713 break;
5714 }
5715 case Intrinsic::aarch64_sme_readz_x2: {
5716 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5717 AArch64::ZA);
5718 return;
5719 }
5720 case Intrinsic::aarch64_sme_readz_x4: {
5721 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5722 AArch64::ZA);
5723 return;
5724 }
5725 case Intrinsic::swift_async_context_addr: {
5726 SDLoc DL(Node);
5727 SDValue Chain = Node->getOperand(0);
5728 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5729 SDValue Res = SDValue(
5730 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5731 CurDAG->getTargetConstant(8, DL, MVT::i32),
5732 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5733 0);
5734 ReplaceUses(SDValue(Node, 0), Res);
5735 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5736 CurDAG->RemoveDeadNode(Node);
5737
5738 auto &MF = CurDAG->getMachineFunction();
5739 MF.getFrameInfo().setFrameAddressIsTaken(true);
5740 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5741 return;
5742 }
5743 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5745 Node->getValueType(0),
5746 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5747 AArch64::LUTI2_4ZTZI_S}))
5748 // Second Immediate must be <= 3:
5749 SelectMultiVectorLutiLane(Node, 4, Opc, 3);
5750 return;
5751 }
5752 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5754 Node->getValueType(0),
5755 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5756 // Second Immediate must be <= 1:
5757 SelectMultiVectorLutiLane(Node, 4, Opc, 1);
5758 return;
5759 }
5760 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5762 Node->getValueType(0),
5763 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5764 AArch64::LUTI2_2ZTZI_S}))
5765 // Second Immediate must be <= 7:
5766 SelectMultiVectorLutiLane(Node, 2, Opc, 7);
5767 return;
5768 }
5769 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5771 Node->getValueType(0),
5772 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5773 AArch64::LUTI4_2ZTZI_S}))
5774 // Second Immediate must be <= 3:
5775 SelectMultiVectorLutiLane(Node, 2, Opc, 3);
5776 return;
5777 }
5778 case Intrinsic::aarch64_sme_luti4_zt_x4: {
5779 SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
5780 return;
5781 }
5782 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5784 Node->getValueType(0),
5785 {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5786 SelectCVTIntrinsicFP8(Node, 2, Opc);
5787 return;
5788 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5790 Node->getValueType(0),
5791 {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
5792 SelectCVTIntrinsicFP8(Node, 2, Opc);
5793 return;
5794 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
5796 Node->getValueType(0),
5797 {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
5798 SelectCVTIntrinsicFP8(Node, 2, Opc);
5799 return;
5800 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
5802 Node->getValueType(0),
5803 {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
5804 SelectCVTIntrinsicFP8(Node, 2, Opc);
5805 return;
5806 }
5807 } break;
5809 unsigned IntNo = Node->getConstantOperandVal(0);
5810 switch (IntNo) {
5811 default:
5812 break;
5813 case Intrinsic::aarch64_tagp:
5814 SelectTagP(Node);
5815 return;
5816
5817 case Intrinsic::ptrauth_auth:
5818 SelectPtrauthAuth(Node);
5819 return;
5820
5821 case Intrinsic::ptrauth_resign:
5822 SelectPtrauthResign(Node);
5823 return;
5824
5825 case Intrinsic::aarch64_neon_tbl2:
5826 SelectTable(Node, 2,
5827 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5828 false);
5829 return;
5830 case Intrinsic::aarch64_neon_tbl3:
5831 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5832 : AArch64::TBLv16i8Three,
5833 false);
5834 return;
5835 case Intrinsic::aarch64_neon_tbl4:
5836 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5837 : AArch64::TBLv16i8Four,
5838 false);
5839 return;
5840 case Intrinsic::aarch64_neon_tbx2:
5841 SelectTable(Node, 2,
5842 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5843 true);
5844 return;
5845 case Intrinsic::aarch64_neon_tbx3:
5846 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5847 : AArch64::TBXv16i8Three,
5848 true);
5849 return;
5850 case Intrinsic::aarch64_neon_tbx4:
5851 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5852 : AArch64::TBXv16i8Four,
5853 true);
5854 return;
5855 case Intrinsic::aarch64_sve_srshl_single_x2:
5857 Node->getValueType(0),
5858 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5859 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5860 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5861 return;
5862 case Intrinsic::aarch64_sve_srshl_single_x4:
5864 Node->getValueType(0),
5865 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5866 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5867 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5868 return;
5869 case Intrinsic::aarch64_sve_urshl_single_x2:
5871 Node->getValueType(0),
5872 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5873 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5874 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5875 return;
5876 case Intrinsic::aarch64_sve_urshl_single_x4:
5878 Node->getValueType(0),
5879 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5880 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5881 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5882 return;
5883 case Intrinsic::aarch64_sve_srshl_x2:
5885 Node->getValueType(0),
5886 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5887 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5888 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5889 return;
5890 case Intrinsic::aarch64_sve_srshl_x4:
5892 Node->getValueType(0),
5893 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5894 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5895 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5896 return;
5897 case Intrinsic::aarch64_sve_urshl_x2:
5899 Node->getValueType(0),
5900 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5901 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5902 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5903 return;
5904 case Intrinsic::aarch64_sve_urshl_x4:
5906 Node->getValueType(0),
5907 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5908 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5909 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5910 return;
5911 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5913 Node->getValueType(0),
5914 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5915 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5916 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5917 return;
5918 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5920 Node->getValueType(0),
5921 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5922 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5923 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5924 return;
5925 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5927 Node->getValueType(0),
5928 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5929 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5930 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5931 return;
5932 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5934 Node->getValueType(0),
5935 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5936 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5937 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5938 return;
5939 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
5941 Node->getValueType(0),
5942 {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
5943 AArch64::FSCALE_2ZZ_D}))
5944 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5945 return;
5946 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
5948 Node->getValueType(0),
5949 {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
5950 AArch64::FSCALE_4ZZ_D}))
5951 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5952 return;
5953 case Intrinsic::aarch64_sme_fp8_scale_x2:
5955 Node->getValueType(0),
5956 {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
5957 AArch64::FSCALE_2Z2Z_D}))
5958 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5959 return;
5960 case Intrinsic::aarch64_sme_fp8_scale_x4:
5962 Node->getValueType(0),
5963 {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
5964 AArch64::FSCALE_4Z4Z_D}))
5965 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5966 return;
5967 case Intrinsic::aarch64_sve_whilege_x2:
5969 Node->getValueType(0),
5970 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5971 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5972 SelectWhilePair(Node, Op);
5973 return;
5974 case Intrinsic::aarch64_sve_whilegt_x2:
5976 Node->getValueType(0),
5977 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5978 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5979 SelectWhilePair(Node, Op);
5980 return;
5981 case Intrinsic::aarch64_sve_whilehi_x2:
5983 Node->getValueType(0),
5984 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5985 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5986 SelectWhilePair(Node, Op);
5987 return;
5988 case Intrinsic::aarch64_sve_whilehs_x2:
5990 Node->getValueType(0),
5991 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5992 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5993 SelectWhilePair(Node, Op);
5994 return;
5995 case Intrinsic::aarch64_sve_whilele_x2:
5997 Node->getValueType(0),
5998 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5999 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
6000 SelectWhilePair(Node, Op);
6001 return;
6002 case Intrinsic::aarch64_sve_whilelo_x2:
6004 Node->getValueType(0),
6005 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
6006 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
6007 SelectWhilePair(Node, Op);
6008 return;
6009 case Intrinsic::aarch64_sve_whilels_x2:
6011 Node->getValueType(0),
6012 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
6013 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
6014 SelectWhilePair(Node, Op);
6015 return;
6016 case Intrinsic::aarch64_sve_whilelt_x2:
6018 Node->getValueType(0),
6019 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
6020 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
6021 SelectWhilePair(Node, Op);
6022 return;
6023 case Intrinsic::aarch64_sve_smax_single_x2:
6025 Node->getValueType(0),
6026 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
6027 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
6028 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6029 return;
6030 case Intrinsic::aarch64_sve_umax_single_x2:
6032 Node->getValueType(0),
6033 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
6034 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
6035 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6036 return;
6037 case Intrinsic::aarch64_sve_fmax_single_x2:
6039 Node->getValueType(0),
6040 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
6041 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
6042 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6043 return;
6044 case Intrinsic::aarch64_sve_smax_single_x4:
6046 Node->getValueType(0),
6047 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
6048 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
6049 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6050 return;
6051 case Intrinsic::aarch64_sve_umax_single_x4:
6053 Node->getValueType(0),
6054 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
6055 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
6056 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6057 return;
6058 case Intrinsic::aarch64_sve_fmax_single_x4:
6060 Node->getValueType(0),
6061 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
6062 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
6063 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6064 return;
6065 case Intrinsic::aarch64_sve_smin_single_x2:
6067 Node->getValueType(0),
6068 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
6069 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
6070 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6071 return;
6072 case Intrinsic::aarch64_sve_umin_single_x2:
6074 Node->getValueType(0),
6075 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
6076 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
6077 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6078 return;
6079 case Intrinsic::aarch64_sve_fmin_single_x2:
6081 Node->getValueType(0),
6082 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
6083 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
6084 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6085 return;
6086 case Intrinsic::aarch64_sve_smin_single_x4:
6088 Node->getValueType(0),
6089 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
6090 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
6091 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6092 return;
6093 case Intrinsic::aarch64_sve_umin_single_x4:
6095 Node->getValueType(0),
6096 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
6097 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
6098 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6099 return;
6100 case Intrinsic::aarch64_sve_fmin_single_x4:
6102 Node->getValueType(0),
6103 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
6104 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
6105 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6106 return;
6107 case Intrinsic::aarch64_sve_smax_x2:
6109 Node->getValueType(0),
6110 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
6111 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
6112 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6113 return;
6114 case Intrinsic::aarch64_sve_umax_x2:
6116 Node->getValueType(0),
6117 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
6118 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
6119 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6120 return;
6121 case Intrinsic::aarch64_sve_fmax_x2:
6123 Node->getValueType(0),
6124 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
6125 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
6126 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6127 return;
6128 case Intrinsic::aarch64_sve_smax_x4:
6130 Node->getValueType(0),
6131 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
6132 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
6133 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6134 return;
6135 case Intrinsic::aarch64_sve_umax_x4:
6137 Node->getValueType(0),
6138 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
6139 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
6140 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6141 return;
6142 case Intrinsic::aarch64_sve_fmax_x4:
6144 Node->getValueType(0),
6145 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
6146 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
6147 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6148 return;
6149 case Intrinsic::aarch64_sme_famax_x2:
6151 Node->getValueType(0),
6152 {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
6153 AArch64::FAMAX_2Z2Z_D}))
6154 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6155 return;
6156 case Intrinsic::aarch64_sme_famax_x4:
6158 Node->getValueType(0),
6159 {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
6160 AArch64::FAMAX_4Z4Z_D}))
6161 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6162 return;
6163 case Intrinsic::aarch64_sme_famin_x2:
6165 Node->getValueType(0),
6166 {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
6167 AArch64::FAMIN_2Z2Z_D}))
6168 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6169 return;
6170 case Intrinsic::aarch64_sme_famin_x4:
6172 Node->getValueType(0),
6173 {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
6174 AArch64::FAMIN_4Z4Z_D}))
6175 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6176 return;
6177 case Intrinsic::aarch64_sve_smin_x2:
6179 Node->getValueType(0),
6180 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
6181 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
6182 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6183 return;
6184 case Intrinsic::aarch64_sve_umin_x2:
6186 Node->getValueType(0),
6187 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
6188 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
6189 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6190 return;
6191 case Intrinsic::aarch64_sve_fmin_x2:
6193 Node->getValueType(0),
6194 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
6195 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
6196 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6197 return;
6198 case Intrinsic::aarch64_sve_smin_x4:
6200 Node->getValueType(0),
6201 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
6202 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
6203 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6204 return;
6205 case Intrinsic::aarch64_sve_umin_x4:
6207 Node->getValueType(0),
6208 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6209 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6210 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6211 return;
6212 case Intrinsic::aarch64_sve_fmin_x4:
6214 Node->getValueType(0),
6215 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6216 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6217 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6218 return;
6219 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6221 Node->getValueType(0),
6222 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6223 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6224 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6225 return;
6226 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6228 Node->getValueType(0),
6229 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6230 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6231 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6232 return;
6233 case Intrinsic::aarch64_sve_fminnm_single_x2:
6235 Node->getValueType(0),
6236 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6237 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6238 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6239 return;
6240 case Intrinsic::aarch64_sve_fminnm_single_x4:
6242 Node->getValueType(0),
6243 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6244 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6245 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6246 return;
6247 case Intrinsic::aarch64_sve_fscale_single_x4:
6248 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::BFSCALE_4ZZ);
6249 return;
6250 case Intrinsic::aarch64_sve_fscale_single_x2:
6251 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::BFSCALE_2ZZ);
6252 return;
6253 case Intrinsic::aarch64_sve_fmul_single_x4:
6255 Node->getValueType(0),
6256 {AArch64::BFMUL_4ZZ, AArch64::FMUL_4ZZ_H, AArch64::FMUL_4ZZ_S,
6257 AArch64::FMUL_4ZZ_D}))
6258 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6259 return;
6260 case Intrinsic::aarch64_sve_fmul_single_x2:
6262 Node->getValueType(0),
6263 {AArch64::BFMUL_2ZZ, AArch64::FMUL_2ZZ_H, AArch64::FMUL_2ZZ_S,
6264 AArch64::FMUL_2ZZ_D}))
6265 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6266 return;
6267 case Intrinsic::aarch64_sve_fmaxnm_x2:
6269 Node->getValueType(0),
6270 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6271 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6272 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6273 return;
6274 case Intrinsic::aarch64_sve_fmaxnm_x4:
6276 Node->getValueType(0),
6277 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6278 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6279 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6280 return;
6281 case Intrinsic::aarch64_sve_fminnm_x2:
6283 Node->getValueType(0),
6284 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6285 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6286 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6287 return;
6288 case Intrinsic::aarch64_sve_fminnm_x4:
6290 Node->getValueType(0),
6291 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6292 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6293 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6294 return;
6295 case Intrinsic::aarch64_sve_fscale_x4:
6296 SelectDestructiveMultiIntrinsic(Node, 4, true, AArch64::BFSCALE_4Z4Z);
6297 return;
6298 case Intrinsic::aarch64_sve_fscale_x2:
6299 SelectDestructiveMultiIntrinsic(Node, 2, true, AArch64::BFSCALE_2Z2Z);
6300 return;
6301 case Intrinsic::aarch64_sve_fmul_x4:
6303 Node->getValueType(0),
6304 {AArch64::BFMUL_4Z4Z, AArch64::FMUL_4Z4Z_H, AArch64::FMUL_4Z4Z_S,
6305 AArch64::FMUL_4Z4Z_D}))
6306 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6307 return;
6308 case Intrinsic::aarch64_sve_fmul_x2:
6310 Node->getValueType(0),
6311 {AArch64::BFMUL_2Z2Z, AArch64::FMUL_2Z2Z_H, AArch64::FMUL_2Z2Z_S,
6312 AArch64::FMUL_2Z2Z_D}))
6313 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6314 return;
6315 case Intrinsic::aarch64_sve_fcvtzs_x2:
6316 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
6317 return;
6318 case Intrinsic::aarch64_sve_scvtf_x2:
6319 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
6320 return;
6321 case Intrinsic::aarch64_sve_fcvtzu_x2:
6322 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
6323 return;
6324 case Intrinsic::aarch64_sve_ucvtf_x2:
6325 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
6326 return;
6327 case Intrinsic::aarch64_sve_fcvtzs_x4:
6328 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
6329 return;
6330 case Intrinsic::aarch64_sve_scvtf_x4:
6331 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
6332 return;
6333 case Intrinsic::aarch64_sve_fcvtzu_x4:
6334 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
6335 return;
6336 case Intrinsic::aarch64_sve_ucvtf_x4:
6337 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
6338 return;
6339 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6340 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
6341 return;
6342 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6343 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
6344 return;
6345 case Intrinsic::aarch64_sve_sclamp_single_x2:
6347 Node->getValueType(0),
6348 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6349 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6350 SelectClamp(Node, 2, Op);
6351 return;
6352 case Intrinsic::aarch64_sve_uclamp_single_x2:
6354 Node->getValueType(0),
6355 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6356 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6357 SelectClamp(Node, 2, Op);
6358 return;
6359 case Intrinsic::aarch64_sve_fclamp_single_x2:
6361 Node->getValueType(0),
6362 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6363 AArch64::FCLAMP_VG2_2Z2Z_D}))
6364 SelectClamp(Node, 2, Op);
6365 return;
6366 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6367 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
6368 return;
6369 case Intrinsic::aarch64_sve_sclamp_single_x4:
6371 Node->getValueType(0),
6372 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6373 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6374 SelectClamp(Node, 4, Op);
6375 return;
6376 case Intrinsic::aarch64_sve_uclamp_single_x4:
6378 Node->getValueType(0),
6379 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6380 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6381 SelectClamp(Node, 4, Op);
6382 return;
6383 case Intrinsic::aarch64_sve_fclamp_single_x4:
6385 Node->getValueType(0),
6386 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6387 AArch64::FCLAMP_VG4_4Z4Z_D}))
6388 SelectClamp(Node, 4, Op);
6389 return;
6390 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6391 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6392 return;
6393 case Intrinsic::aarch64_sve_add_single_x2:
6395 Node->getValueType(0),
6396 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6397 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6398 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6399 return;
6400 case Intrinsic::aarch64_sve_add_single_x4:
6402 Node->getValueType(0),
6403 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6404 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6405 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6406 return;
6407 case Intrinsic::aarch64_sve_zip_x2:
6409 Node->getValueType(0),
6410 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6411 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6412 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6413 return;
6414 case Intrinsic::aarch64_sve_zipq_x2:
6415 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6416 AArch64::ZIP_VG2_2ZZZ_Q);
6417 return;
6418 case Intrinsic::aarch64_sve_zip_x4:
6420 Node->getValueType(0),
6421 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6422 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6423 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6424 return;
6425 case Intrinsic::aarch64_sve_zipq_x4:
6426 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6427 AArch64::ZIP_VG4_4Z4Z_Q);
6428 return;
6429 case Intrinsic::aarch64_sve_uzp_x2:
6431 Node->getValueType(0),
6432 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6433 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6434 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6435 return;
6436 case Intrinsic::aarch64_sve_uzpq_x2:
6437 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6438 AArch64::UZP_VG2_2ZZZ_Q);
6439 return;
6440 case Intrinsic::aarch64_sve_uzp_x4:
6442 Node->getValueType(0),
6443 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6444 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6445 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6446 return;
6447 case Intrinsic::aarch64_sve_uzpq_x4:
6448 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6449 AArch64::UZP_VG4_4Z4Z_Q);
6450 return;
6451 case Intrinsic::aarch64_sve_sel_x2:
6453 Node->getValueType(0),
6454 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6455 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6456 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6457 return;
6458 case Intrinsic::aarch64_sve_sel_x4:
6460 Node->getValueType(0),
6461 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6462 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6463 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6464 return;
6465 case Intrinsic::aarch64_sve_frinta_x2:
6466 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6467 return;
6468 case Intrinsic::aarch64_sve_frinta_x4:
6469 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6470 return;
6471 case Intrinsic::aarch64_sve_frintm_x2:
6472 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6473 return;
6474 case Intrinsic::aarch64_sve_frintm_x4:
6475 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6476 return;
6477 case Intrinsic::aarch64_sve_frintn_x2:
6478 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6479 return;
6480 case Intrinsic::aarch64_sve_frintn_x4:
6481 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6482 return;
6483 case Intrinsic::aarch64_sve_frintp_x2:
6484 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6485 return;
6486 case Intrinsic::aarch64_sve_frintp_x4:
6487 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6488 return;
6489 case Intrinsic::aarch64_sve_sunpk_x2:
6491 Node->getValueType(0),
6492 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6493 AArch64::SUNPK_VG2_2ZZ_D}))
6494 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6495 return;
6496 case Intrinsic::aarch64_sve_uunpk_x2:
6498 Node->getValueType(0),
6499 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6500 AArch64::UUNPK_VG2_2ZZ_D}))
6501 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6502 return;
6503 case Intrinsic::aarch64_sve_sunpk_x4:
6505 Node->getValueType(0),
6506 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6507 AArch64::SUNPK_VG4_4Z2Z_D}))
6508 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6509 return;
6510 case Intrinsic::aarch64_sve_uunpk_x4:
6512 Node->getValueType(0),
6513 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6514 AArch64::UUNPK_VG4_4Z2Z_D}))
6515 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6516 return;
6517 case Intrinsic::aarch64_sve_pext_x2: {
6519 Node->getValueType(0),
6520 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6521 AArch64::PEXT_2PCI_D}))
6522 SelectPExtPair(Node, Op);
6523 return;
6524 }
6525 }
6526 break;
6527 }
6528 case ISD::INTRINSIC_VOID: {
6529 unsigned IntNo = Node->getConstantOperandVal(1);
6530 if (Node->getNumOperands() >= 3)
6531 VT = Node->getOperand(2)->getValueType(0);
6532 switch (IntNo) {
6533 default:
6534 break;
6535 case Intrinsic::aarch64_neon_st1x2: {
6536 if (VT == MVT::v8i8) {
6537 SelectStore(Node, 2, AArch64::ST1Twov8b);
6538 return;
6539 } else if (VT == MVT::v16i8) {
6540 SelectStore(Node, 2, AArch64::ST1Twov16b);
6541 return;
6542 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6543 VT == MVT::v4bf16) {
6544 SelectStore(Node, 2, AArch64::ST1Twov4h);
6545 return;
6546 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6547 VT == MVT::v8bf16) {
6548 SelectStore(Node, 2, AArch64::ST1Twov8h);
6549 return;
6550 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6551 SelectStore(Node, 2, AArch64::ST1Twov2s);
6552 return;
6553 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6554 SelectStore(Node, 2, AArch64::ST1Twov4s);
6555 return;
6556 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6557 SelectStore(Node, 2, AArch64::ST1Twov2d);
6558 return;
6559 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6560 SelectStore(Node, 2, AArch64::ST1Twov1d);
6561 return;
6562 }
6563 break;
6564 }
6565 case Intrinsic::aarch64_neon_st1x3: {
6566 if (VT == MVT::v8i8) {
6567 SelectStore(Node, 3, AArch64::ST1Threev8b);
6568 return;
6569 } else if (VT == MVT::v16i8) {
6570 SelectStore(Node, 3, AArch64::ST1Threev16b);
6571 return;
6572 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6573 VT == MVT::v4bf16) {
6574 SelectStore(Node, 3, AArch64::ST1Threev4h);
6575 return;
6576 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6577 VT == MVT::v8bf16) {
6578 SelectStore(Node, 3, AArch64::ST1Threev8h);
6579 return;
6580 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6581 SelectStore(Node, 3, AArch64::ST1Threev2s);
6582 return;
6583 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6584 SelectStore(Node, 3, AArch64::ST1Threev4s);
6585 return;
6586 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6587 SelectStore(Node, 3, AArch64::ST1Threev2d);
6588 return;
6589 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6590 SelectStore(Node, 3, AArch64::ST1Threev1d);
6591 return;
6592 }
6593 break;
6594 }
6595 case Intrinsic::aarch64_neon_st1x4: {
6596 if (VT == MVT::v8i8) {
6597 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6598 return;
6599 } else if (VT == MVT::v16i8) {
6600 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6601 return;
6602 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6603 VT == MVT::v4bf16) {
6604 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6605 return;
6606 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6607 VT == MVT::v8bf16) {
6608 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6609 return;
6610 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6611 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6612 return;
6613 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6614 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6615 return;
6616 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6617 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6618 return;
6619 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6620 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6621 return;
6622 }
6623 break;
6624 }
6625 case Intrinsic::aarch64_neon_st2: {
6626 if (VT == MVT::v8i8) {
6627 SelectStore(Node, 2, AArch64::ST2Twov8b);
6628 return;
6629 } else if (VT == MVT::v16i8) {
6630 SelectStore(Node, 2, AArch64::ST2Twov16b);
6631 return;
6632 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6633 VT == MVT::v4bf16) {
6634 SelectStore(Node, 2, AArch64::ST2Twov4h);
6635 return;
6636 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6637 VT == MVT::v8bf16) {
6638 SelectStore(Node, 2, AArch64::ST2Twov8h);
6639 return;
6640 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6641 SelectStore(Node, 2, AArch64::ST2Twov2s);
6642 return;
6643 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6644 SelectStore(Node, 2, AArch64::ST2Twov4s);
6645 return;
6646 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6647 SelectStore(Node, 2, AArch64::ST2Twov2d);
6648 return;
6649 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6650 SelectStore(Node, 2, AArch64::ST1Twov1d);
6651 return;
6652 }
6653 break;
6654 }
6655 case Intrinsic::aarch64_neon_st3: {
6656 if (VT == MVT::v8i8) {
6657 SelectStore(Node, 3, AArch64::ST3Threev8b);
6658 return;
6659 } else if (VT == MVT::v16i8) {
6660 SelectStore(Node, 3, AArch64::ST3Threev16b);
6661 return;
6662 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6663 VT == MVT::v4bf16) {
6664 SelectStore(Node, 3, AArch64::ST3Threev4h);
6665 return;
6666 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6667 VT == MVT::v8bf16) {
6668 SelectStore(Node, 3, AArch64::ST3Threev8h);
6669 return;
6670 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6671 SelectStore(Node, 3, AArch64::ST3Threev2s);
6672 return;
6673 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6674 SelectStore(Node, 3, AArch64::ST3Threev4s);
6675 return;
6676 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6677 SelectStore(Node, 3, AArch64::ST3Threev2d);
6678 return;
6679 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6680 SelectStore(Node, 3, AArch64::ST1Threev1d);
6681 return;
6682 }
6683 break;
6684 }
6685 case Intrinsic::aarch64_neon_st4: {
6686 if (VT == MVT::v8i8) {
6687 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6688 return;
6689 } else if (VT == MVT::v16i8) {
6690 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6691 return;
6692 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6693 VT == MVT::v4bf16) {
6694 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6695 return;
6696 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6697 VT == MVT::v8bf16) {
6698 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6699 return;
6700 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6701 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6702 return;
6703 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6704 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6705 return;
6706 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6707 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6708 return;
6709 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6710 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6711 return;
6712 }
6713 break;
6714 }
6715 case Intrinsic::aarch64_neon_st2lane: {
6716 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6717 SelectStoreLane(Node, 2, AArch64::ST2i8);
6718 return;
6719 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6720 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6721 SelectStoreLane(Node, 2, AArch64::ST2i16);
6722 return;
6723 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6724 VT == MVT::v2f32) {
6725 SelectStoreLane(Node, 2, AArch64::ST2i32);
6726 return;
6727 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6728 VT == MVT::v1f64) {
6729 SelectStoreLane(Node, 2, AArch64::ST2i64);
6730 return;
6731 }
6732 break;
6733 }
6734 case Intrinsic::aarch64_neon_st3lane: {
6735 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6736 SelectStoreLane(Node, 3, AArch64::ST3i8);
6737 return;
6738 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6739 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6740 SelectStoreLane(Node, 3, AArch64::ST3i16);
6741 return;
6742 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6743 VT == MVT::v2f32) {
6744 SelectStoreLane(Node, 3, AArch64::ST3i32);
6745 return;
6746 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6747 VT == MVT::v1f64) {
6748 SelectStoreLane(Node, 3, AArch64::ST3i64);
6749 return;
6750 }
6751 break;
6752 }
6753 case Intrinsic::aarch64_neon_st4lane: {
6754 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6755 SelectStoreLane(Node, 4, AArch64::ST4i8);
6756 return;
6757 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6758 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6759 SelectStoreLane(Node, 4, AArch64::ST4i16);
6760 return;
6761 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6762 VT == MVT::v2f32) {
6763 SelectStoreLane(Node, 4, AArch64::ST4i32);
6764 return;
6765 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6766 VT == MVT::v1f64) {
6767 SelectStoreLane(Node, 4, AArch64::ST4i64);
6768 return;
6769 }
6770 break;
6771 }
6772 case Intrinsic::aarch64_sve_st2q: {
6773 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6774 return;
6775 }
6776 case Intrinsic::aarch64_sve_st3q: {
6777 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6778 return;
6779 }
6780 case Intrinsic::aarch64_sve_st4q: {
6781 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6782 return;
6783 }
6784 case Intrinsic::aarch64_sve_st2: {
6785 if (VT == MVT::nxv16i8) {
6786 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6787 return;
6788 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6789 VT == MVT::nxv8bf16) {
6790 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6791 return;
6792 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6793 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6794 return;
6795 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6796 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6797 return;
6798 }
6799 break;
6800 }
6801 case Intrinsic::aarch64_sve_st3: {
6802 if (VT == MVT::nxv16i8) {
6803 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6804 return;
6805 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6806 VT == MVT::nxv8bf16) {
6807 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6808 return;
6809 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6810 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6811 return;
6812 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6813 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6814 return;
6815 }
6816 break;
6817 }
6818 case Intrinsic::aarch64_sve_st4: {
6819 if (VT == MVT::nxv16i8) {
6820 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6821 return;
6822 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6823 VT == MVT::nxv8bf16) {
6824 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6825 return;
6826 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6827 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6828 return;
6829 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6830 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6831 return;
6832 }
6833 break;
6834 }
6835 }
6836 break;
6837 }
6838 case AArch64ISD::LD2post: {
6839 if (VT == MVT::v8i8) {
6840 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6841 return;
6842 } else if (VT == MVT::v16i8) {
6843 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6844 return;
6845 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6846 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6847 return;
6848 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6849 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6850 return;
6851 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6852 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6853 return;
6854 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6855 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6856 return;
6857 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6858 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6859 return;
6860 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6861 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6862 return;
6863 }
6864 break;
6865 }
6866 case AArch64ISD::LD3post: {
6867 if (VT == MVT::v8i8) {
6868 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6869 return;
6870 } else if (VT == MVT::v16i8) {
6871 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6872 return;
6873 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6874 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6875 return;
6876 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6877 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6878 return;
6879 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6880 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6881 return;
6882 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6883 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6884 return;
6885 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6886 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6887 return;
6888 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6889 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6890 return;
6891 }
6892 break;
6893 }
6894 case AArch64ISD::LD4post: {
6895 if (VT == MVT::v8i8) {
6896 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6897 return;
6898 } else if (VT == MVT::v16i8) {
6899 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6900 return;
6901 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6902 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6903 return;
6904 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6905 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6906 return;
6907 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6908 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6909 return;
6910 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6911 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6912 return;
6913 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6914 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6915 return;
6916 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6917 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6918 return;
6919 }
6920 break;
6921 }
6922 case AArch64ISD::LD1x2post: {
6923 if (VT == MVT::v8i8) {
6924 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6925 return;
6926 } else if (VT == MVT::v16i8) {
6927 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6928 return;
6929 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6930 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6931 return;
6932 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6933 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6934 return;
6935 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6936 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6937 return;
6938 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6939 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6940 return;
6941 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6942 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6943 return;
6944 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6945 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
6946 return;
6947 }
6948 break;
6949 }
6950 case AArch64ISD::LD1x3post: {
6951 if (VT == MVT::v8i8) {
6952 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
6953 return;
6954 } else if (VT == MVT::v16i8) {
6955 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
6956 return;
6957 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6958 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
6959 return;
6960 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6961 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
6962 return;
6963 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6964 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
6965 return;
6966 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6967 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
6968 return;
6969 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6970 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6971 return;
6972 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6973 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
6974 return;
6975 }
6976 break;
6977 }
6978 case AArch64ISD::LD1x4post: {
6979 if (VT == MVT::v8i8) {
6980 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
6981 return;
6982 } else if (VT == MVT::v16i8) {
6983 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
6984 return;
6985 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6986 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
6987 return;
6988 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6989 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
6990 return;
6991 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6992 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
6993 return;
6994 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6995 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
6996 return;
6997 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6998 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6999 return;
7000 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7001 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
7002 return;
7003 }
7004 break;
7005 }
7006 case AArch64ISD::LD1DUPpost: {
7007 if (VT == MVT::v8i8) {
7008 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
7009 return;
7010 } else if (VT == MVT::v16i8) {
7011 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
7012 return;
7013 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7014 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
7015 return;
7016 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7017 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
7018 return;
7019 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7020 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
7021 return;
7022 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7023 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
7024 return;
7025 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7026 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
7027 return;
7028 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7029 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
7030 return;
7031 }
7032 break;
7033 }
7034 case AArch64ISD::LD2DUPpost: {
7035 if (VT == MVT::v8i8) {
7036 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
7037 return;
7038 } else if (VT == MVT::v16i8) {
7039 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
7040 return;
7041 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7042 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
7043 return;
7044 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7045 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
7046 return;
7047 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7048 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
7049 return;
7050 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7051 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
7052 return;
7053 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7054 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
7055 return;
7056 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7057 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
7058 return;
7059 }
7060 break;
7061 }
7062 case AArch64ISD::LD3DUPpost: {
7063 if (VT == MVT::v8i8) {
7064 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
7065 return;
7066 } else if (VT == MVT::v16i8) {
7067 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
7068 return;
7069 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7070 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
7071 return;
7072 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7073 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
7074 return;
7075 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7076 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
7077 return;
7078 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7079 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
7080 return;
7081 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7082 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
7083 return;
7084 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7085 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
7086 return;
7087 }
7088 break;
7089 }
7090 case AArch64ISD::LD4DUPpost: {
7091 if (VT == MVT::v8i8) {
7092 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
7093 return;
7094 } else if (VT == MVT::v16i8) {
7095 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
7096 return;
7097 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7098 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
7099 return;
7100 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7101 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
7102 return;
7103 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7104 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
7105 return;
7106 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7107 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
7108 return;
7109 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7110 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
7111 return;
7112 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7113 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
7114 return;
7115 }
7116 break;
7117 }
7118 case AArch64ISD::LD1LANEpost: {
7119 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7120 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
7121 return;
7122 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7123 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7124 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
7125 return;
7126 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7127 VT == MVT::v2f32) {
7128 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
7129 return;
7130 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7131 VT == MVT::v1f64) {
7132 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
7133 return;
7134 }
7135 break;
7136 }
7137 case AArch64ISD::LD2LANEpost: {
7138 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7139 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
7140 return;
7141 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7142 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7143 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
7144 return;
7145 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7146 VT == MVT::v2f32) {
7147 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
7148 return;
7149 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7150 VT == MVT::v1f64) {
7151 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
7152 return;
7153 }
7154 break;
7155 }
7156 case AArch64ISD::LD3LANEpost: {
7157 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7158 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
7159 return;
7160 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7161 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7162 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
7163 return;
7164 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7165 VT == MVT::v2f32) {
7166 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
7167 return;
7168 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7169 VT == MVT::v1f64) {
7170 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
7171 return;
7172 }
7173 break;
7174 }
7175 case AArch64ISD::LD4LANEpost: {
7176 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7177 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
7178 return;
7179 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7180 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7181 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
7182 return;
7183 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7184 VT == MVT::v2f32) {
7185 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
7186 return;
7187 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7188 VT == MVT::v1f64) {
7189 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
7190 return;
7191 }
7192 break;
7193 }
7194 case AArch64ISD::ST2post: {
7195 VT = Node->getOperand(1).getValueType();
7196 if (VT == MVT::v8i8) {
7197 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
7198 return;
7199 } else if (VT == MVT::v16i8) {
7200 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
7201 return;
7202 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7203 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
7204 return;
7205 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7206 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
7207 return;
7208 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7209 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
7210 return;
7211 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7212 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
7213 return;
7214 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7215 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
7216 return;
7217 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7218 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7219 return;
7220 }
7221 break;
7222 }
7223 case AArch64ISD::ST3post: {
7224 VT = Node->getOperand(1).getValueType();
7225 if (VT == MVT::v8i8) {
7226 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
7227 return;
7228 } else if (VT == MVT::v16i8) {
7229 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
7230 return;
7231 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7232 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
7233 return;
7234 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7235 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
7236 return;
7237 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7238 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
7239 return;
7240 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7241 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
7242 return;
7243 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7244 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
7245 return;
7246 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7247 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7248 return;
7249 }
7250 break;
7251 }
7252 case AArch64ISD::ST4post: {
7253 VT = Node->getOperand(1).getValueType();
7254 if (VT == MVT::v8i8) {
7255 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
7256 return;
7257 } else if (VT == MVT::v16i8) {
7258 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
7259 return;
7260 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7261 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
7262 return;
7263 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7264 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
7265 return;
7266 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7267 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
7268 return;
7269 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7270 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
7271 return;
7272 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7273 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
7274 return;
7275 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7276 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7277 return;
7278 }
7279 break;
7280 }
7281 case AArch64ISD::ST1x2post: {
7282 VT = Node->getOperand(1).getValueType();
7283 if (VT == MVT::v8i8) {
7284 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
7285 return;
7286 } else if (VT == MVT::v16i8) {
7287 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
7288 return;
7289 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7290 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
7291 return;
7292 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7293 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
7294 return;
7295 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7296 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
7297 return;
7298 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7299 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
7300 return;
7301 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7302 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7303 return;
7304 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7305 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
7306 return;
7307 }
7308 break;
7309 }
7310 case AArch64ISD::ST1x3post: {
7311 VT = Node->getOperand(1).getValueType();
7312 if (VT == MVT::v8i8) {
7313 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
7314 return;
7315 } else if (VT == MVT::v16i8) {
7316 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
7317 return;
7318 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7319 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
7320 return;
7321 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7322 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
7323 return;
7324 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7325 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
7326 return;
7327 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7328 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
7329 return;
7330 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7331 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7332 return;
7333 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7334 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
7335 return;
7336 }
7337 break;
7338 }
7339 case AArch64ISD::ST1x4post: {
7340 VT = Node->getOperand(1).getValueType();
7341 if (VT == MVT::v8i8) {
7342 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
7343 return;
7344 } else if (VT == MVT::v16i8) {
7345 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
7346 return;
7347 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7348 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
7349 return;
7350 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7351 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
7352 return;
7353 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7354 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
7355 return;
7356 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7357 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
7358 return;
7359 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7360 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7361 return;
7362 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7363 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
7364 return;
7365 }
7366 break;
7367 }
7368 case AArch64ISD::ST2LANEpost: {
7369 VT = Node->getOperand(1).getValueType();
7370 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7371 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
7372 return;
7373 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7374 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7375 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
7376 return;
7377 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7378 VT == MVT::v2f32) {
7379 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7380 return;
7381 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7382 VT == MVT::v1f64) {
7383 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7384 return;
7385 }
7386 break;
7387 }
7388 case AArch64ISD::ST3LANEpost: {
7389 VT = Node->getOperand(1).getValueType();
7390 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7391 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7392 return;
7393 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7394 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7395 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7396 return;
7397 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7398 VT == MVT::v2f32) {
7399 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7400 return;
7401 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7402 VT == MVT::v1f64) {
7403 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7404 return;
7405 }
7406 break;
7407 }
7408 case AArch64ISD::ST4LANEpost: {
7409 VT = Node->getOperand(1).getValueType();
7410 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7411 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7412 return;
7413 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7414 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7415 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7416 return;
7417 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7418 VT == MVT::v2f32) {
7419 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7420 return;
7421 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7422 VT == MVT::v1f64) {
7423 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7424 return;
7425 }
7426 break;
7427 }
7428 }
7429
7430 // Select the default instruction
7431 SelectCode(Node);
7432}
7433
7434/// createAArch64ISelDag - This pass converts a legalized DAG into a
7435/// AArch64-specific DAG, ready for instruction scheduling.
7437 CodeGenOptLevel OptLevel) {
7438 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7439}
7440
7441/// When \p PredVT is a scalable vector predicate in the form
7442/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7443/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7444/// structured vectors (NumVec >1), the output data type is
7445/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7446/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7447/// EVT.
7449 unsigned NumVec) {
7450 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7451 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7452 return EVT();
7453
7454 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7455 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7456 return EVT();
7457
7458 ElementCount EC = PredVT.getVectorElementCount();
7459 EVT ScalarVT =
7460 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7461 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7462
7463 return MemVT;
7464}
7465
7466/// Return the EVT of the data associated to a memory operation in \p
7467/// Root. If such EVT cannot be retrieved, it returns an invalid EVT.
7469 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Root))
7470 return MemIntr->getMemoryVT();
7471
7472 if (isa<MemSDNode>(Root)) {
7473 EVT MemVT = cast<MemSDNode>(Root)->getMemoryVT();
7474
7475 EVT DataVT;
7476 if (auto *Load = dyn_cast<LoadSDNode>(Root))
7477 DataVT = Load->getValueType(0);
7478 else if (auto *Load = dyn_cast<MaskedLoadSDNode>(Root))
7479 DataVT = Load->getValueType(0);
7480 else if (auto *Store = dyn_cast<StoreSDNode>(Root))
7481 DataVT = Store->getValue().getValueType();
7482 else if (auto *Store = dyn_cast<MaskedStoreSDNode>(Root))
7483 DataVT = Store->getValue().getValueType();
7484 else
7485 llvm_unreachable("Unexpected MemSDNode!");
7486
7487 return DataVT.changeVectorElementType(MemVT.getVectorElementType());
7488 }
7489
7490 const unsigned Opcode = Root->getOpcode();
7491 // For custom ISD nodes, we have to look at them individually to extract the
7492 // type of the data moved to/from memory.
7493 switch (Opcode) {
7494 case AArch64ISD::LD1_MERGE_ZERO:
7495 case AArch64ISD::LD1S_MERGE_ZERO:
7496 case AArch64ISD::LDNF1_MERGE_ZERO:
7497 case AArch64ISD::LDNF1S_MERGE_ZERO:
7498 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7499 case AArch64ISD::ST1_PRED:
7500 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7501 default:
7502 break;
7503 }
7504
7505 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7506 return EVT();
7507
7508 switch (Root->getConstantOperandVal(1)) {
7509 default:
7510 return EVT();
7511 case Intrinsic::aarch64_sme_ldr:
7512 case Intrinsic::aarch64_sme_str:
7513 return MVT::nxv16i8;
7514 case Intrinsic::aarch64_sve_prf:
7515 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7516 // width of the predicate.
7518 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7519 case Intrinsic::aarch64_sve_ld2_sret:
7520 case Intrinsic::aarch64_sve_ld2q_sret:
7522 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7523 case Intrinsic::aarch64_sve_st2q:
7525 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7526 case Intrinsic::aarch64_sve_ld3_sret:
7527 case Intrinsic::aarch64_sve_ld3q_sret:
7529 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7530 case Intrinsic::aarch64_sve_st3q:
7532 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7533 case Intrinsic::aarch64_sve_ld4_sret:
7534 case Intrinsic::aarch64_sve_ld4q_sret:
7536 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7537 case Intrinsic::aarch64_sve_st4q:
7539 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7540 case Intrinsic::aarch64_sve_ld1udq:
7541 case Intrinsic::aarch64_sve_st1dq:
7542 return EVT(MVT::nxv1i64);
7543 case Intrinsic::aarch64_sve_ld1uwq:
7544 case Intrinsic::aarch64_sve_st1wq:
7545 return EVT(MVT::nxv1i32);
7546 }
7547}
7548
7549/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7550/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7551/// where Root is the memory access using N for its address.
7552template <int64_t Min, int64_t Max>
7553bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7554 SDValue &Base,
7555 SDValue &OffImm) {
7556 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7557 const DataLayout &DL = CurDAG->getDataLayout();
7558 const MachineFrameInfo &MFI = MF->getFrameInfo();
7559
7560 if (N.getOpcode() == ISD::FrameIndex) {
7561 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7562 // We can only encode VL scaled offsets, so only fold in frame indexes
7563 // referencing SVE objects.
7564 if (MFI.hasScalableStackID(FI)) {
7565 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7566 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7567 return true;
7568 }
7569
7570 return false;
7571 }
7572
7573 if (MemVT == EVT())
7574 return false;
7575
7576 if (N.getOpcode() != ISD::ADD)
7577 return false;
7578
7579 SDValue VScale = N.getOperand(1);
7580 int64_t MulImm = std::numeric_limits<int64_t>::max();
7581 if (VScale.getOpcode() == ISD::VSCALE) {
7582 MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7583 } else if (auto C = dyn_cast<ConstantSDNode>(VScale)) {
7584 int64_t ByteOffset = C->getSExtValue();
7585 const auto KnownVScale =
7587
7588 if (!KnownVScale || ByteOffset % KnownVScale != 0)
7589 return false;
7590
7591 MulImm = ByteOffset / KnownVScale;
7592 } else
7593 return false;
7594
7595 TypeSize TS = MemVT.getSizeInBits();
7596 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7597
7598 if ((MulImm % MemWidthBytes) != 0)
7599 return false;
7600
7601 int64_t Offset = MulImm / MemWidthBytes;
7603 return false;
7604
7605 Base = N.getOperand(0);
7606 if (Base.getOpcode() == ISD::FrameIndex) {
7607 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7608 // We can only encode VL scaled offsets, so only fold in frame indexes
7609 // referencing SVE objects.
7610 if (MFI.hasScalableStackID(FI))
7611 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7612 }
7613
7614 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7615 return true;
7616}
7617
7618/// Select register plus register addressing mode for SVE, with scaled
7619/// offset.
7620bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7621 SDValue &Base,
7622 SDValue &Offset) {
7623 if (N.getOpcode() != ISD::ADD)
7624 return false;
7625
7626 // Process an ADD node.
7627 const SDValue LHS = N.getOperand(0);
7628 const SDValue RHS = N.getOperand(1);
7629
7630 // 8 bit data does not come with the SHL node, so it is treated
7631 // separately.
7632 if (Scale == 0) {
7633 Base = LHS;
7634 Offset = RHS;
7635 return true;
7636 }
7637
7638 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7639 int64_t ImmOff = C->getSExtValue();
7640 unsigned Size = 1 << Scale;
7641
7642 // To use the reg+reg addressing mode, the immediate must be a multiple of
7643 // the vector element's byte size.
7644 if (ImmOff % Size)
7645 return false;
7646
7647 SDLoc DL(N);
7648 Base = LHS;
7649 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7650 SDValue Ops[] = {Offset};
7651 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7652 Offset = SDValue(MI, 0);
7653 return true;
7654 }
7655
7656 // Check if the RHS is a shift node with a constant.
7657 if (RHS.getOpcode() != ISD::SHL)
7658 return false;
7659
7660 const SDValue ShiftRHS = RHS.getOperand(1);
7661 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7662 if (C->getZExtValue() == Scale) {
7663 Base = LHS;
7664 Offset = RHS.getOperand(0);
7665 return true;
7666 }
7667
7668 return false;
7669}
7670
7671bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7672 const AArch64TargetLowering *TLI =
7673 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7674
7675 return TLI->isAllActivePredicate(*CurDAG, N);
7676}
7677
7678bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7679 EVT VT = N.getValueType();
7680 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7681}
7682
7683bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7685 unsigned Scale) {
7686 auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
7687 if (auto *C = dyn_cast<ConstantSDNode>(CN)) {
7688 int64_t ImmOff = C->getSExtValue();
7689 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
7690 return CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7691 }
7692 return SDValue();
7693 };
7694
7695 if (SDValue C = MatchConstantOffset(N)) {
7696 Base = CurDAG->getConstant(0, SDLoc(N), MVT::i32);
7697 Offset = C;
7698 return true;
7699 }
7700
7701 // Try to untangle an ADD node into a 'reg + offset'
7702 if (CurDAG->isBaseWithConstantOffset(N)) {
7703 if (SDValue C = MatchConstantOffset(N.getOperand(1))) {
7704 Base = N.getOperand(0);
7705 Offset = C;
7706 return true;
7707 }
7708 }
7709
7710 // By default, just match reg + 0.
7711 Base = N;
7712 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7713 return true;
7714}
7715
7716bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N,
7717 SDValue &Imm) {
7719 static_cast<AArch64CC::CondCode>(P->getConstantOperandVal(1));
7720 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
7721 // Check conservatively if the immediate fits the valid range [0, 64).
7722 // Immediate variants for GE and HS definitely need to be decremented
7723 // when lowering the pseudos later, so an immediate of 1 would become 0.
7724 // For the inverse conditions LT and LO we don't know for sure if they
7725 // will need a decrement but should the decision be made to reverse the
7726 // branch condition, we again end up with the need to decrement.
7727 // The same argument holds for LE, LS, GT and HI and possibly
7728 // incremented immediates. This can lead to slightly less optimal
7729 // codegen, e.g. we never codegen the legal case
7730 // cblt w0, #63, A
7731 // because we could end up with the illegal case
7732 // cbge w0, #64, B
7733 // should the decision to reverse the branch direction be made. For the
7734 // lower bound cases this is no problem since we can express comparisons
7735 // against 0 with either tbz/tnbz or using wzr/xzr.
7736 uint64_t LowerBound = 0, UpperBound = 64;
7737 switch (CC) {
7738 case AArch64CC::GE:
7739 case AArch64CC::HS:
7740 case AArch64CC::LT:
7741 case AArch64CC::LO:
7742 LowerBound = 1;
7743 break;
7744 case AArch64CC::LE:
7745 case AArch64CC::LS:
7746 case AArch64CC::GT:
7747 case AArch64CC::HI:
7748 UpperBound = 63;
7749 break;
7750 default:
7751 break;
7752 }
7753
7754 if (CN->getAPIntValue().uge(LowerBound) &&
7755 CN->getAPIntValue().ult(UpperBound)) {
7756 SDLoc DL(N);
7757 Imm = CurDAG->getTargetConstant(CN->getZExtValue(), DL, N.getValueType());
7758 return true;
7759 }
7760 }
7761
7762 return false;
7763}
7764
7765template <bool MatchCBB>
7766bool AArch64DAGToDAGISel::SelectCmpBranchExtOperand(SDValue N, SDValue &Reg,
7767 SDValue &ExtType) {
7768
7769 // Use an invalid shift-extend value to indicate we don't need to extend later
7770 if (N.getOpcode() == ISD::AssertZext || N.getOpcode() == ISD::AssertSext) {
7771 EVT Ty = cast<VTSDNode>(N.getOperand(1))->getVT();
7772 if (Ty != (MatchCBB ? MVT::i8 : MVT::i16))
7773 return false;
7774 Reg = N.getOperand(0);
7775 ExtType = CurDAG->getSignedTargetConstant(AArch64_AM::InvalidShiftExtend,
7776 SDLoc(N), MVT::i32);
7777 return true;
7778 }
7779
7781
7782 if ((MatchCBB && (ET == AArch64_AM::UXTB || ET == AArch64_AM::SXTB)) ||
7783 (!MatchCBB && (ET == AArch64_AM::UXTH || ET == AArch64_AM::SXTH))) {
7784 Reg = N.getOperand(0);
7785 ExtType =
7786 CurDAG->getTargetConstant(getExtendEncoding(ET), SDLoc(N), MVT::i32);
7787 return true;
7788 }
7789
7790 return false;
7791}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isMemOpOrPrefetch(SDNode *N)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
return SDValue()
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define DEBUG_TYPE
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
OptimizedStructLayoutField Field
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define PASS_NAME
Value * RHS
Value * LHS
const AArch64RegisterInfo * getRegisterInfo() const override
bool isStreaming() const
Returns true if the function has a streaming body.
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
unsigned getSVEVectorSizeInBits() const
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
LLVM_ABI bool getExactInverse(APFloat *Inv) const
If this value is normal and has an exact, normal, multiplicative inverse, store it in inv and return ...
Definition APFloat.cpp:5995
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1314
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1671
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1640
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1599
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1453
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:511
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:859
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool hasScalableStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:702
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned getID() const
Return the register class ID number.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:956
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
unsigned getExtendEncoding(AArch64_AM::ShiftExtendType ET)
Mapping from extend bits to required operation: shifter: 000 ==> uxtb 001 ==> uxth 010 ==> uxtw 011 =...
static bool isSVECpyDupImm(int SizeInBits, int64_t Val, int32_t &Imm, int32_t &Shift)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:981
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:267
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:1989
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
Extended Value Type.
Definition ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:463
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition ValueTypes.h:202
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.