LLVM 22.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34// https://github.com/llvm/llvm-project/issues/114425
35#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36#pragma inline_depth(0)
37#endif
38
39//===--------------------------------------------------------------------===//
40/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41/// instructions for SelectionDAG operations.
42///
43namespace {
44
45class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const AArch64Subtarget *Subtarget;
50
51public:
52 AArch64DAGToDAGISel() = delete;
53
54 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55 CodeGenOptLevel OptLevel)
56 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
57
58 bool runOnMachineFunction(MachineFunction &MF) override {
59 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
61 }
62
63 void Select(SDNode *Node) override;
64
65 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
66 /// inline asm expressions.
67 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
68 InlineAsm::ConstraintCode ConstraintID,
69 std::vector<SDValue> &OutOps) override;
70
71 template <signed Low, signed High, signed Scale>
72 bool SelectRDVLImm(SDValue N, SDValue &Imm);
73
74 template <signed Low, signed High>
75 bool SelectRDSVLShiftImm(SDValue N, SDValue &Imm);
76
77 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
78 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
79 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
80 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
81 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
82 return SelectShiftedRegister(N, false, Reg, Shift);
83 }
84 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
85 return SelectShiftedRegister(N, true, Reg, Shift);
86 }
87 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
88 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
89 }
90 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
91 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
92 }
93 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
94 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
95 }
96 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
97 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
98 }
99 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
100 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
101 }
102 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
103 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
104 }
105 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
106 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
107 }
108 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
109 return SelectAddrModeIndexed(N, 1, Base, OffImm);
110 }
111 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
112 return SelectAddrModeIndexed(N, 2, Base, OffImm);
113 }
114 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
115 return SelectAddrModeIndexed(N, 4, Base, OffImm);
116 }
117 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
118 return SelectAddrModeIndexed(N, 8, Base, OffImm);
119 }
120 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
121 return SelectAddrModeIndexed(N, 16, Base, OffImm);
122 }
123 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
124 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
125 }
126 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
127 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
128 }
129 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
130 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
131 }
132 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
133 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
134 }
135 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
136 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
137 }
138 template <unsigned Size, unsigned Max>
139 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
140 // Test if there is an appropriate addressing mode and check if the
141 // immediate fits.
142 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
143 if (Found) {
144 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
145 int64_t C = CI->getSExtValue();
146 if (C <= Max)
147 return true;
148 }
149 }
150
151 // Otherwise, base only, materialize address in register.
152 Base = N;
153 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
154 return true;
155 }
156
157 template<int Width>
158 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
159 SDValue &SignExtend, SDValue &DoShift) {
160 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
161 }
162
163 template<int Width>
164 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
165 SDValue &SignExtend, SDValue &DoShift) {
166 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
167 }
168
169 bool SelectExtractHigh(SDValue N, SDValue &Res) {
170 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
171 N = N->getOperand(0);
172 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
173 !isa<ConstantSDNode>(N->getOperand(1)))
174 return false;
175 EVT VT = N->getValueType(0);
176 EVT LVT = N->getOperand(0).getValueType();
177 unsigned Index = N->getConstantOperandVal(1);
178 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
179 Index != VT.getVectorNumElements())
180 return false;
181 Res = N->getOperand(0);
182 return true;
183 }
184
185 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
186 if (N.getOpcode() != AArch64ISD::VLSHR)
187 return false;
188 SDValue Op = N->getOperand(0);
189 EVT VT = Op.getValueType();
190 unsigned ShtAmt = N->getConstantOperandVal(1);
191 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
192 return false;
193
194 APInt Imm;
195 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
196 Imm = APInt(VT.getScalarSizeInBits(),
197 Op.getOperand(1).getConstantOperandVal(0)
198 << Op.getOperand(1).getConstantOperandVal(1));
199 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
200 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
201 Imm = APInt(VT.getScalarSizeInBits(),
202 Op.getOperand(1).getConstantOperandVal(0));
203 else
204 return false;
205
206 if (Imm != 1ULL << (ShtAmt - 1))
207 return false;
208
209 Res1 = Op.getOperand(0);
210 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
211 return true;
212 }
213
214 bool SelectDupZeroOrUndef(SDValue N) {
215 switch(N->getOpcode()) {
216 case ISD::UNDEF:
217 return true;
218 case AArch64ISD::DUP:
219 case ISD::SPLAT_VECTOR: {
220 auto Opnd0 = N->getOperand(0);
221 if (isNullConstant(Opnd0))
222 return true;
223 if (isNullFPConstant(Opnd0))
224 return true;
225 break;
226 }
227 default:
228 break;
229 }
230
231 return false;
232 }
233
234 bool SelectAny(SDValue) { return true; }
235
236 bool SelectDupZero(SDValue N) {
237 switch(N->getOpcode()) {
238 case AArch64ISD::DUP:
239 case ISD::SPLAT_VECTOR: {
240 auto Opnd0 = N->getOperand(0);
241 if (isNullConstant(Opnd0))
242 return true;
243 if (isNullFPConstant(Opnd0))
244 return true;
245 break;
246 }
247 }
248
249 return false;
250 }
251
252 template <MVT::SimpleValueType VT, bool Negate>
253 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
254 return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
255 }
256
257 template <MVT::SimpleValueType VT, bool Negate>
258 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
259 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
260 }
261
262 template <MVT::SimpleValueType VT>
263 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
264 return SelectSVECpyDupImm(N, VT, Imm, Shift);
265 }
266
267 template <MVT::SimpleValueType VT, bool Invert = false>
268 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
269 return SelectSVELogicalImm(N, VT, Imm, Invert);
270 }
271
272 template <MVT::SimpleValueType VT>
273 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
274 return SelectSVEArithImm(N, VT, Imm);
275 }
276
277 template <unsigned Low, unsigned High, bool AllowSaturation = false>
278 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
279 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
280 }
281
282 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
283 if (N->getOpcode() != ISD::SPLAT_VECTOR)
284 return false;
285
286 EVT EltVT = N->getValueType(0).getVectorElementType();
287 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
288 /* High */ EltVT.getFixedSizeInBits(),
289 /* AllowSaturation */ true, Imm);
290 }
291
292 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
293 template<signed Min, signed Max, signed Scale, bool Shift>
294 bool SelectCntImm(SDValue N, SDValue &Imm) {
296 return false;
297
298 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
299 if (Shift)
300 MulImm = 1LL << MulImm;
301
302 if ((MulImm % std::abs(Scale)) != 0)
303 return false;
304
305 MulImm /= Scale;
306 if ((MulImm >= Min) && (MulImm <= Max)) {
307 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
308 return true;
309 }
310
311 return false;
312 }
313
314 template <signed Max, signed Scale>
315 bool SelectEXTImm(SDValue N, SDValue &Imm) {
317 return false;
318
319 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
320
321 if (MulImm >= 0 && MulImm <= Max) {
322 MulImm *= Scale;
323 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
324 return true;
325 }
326
327 return false;
328 }
329
330 template <unsigned BaseReg, unsigned Max>
331 bool ImmToReg(SDValue N, SDValue &Imm) {
332 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
333 uint64_t C = CI->getZExtValue();
334
335 if (C > Max)
336 return false;
337
338 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
339 return true;
340 }
341 return false;
342 }
343
344 /// Form sequences of consecutive 64/128-bit registers for use in NEON
345 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
346 /// between 1 and 4 elements. If it contains a single element that is returned
347 /// unchanged; otherwise a REG_SEQUENCE value is returned.
350 // Form a sequence of SVE registers for instructions using list of vectors,
351 // e.g. structured loads and stores (ldN, stN).
352 SDValue createZTuple(ArrayRef<SDValue> Vecs);
353
354 // Similar to above, except the register must start at a multiple of the
355 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
356 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
357
358 /// Generic helper for the createDTuple/createQTuple
359 /// functions. Those should almost always be called instead.
360 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
361 const unsigned SubRegs[]);
362
363 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
364
365 bool tryIndexedLoad(SDNode *N);
366
367 void SelectPtrauthAuth(SDNode *N);
368 void SelectPtrauthResign(SDNode *N);
369
370 bool trySelectStackSlotTagP(SDNode *N);
371 void SelectTagP(SDNode *N);
372
373 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
374 unsigned SubRegIdx);
375 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
376 unsigned SubRegIdx);
377 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
378 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
379 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
380 unsigned Opc_rr, unsigned Opc_ri,
381 bool IsIntr = false);
382 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
383 unsigned Scale, unsigned Opc_ri,
384 unsigned Opc_rr);
385 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
386 bool IsZmMulti, unsigned Opcode,
387 bool HasPred = false);
388 void SelectPExtPair(SDNode *N, unsigned Opc);
389 void SelectWhilePair(SDNode *N, unsigned Opc);
390 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
391 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
392 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
393 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
394 bool IsTupleInput, unsigned Opc);
395 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
396
397 template <unsigned MaxIdx, unsigned Scale>
398 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
399 unsigned Op);
400 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
401 unsigned Op, unsigned MaxIdx, unsigned Scale,
402 unsigned BaseReg = 0);
403 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
404 /// SVE Reg+Imm addressing mode.
405 template <int64_t Min, int64_t Max>
406 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
407 SDValue &OffImm);
408 /// SVE Reg+Reg address mode.
409 template <unsigned Scale>
410 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
411 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
412 }
413
414 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
415 unsigned Opc, uint32_t MaxImm);
416
417 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
418
419 template <unsigned MaxIdx, unsigned Scale>
420 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
421 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
422 }
423
424 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
425 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
426 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
427 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
428 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
429 unsigned Opc_rr, unsigned Opc_ri);
430 std::tuple<unsigned, SDValue, SDValue>
431 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
432 const SDValue &OldBase, const SDValue &OldOffset,
433 unsigned Scale);
434
435 bool tryBitfieldExtractOp(SDNode *N);
436 bool tryBitfieldExtractOpFromSExt(SDNode *N);
437 bool tryBitfieldInsertOp(SDNode *N);
438 bool tryBitfieldInsertInZeroOp(SDNode *N);
439 bool tryShiftAmountMod(SDNode *N);
440
441 bool tryReadRegister(SDNode *N);
442 bool tryWriteRegister(SDNode *N);
443
444 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
445 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
446
447 bool trySelectXAR(SDNode *N);
448
449// Include the pieces autogenerated from the target description.
450#include "AArch64GenDAGISel.inc"
451
452private:
453 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
454 SDValue &Shift);
455 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
456 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
457 SDValue &OffImm) {
458 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
459 }
460 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
461 unsigned Size, SDValue &Base,
462 SDValue &OffImm);
463 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
464 SDValue &OffImm);
465 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
466 SDValue &OffImm);
467 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
468 SDValue &Offset, SDValue &SignExtend,
469 SDValue &DoShift);
470 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
471 SDValue &Offset, SDValue &SignExtend,
472 SDValue &DoShift);
473 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
474 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
475 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
476 SDValue &Offset, SDValue &SignExtend);
477
478 template<unsigned RegWidth>
479 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
480 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
481 }
482
483 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
484
485 template<unsigned RegWidth>
486 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
487 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
488 }
489
490 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
491 unsigned Width);
492
493 bool SelectCMP_SWAP(SDNode *N);
494
495 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
496 bool Negate);
497 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
498 bool Negate);
499 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
500 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
501
502 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
503 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
504 bool AllowSaturation, SDValue &Imm);
505
506 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
507 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
508 SDValue &Offset);
509 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
510 SDValue &Offset, unsigned Scale = 1);
511
512 bool SelectAllActivePredicate(SDValue N);
513 bool SelectAnyPredicate(SDValue N);
514
515 bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm);
516
517 template <bool MatchCBB>
518 bool SelectCmpBranchExtOperand(SDValue N, SDValue &Reg, SDValue &ExtType);
519};
520
521class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
522public:
523 static char ID;
524 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
525 CodeGenOptLevel OptLevel)
527 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
528};
529} // end anonymous namespace
530
531char AArch64DAGToDAGISelLegacy::ID = 0;
532
533INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
534
535/// isIntImmediate - This method tests to see if the node is a constant
536/// operand. If so Imm will receive the 32-bit value.
537static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
539 Imm = C->getZExtValue();
540 return true;
541 }
542 return false;
543}
544
545// isIntImmediate - This method tests to see if a constant operand.
546// If so Imm will receive the value.
547static bool isIntImmediate(SDValue N, uint64_t &Imm) {
548 return isIntImmediate(N.getNode(), Imm);
549}
550
551// isOpcWithIntImmediate - This method tests to see if the node is a specific
552// opcode and that it has a immediate integer right operand.
553// If so Imm will receive the 32 bit value.
554static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
555 uint64_t &Imm) {
556 return N->getOpcode() == Opc &&
557 isIntImmediate(N->getOperand(1).getNode(), Imm);
558}
559
560// isIntImmediateEq - This method tests to see if N is a constant operand that
561// is equivalent to 'ImmExpected'.
562#ifndef NDEBUG
563static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
564 uint64_t Imm;
565 if (!isIntImmediate(N.getNode(), Imm))
566 return false;
567 return Imm == ImmExpected;
568}
569#endif
570
571bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
572 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
573 std::vector<SDValue> &OutOps) {
574 switch(ConstraintID) {
575 default:
576 llvm_unreachable("Unexpected asm memory constraint");
577 case InlineAsm::ConstraintCode::m:
578 case InlineAsm::ConstraintCode::o:
579 case InlineAsm::ConstraintCode::Q:
580 // We need to make sure that this one operand does not end up in XZR, thus
581 // require the address to be in a PointerRegClass register.
582 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
583 const TargetRegisterClass *TRC = TRI->getPointerRegClass();
584 SDLoc dl(Op);
585 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
586 SDValue NewOp =
587 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
588 dl, Op.getValueType(),
589 Op, RC), 0);
590 OutOps.push_back(NewOp);
591 return false;
592 }
593 return true;
594}
595
596/// SelectArithImmed - Select an immediate value that can be represented as
597/// a 12-bit value shifted left by either 0 or 12. If so, return true with
598/// Val set to the 12-bit value and Shift set to the shifter operand.
599bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
600 SDValue &Shift) {
601 // This function is called from the addsub_shifted_imm ComplexPattern,
602 // which lists [imm] as the list of opcode it's interested in, however
603 // we still need to check whether the operand is actually an immediate
604 // here because the ComplexPattern opcode list is only used in
605 // root-level opcode matching.
606 if (!isa<ConstantSDNode>(N.getNode()))
607 return false;
608
609 uint64_t Immed = N.getNode()->getAsZExtVal();
610 unsigned ShiftAmt;
611
612 if (Immed >> 12 == 0) {
613 ShiftAmt = 0;
614 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
615 ShiftAmt = 12;
616 Immed = Immed >> 12;
617 } else
618 return false;
619
620 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
621 SDLoc dl(N);
622 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
623 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
624 return true;
625}
626
627/// SelectNegArithImmed - As above, but negates the value before trying to
628/// select it.
629bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
630 SDValue &Shift) {
631 // This function is called from the addsub_shifted_imm ComplexPattern,
632 // which lists [imm] as the list of opcode it's interested in, however
633 // we still need to check whether the operand is actually an immediate
634 // here because the ComplexPattern opcode list is only used in
635 // root-level opcode matching.
636 if (!isa<ConstantSDNode>(N.getNode()))
637 return false;
638
639 // The immediate operand must be a 24-bit zero-extended immediate.
640 uint64_t Immed = N.getNode()->getAsZExtVal();
641
642 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
643 // have the opposite effect on the C flag, so this pattern mustn't match under
644 // those circumstances.
645 if (Immed == 0)
646 return false;
647
648 if (N.getValueType() == MVT::i32)
649 Immed = ~((uint32_t)Immed) + 1;
650 else
651 Immed = ~Immed + 1ULL;
652 if (Immed & 0xFFFFFFFFFF000000ULL)
653 return false;
654
655 Immed &= 0xFFFFFFULL;
656 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
657 Shift);
658}
659
660/// getShiftTypeForNode - Translate a shift node to the corresponding
661/// ShiftType value.
663 switch (N.getOpcode()) {
664 default:
666 case ISD::SHL:
667 return AArch64_AM::LSL;
668 case ISD::SRL:
669 return AArch64_AM::LSR;
670 case ISD::SRA:
671 return AArch64_AM::ASR;
672 case ISD::ROTR:
673 return AArch64_AM::ROR;
674 }
675}
676
678 return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
679}
680
681/// Determine whether it is worth it to fold SHL into the addressing
682/// mode.
684 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
685 // It is worth folding logical shift of up to three places.
686 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
687 if (!CSD)
688 return false;
689 unsigned ShiftVal = CSD->getZExtValue();
690 if (ShiftVal > 3)
691 return false;
692
693 // Check if this particular node is reused in any non-memory related
694 // operation. If yes, do not try to fold this node into the address
695 // computation, since the computation will be kept.
696 const SDNode *Node = V.getNode();
697 for (SDNode *UI : Node->users())
698 if (!isMemOpOrPrefetch(UI))
699 for (SDNode *UII : UI->users())
700 if (!isMemOpOrPrefetch(UII))
701 return false;
702 return true;
703}
704
705/// Determine whether it is worth to fold V into an extended register addressing
706/// mode.
707bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
708 // Trivial if we are optimizing for code size or if there is only
709 // one use of the value.
710 if (CurDAG->shouldOptForSize() || V.hasOneUse())
711 return true;
712
713 // If a subtarget has a slow shift, folding a shift into multiple loads
714 // costs additional micro-ops.
715 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
716 return false;
717
718 // Check whether we're going to emit the address arithmetic anyway because
719 // it's used by a non-address operation.
720 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
721 return true;
722 if (V.getOpcode() == ISD::ADD) {
723 const SDValue LHS = V.getOperand(0);
724 const SDValue RHS = V.getOperand(1);
725 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
726 return true;
727 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
728 return true;
729 }
730
731 // It hurts otherwise, since the value will be reused.
732 return false;
733}
734
735/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
736/// to select more shifted register
737bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
738 SDValue &Shift) {
739 EVT VT = N.getValueType();
740 if (VT != MVT::i32 && VT != MVT::i64)
741 return false;
742
743 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
744 return false;
745 SDValue LHS = N.getOperand(0);
746 if (!LHS->hasOneUse())
747 return false;
748
749 unsigned LHSOpcode = LHS->getOpcode();
750 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
751 return false;
752
753 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
754 if (!ShiftAmtNode)
755 return false;
756
757 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
758 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
759 if (!RHSC)
760 return false;
761
762 APInt AndMask = RHSC->getAPIntValue();
763 unsigned LowZBits, MaskLen;
764 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
765 return false;
766
767 unsigned BitWidth = N.getValueSizeInBits();
768 SDLoc DL(LHS);
769 uint64_t NewShiftC;
770 unsigned NewShiftOp;
771 if (LHSOpcode == ISD::SHL) {
772 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
773 // BitWidth != LowZBits + MaskLen doesn't match the pattern
774 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
775 return false;
776
777 NewShiftC = LowZBits - ShiftAmtC;
778 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
779 } else {
780 if (LowZBits == 0)
781 return false;
782
783 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
784 NewShiftC = LowZBits + ShiftAmtC;
785 if (NewShiftC >= BitWidth)
786 return false;
787
788 // SRA need all high bits
789 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
790 return false;
791
792 // SRL high bits can be 0 or 1
793 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
794 return false;
795
796 if (LHSOpcode == ISD::SRL)
797 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
798 else
799 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
800 }
801
802 assert(NewShiftC < BitWidth && "Invalid shift amount");
803 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
804 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
805 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
806 NewShiftAmt, BitWidthMinus1),
807 0);
808 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
809 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
810 return true;
811}
812
813/// getExtendTypeForNode - Translate an extend node to the corresponding
814/// ExtendType value.
816getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
817 if (N.getOpcode() == ISD::SIGN_EXTEND ||
818 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
819 EVT SrcVT;
820 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
821 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
822 else
823 SrcVT = N.getOperand(0).getValueType();
824
825 if (!IsLoadStore && SrcVT == MVT::i8)
826 return AArch64_AM::SXTB;
827 else if (!IsLoadStore && SrcVT == MVT::i16)
828 return AArch64_AM::SXTH;
829 else if (SrcVT == MVT::i32)
830 return AArch64_AM::SXTW;
831 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
832
834 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
835 N.getOpcode() == ISD::ANY_EXTEND) {
836 EVT SrcVT = N.getOperand(0).getValueType();
837 if (!IsLoadStore && SrcVT == MVT::i8)
838 return AArch64_AM::UXTB;
839 else if (!IsLoadStore && SrcVT == MVT::i16)
840 return AArch64_AM::UXTH;
841 else if (SrcVT == MVT::i32)
842 return AArch64_AM::UXTW;
843 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
844
846 } else if (N.getOpcode() == ISD::AND) {
847 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
848 if (!CSD)
850 uint64_t AndMask = CSD->getZExtValue();
851
852 switch (AndMask) {
853 default:
855 case 0xFF:
856 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
857 case 0xFFFF:
858 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
859 case 0xFFFFFFFF:
860 return AArch64_AM::UXTW;
861 }
862 }
863
865}
866
867/// Determine whether it is worth to fold V into an extended register of an
868/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
869/// instruction, and the shift should be treated as worth folding even if has
870/// multiple uses.
871bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
872 // Trivial if we are optimizing for code size or if there is only
873 // one use of the value.
874 if (CurDAG->shouldOptForSize() || V.hasOneUse())
875 return true;
876
877 // If a subtarget has a fastpath LSL we can fold a logical shift into
878 // the add/sub and save a cycle.
879 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
880 V.getConstantOperandVal(1) <= 4 &&
882 return true;
883
884 // It hurts otherwise, since the value will be reused.
885 return false;
886}
887
888/// SelectShiftedRegister - Select a "shifted register" operand. If the value
889/// is not shifted, set the Shift operand to default of "LSL 0". The logical
890/// instructions allow the shifted register to be rotated, but the arithmetic
891/// instructions do not. The AllowROR parameter specifies whether ROR is
892/// supported.
893bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
894 SDValue &Reg, SDValue &Shift) {
895 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
896 return true;
897
899 if (ShType == AArch64_AM::InvalidShiftExtend)
900 return false;
901 if (!AllowROR && ShType == AArch64_AM::ROR)
902 return false;
903
904 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
905 unsigned BitSize = N.getValueSizeInBits();
906 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
907 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
908
909 Reg = N.getOperand(0);
910 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
911 return isWorthFoldingALU(N, true);
912 }
913
914 return false;
915}
916
917/// Instructions that accept extend modifiers like UXTW expect the register
918/// being extended to be a GPR32, but the incoming DAG might be acting on a
919/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
920/// this is the case.
922 if (N.getValueType() == MVT::i32)
923 return N;
924
925 SDLoc dl(N);
926 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
927}
928
929// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
930template<signed Low, signed High, signed Scale>
931bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
933 return false;
934
935 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
936 if ((MulImm % std::abs(Scale)) == 0) {
937 int64_t RDVLImm = MulImm / Scale;
938 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
939 Imm = CurDAG->getSignedTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
940 return true;
941 }
942 }
943
944 return false;
945}
946
947// Returns a suitable RDSVL multiplier from a left shift.
948template <signed Low, signed High>
949bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) {
951 return false;
952
953 int64_t MulImm = 1LL << cast<ConstantSDNode>(N)->getSExtValue();
954 if (MulImm >= Low && MulImm <= High) {
955 Imm = CurDAG->getSignedTargetConstant(MulImm, SDLoc(N), MVT::i32);
956 return true;
957 }
958
959 return false;
960}
961
962/// SelectArithExtendedRegister - Select a "extended register" operand. This
963/// operand folds in an extend followed by an optional left shift.
964bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
965 SDValue &Shift) {
966 unsigned ShiftVal = 0;
968
969 if (N.getOpcode() == ISD::SHL) {
970 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
971 if (!CSD)
972 return false;
973 ShiftVal = CSD->getZExtValue();
974 if (ShiftVal > 4)
975 return false;
976
977 Ext = getExtendTypeForNode(N.getOperand(0));
979 return false;
980
981 Reg = N.getOperand(0).getOperand(0);
982 } else {
983 Ext = getExtendTypeForNode(N);
985 return false;
986
987 Reg = N.getOperand(0);
988
989 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
990 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
991 auto isDef32 = [](SDValue N) {
992 unsigned Opc = N.getOpcode();
993 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
996 Opc != ISD::FREEZE;
997 };
998 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
999 isDef32(Reg))
1000 return false;
1001 }
1002
1003 // AArch64 mandates that the RHS of the operation must use the smallest
1004 // register class that could contain the size being extended from. Thus,
1005 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
1006 // there might not be an actual 32-bit value in the program. We can
1007 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
1008 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
1009 Reg = narrowIfNeeded(CurDAG, Reg);
1010 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1011 MVT::i32);
1012 return isWorthFoldingALU(N);
1013}
1014
1015/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
1016/// operand is referred by the instructions have SP operand
1017bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
1018 SDValue &Shift) {
1019 unsigned ShiftVal = 0;
1021
1022 if (N.getOpcode() != ISD::SHL)
1023 return false;
1024
1025 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1026 if (!CSD)
1027 return false;
1028 ShiftVal = CSD->getZExtValue();
1029 if (ShiftVal > 4)
1030 return false;
1031
1032 Ext = AArch64_AM::UXTX;
1033 Reg = N.getOperand(0);
1034 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1035 MVT::i32);
1036 return isWorthFoldingALU(N);
1037}
1038
1039/// If there's a use of this ADDlow that's not itself a load/store then we'll
1040/// need to create a real ADD instruction from it anyway and there's no point in
1041/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1042/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1043/// leads to duplicated ADRP instructions.
1045 for (auto *User : N->users()) {
1046 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1047 User->getOpcode() != ISD::ATOMIC_LOAD &&
1048 User->getOpcode() != ISD::ATOMIC_STORE)
1049 return false;
1050
1051 // ldar and stlr have much more restrictive addressing modes (just a
1052 // register).
1053 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
1054 return false;
1055 }
1056
1057 return true;
1058}
1059
1060/// Check if the immediate offset is valid as a scaled immediate.
1061static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1062 unsigned Size) {
1063 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1064 Offset < (Range << Log2_32(Size)))
1065 return true;
1066 return false;
1067}
1068
1069/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1070/// immediate" address. The "Size" argument is the size in bytes of the memory
1071/// reference, which determines the scale.
1072bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1073 unsigned BW, unsigned Size,
1074 SDValue &Base,
1075 SDValue &OffImm) {
1076 SDLoc dl(N);
1077 const DataLayout &DL = CurDAG->getDataLayout();
1078 const TargetLowering *TLI = getTargetLowering();
1079 if (N.getOpcode() == ISD::FrameIndex) {
1080 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1081 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1082 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1083 return true;
1084 }
1085
1086 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1087 // selected here doesn't support labels/immediates, only base+offset.
1088 if (CurDAG->isBaseWithConstantOffset(N)) {
1089 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1090 if (IsSignedImm) {
1091 int64_t RHSC = RHS->getSExtValue();
1092 unsigned Scale = Log2_32(Size);
1093 int64_t Range = 0x1LL << (BW - 1);
1094
1095 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1096 RHSC < (Range << Scale)) {
1097 Base = N.getOperand(0);
1098 if (Base.getOpcode() == ISD::FrameIndex) {
1099 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1100 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1101 }
1102 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1103 return true;
1104 }
1105 } else {
1106 // unsigned Immediate
1107 uint64_t RHSC = RHS->getZExtValue();
1108 unsigned Scale = Log2_32(Size);
1109 uint64_t Range = 0x1ULL << BW;
1110
1111 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1112 Base = N.getOperand(0);
1113 if (Base.getOpcode() == ISD::FrameIndex) {
1114 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1115 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1116 }
1117 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1118 return true;
1119 }
1120 }
1121 }
1122 }
1123 // Base only. The address will be materialized into a register before
1124 // the memory is accessed.
1125 // add x0, Xbase, #offset
1126 // stp x1, x2, [x0]
1127 Base = N;
1128 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1129 return true;
1130}
1131
1132/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1133/// immediate" address. The "Size" argument is the size in bytes of the memory
1134/// reference, which determines the scale.
1135bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1136 SDValue &Base, SDValue &OffImm) {
1137 SDLoc dl(N);
1138 const DataLayout &DL = CurDAG->getDataLayout();
1139 const TargetLowering *TLI = getTargetLowering();
1140 if (N.getOpcode() == ISD::FrameIndex) {
1141 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1142 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1143 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1144 return true;
1145 }
1146
1147 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1148 GlobalAddressSDNode *GAN =
1149 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1150 Base = N.getOperand(0);
1151 OffImm = N.getOperand(1);
1152 if (!GAN)
1153 return true;
1154
1155 if (GAN->getOffset() % Size == 0 &&
1157 return true;
1158 }
1159
1160 if (CurDAG->isBaseWithConstantOffset(N)) {
1161 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1162 int64_t RHSC = (int64_t)RHS->getZExtValue();
1163 unsigned Scale = Log2_32(Size);
1164 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1165 Base = N.getOperand(0);
1166 if (Base.getOpcode() == ISD::FrameIndex) {
1167 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1168 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1169 }
1170 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1171 return true;
1172 }
1173 }
1174 }
1175
1176 // Before falling back to our general case, check if the unscaled
1177 // instructions can handle this. If so, that's preferable.
1178 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1179 return false;
1180
1181 // Base only. The address will be materialized into a register before
1182 // the memory is accessed.
1183 // add x0, Xbase, #offset
1184 // ldr x0, [x0]
1185 Base = N;
1186 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1187 return true;
1188}
1189
1190/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1191/// immediate" address. This should only match when there is an offset that
1192/// is not valid for a scaled immediate addressing mode. The "Size" argument
1193/// is the size in bytes of the memory reference, which is needed here to know
1194/// what is valid for a scaled immediate.
1195bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1196 SDValue &Base,
1197 SDValue &OffImm) {
1198 if (!CurDAG->isBaseWithConstantOffset(N))
1199 return false;
1200 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1201 int64_t RHSC = RHS->getSExtValue();
1202 if (RHSC >= -256 && RHSC < 256) {
1203 Base = N.getOperand(0);
1204 if (Base.getOpcode() == ISD::FrameIndex) {
1205 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1206 const TargetLowering *TLI = getTargetLowering();
1207 Base = CurDAG->getTargetFrameIndex(
1208 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1209 }
1210 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1211 return true;
1212 }
1213 }
1214 return false;
1215}
1216
1218 SDLoc dl(N);
1219 SDValue ImpDef = SDValue(
1220 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1221 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1222 N);
1223}
1224
1225/// Check if the given SHL node (\p N), can be used to form an
1226/// extended register for an addressing mode.
1227bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1228 bool WantExtend, SDValue &Offset,
1229 SDValue &SignExtend) {
1230 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1231 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1232 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1233 return false;
1234
1235 SDLoc dl(N);
1236 if (WantExtend) {
1238 getExtendTypeForNode(N.getOperand(0), true);
1240 return false;
1241
1242 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1243 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1244 MVT::i32);
1245 } else {
1246 Offset = N.getOperand(0);
1247 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1248 }
1249
1250 unsigned LegalShiftVal = Log2_32(Size);
1251 unsigned ShiftVal = CSD->getZExtValue();
1252
1253 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1254 return false;
1255
1256 return isWorthFoldingAddr(N, Size);
1257}
1258
1259bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1261 SDValue &SignExtend,
1262 SDValue &DoShift) {
1263 if (N.getOpcode() != ISD::ADD)
1264 return false;
1265 SDValue LHS = N.getOperand(0);
1266 SDValue RHS = N.getOperand(1);
1267 SDLoc dl(N);
1268
1269 // We don't want to match immediate adds here, because they are better lowered
1270 // to the register-immediate addressing modes.
1272 return false;
1273
1274 // Check if this particular node is reused in any non-memory related
1275 // operation. If yes, do not try to fold this node into the address
1276 // computation, since the computation will be kept.
1277 const SDNode *Node = N.getNode();
1278 for (SDNode *UI : Node->users()) {
1279 if (!isMemOpOrPrefetch(UI))
1280 return false;
1281 }
1282
1283 // Remember if it is worth folding N when it produces extended register.
1284 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1285
1286 // Try to match a shifted extend on the RHS.
1287 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1288 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1289 Base = LHS;
1290 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1291 return true;
1292 }
1293
1294 // Try to match a shifted extend on the LHS.
1295 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1296 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1297 Base = RHS;
1298 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1299 return true;
1300 }
1301
1302 // There was no shift, whatever else we find.
1303 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1304
1306 // Try to match an unshifted extend on the LHS.
1307 if (IsExtendedRegisterWorthFolding &&
1308 (Ext = getExtendTypeForNode(LHS, true)) !=
1310 Base = RHS;
1311 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1312 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1313 MVT::i32);
1314 if (isWorthFoldingAddr(LHS, Size))
1315 return true;
1316 }
1317
1318 // Try to match an unshifted extend on the RHS.
1319 if (IsExtendedRegisterWorthFolding &&
1320 (Ext = getExtendTypeForNode(RHS, true)) !=
1322 Base = LHS;
1323 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1324 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1325 MVT::i32);
1326 if (isWorthFoldingAddr(RHS, Size))
1327 return true;
1328 }
1329
1330 return false;
1331}
1332
1333// Check if the given immediate is preferred by ADD. If an immediate can be
1334// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1335// encoded by one MOVZ, return true.
1336static bool isPreferredADD(int64_t ImmOff) {
1337 // Constant in [0x0, 0xfff] can be encoded in ADD.
1338 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1339 return true;
1340 // Check if it can be encoded in an "ADD LSL #12".
1341 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1342 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1343 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1344 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1345 return false;
1346}
1347
1348bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1350 SDValue &SignExtend,
1351 SDValue &DoShift) {
1352 if (N.getOpcode() != ISD::ADD)
1353 return false;
1354 SDValue LHS = N.getOperand(0);
1355 SDValue RHS = N.getOperand(1);
1356 SDLoc DL(N);
1357
1358 // Check if this particular node is reused in any non-memory related
1359 // operation. If yes, do not try to fold this node into the address
1360 // computation, since the computation will be kept.
1361 const SDNode *Node = N.getNode();
1362 for (SDNode *UI : Node->users()) {
1363 if (!isMemOpOrPrefetch(UI))
1364 return false;
1365 }
1366
1367 // Watch out if RHS is a wide immediate, it can not be selected into
1368 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1369 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1370 // instructions like:
1371 // MOV X0, WideImmediate
1372 // ADD X1, BaseReg, X0
1373 // LDR X2, [X1, 0]
1374 // For such situation, using [BaseReg, XReg] addressing mode can save one
1375 // ADD/SUB:
1376 // MOV X0, WideImmediate
1377 // LDR X2, [BaseReg, X0]
1378 if (isa<ConstantSDNode>(RHS)) {
1379 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1380 // Skip the immediate can be selected by load/store addressing mode.
1381 // Also skip the immediate can be encoded by a single ADD (SUB is also
1382 // checked by using -ImmOff).
1383 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1384 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1385 return false;
1386
1387 SDValue Ops[] = { RHS };
1388 SDNode *MOVI =
1389 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1390 SDValue MOVIV = SDValue(MOVI, 0);
1391 // This ADD of two X register will be selected into [Reg+Reg] mode.
1392 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1393 }
1394
1395 // Remember if it is worth folding N when it produces extended register.
1396 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1397
1398 // Try to match a shifted extend on the RHS.
1399 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1400 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1401 Base = LHS;
1402 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1403 return true;
1404 }
1405
1406 // Try to match a shifted extend on the LHS.
1407 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1408 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1409 Base = RHS;
1410 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1411 return true;
1412 }
1413
1414 // Match any non-shifted, non-extend, non-immediate add expression.
1415 Base = LHS;
1416 Offset = RHS;
1417 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1418 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1419 // Reg1 + Reg2 is free: no check needed.
1420 return true;
1421}
1422
1423SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1424 static const unsigned RegClassIDs[] = {
1425 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1426 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1427 AArch64::dsub2, AArch64::dsub3};
1428
1429 return createTuple(Regs, RegClassIDs, SubRegs);
1430}
1431
1432SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1433 static const unsigned RegClassIDs[] = {
1434 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1435 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1436 AArch64::qsub2, AArch64::qsub3};
1437
1438 return createTuple(Regs, RegClassIDs, SubRegs);
1439}
1440
1441SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1442 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1443 AArch64::ZPR3RegClassID,
1444 AArch64::ZPR4RegClassID};
1445 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1446 AArch64::zsub2, AArch64::zsub3};
1447
1448 return createTuple(Regs, RegClassIDs, SubRegs);
1449}
1450
1451SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1452 assert(Regs.size() == 2 || Regs.size() == 4);
1453
1454 // The createTuple interface requires 3 RegClassIDs for each possible
1455 // tuple type even though we only have them for ZPR2 and ZPR4.
1456 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1457 AArch64::ZPR4Mul4RegClassID};
1458 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1459 AArch64::zsub2, AArch64::zsub3};
1460 return createTuple(Regs, RegClassIDs, SubRegs);
1461}
1462
1463SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1464 const unsigned RegClassIDs[],
1465 const unsigned SubRegs[]) {
1466 // There's no special register-class for a vector-list of 1 element: it's just
1467 // a vector.
1468 if (Regs.size() == 1)
1469 return Regs[0];
1470
1471 assert(Regs.size() >= 2 && Regs.size() <= 4);
1472
1473 SDLoc DL(Regs[0]);
1474
1476
1477 // First operand of REG_SEQUENCE is the desired RegClass.
1478 Ops.push_back(
1479 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1480
1481 // Then we get pairs of source & subregister-position for the components.
1482 for (unsigned i = 0; i < Regs.size(); ++i) {
1483 Ops.push_back(Regs[i]);
1484 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1485 }
1486
1487 SDNode *N =
1488 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1489 return SDValue(N, 0);
1490}
1491
1492void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1493 bool isExt) {
1494 SDLoc dl(N);
1495 EVT VT = N->getValueType(0);
1496
1497 unsigned ExtOff = isExt;
1498
1499 // Form a REG_SEQUENCE to force register allocation.
1500 unsigned Vec0Off = ExtOff + 1;
1501 SmallVector<SDValue, 4> Regs(N->ops().slice(Vec0Off, NumVecs));
1502 SDValue RegSeq = createQTuple(Regs);
1503
1505 if (isExt)
1506 Ops.push_back(N->getOperand(1));
1507 Ops.push_back(RegSeq);
1508 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1509 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1510}
1511
1512static std::tuple<SDValue, SDValue>
1514 SDLoc DL(Disc);
1515 SDValue AddrDisc;
1516 SDValue ConstDisc;
1517
1518 // If this is a blend, remember the constant and address discriminators.
1519 // Otherwise, it's either a constant discriminator, or a non-blended
1520 // address discriminator.
1521 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1522 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1523 AddrDisc = Disc->getOperand(1);
1524 ConstDisc = Disc->getOperand(2);
1525 } else {
1526 ConstDisc = Disc;
1527 }
1528
1529 // If the constant discriminator (either the blend RHS, or the entire
1530 // discriminator value) isn't a 16-bit constant, bail out, and let the
1531 // discriminator be computed separately.
1532 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1533 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1534 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1535
1536 // If there's no address discriminator, use XZR directly.
1537 if (!AddrDisc)
1538 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1539
1540 return std::make_tuple(
1541 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1542 AddrDisc);
1543}
1544
1545void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1546 SDLoc DL(N);
1547 // IntrinsicID is operand #0
1548 SDValue Val = N->getOperand(1);
1549 SDValue AUTKey = N->getOperand(2);
1550 SDValue AUTDisc = N->getOperand(3);
1551
1552 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1553 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1554
1555 SDValue AUTAddrDisc, AUTConstDisc;
1556 std::tie(AUTConstDisc, AUTAddrDisc) =
1557 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1558
1559 if (!Subtarget->isX16X17Safer()) {
1560 std::vector<SDValue> Ops = {Val, AUTKey, AUTConstDisc, AUTAddrDisc};
1561 // Copy deactivation symbol if present.
1562 if (N->getNumOperands() > 4)
1563 Ops.push_back(N->getOperand(4));
1564
1565 SDNode *AUT =
1566 CurDAG->getMachineNode(AArch64::AUTxMxN, DL, MVT::i64, MVT::i64, Ops);
1567 ReplaceNode(N, AUT);
1568 } else {
1569 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1570 AArch64::X16, Val, SDValue());
1571 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1572
1573 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUTx16x17, DL, MVT::i64, Ops);
1574 ReplaceNode(N, AUT);
1575 }
1576}
1577
1578void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1579 SDLoc DL(N);
1580 // IntrinsicID is operand #0
1581 SDValue Val = N->getOperand(1);
1582 SDValue AUTKey = N->getOperand(2);
1583 SDValue AUTDisc = N->getOperand(3);
1584 SDValue PACKey = N->getOperand(4);
1585 SDValue PACDisc = N->getOperand(5);
1586
1587 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1588 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1589
1590 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1591 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1592
1593 SDValue AUTAddrDisc, AUTConstDisc;
1594 std::tie(AUTConstDisc, AUTAddrDisc) =
1595 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1596
1597 SDValue PACAddrDisc, PACConstDisc;
1598 std::tie(PACConstDisc, PACAddrDisc) =
1599 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1600
1601 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1602 AArch64::X16, Val, SDValue());
1603
1604 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1605 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1606
1607 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1608 ReplaceNode(N, AUTPAC);
1609}
1610
1611bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1612 LoadSDNode *LD = cast<LoadSDNode>(N);
1613 if (LD->isUnindexed())
1614 return false;
1615 EVT VT = LD->getMemoryVT();
1616 EVT DstVT = N->getValueType(0);
1617 ISD::MemIndexedMode AM = LD->getAddressingMode();
1618 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1619 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1620 int OffsetVal = (int)OffsetOp->getZExtValue();
1621
1622 // We're not doing validity checking here. That was done when checking
1623 // if we should mark the load as indexed or not. We're just selecting
1624 // the right instruction.
1625 unsigned Opcode = 0;
1626
1627 ISD::LoadExtType ExtType = LD->getExtensionType();
1628 bool InsertTo64 = false;
1629 if (VT == MVT::i64)
1630 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1631 else if (VT == MVT::i32) {
1632 if (ExtType == ISD::NON_EXTLOAD)
1633 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1634 else if (ExtType == ISD::SEXTLOAD)
1635 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1636 else {
1637 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1638 InsertTo64 = true;
1639 // The result of the load is only i32. It's the subreg_to_reg that makes
1640 // it into an i64.
1641 DstVT = MVT::i32;
1642 }
1643 } else if (VT == MVT::i16) {
1644 if (ExtType == ISD::SEXTLOAD) {
1645 if (DstVT == MVT::i64)
1646 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1647 else
1648 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1649 } else {
1650 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1651 InsertTo64 = DstVT == MVT::i64;
1652 // The result of the load is only i32. It's the subreg_to_reg that makes
1653 // it into an i64.
1654 DstVT = MVT::i32;
1655 }
1656 } else if (VT == MVT::i8) {
1657 if (ExtType == ISD::SEXTLOAD) {
1658 if (DstVT == MVT::i64)
1659 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1660 else
1661 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1662 } else {
1663 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1664 InsertTo64 = DstVT == MVT::i64;
1665 // The result of the load is only i32. It's the subreg_to_reg that makes
1666 // it into an i64.
1667 DstVT = MVT::i32;
1668 }
1669 } else if (VT == MVT::f16) {
1670 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1671 } else if (VT == MVT::bf16) {
1672 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1673 } else if (VT == MVT::f32) {
1674 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1675 } else if (VT == MVT::f64 ||
1676 (VT.is64BitVector() && Subtarget->isLittleEndian())) {
1677 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1678 } else if (VT.is128BitVector() && Subtarget->isLittleEndian()) {
1679 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1680 } else if (VT.is64BitVector()) {
1681 if (IsPre || OffsetVal != 8)
1682 return false;
1683 switch (VT.getScalarSizeInBits()) {
1684 case 8:
1685 Opcode = AArch64::LD1Onev8b_POST;
1686 break;
1687 case 16:
1688 Opcode = AArch64::LD1Onev4h_POST;
1689 break;
1690 case 32:
1691 Opcode = AArch64::LD1Onev2s_POST;
1692 break;
1693 case 64:
1694 Opcode = AArch64::LD1Onev1d_POST;
1695 break;
1696 default:
1697 llvm_unreachable("Expected vector element to be a power of 2");
1698 }
1699 } else if (VT.is128BitVector()) {
1700 if (IsPre || OffsetVal != 16)
1701 return false;
1702 switch (VT.getScalarSizeInBits()) {
1703 case 8:
1704 Opcode = AArch64::LD1Onev16b_POST;
1705 break;
1706 case 16:
1707 Opcode = AArch64::LD1Onev8h_POST;
1708 break;
1709 case 32:
1710 Opcode = AArch64::LD1Onev4s_POST;
1711 break;
1712 case 64:
1713 Opcode = AArch64::LD1Onev2d_POST;
1714 break;
1715 default:
1716 llvm_unreachable("Expected vector element to be a power of 2");
1717 }
1718 } else
1719 return false;
1720 SDValue Chain = LD->getChain();
1721 SDValue Base = LD->getBasePtr();
1722 SDLoc dl(N);
1723 // LD1 encodes an immediate offset by using XZR as the offset register.
1724 SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian())
1725 ? CurDAG->getRegister(AArch64::XZR, MVT::i64)
1726 : CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1727 SDValue Ops[] = { Base, Offset, Chain };
1728 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1729 MVT::Other, Ops);
1730
1731 // Transfer memoperands.
1732 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1733 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1734
1735 // Either way, we're replacing the node, so tell the caller that.
1736 SDValue LoadedVal = SDValue(Res, 1);
1737 if (InsertTo64) {
1738 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1739 LoadedVal =
1740 SDValue(CurDAG->getMachineNode(
1741 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1742 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1743 SubReg),
1744 0);
1745 }
1746
1747 ReplaceUses(SDValue(N, 0), LoadedVal);
1748 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1749 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1750 CurDAG->RemoveDeadNode(N);
1751 return true;
1752}
1753
1754void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1755 unsigned SubRegIdx) {
1756 SDLoc dl(N);
1757 EVT VT = N->getValueType(0);
1758 SDValue Chain = N->getOperand(0);
1759
1760 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1761 Chain};
1762
1763 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1764
1765 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1766 SDValue SuperReg = SDValue(Ld, 0);
1767 for (unsigned i = 0; i < NumVecs; ++i)
1768 ReplaceUses(SDValue(N, i),
1769 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1770
1771 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1772
1773 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1774 // because it's too simple to have needed special treatment during lowering.
1775 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1776 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1777 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1778 }
1779
1780 CurDAG->RemoveDeadNode(N);
1781}
1782
1783void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1784 unsigned Opc, unsigned SubRegIdx) {
1785 SDLoc dl(N);
1786 EVT VT = N->getValueType(0);
1787 SDValue Chain = N->getOperand(0);
1788
1789 SDValue Ops[] = {N->getOperand(1), // Mem operand
1790 N->getOperand(2), // Incremental
1791 Chain};
1792
1793 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1794 MVT::Untyped, MVT::Other};
1795
1796 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1797
1798 // Update uses of write back register
1799 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1800
1801 // Update uses of vector list
1802 SDValue SuperReg = SDValue(Ld, 1);
1803 if (NumVecs == 1)
1804 ReplaceUses(SDValue(N, 0), SuperReg);
1805 else
1806 for (unsigned i = 0; i < NumVecs; ++i)
1807 ReplaceUses(SDValue(N, i),
1808 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1809
1810 // Update the chain
1811 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1812 CurDAG->RemoveDeadNode(N);
1813}
1814
1815/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1816/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1817/// new Base and an SDValue representing the new offset.
1818std::tuple<unsigned, SDValue, SDValue>
1819AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1820 unsigned Opc_ri,
1821 const SDValue &OldBase,
1822 const SDValue &OldOffset,
1823 unsigned Scale) {
1824 SDValue NewBase = OldBase;
1825 SDValue NewOffset = OldOffset;
1826 // Detect a possible Reg+Imm addressing mode.
1827 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1828 N, OldBase, NewBase, NewOffset);
1829
1830 // Detect a possible reg+reg addressing mode, but only if we haven't already
1831 // detected a Reg+Imm one.
1832 const bool IsRegReg =
1833 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1834
1835 // Select the instruction.
1836 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1837}
1838
1839enum class SelectTypeKind {
1840 Int1 = 0,
1841 Int = 1,
1842 FP = 2,
1844};
1845
1846/// This function selects an opcode from a list of opcodes, which is
1847/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1848/// element types, in this order.
1849template <SelectTypeKind Kind>
1850static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1851 // Only match scalable vector VTs
1852 if (!VT.isScalableVector())
1853 return 0;
1854
1855 EVT EltVT = VT.getVectorElementType();
1856 unsigned Key = VT.getVectorMinNumElements();
1857 switch (Kind) {
1859 break;
1861 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1862 EltVT != MVT::i64)
1863 return 0;
1864 break;
1866 if (EltVT != MVT::i1)
1867 return 0;
1868 break;
1869 case SelectTypeKind::FP:
1870 if (EltVT == MVT::bf16)
1871 Key = 16;
1872 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1873 EltVT != MVT::f64)
1874 return 0;
1875 break;
1876 }
1877
1878 unsigned Offset;
1879 switch (Key) {
1880 case 16: // 8-bit or bf16
1881 Offset = 0;
1882 break;
1883 case 8: // 16-bit
1884 Offset = 1;
1885 break;
1886 case 4: // 32-bit
1887 Offset = 2;
1888 break;
1889 case 2: // 64-bit
1890 Offset = 3;
1891 break;
1892 default:
1893 return 0;
1894 }
1895
1896 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1897}
1898
1899// This function is almost identical to SelectWhilePair, but has an
1900// extra check on the range of the immediate operand.
1901// TODO: Merge these two functions together at some point?
1902void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1903 // Immediate can be either 0 or 1.
1904 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1905 if (Imm->getZExtValue() > 1)
1906 return;
1907
1908 SDLoc DL(N);
1909 EVT VT = N->getValueType(0);
1910 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1911 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1912 SDValue SuperReg = SDValue(WhilePair, 0);
1913
1914 for (unsigned I = 0; I < 2; ++I)
1915 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1916 AArch64::psub0 + I, DL, VT, SuperReg));
1917
1918 CurDAG->RemoveDeadNode(N);
1919}
1920
1921void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1922 SDLoc DL(N);
1923 EVT VT = N->getValueType(0);
1924
1925 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1926
1927 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1928 SDValue SuperReg = SDValue(WhilePair, 0);
1929
1930 for (unsigned I = 0; I < 2; ++I)
1931 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1932 AArch64::psub0 + I, DL, VT, SuperReg));
1933
1934 CurDAG->RemoveDeadNode(N);
1935}
1936
1937void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1938 unsigned Opcode) {
1939 EVT VT = N->getValueType(0);
1940 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
1941 SDValue Ops = createZTuple(Regs);
1942 SDLoc DL(N);
1943 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1944 SDValue SuperReg = SDValue(Intrinsic, 0);
1945 for (unsigned i = 0; i < NumVecs; ++i)
1946 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1947 AArch64::zsub0 + i, DL, VT, SuperReg));
1948
1949 CurDAG->RemoveDeadNode(N);
1950}
1951
1952void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
1953 unsigned Opcode) {
1954 SDLoc DL(N);
1955 EVT VT = N->getValueType(0);
1956 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
1957 Ops.push_back(/*Chain*/ N->getOperand(0));
1958
1959 SDNode *Instruction =
1960 CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
1961 SDValue SuperReg = SDValue(Instruction, 0);
1962
1963 for (unsigned i = 0; i < NumVecs; ++i)
1964 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1965 AArch64::zsub0 + i, DL, VT, SuperReg));
1966
1967 // Copy chain
1968 unsigned ChainIdx = NumVecs;
1969 ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
1970 CurDAG->RemoveDeadNode(N);
1971}
1972
1973void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1974 unsigned NumVecs,
1975 bool IsZmMulti,
1976 unsigned Opcode,
1977 bool HasPred) {
1978 assert(Opcode != 0 && "Unexpected opcode");
1979
1980 SDLoc DL(N);
1981 EVT VT = N->getValueType(0);
1982 SDUse *OpsIter = N->op_begin() + 1; // Skip intrinsic ID
1984
1985 auto GetMultiVecOperand = [&]() {
1986 SmallVector<SDValue, 4> Regs(OpsIter, OpsIter + NumVecs);
1987 OpsIter += NumVecs;
1988 return createZMulTuple(Regs);
1989 };
1990
1991 if (HasPred)
1992 Ops.push_back(*OpsIter++);
1993
1994 Ops.push_back(GetMultiVecOperand());
1995 if (IsZmMulti)
1996 Ops.push_back(GetMultiVecOperand());
1997 else
1998 Ops.push_back(*OpsIter++);
1999
2000 // Append any remaining operands.
2001 Ops.append(OpsIter, N->op_end());
2002 SDNode *Intrinsic;
2003 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
2004 SDValue SuperReg = SDValue(Intrinsic, 0);
2005 for (unsigned i = 0; i < NumVecs; ++i)
2006 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2007 AArch64::zsub0 + i, DL, VT, SuperReg));
2008
2009 CurDAG->RemoveDeadNode(N);
2010}
2011
2012void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
2013 unsigned Scale, unsigned Opc_ri,
2014 unsigned Opc_rr, bool IsIntr) {
2015 assert(Scale < 5 && "Invalid scaling value.");
2016 SDLoc DL(N);
2017 EVT VT = N->getValueType(0);
2018 SDValue Chain = N->getOperand(0);
2019
2020 // Optimize addressing mode.
2022 unsigned Opc;
2023 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2024 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
2025 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
2026
2027 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
2028 Base, // Memory operand
2029 Offset, Chain};
2030
2031 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2032
2033 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2034 SDValue SuperReg = SDValue(Load, 0);
2035 for (unsigned i = 0; i < NumVecs; ++i)
2036 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2037 AArch64::zsub0 + i, DL, VT, SuperReg));
2038
2039 // Copy chain
2040 unsigned ChainIdx = NumVecs;
2041 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2042 CurDAG->RemoveDeadNode(N);
2043}
2044
2045void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
2046 unsigned NumVecs,
2047 unsigned Scale,
2048 unsigned Opc_ri,
2049 unsigned Opc_rr) {
2050 assert(Scale < 4 && "Invalid scaling value.");
2051 SDLoc DL(N);
2052 EVT VT = N->getValueType(0);
2053 SDValue Chain = N->getOperand(0);
2054
2055 SDValue PNg = N->getOperand(2);
2056 SDValue Base = N->getOperand(3);
2057 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
2058 unsigned Opc;
2059 std::tie(Opc, Base, Offset) =
2060 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
2061
2062 SDValue Ops[] = {PNg, // Predicate-as-counter
2063 Base, // Memory operand
2064 Offset, Chain};
2065
2066 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2067
2068 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2069 SDValue SuperReg = SDValue(Load, 0);
2070 for (unsigned i = 0; i < NumVecs; ++i)
2071 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2072 AArch64::zsub0 + i, DL, VT, SuperReg));
2073
2074 // Copy chain
2075 unsigned ChainIdx = NumVecs;
2076 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2077 CurDAG->RemoveDeadNode(N);
2078}
2079
2080void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2081 unsigned Opcode) {
2082 if (N->getValueType(0) != MVT::nxv4f32)
2083 return;
2084 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
2085}
2086
2087void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2088 unsigned NumOutVecs,
2089 unsigned Opc,
2090 uint32_t MaxImm) {
2091 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
2092 if (Imm->getZExtValue() > MaxImm)
2093 return;
2094
2095 SDValue ZtValue;
2096 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2097 return;
2098
2099 SDValue Chain = Node->getOperand(0);
2100 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4), Chain};
2101 SDLoc DL(Node);
2102 EVT VT = Node->getValueType(0);
2103
2104 SDNode *Instruction =
2105 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2106 SDValue SuperReg = SDValue(Instruction, 0);
2107
2108 for (unsigned I = 0; I < NumOutVecs; ++I)
2109 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2110 AArch64::zsub0 + I, DL, VT, SuperReg));
2111
2112 // Copy chain
2113 unsigned ChainIdx = NumOutVecs;
2114 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2115 CurDAG->RemoveDeadNode(Node);
2116}
2117
2118void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2119 unsigned NumOutVecs,
2120 unsigned Opc) {
2121 SDValue ZtValue;
2122 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2123 return;
2124
2125 SDValue Chain = Node->getOperand(0);
2126 SDValue Ops[] = {ZtValue,
2127 createZMulTuple({Node->getOperand(3), Node->getOperand(4)}),
2128 Chain};
2129
2130 SDLoc DL(Node);
2131 EVT VT = Node->getValueType(0);
2132
2133 SDNode *Instruction =
2134 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2135 SDValue SuperReg = SDValue(Instruction, 0);
2136
2137 for (unsigned I = 0; I < NumOutVecs; ++I)
2138 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2139 AArch64::zsub0 + I, DL, VT, SuperReg));
2140
2141 // Copy chain
2142 unsigned ChainIdx = NumOutVecs;
2143 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2144 CurDAG->RemoveDeadNode(Node);
2145}
2146
2147void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2148 unsigned Op) {
2149 SDLoc DL(N);
2150 EVT VT = N->getValueType(0);
2151
2152 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2153 SDValue Zd = createZMulTuple(Regs);
2154 SDValue Zn = N->getOperand(1 + NumVecs);
2155 SDValue Zm = N->getOperand(2 + NumVecs);
2156
2157 SDValue Ops[] = {Zd, Zn, Zm};
2158
2159 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2160 SDValue SuperReg = SDValue(Intrinsic, 0);
2161 for (unsigned i = 0; i < NumVecs; ++i)
2162 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2163 AArch64::zsub0 + i, DL, VT, SuperReg));
2164
2165 CurDAG->RemoveDeadNode(N);
2166}
2167
2168bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2169 switch (BaseReg) {
2170 default:
2171 return false;
2172 case AArch64::ZA:
2173 case AArch64::ZAB0:
2174 if (TileNum == 0)
2175 break;
2176 return false;
2177 case AArch64::ZAH0:
2178 if (TileNum <= 1)
2179 break;
2180 return false;
2181 case AArch64::ZAS0:
2182 if (TileNum <= 3)
2183 break;
2184 return false;
2185 case AArch64::ZAD0:
2186 if (TileNum <= 7)
2187 break;
2188 return false;
2189 }
2190
2191 BaseReg += TileNum;
2192 return true;
2193}
2194
2195template <unsigned MaxIdx, unsigned Scale>
2196void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2197 unsigned BaseReg, unsigned Op) {
2198 unsigned TileNum = 0;
2199 if (BaseReg != AArch64::ZA)
2200 TileNum = N->getConstantOperandVal(2);
2201
2202 if (!SelectSMETile(BaseReg, TileNum))
2203 return;
2204
2205 SDValue SliceBase, Base, Offset;
2206 if (BaseReg == AArch64::ZA)
2207 SliceBase = N->getOperand(2);
2208 else
2209 SliceBase = N->getOperand(3);
2210
2211 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2212 return;
2213
2214 SDLoc DL(N);
2215 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2216 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2217 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2218
2219 EVT VT = N->getValueType(0);
2220 for (unsigned I = 0; I < NumVecs; ++I)
2221 ReplaceUses(SDValue(N, I),
2222 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2223 SDValue(Mov, 0)));
2224 // Copy chain
2225 unsigned ChainIdx = NumVecs;
2226 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2227 CurDAG->RemoveDeadNode(N);
2228}
2229
2230void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2231 unsigned Op, unsigned MaxIdx,
2232 unsigned Scale, unsigned BaseReg) {
2233 // Slice can be in different positions
2234 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2235 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2236 SDValue SliceBase = N->getOperand(2);
2237 if (BaseReg != AArch64::ZA)
2238 SliceBase = N->getOperand(3);
2239
2241 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2242 return;
2243 // The correct Za tile number is computed in Machine Instruction
2244 // See EmitZAInstr
2245 // DAG cannot select Za tile as an output register with ZReg
2246 SDLoc DL(N);
2248 if (BaseReg != AArch64::ZA )
2249 Ops.push_back(N->getOperand(2));
2250 Ops.push_back(Base);
2251 Ops.push_back(Offset);
2252 Ops.push_back(N->getOperand(0)); //Chain
2253 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2254
2255 EVT VT = N->getValueType(0);
2256 for (unsigned I = 0; I < NumVecs; ++I)
2257 ReplaceUses(SDValue(N, I),
2258 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2259 SDValue(Mov, 0)));
2260
2261 // Copy chain
2262 unsigned ChainIdx = NumVecs;
2263 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2264 CurDAG->RemoveDeadNode(N);
2265}
2266
2267void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2268 unsigned NumOutVecs,
2269 bool IsTupleInput,
2270 unsigned Opc) {
2271 SDLoc DL(N);
2272 EVT VT = N->getValueType(0);
2273 unsigned NumInVecs = N->getNumOperands() - 1;
2274
2276 if (IsTupleInput) {
2277 assert((NumInVecs == 2 || NumInVecs == 4) &&
2278 "Don't know how to handle multi-register input!");
2279 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2280 Ops.push_back(createZMulTuple(Regs));
2281 } else {
2282 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2283 for (unsigned I = 0; I < NumInVecs; I++)
2284 Ops.push_back(N->getOperand(1 + I));
2285 }
2286
2287 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2288 SDValue SuperReg = SDValue(Res, 0);
2289
2290 for (unsigned I = 0; I < NumOutVecs; I++)
2291 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2292 AArch64::zsub0 + I, DL, VT, SuperReg));
2293 CurDAG->RemoveDeadNode(N);
2294}
2295
2296void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2297 unsigned Opc) {
2298 SDLoc dl(N);
2299 EVT VT = N->getOperand(2)->getValueType(0);
2300
2301 // Form a REG_SEQUENCE to force register allocation.
2302 bool Is128Bit = VT.getSizeInBits() == 128;
2303 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2304 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2305
2306 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2307 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2308
2309 // Transfer memoperands.
2310 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2311 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2312
2313 ReplaceNode(N, St);
2314}
2315
2316void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2317 unsigned Scale, unsigned Opc_rr,
2318 unsigned Opc_ri) {
2319 SDLoc dl(N);
2320
2321 // Form a REG_SEQUENCE to force register allocation.
2322 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2323 SDValue RegSeq = createZTuple(Regs);
2324
2325 // Optimize addressing mode.
2326 unsigned Opc;
2328 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2329 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2330 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2331
2332 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2333 Base, // address
2334 Offset, // offset
2335 N->getOperand(0)}; // chain
2336 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2337
2338 ReplaceNode(N, St);
2339}
2340
2341bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2342 SDValue &OffImm) {
2343 SDLoc dl(N);
2344 const DataLayout &DL = CurDAG->getDataLayout();
2345 const TargetLowering *TLI = getTargetLowering();
2346
2347 // Try to match it for the frame address
2348 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2349 int FI = FINode->getIndex();
2350 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2351 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2352 return true;
2353 }
2354
2355 return false;
2356}
2357
2358void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2359 unsigned Opc) {
2360 SDLoc dl(N);
2361 EVT VT = N->getOperand(2)->getValueType(0);
2362 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2363 MVT::Other}; // Type for the Chain
2364
2365 // Form a REG_SEQUENCE to force register allocation.
2366 bool Is128Bit = VT.getSizeInBits() == 128;
2367 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2368 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2369
2370 SDValue Ops[] = {RegSeq,
2371 N->getOperand(NumVecs + 1), // base register
2372 N->getOperand(NumVecs + 2), // Incremental
2373 N->getOperand(0)}; // Chain
2374 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2375
2376 ReplaceNode(N, St);
2377}
2378
2379namespace {
2380/// WidenVector - Given a value in the V64 register class, produce the
2381/// equivalent value in the V128 register class.
2382class WidenVector {
2383 SelectionDAG &DAG;
2384
2385public:
2386 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2387
2388 SDValue operator()(SDValue V64Reg) {
2389 EVT VT = V64Reg.getValueType();
2390 unsigned NarrowSize = VT.getVectorNumElements();
2391 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2392 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2393 SDLoc DL(V64Reg);
2394
2395 SDValue Undef =
2396 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2397 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2398 }
2399};
2400} // namespace
2401
2402/// NarrowVector - Given a value in the V128 register class, produce the
2403/// equivalent value in the V64 register class.
2405 EVT VT = V128Reg.getValueType();
2406 unsigned WideSize = VT.getVectorNumElements();
2407 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2408 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2409
2410 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2411 V128Reg);
2412}
2413
2414void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2415 unsigned Opc) {
2416 SDLoc dl(N);
2417 EVT VT = N->getValueType(0);
2418 bool Narrow = VT.getSizeInBits() == 64;
2419
2420 // Form a REG_SEQUENCE to force register allocation.
2421 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2422
2423 if (Narrow)
2424 transform(Regs, Regs.begin(),
2425 WidenVector(*CurDAG));
2426
2427 SDValue RegSeq = createQTuple(Regs);
2428
2429 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2430
2431 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2432
2433 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2434 N->getOperand(NumVecs + 3), N->getOperand(0)};
2435 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2436 SDValue SuperReg = SDValue(Ld, 0);
2437
2438 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2439 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2440 AArch64::qsub2, AArch64::qsub3 };
2441 for (unsigned i = 0; i < NumVecs; ++i) {
2442 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2443 if (Narrow)
2444 NV = NarrowVector(NV, *CurDAG);
2445 ReplaceUses(SDValue(N, i), NV);
2446 }
2447
2448 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2449 CurDAG->RemoveDeadNode(N);
2450}
2451
2452void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2453 unsigned Opc) {
2454 SDLoc dl(N);
2455 EVT VT = N->getValueType(0);
2456 bool Narrow = VT.getSizeInBits() == 64;
2457
2458 // Form a REG_SEQUENCE to force register allocation.
2459 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2460
2461 if (Narrow)
2462 transform(Regs, Regs.begin(),
2463 WidenVector(*CurDAG));
2464
2465 SDValue RegSeq = createQTuple(Regs);
2466
2467 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2468 RegSeq->getValueType(0), MVT::Other};
2469
2470 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2471
2472 SDValue Ops[] = {RegSeq,
2473 CurDAG->getTargetConstant(LaneNo, dl,
2474 MVT::i64), // Lane Number
2475 N->getOperand(NumVecs + 2), // Base register
2476 N->getOperand(NumVecs + 3), // Incremental
2477 N->getOperand(0)};
2478 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2479
2480 // Update uses of the write back register
2481 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2482
2483 // Update uses of the vector list
2484 SDValue SuperReg = SDValue(Ld, 1);
2485 if (NumVecs == 1) {
2486 ReplaceUses(SDValue(N, 0),
2487 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2488 } else {
2489 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2490 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2491 AArch64::qsub2, AArch64::qsub3 };
2492 for (unsigned i = 0; i < NumVecs; ++i) {
2493 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2494 SuperReg);
2495 if (Narrow)
2496 NV = NarrowVector(NV, *CurDAG);
2497 ReplaceUses(SDValue(N, i), NV);
2498 }
2499 }
2500
2501 // Update the Chain
2502 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2503 CurDAG->RemoveDeadNode(N);
2504}
2505
2506void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2507 unsigned Opc) {
2508 SDLoc dl(N);
2509 EVT VT = N->getOperand(2)->getValueType(0);
2510 bool Narrow = VT.getSizeInBits() == 64;
2511
2512 // Form a REG_SEQUENCE to force register allocation.
2513 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2514
2515 if (Narrow)
2516 transform(Regs, Regs.begin(),
2517 WidenVector(*CurDAG));
2518
2519 SDValue RegSeq = createQTuple(Regs);
2520
2521 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2522
2523 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2524 N->getOperand(NumVecs + 3), N->getOperand(0)};
2525 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2526
2527 // Transfer memoperands.
2528 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2529 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2530
2531 ReplaceNode(N, St);
2532}
2533
2534void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2535 unsigned Opc) {
2536 SDLoc dl(N);
2537 EVT VT = N->getOperand(2)->getValueType(0);
2538 bool Narrow = VT.getSizeInBits() == 64;
2539
2540 // Form a REG_SEQUENCE to force register allocation.
2541 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2542
2543 if (Narrow)
2544 transform(Regs, Regs.begin(),
2545 WidenVector(*CurDAG));
2546
2547 SDValue RegSeq = createQTuple(Regs);
2548
2549 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2550 MVT::Other};
2551
2552 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2553
2554 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2555 N->getOperand(NumVecs + 2), // Base Register
2556 N->getOperand(NumVecs + 3), // Incremental
2557 N->getOperand(0)};
2558 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2559
2560 // Transfer memoperands.
2561 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2562 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2563
2564 ReplaceNode(N, St);
2565}
2566
2568 unsigned &Opc, SDValue &Opd0,
2569 unsigned &LSB, unsigned &MSB,
2570 unsigned NumberOfIgnoredLowBits,
2571 bool BiggerPattern) {
2572 assert(N->getOpcode() == ISD::AND &&
2573 "N must be a AND operation to call this function");
2574
2575 EVT VT = N->getValueType(0);
2576
2577 // Here we can test the type of VT and return false when the type does not
2578 // match, but since it is done prior to that call in the current context
2579 // we turned that into an assert to avoid redundant code.
2580 assert((VT == MVT::i32 || VT == MVT::i64) &&
2581 "Type checking must have been done before calling this function");
2582
2583 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2584 // changed the AND node to a 32-bit mask operation. We'll have to
2585 // undo that as part of the transform here if we want to catch all
2586 // the opportunities.
2587 // Currently the NumberOfIgnoredLowBits argument helps to recover
2588 // from these situations when matching bigger pattern (bitfield insert).
2589
2590 // For unsigned extracts, check for a shift right and mask
2591 uint64_t AndImm = 0;
2592 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2593 return false;
2594
2595 const SDNode *Op0 = N->getOperand(0).getNode();
2596
2597 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2598 // simplified. Try to undo that
2599 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2600
2601 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2602 if (AndImm & (AndImm + 1))
2603 return false;
2604
2605 bool ClampMSB = false;
2606 uint64_t SrlImm = 0;
2607 // Handle the SRL + ANY_EXTEND case.
2608 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2609 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2610 // Extend the incoming operand of the SRL to 64-bit.
2611 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2612 // Make sure to clamp the MSB so that we preserve the semantics of the
2613 // original operations.
2614 ClampMSB = true;
2615 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2617 SrlImm)) {
2618 // If the shift result was truncated, we can still combine them.
2619 Opd0 = Op0->getOperand(0).getOperand(0);
2620
2621 // Use the type of SRL node.
2622 VT = Opd0->getValueType(0);
2623 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2624 Opd0 = Op0->getOperand(0);
2625 ClampMSB = (VT == MVT::i32);
2626 } else if (BiggerPattern) {
2627 // Let's pretend a 0 shift right has been performed.
2628 // The resulting code will be at least as good as the original one
2629 // plus it may expose more opportunities for bitfield insert pattern.
2630 // FIXME: Currently we limit this to the bigger pattern, because
2631 // some optimizations expect AND and not UBFM.
2632 Opd0 = N->getOperand(0);
2633 } else
2634 return false;
2635
2636 // Bail out on large immediates. This happens when no proper
2637 // combining/constant folding was performed.
2638 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2639 LLVM_DEBUG(
2640 (dbgs() << N
2641 << ": Found large shift immediate, this should not happen\n"));
2642 return false;
2643 }
2644
2645 LSB = SrlImm;
2646 MSB = SrlImm +
2647 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2648 : llvm::countr_one<uint64_t>(AndImm)) -
2649 1;
2650 if (ClampMSB)
2651 // Since we're moving the extend before the right shift operation, we need
2652 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2653 // the zeros which would get shifted in with the original right shift
2654 // operation.
2655 MSB = MSB > 31 ? 31 : MSB;
2656
2657 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2658 return true;
2659}
2660
2662 SDValue &Opd0, unsigned &Immr,
2663 unsigned &Imms) {
2664 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2665
2666 EVT VT = N->getValueType(0);
2667 unsigned BitWidth = VT.getSizeInBits();
2668 assert((VT == MVT::i32 || VT == MVT::i64) &&
2669 "Type checking must have been done before calling this function");
2670
2671 SDValue Op = N->getOperand(0);
2672 if (Op->getOpcode() == ISD::TRUNCATE) {
2673 Op = Op->getOperand(0);
2674 VT = Op->getValueType(0);
2675 BitWidth = VT.getSizeInBits();
2676 }
2677
2678 uint64_t ShiftImm;
2679 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2680 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2681 return false;
2682
2683 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2684 if (ShiftImm + Width > BitWidth)
2685 return false;
2686
2687 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2688 Opd0 = Op.getOperand(0);
2689 Immr = ShiftImm;
2690 Imms = ShiftImm + Width - 1;
2691 return true;
2692}
2693
2695 SDValue &Opd0, unsigned &LSB,
2696 unsigned &MSB) {
2697 // We are looking for the following pattern which basically extracts several
2698 // continuous bits from the source value and places it from the LSB of the
2699 // destination value, all other bits of the destination value or set to zero:
2700 //
2701 // Value2 = AND Value, MaskImm
2702 // SRL Value2, ShiftImm
2703 //
2704 // with MaskImm >> ShiftImm to search for the bit width.
2705 //
2706 // This gets selected into a single UBFM:
2707 //
2708 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2709 //
2710
2711 if (N->getOpcode() != ISD::SRL)
2712 return false;
2713
2714 uint64_t AndMask = 0;
2715 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2716 return false;
2717
2718 Opd0 = N->getOperand(0).getOperand(0);
2719
2720 uint64_t SrlImm = 0;
2721 if (!isIntImmediate(N->getOperand(1), SrlImm))
2722 return false;
2723
2724 // Check whether we really have several bits extract here.
2725 if (!isMask_64(AndMask >> SrlImm))
2726 return false;
2727
2728 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2729 LSB = SrlImm;
2730 MSB = llvm::Log2_64(AndMask);
2731 return true;
2732}
2733
2734static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2735 unsigned &Immr, unsigned &Imms,
2736 bool BiggerPattern) {
2737 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2738 "N must be a SHR/SRA operation to call this function");
2739
2740 EVT VT = N->getValueType(0);
2741
2742 // Here we can test the type of VT and return false when the type does not
2743 // match, but since it is done prior to that call in the current context
2744 // we turned that into an assert to avoid redundant code.
2745 assert((VT == MVT::i32 || VT == MVT::i64) &&
2746 "Type checking must have been done before calling this function");
2747
2748 // Check for AND + SRL doing several bits extract.
2749 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2750 return true;
2751
2752 // We're looking for a shift of a shift.
2753 uint64_t ShlImm = 0;
2754 uint64_t TruncBits = 0;
2755 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2756 Opd0 = N->getOperand(0).getOperand(0);
2757 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2758 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2759 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2760 // be considered as setting high 32 bits as zero. Our strategy here is to
2761 // always generate 64bit UBFM. This consistency will help the CSE pass
2762 // later find more redundancy.
2763 Opd0 = N->getOperand(0).getOperand(0);
2764 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2765 VT = Opd0.getValueType();
2766 assert(VT == MVT::i64 && "the promoted type should be i64");
2767 } else if (BiggerPattern) {
2768 // Let's pretend a 0 shift left has been performed.
2769 // FIXME: Currently we limit this to the bigger pattern case,
2770 // because some optimizations expect AND and not UBFM
2771 Opd0 = N->getOperand(0);
2772 } else
2773 return false;
2774
2775 // Missing combines/constant folding may have left us with strange
2776 // constants.
2777 if (ShlImm >= VT.getSizeInBits()) {
2778 LLVM_DEBUG(
2779 (dbgs() << N
2780 << ": Found large shift immediate, this should not happen\n"));
2781 return false;
2782 }
2783
2784 uint64_t SrlImm = 0;
2785 if (!isIntImmediate(N->getOperand(1), SrlImm))
2786 return false;
2787
2788 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2789 "bad amount in shift node!");
2790 int immr = SrlImm - ShlImm;
2791 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2792 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2793 // SRA requires a signed extraction
2794 if (VT == MVT::i32)
2795 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2796 else
2797 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2798 return true;
2799}
2800
2801bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2802 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2803
2804 EVT VT = N->getValueType(0);
2805 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2806 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2807 return false;
2808
2809 uint64_t ShiftImm;
2810 SDValue Op = N->getOperand(0);
2811 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2812 return false;
2813
2814 SDLoc dl(N);
2815 // Extend the incoming operand of the shift to 64-bits.
2816 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2817 unsigned Immr = ShiftImm;
2818 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2819 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2820 CurDAG->getTargetConstant(Imms, dl, VT)};
2821 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2822 return true;
2823}
2824
2825static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2826 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2827 unsigned NumberOfIgnoredLowBits = 0,
2828 bool BiggerPattern = false) {
2829 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2830 return false;
2831
2832 switch (N->getOpcode()) {
2833 default:
2834 if (!N->isMachineOpcode())
2835 return false;
2836 break;
2837 case ISD::AND:
2838 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2839 NumberOfIgnoredLowBits, BiggerPattern);
2840 case ISD::SRL:
2841 case ISD::SRA:
2842 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2843
2845 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2846 }
2847
2848 unsigned NOpc = N->getMachineOpcode();
2849 switch (NOpc) {
2850 default:
2851 return false;
2852 case AArch64::SBFMWri:
2853 case AArch64::UBFMWri:
2854 case AArch64::SBFMXri:
2855 case AArch64::UBFMXri:
2856 Opc = NOpc;
2857 Opd0 = N->getOperand(0);
2858 Immr = N->getConstantOperandVal(1);
2859 Imms = N->getConstantOperandVal(2);
2860 return true;
2861 }
2862 // Unreachable
2863 return false;
2864}
2865
2866bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2867 unsigned Opc, Immr, Imms;
2868 SDValue Opd0;
2869 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2870 return false;
2871
2872 EVT VT = N->getValueType(0);
2873 SDLoc dl(N);
2874
2875 // If the bit extract operation is 64bit but the original type is 32bit, we
2876 // need to add one EXTRACT_SUBREG.
2877 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2878 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2879 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2880
2881 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2882 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2883 MVT::i32, SDValue(BFM, 0));
2884 ReplaceNode(N, Inner.getNode());
2885 return true;
2886 }
2887
2888 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2889 CurDAG->getTargetConstant(Imms, dl, VT)};
2890 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2891 return true;
2892}
2893
2894/// Does DstMask form a complementary pair with the mask provided by
2895/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2896/// this asks whether DstMask zeroes precisely those bits that will be set by
2897/// the other half.
2898static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2899 unsigned NumberOfIgnoredHighBits, EVT VT) {
2900 assert((VT == MVT::i32 || VT == MVT::i64) &&
2901 "i32 or i64 mask type expected!");
2902 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2903
2904 // Enable implicitTrunc as we're intentionally ignoring high bits.
2905 APInt SignificantDstMask =
2906 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
2907 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2908
2909 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2910 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2911}
2912
2913// Look for bits that will be useful for later uses.
2914// A bit is consider useless as soon as it is dropped and never used
2915// before it as been dropped.
2916// E.g., looking for useful bit of x
2917// 1. y = x & 0x7
2918// 2. z = y >> 2
2919// After #1, x useful bits are 0x7, then the useful bits of x, live through
2920// y.
2921// After #2, the useful bits of x are 0x4.
2922// However, if x is used on an unpredictable instruction, then all its bits
2923// are useful.
2924// E.g.
2925// 1. y = x & 0x7
2926// 2. z = y >> 2
2927// 3. str x, [@x]
2928static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2929
2931 unsigned Depth) {
2932 uint64_t Imm =
2933 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2934 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2935 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2936 getUsefulBits(Op, UsefulBits, Depth + 1);
2937}
2938
2940 uint64_t Imm, uint64_t MSB,
2941 unsigned Depth) {
2942 // inherit the bitwidth value
2943 APInt OpUsefulBits(UsefulBits);
2944 OpUsefulBits = 1;
2945
2946 if (MSB >= Imm) {
2947 OpUsefulBits <<= MSB - Imm + 1;
2948 --OpUsefulBits;
2949 // The interesting part will be in the lower part of the result
2950 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2951 // The interesting part was starting at Imm in the argument
2952 OpUsefulBits <<= Imm;
2953 } else {
2954 OpUsefulBits <<= MSB + 1;
2955 --OpUsefulBits;
2956 // The interesting part will be shifted in the result
2957 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2958 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2959 // The interesting part was at zero in the argument
2960 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2961 }
2962
2963 UsefulBits &= OpUsefulBits;
2964}
2965
2966static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2967 unsigned Depth) {
2968 uint64_t Imm =
2969 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2970 uint64_t MSB =
2971 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2972
2973 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2974}
2975
2977 unsigned Depth) {
2978 uint64_t ShiftTypeAndValue =
2979 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2980 APInt Mask(UsefulBits);
2981 Mask.clearAllBits();
2982 Mask.flipAllBits();
2983
2984 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2985 // Shift Left
2986 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2987 Mask <<= ShiftAmt;
2988 getUsefulBits(Op, Mask, Depth + 1);
2989 Mask.lshrInPlace(ShiftAmt);
2990 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2991 // Shift Right
2992 // We do not handle AArch64_AM::ASR, because the sign will change the
2993 // number of useful bits
2994 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2995 Mask.lshrInPlace(ShiftAmt);
2996 getUsefulBits(Op, Mask, Depth + 1);
2997 Mask <<= ShiftAmt;
2998 } else
2999 return;
3000
3001 UsefulBits &= Mask;
3002}
3003
3004static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
3005 unsigned Depth) {
3006 uint64_t Imm =
3007 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3008 uint64_t MSB =
3009 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
3010
3011 APInt OpUsefulBits(UsefulBits);
3012 OpUsefulBits = 1;
3013
3014 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
3015 ResultUsefulBits.flipAllBits();
3016 APInt Mask(UsefulBits.getBitWidth(), 0);
3017
3018 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
3019
3020 if (MSB >= Imm) {
3021 // The instruction is a BFXIL.
3022 uint64_t Width = MSB - Imm + 1;
3023 uint64_t LSB = Imm;
3024
3025 OpUsefulBits <<= Width;
3026 --OpUsefulBits;
3027
3028 if (Op.getOperand(1) == Orig) {
3029 // Copy the low bits from the result to bits starting from LSB.
3030 Mask = ResultUsefulBits & OpUsefulBits;
3031 Mask <<= LSB;
3032 }
3033
3034 if (Op.getOperand(0) == Orig)
3035 // Bits starting from LSB in the input contribute to the result.
3036 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3037 } else {
3038 // The instruction is a BFI.
3039 uint64_t Width = MSB + 1;
3040 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
3041
3042 OpUsefulBits <<= Width;
3043 --OpUsefulBits;
3044 OpUsefulBits <<= LSB;
3045
3046 if (Op.getOperand(1) == Orig) {
3047 // Copy the bits from the result to the zero bits.
3048 Mask = ResultUsefulBits & OpUsefulBits;
3049 Mask.lshrInPlace(LSB);
3050 }
3051
3052 if (Op.getOperand(0) == Orig)
3053 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3054 }
3055
3056 UsefulBits &= Mask;
3057}
3058
3059static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
3060 SDValue Orig, unsigned Depth) {
3061
3062 // Users of this node should have already been instruction selected
3063 // FIXME: Can we turn that into an assert?
3064 if (!UserNode->isMachineOpcode())
3065 return;
3066
3067 switch (UserNode->getMachineOpcode()) {
3068 default:
3069 return;
3070 case AArch64::ANDSWri:
3071 case AArch64::ANDSXri:
3072 case AArch64::ANDWri:
3073 case AArch64::ANDXri:
3074 // We increment Depth only when we call the getUsefulBits
3075 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
3076 Depth);
3077 case AArch64::UBFMWri:
3078 case AArch64::UBFMXri:
3079 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
3080
3081 case AArch64::ORRWrs:
3082 case AArch64::ORRXrs:
3083 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
3084 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
3085 Depth);
3086 return;
3087 case AArch64::BFMWri:
3088 case AArch64::BFMXri:
3089 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3090
3091 case AArch64::STRBBui:
3092 case AArch64::STURBBi:
3093 if (UserNode->getOperand(0) != Orig)
3094 return;
3095 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3096 return;
3097
3098 case AArch64::STRHHui:
3099 case AArch64::STURHHi:
3100 if (UserNode->getOperand(0) != Orig)
3101 return;
3102 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3103 return;
3104 }
3105}
3106
3107static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3109 return;
3110 // Initialize UsefulBits
3111 if (!Depth) {
3112 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3113 // At the beginning, assume every produced bits is useful
3114 UsefulBits = APInt(Bitwidth, 0);
3115 UsefulBits.flipAllBits();
3116 }
3117 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3118
3119 for (SDNode *Node : Op.getNode()->users()) {
3120 // A use cannot produce useful bits
3121 APInt UsefulBitsForUse = APInt(UsefulBits);
3122 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
3123 UsersUsefulBits |= UsefulBitsForUse;
3124 }
3125 // UsefulBits contains the produced bits that are meaningful for the
3126 // current definition, thus a user cannot make a bit meaningful at
3127 // this point
3128 UsefulBits &= UsersUsefulBits;
3129}
3130
3131/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3132/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3133/// 0, return Op unchanged.
3134static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3135 if (ShlAmount == 0)
3136 return Op;
3137
3138 EVT VT = Op.getValueType();
3139 SDLoc dl(Op);
3140 unsigned BitWidth = VT.getSizeInBits();
3141 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3142
3143 SDNode *ShiftNode;
3144 if (ShlAmount > 0) {
3145 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3146 ShiftNode = CurDAG->getMachineNode(
3147 UBFMOpc, dl, VT, Op,
3148 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3149 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3150 } else {
3151 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3152 assert(ShlAmount < 0 && "expected right shift");
3153 int ShrAmount = -ShlAmount;
3154 ShiftNode = CurDAG->getMachineNode(
3155 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3156 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3157 }
3158
3159 return SDValue(ShiftNode, 0);
3160}
3161
3162// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3163static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3164 bool BiggerPattern,
3165 const uint64_t NonZeroBits,
3166 SDValue &Src, int &DstLSB,
3167 int &Width);
3168
3169// For bit-field-positioning pattern "shl VAL, N)".
3170static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3171 bool BiggerPattern,
3172 const uint64_t NonZeroBits,
3173 SDValue &Src, int &DstLSB,
3174 int &Width);
3175
3176/// Does this tree qualify as an attempt to move a bitfield into position,
3177/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3179 bool BiggerPattern, SDValue &Src,
3180 int &DstLSB, int &Width) {
3181 EVT VT = Op.getValueType();
3182 unsigned BitWidth = VT.getSizeInBits();
3183 (void)BitWidth;
3184 assert(BitWidth == 32 || BitWidth == 64);
3185
3186 KnownBits Known = CurDAG->computeKnownBits(Op);
3187
3188 // Non-zero in the sense that they're not provably zero, which is the key
3189 // point if we want to use this value
3190 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3191 if (!isShiftedMask_64(NonZeroBits))
3192 return false;
3193
3194 switch (Op.getOpcode()) {
3195 default:
3196 break;
3197 case ISD::AND:
3198 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3199 NonZeroBits, Src, DstLSB, Width);
3200 case ISD::SHL:
3201 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3202 NonZeroBits, Src, DstLSB, Width);
3203 }
3204
3205 return false;
3206}
3207
3209 bool BiggerPattern,
3210 const uint64_t NonZeroBits,
3211 SDValue &Src, int &DstLSB,
3212 int &Width) {
3213 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3214
3215 EVT VT = Op.getValueType();
3216 assert((VT == MVT::i32 || VT == MVT::i64) &&
3217 "Caller guarantees VT is one of i32 or i64");
3218 (void)VT;
3219
3220 uint64_t AndImm;
3221 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3222 return false;
3223
3224 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3225 // 1) (AndImm & (1 << POS) == 0)
3226 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3227 //
3228 // 1) and 2) don't agree so something must be wrong (e.g., in
3229 // 'SelectionDAG::computeKnownBits')
3230 assert((~AndImm & NonZeroBits) == 0 &&
3231 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3232
3233 SDValue AndOp0 = Op.getOperand(0);
3234
3235 uint64_t ShlImm;
3236 SDValue ShlOp0;
3237 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3238 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3239 ShlOp0 = AndOp0.getOperand(0);
3240 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3242 ShlImm)) {
3243 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3244
3245 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3246 SDValue ShlVal = AndOp0.getOperand(0);
3247
3248 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3249 // expect VT to be MVT::i32.
3250 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3251
3252 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3253 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3254 } else
3255 return false;
3256
3257 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3258 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3259 // AndOp0+AND.
3260 if (!BiggerPattern && !AndOp0.hasOneUse())
3261 return false;
3262
3263 DstLSB = llvm::countr_zero(NonZeroBits);
3264 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3265
3266 // Bail out on large Width. This happens when no proper combining / constant
3267 // folding was performed.
3268 if (Width >= (int)VT.getSizeInBits()) {
3269 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3270 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3271 // "val".
3272 // If VT is i32, what Width >= 32 means:
3273 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3274 // demands at least 'Width' bits (after dag-combiner). This together with
3275 // `any_extend` Op (undefined higher bits) indicates missed combination
3276 // when lowering the 'and' IR instruction to an machine IR instruction.
3277 LLVM_DEBUG(
3278 dbgs()
3279 << "Found large Width in bit-field-positioning -- this indicates no "
3280 "proper combining / constant folding was performed\n");
3281 return false;
3282 }
3283
3284 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3285 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3286 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3287 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3288 // which case it is not profitable to insert an extra shift.
3289 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3290 return false;
3291
3292 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3293 return true;
3294}
3295
3296// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3297// UBFIZ.
3299 SDValue &Src, int &DstLSB,
3300 int &Width) {
3301 // Caller should have verified that N is a left shift with constant shift
3302 // amount; asserts that.
3303 assert(Op.getOpcode() == ISD::SHL &&
3304 "Op.getNode() should be a SHL node to call this function");
3305 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3306 "Op.getNode() should shift ShlImm to call this function");
3307
3308 uint64_t AndImm = 0;
3309 SDValue Op0 = Op.getOperand(0);
3310 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3311 return false;
3312
3313 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3314 if (isMask_64(ShiftedAndImm)) {
3315 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3316 // should end with Mask, and could be prefixed with random bits if those
3317 // bits are shifted out.
3318 //
3319 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3320 // the AND result corresponding to those bits are shifted out, so it's fine
3321 // to not extract them.
3322 Width = llvm::countr_one(ShiftedAndImm);
3323 DstLSB = ShlImm;
3324 Src = Op0.getOperand(0);
3325 return true;
3326 }
3327 return false;
3328}
3329
3331 bool BiggerPattern,
3332 const uint64_t NonZeroBits,
3333 SDValue &Src, int &DstLSB,
3334 int &Width) {
3335 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3336
3337 EVT VT = Op.getValueType();
3338 assert((VT == MVT::i32 || VT == MVT::i64) &&
3339 "Caller guarantees that type is i32 or i64");
3340 (void)VT;
3341
3342 uint64_t ShlImm;
3343 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3344 return false;
3345
3346 if (!BiggerPattern && !Op.hasOneUse())
3347 return false;
3348
3349 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3350 return true;
3351
3352 DstLSB = llvm::countr_zero(NonZeroBits);
3353 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3354
3355 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3356 return false;
3357
3358 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3359 return true;
3360}
3361
3362static bool isShiftedMask(uint64_t Mask, EVT VT) {
3363 assert(VT == MVT::i32 || VT == MVT::i64);
3364 if (VT == MVT::i32)
3365 return isShiftedMask_32(Mask);
3366 return isShiftedMask_64(Mask);
3367}
3368
3369// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3370// inserted only sets known zero bits.
3372 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3373
3374 EVT VT = N->getValueType(0);
3375 if (VT != MVT::i32 && VT != MVT::i64)
3376 return false;
3377
3378 unsigned BitWidth = VT.getSizeInBits();
3379
3380 uint64_t OrImm;
3381 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3382 return false;
3383
3384 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3385 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3386 // performance neutral.
3388 return false;
3389
3390 uint64_t MaskImm;
3391 SDValue And = N->getOperand(0);
3392 // Must be a single use AND with an immediate operand.
3393 if (!And.hasOneUse() ||
3394 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3395 return false;
3396
3397 // Compute the Known Zero for the AND as this allows us to catch more general
3398 // cases than just looking for AND with imm.
3399 KnownBits Known = CurDAG->computeKnownBits(And);
3400
3401 // Non-zero in the sense that they're not provably zero, which is the key
3402 // point if we want to use this value.
3403 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3404
3405 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3406 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3407 return false;
3408
3409 // The bits being inserted must only set those bits that are known to be zero.
3410 if ((OrImm & NotKnownZero) != 0) {
3411 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3412 // currently handle this case.
3413 return false;
3414 }
3415
3416 // BFI/BFXIL dst, src, #lsb, #width.
3417 int LSB = llvm::countr_one(NotKnownZero);
3418 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3419
3420 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3421 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3422 unsigned ImmS = Width - 1;
3423
3424 // If we're creating a BFI instruction avoid cases where we need more
3425 // instructions to materialize the BFI constant as compared to the original
3426 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3427 // should be no worse in this case.
3428 bool IsBFI = LSB != 0;
3429 uint64_t BFIImm = OrImm >> LSB;
3430 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3431 // We have a BFI instruction and we know the constant can't be materialized
3432 // with a ORR-immediate with the zero register.
3433 unsigned OrChunks = 0, BFIChunks = 0;
3434 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3435 if (((OrImm >> Shift) & 0xFFFF) != 0)
3436 ++OrChunks;
3437 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3438 ++BFIChunks;
3439 }
3440 if (BFIChunks > OrChunks)
3441 return false;
3442 }
3443
3444 // Materialize the constant to be inserted.
3445 SDLoc DL(N);
3446 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3447 SDNode *MOVI = CurDAG->getMachineNode(
3448 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3449
3450 // Create the BFI/BFXIL instruction.
3451 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3452 CurDAG->getTargetConstant(ImmR, DL, VT),
3453 CurDAG->getTargetConstant(ImmS, DL, VT)};
3454 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3455 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3456 return true;
3457}
3458
3460 SDValue &ShiftedOperand,
3461 uint64_t &EncodedShiftImm) {
3462 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3463 if (!Dst.hasOneUse())
3464 return false;
3465
3466 EVT VT = Dst.getValueType();
3467 assert((VT == MVT::i32 || VT == MVT::i64) &&
3468 "Caller should guarantee that VT is one of i32 or i64");
3469 const unsigned SizeInBits = VT.getSizeInBits();
3470
3471 SDLoc DL(Dst.getNode());
3472 uint64_t AndImm, ShlImm;
3473 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3474 isShiftedMask_64(AndImm)) {
3475 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3476 SDValue DstOp0 = Dst.getOperand(0);
3477 if (!DstOp0.hasOneUse())
3478 return false;
3479
3480 // An example to illustrate the transformation
3481 // From:
3482 // lsr x8, x1, #1
3483 // and x8, x8, #0x3f80
3484 // bfxil x8, x1, #0, #7
3485 // To:
3486 // and x8, x23, #0x7f
3487 // ubfx x9, x23, #8, #7
3488 // orr x23, x8, x9, lsl #7
3489 //
3490 // The number of instructions remains the same, but ORR is faster than BFXIL
3491 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3492 // the dependency chain is improved after the transformation.
3493 uint64_t SrlImm;
3494 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3495 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3496 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3497 unsigned MaskWidth =
3498 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3499 unsigned UBFMOpc =
3500 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3501 SDNode *UBFMNode = CurDAG->getMachineNode(
3502 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3503 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3504 VT),
3505 CurDAG->getTargetConstant(
3506 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3507 ShiftedOperand = SDValue(UBFMNode, 0);
3508 EncodedShiftImm = AArch64_AM::getShifterImm(
3509 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3510 return true;
3511 }
3512 }
3513 return false;
3514 }
3515
3516 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3517 ShiftedOperand = Dst.getOperand(0);
3518 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3519 return true;
3520 }
3521
3522 uint64_t SrlImm;
3523 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3524 ShiftedOperand = Dst.getOperand(0);
3525 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3526 return true;
3527 }
3528 return false;
3529}
3530
3531// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3532// the operands and select it to AArch64::ORR with shifted registers if
3533// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3534static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3535 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3536 const bool BiggerPattern) {
3537 EVT VT = N->getValueType(0);
3538 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3539 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3540 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3541 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3542 assert((VT == MVT::i32 || VT == MVT::i64) &&
3543 "Expect result type to be i32 or i64 since N is combinable to BFM");
3544 SDLoc DL(N);
3545
3546 // Bail out if BFM simplifies away one node in BFM Dst.
3547 if (OrOpd1 != Dst)
3548 return false;
3549
3550 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3551 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3552 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3553 if (BiggerPattern) {
3554 uint64_t SrcAndImm;
3555 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3556 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3557 // OrOpd0 = AND Src, #Mask
3558 // So BFM simplifies away one AND node from Src and doesn't simplify away
3559 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3560 // one node (from Rd), ORR is better since it has higher throughput and
3561 // smaller latency than BFM on many AArch64 processors (and for the rest
3562 // ORR is at least as good as BFM).
3563 SDValue ShiftedOperand;
3564 uint64_t EncodedShiftImm;
3565 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3566 EncodedShiftImm)) {
3567 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3568 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3569 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3570 return true;
3571 }
3572 }
3573 return false;
3574 }
3575
3576 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3577
3578 uint64_t ShlImm;
3579 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3580 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3581 SDValue Ops[] = {
3582 Dst, Src,
3583 CurDAG->getTargetConstant(
3585 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3586 return true;
3587 }
3588
3589 // Select the following pattern to left-shifted operand rather than BFI.
3590 // %val1 = op ..
3591 // %val2 = shl %val1, #imm
3592 // %res = or %val1, %val2
3593 //
3594 // If N is selected to be BFI, we know that
3595 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3596 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3597 //
3598 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3599 if (OrOpd0.getOperand(0) == OrOpd1) {
3600 SDValue Ops[] = {
3601 OrOpd1, OrOpd1,
3602 CurDAG->getTargetConstant(
3604 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3605 return true;
3606 }
3607 }
3608
3609 uint64_t SrlImm;
3610 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3611 // Select the following pattern to right-shifted operand rather than BFXIL.
3612 // %val1 = op ..
3613 // %val2 = lshr %val1, #imm
3614 // %res = or %val1, %val2
3615 //
3616 // If N is selected to be BFXIL, we know that
3617 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3618 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3619 //
3620 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3621 if (OrOpd0.getOperand(0) == OrOpd1) {
3622 SDValue Ops[] = {
3623 OrOpd1, OrOpd1,
3624 CurDAG->getTargetConstant(
3626 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3627 return true;
3628 }
3629 }
3630
3631 return false;
3632}
3633
3634static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3635 SelectionDAG *CurDAG) {
3636 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3637
3638 EVT VT = N->getValueType(0);
3639 if (VT != MVT::i32 && VT != MVT::i64)
3640 return false;
3641
3642 unsigned BitWidth = VT.getSizeInBits();
3643
3644 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3645 // have the expected shape. Try to undo that.
3646
3647 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3648 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3649
3650 // Given a OR operation, check if we have the following pattern
3651 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3652 // isBitfieldExtractOp)
3653 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3654 // countTrailingZeros(mask2) == imm2 - imm + 1
3655 // f = d | c
3656 // if yes, replace the OR instruction with:
3657 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3658
3659 // OR is commutative, check all combinations of operand order and values of
3660 // BiggerPattern, i.e.
3661 // Opd0, Opd1, BiggerPattern=false
3662 // Opd1, Opd0, BiggerPattern=false
3663 // Opd0, Opd1, BiggerPattern=true
3664 // Opd1, Opd0, BiggerPattern=true
3665 // Several of these combinations may match, so check with BiggerPattern=false
3666 // first since that will produce better results by matching more instructions
3667 // and/or inserting fewer extra instructions.
3668 for (int I = 0; I < 4; ++I) {
3669
3670 SDValue Dst, Src;
3671 unsigned ImmR, ImmS;
3672 bool BiggerPattern = I / 2;
3673 SDValue OrOpd0Val = N->getOperand(I % 2);
3674 SDNode *OrOpd0 = OrOpd0Val.getNode();
3675 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3676 SDNode *OrOpd1 = OrOpd1Val.getNode();
3677
3678 unsigned BFXOpc;
3679 int DstLSB, Width;
3680 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3681 NumberOfIgnoredLowBits, BiggerPattern)) {
3682 // Check that the returned opcode is compatible with the pattern,
3683 // i.e., same type and zero extended (U and not S)
3684 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3685 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3686 continue;
3687
3688 // Compute the width of the bitfield insertion
3689 DstLSB = 0;
3690 Width = ImmS - ImmR + 1;
3691 // FIXME: This constraint is to catch bitfield insertion we may
3692 // want to widen the pattern if we want to grab general bitfield
3693 // move case
3694 if (Width <= 0)
3695 continue;
3696
3697 // If the mask on the insertee is correct, we have a BFXIL operation. We
3698 // can share the ImmR and ImmS values from the already-computed UBFM.
3699 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3700 BiggerPattern,
3701 Src, DstLSB, Width)) {
3702 ImmR = (BitWidth - DstLSB) % BitWidth;
3703 ImmS = Width - 1;
3704 } else
3705 continue;
3706
3707 // Check the second part of the pattern
3708 EVT VT = OrOpd1Val.getValueType();
3709 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3710
3711 // Compute the Known Zero for the candidate of the first operand.
3712 // This allows to catch more general case than just looking for
3713 // AND with imm. Indeed, simplify-demanded-bits may have removed
3714 // the AND instruction because it proves it was useless.
3715 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3716
3717 // Check if there is enough room for the second operand to appear
3718 // in the first one
3719 APInt BitsToBeInserted =
3720 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3721
3722 if ((BitsToBeInserted & ~Known.Zero) != 0)
3723 continue;
3724
3725 // Set the first operand
3726 uint64_t Imm;
3727 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3728 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3729 // In that case, we can eliminate the AND
3730 Dst = OrOpd1->getOperand(0);
3731 else
3732 // Maybe the AND has been removed by simplify-demanded-bits
3733 // or is useful because it discards more bits
3734 Dst = OrOpd1Val;
3735
3736 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3737 // with shifted operand is more efficient.
3738 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3739 BiggerPattern))
3740 return true;
3741
3742 // both parts match
3743 SDLoc DL(N);
3744 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3745 CurDAG->getTargetConstant(ImmS, DL, VT)};
3746 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3747 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3748 return true;
3749 }
3750
3751 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3752 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3753 // mask (e.g., 0x000ffff0).
3754 uint64_t Mask0Imm, Mask1Imm;
3755 SDValue And0 = N->getOperand(0);
3756 SDValue And1 = N->getOperand(1);
3757 if (And0.hasOneUse() && And1.hasOneUse() &&
3758 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3759 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3760 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3761 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3762
3763 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3764 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3765 // bits to be inserted.
3766 if (isShiftedMask(Mask0Imm, VT)) {
3767 std::swap(And0, And1);
3768 std::swap(Mask0Imm, Mask1Imm);
3769 }
3770
3771 SDValue Src = And1->getOperand(0);
3772 SDValue Dst = And0->getOperand(0);
3773 unsigned LSB = llvm::countr_zero(Mask1Imm);
3774 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3775
3776 // The BFXIL inserts the low-order bits from a source register, so right
3777 // shift the needed bits into place.
3778 SDLoc DL(N);
3779 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3780 uint64_t LsrImm = LSB;
3781 if (Src->hasOneUse() &&
3782 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3783 (LsrImm + LSB) < BitWidth) {
3784 Src = Src->getOperand(0);
3785 LsrImm += LSB;
3786 }
3787
3788 SDNode *LSR = CurDAG->getMachineNode(
3789 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3790 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3791
3792 // BFXIL is an alias of BFM, so translate to BFM operands.
3793 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3794 unsigned ImmS = Width - 1;
3795
3796 // Create the BFXIL instruction.
3797 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3798 CurDAG->getTargetConstant(ImmR, DL, VT),
3799 CurDAG->getTargetConstant(ImmS, DL, VT)};
3800 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3801 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3802 return true;
3803 }
3804
3805 return false;
3806}
3807
3808bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3809 if (N->getOpcode() != ISD::OR)
3810 return false;
3811
3812 APInt NUsefulBits;
3813 getUsefulBits(SDValue(N, 0), NUsefulBits);
3814
3815 // If all bits are not useful, just return UNDEF.
3816 if (!NUsefulBits) {
3817 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3818 return true;
3819 }
3820
3821 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3822 return true;
3823
3824 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3825}
3826
3827/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3828/// equivalent of a left shift by a constant amount followed by an and masking
3829/// out a contiguous set of bits.
3830bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3831 if (N->getOpcode() != ISD::AND)
3832 return false;
3833
3834 EVT VT = N->getValueType(0);
3835 if (VT != MVT::i32 && VT != MVT::i64)
3836 return false;
3837
3838 SDValue Op0;
3839 int DstLSB, Width;
3840 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3841 Op0, DstLSB, Width))
3842 return false;
3843
3844 // ImmR is the rotate right amount.
3845 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3846 // ImmS is the most significant bit of the source to be moved.
3847 unsigned ImmS = Width - 1;
3848
3849 SDLoc DL(N);
3850 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3851 CurDAG->getTargetConstant(ImmS, DL, VT)};
3852 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3853 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3854 return true;
3855}
3856
3857/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3858/// variable shift/rotate instructions.
3859bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3860 EVT VT = N->getValueType(0);
3861
3862 unsigned Opc;
3863 switch (N->getOpcode()) {
3864 case ISD::ROTR:
3865 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3866 break;
3867 case ISD::SHL:
3868 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3869 break;
3870 case ISD::SRL:
3871 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3872 break;
3873 case ISD::SRA:
3874 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3875 break;
3876 default:
3877 return false;
3878 }
3879
3880 uint64_t Size;
3881 uint64_t Bits;
3882 if (VT == MVT::i32) {
3883 Bits = 5;
3884 Size = 32;
3885 } else if (VT == MVT::i64) {
3886 Bits = 6;
3887 Size = 64;
3888 } else
3889 return false;
3890
3891 SDValue ShiftAmt = N->getOperand(1);
3892 SDLoc DL(N);
3893 SDValue NewShiftAmt;
3894
3895 // Skip over an extend of the shift amount.
3896 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3897 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3898 ShiftAmt = ShiftAmt->getOperand(0);
3899
3900 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3901 SDValue Add0 = ShiftAmt->getOperand(0);
3902 SDValue Add1 = ShiftAmt->getOperand(1);
3903 uint64_t Add0Imm;
3904 uint64_t Add1Imm;
3905 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3906 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3907 // to avoid the ADD/SUB.
3908 NewShiftAmt = Add0;
3909 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3910 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3911 (Add0Imm % Size == 0)) {
3912 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3913 // to generate a NEG instead of a SUB from a constant.
3914 unsigned NegOpc;
3915 unsigned ZeroReg;
3916 EVT SubVT = ShiftAmt->getValueType(0);
3917 if (SubVT == MVT::i32) {
3918 NegOpc = AArch64::SUBWrr;
3919 ZeroReg = AArch64::WZR;
3920 } else {
3921 assert(SubVT == MVT::i64);
3922 NegOpc = AArch64::SUBXrr;
3923 ZeroReg = AArch64::XZR;
3924 }
3925 SDValue Zero =
3926 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3927 MachineSDNode *Neg =
3928 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3929 NewShiftAmt = SDValue(Neg, 0);
3930 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3931 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3932 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3933 // to generate a NOT instead of a SUB from a constant.
3934 unsigned NotOpc;
3935 unsigned ZeroReg;
3936 EVT SubVT = ShiftAmt->getValueType(0);
3937 if (SubVT == MVT::i32) {
3938 NotOpc = AArch64::ORNWrr;
3939 ZeroReg = AArch64::WZR;
3940 } else {
3941 assert(SubVT == MVT::i64);
3942 NotOpc = AArch64::ORNXrr;
3943 ZeroReg = AArch64::XZR;
3944 }
3945 SDValue Zero =
3946 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3947 MachineSDNode *Not =
3948 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3949 NewShiftAmt = SDValue(Not, 0);
3950 } else
3951 return false;
3952 } else {
3953 // If the shift amount is masked with an AND, check that the mask covers the
3954 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3955 // the AND.
3956 uint64_t MaskImm;
3957 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3958 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3959 return false;
3960
3961 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3962 return false;
3963
3964 NewShiftAmt = ShiftAmt->getOperand(0);
3965 }
3966
3967 // Narrow/widen the shift amount to match the size of the shift operation.
3968 if (VT == MVT::i32)
3969 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3970 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3971 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3972 MachineSDNode *Ext = CurDAG->getMachineNode(
3973 AArch64::SUBREG_TO_REG, DL, VT,
3974 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3975 NewShiftAmt = SDValue(Ext, 0);
3976 }
3977
3978 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3979 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3980 return true;
3981}
3982
3984 SDValue &FixedPos,
3985 unsigned RegWidth,
3986 bool isReciprocal) {
3987 APFloat FVal(0.0);
3989 FVal = CN->getValueAPF();
3990 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3991 // Some otherwise illegal constants are allowed in this case.
3992 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3993 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3994 return false;
3995
3996 ConstantPoolSDNode *CN =
3997 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3998 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3999 } else
4000 return false;
4001
4002 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
4003 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
4004 // x-register.
4005 //
4006 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
4007 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
4008 // integers.
4009 bool IsExact;
4010
4011 if (isReciprocal)
4012 if (!FVal.getExactInverse(&FVal))
4013 return false;
4014
4015 // fbits is between 1 and 64 in the worst-case, which means the fmul
4016 // could have 2^64 as an actual operand. Need 65 bits of precision.
4017 APSInt IntVal(65, true);
4018 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
4019
4020 // N.b. isPowerOf2 also checks for > 0.
4021 if (!IsExact || !IntVal.isPowerOf2())
4022 return false;
4023 unsigned FBits = IntVal.logBase2();
4024
4025 // Checks above should have guaranteed that we haven't lost information in
4026 // finding FBits, but it must still be in range.
4027 if (FBits == 0 || FBits > RegWidth) return false;
4028
4029 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4030 return true;
4031}
4032
4033bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4034 unsigned RegWidth) {
4035 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4036 false);
4037}
4038
4039bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
4040 SDValue &FixedPos,
4041 unsigned RegWidth) {
4042 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4043 true);
4044}
4045
4046// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
4047// of the string and obtains the integer values from them and combines these
4048// into a single value to be used in the MRS/MSR instruction.
4051 RegString.split(Fields, ':');
4052
4053 if (Fields.size() == 1)
4054 return -1;
4055
4056 assert(Fields.size() == 5
4057 && "Invalid number of fields in read register string");
4058
4060 bool AllIntFields = true;
4061
4062 for (StringRef Field : Fields) {
4063 unsigned IntField;
4064 AllIntFields &= !Field.getAsInteger(10, IntField);
4065 Ops.push_back(IntField);
4066 }
4067
4068 assert(AllIntFields &&
4069 "Unexpected non-integer value in special register string.");
4070 (void)AllIntFields;
4071
4072 // Need to combine the integer fields of the string into a single value
4073 // based on the bit encoding of MRS/MSR instruction.
4074 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
4075 (Ops[3] << 3) | (Ops[4]);
4076}
4077
4078// Lower the read_register intrinsic to an MRS instruction node if the special
4079// register string argument is either of the form detailed in the ALCE (the
4080// form described in getIntOperandsFromRegisterString) or is a named register
4081// known by the MRS SysReg mapper.
4082bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4083 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4084 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4085 SDLoc DL(N);
4086
4087 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4088
4089 unsigned Opcode64Bit = AArch64::MRS;
4090 int Imm = getIntOperandFromRegisterString(RegString->getString());
4091 if (Imm == -1) {
4092 // No match, Use the sysreg mapper to map the remaining possible strings to
4093 // the value for the register to be used for the instruction operand.
4094 const auto *TheReg =
4095 AArch64SysReg::lookupSysRegByName(RegString->getString());
4096 if (TheReg && TheReg->Readable &&
4097 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4098 Imm = TheReg->Encoding;
4099 else
4100 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4101
4102 if (Imm == -1) {
4103 // Still no match, see if this is "pc" or give up.
4104 if (!ReadIs128Bit && RegString->getString() == "pc") {
4105 Opcode64Bit = AArch64::ADR;
4106 Imm = 0;
4107 } else {
4108 return false;
4109 }
4110 }
4111 }
4112
4113 SDValue InChain = N->getOperand(0);
4114 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
4115 if (!ReadIs128Bit) {
4116 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
4117 {SysRegImm, InChain});
4118 } else {
4119 SDNode *MRRS = CurDAG->getMachineNode(
4120 AArch64::MRRS, DL,
4121 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4122 {SysRegImm, InChain});
4123
4124 // Sysregs are not endian. The even register always contains the low half
4125 // of the register.
4126 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
4127 SDValue(MRRS, 0));
4128 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
4129 SDValue(MRRS, 0));
4130 SDValue OutChain = SDValue(MRRS, 1);
4131
4132 ReplaceUses(SDValue(N, 0), Lo);
4133 ReplaceUses(SDValue(N, 1), Hi);
4134 ReplaceUses(SDValue(N, 2), OutChain);
4135 };
4136 return true;
4137}
4138
4139// Lower the write_register intrinsic to an MSR instruction node if the special
4140// register string argument is either of the form detailed in the ALCE (the
4141// form described in getIntOperandsFromRegisterString) or is a named register
4142// known by the MSR SysReg mapper.
4143bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4144 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4145 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4146 SDLoc DL(N);
4147
4148 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4149
4150 if (!WriteIs128Bit) {
4151 // Check if the register was one of those allowed as the pstatefield value
4152 // in the MSR (immediate) instruction. To accept the values allowed in the
4153 // pstatefield for the MSR (immediate) instruction, we also require that an
4154 // immediate value has been provided as an argument, we know that this is
4155 // the case as it has been ensured by semantic checking.
4156 auto trySelectPState = [&](auto PMapper, unsigned State) {
4157 if (PMapper) {
4158 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4159 "Expected a constant integer expression.");
4160 unsigned Reg = PMapper->Encoding;
4161 uint64_t Immed = N->getConstantOperandVal(2);
4162 CurDAG->SelectNodeTo(
4163 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4164 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4165 return true;
4166 }
4167 return false;
4168 };
4169
4170 if (trySelectPState(
4171 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4172 AArch64::MSRpstateImm4))
4173 return true;
4174 if (trySelectPState(
4175 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4176 AArch64::MSRpstateImm1))
4177 return true;
4178 }
4179
4180 int Imm = getIntOperandFromRegisterString(RegString->getString());
4181 if (Imm == -1) {
4182 // Use the sysreg mapper to attempt to map the remaining possible strings
4183 // to the value for the register to be used for the MSR (register)
4184 // instruction operand.
4185 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4186 if (TheReg && TheReg->Writeable &&
4187 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4188 Imm = TheReg->Encoding;
4189 else
4190 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4191
4192 if (Imm == -1)
4193 return false;
4194 }
4195
4196 SDValue InChain = N->getOperand(0);
4197 if (!WriteIs128Bit) {
4198 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4199 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4200 N->getOperand(2), InChain);
4201 } else {
4202 // No endian swap. The lower half always goes into the even subreg, and the
4203 // higher half always into the odd supreg.
4204 SDNode *Pair = CurDAG->getMachineNode(
4205 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4206 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4207 MVT::i32),
4208 N->getOperand(2),
4209 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4210 N->getOperand(3),
4211 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4212
4213 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4214 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4215 SDValue(Pair, 0), InChain);
4216 }
4217
4218 return true;
4219}
4220
4221/// We've got special pseudo-instructions for these
4222bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4223 unsigned Opcode;
4224 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4225
4226 // Leave IR for LSE if subtarget supports it.
4227 if (Subtarget->hasLSE()) return false;
4228
4229 if (MemTy == MVT::i8)
4230 Opcode = AArch64::CMP_SWAP_8;
4231 else if (MemTy == MVT::i16)
4232 Opcode = AArch64::CMP_SWAP_16;
4233 else if (MemTy == MVT::i32)
4234 Opcode = AArch64::CMP_SWAP_32;
4235 else if (MemTy == MVT::i64)
4236 Opcode = AArch64::CMP_SWAP_64;
4237 else
4238 llvm_unreachable("Unknown AtomicCmpSwap type");
4239
4240 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4241 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4242 N->getOperand(0)};
4243 SDNode *CmpSwap = CurDAG->getMachineNode(
4244 Opcode, SDLoc(N),
4245 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4246
4247 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4248 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4249
4250 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4251 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4252 CurDAG->RemoveDeadNode(N);
4253
4254 return true;
4255}
4256
4257bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4258 SDValue &Shift, bool Negate) {
4259 if (!isa<ConstantSDNode>(N))
4260 return false;
4261
4262 SDLoc DL(N);
4263 APInt Val =
4264 cast<ConstantSDNode>(N)->getAPIntValue().trunc(VT.getFixedSizeInBits());
4265
4266 if (Negate)
4267 Val = -Val;
4268
4269 switch (VT.SimpleTy) {
4270 case MVT::i8:
4271 // All immediates are supported.
4272 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4273 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4274 return true;
4275 case MVT::i16:
4276 case MVT::i32:
4277 case MVT::i64:
4278 // Support 8bit unsigned immediates.
4279 if ((Val & ~0xff) == 0) {
4280 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4281 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4282 return true;
4283 }
4284 // Support 16bit unsigned immediates that are a multiple of 256.
4285 if ((Val & ~0xff00) == 0) {
4286 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4287 Imm = CurDAG->getTargetConstant(Val.lshr(8).getZExtValue(), DL, MVT::i32);
4288 return true;
4289 }
4290 break;
4291 default:
4292 break;
4293 }
4294
4295 return false;
4296}
4297
4298bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4299 SDValue &Imm, SDValue &Shift,
4300 bool Negate) {
4301 if (!isa<ConstantSDNode>(N))
4302 return false;
4303
4304 SDLoc DL(N);
4305 int64_t Val = cast<ConstantSDNode>(N)
4306 ->getAPIntValue()
4308 .getSExtValue();
4309
4310 if (Negate)
4311 Val = -Val;
4312
4313 // Signed saturating instructions treat their immediate operand as unsigned,
4314 // whereas the related intrinsics define their operands to be signed. This
4315 // means we can only use the immediate form when the operand is non-negative.
4316 if (Val < 0)
4317 return false;
4318
4319 switch (VT.SimpleTy) {
4320 case MVT::i8:
4321 // All positive immediates are supported.
4322 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4323 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4324 return true;
4325 case MVT::i16:
4326 case MVT::i32:
4327 case MVT::i64:
4328 // Support 8bit positive immediates.
4329 if (Val <= 255) {
4330 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4331 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4332 return true;
4333 }
4334 // Support 16bit positive immediates that are a multiple of 256.
4335 if (Val <= 65280 && Val % 256 == 0) {
4336 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4337 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4338 return true;
4339 }
4340 break;
4341 default:
4342 break;
4343 }
4344
4345 return false;
4346}
4347
4348bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4349 SDValue &Shift) {
4350 if (!isa<ConstantSDNode>(N))
4351 return false;
4352
4353 SDLoc DL(N);
4354 int64_t Val = cast<ConstantSDNode>(N)
4355 ->getAPIntValue()
4356 .trunc(VT.getFixedSizeInBits())
4357 .getSExtValue();
4358 int32_t ImmVal, ShiftVal;
4359 if (!AArch64_AM::isSVECpyDupImm(VT.getScalarSizeInBits(), Val, ImmVal,
4360 ShiftVal))
4361 return false;
4362
4363 Shift = CurDAG->getTargetConstant(ShiftVal, DL, MVT::i32);
4364 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4365 return true;
4366}
4367
4368bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4369 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4370 int64_t ImmVal = CNode->getSExtValue();
4371 SDLoc DL(N);
4372 if (ImmVal >= -128 && ImmVal < 128) {
4373 Imm = CurDAG->getSignedTargetConstant(ImmVal, DL, MVT::i32);
4374 return true;
4375 }
4376 }
4377 return false;
4378}
4379
4380bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4381 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4382 uint64_t ImmVal = CNode->getZExtValue();
4383
4384 switch (VT.SimpleTy) {
4385 case MVT::i8:
4386 ImmVal &= 0xFF;
4387 break;
4388 case MVT::i16:
4389 ImmVal &= 0xFFFF;
4390 break;
4391 case MVT::i32:
4392 ImmVal &= 0xFFFFFFFF;
4393 break;
4394 case MVT::i64:
4395 break;
4396 default:
4397 llvm_unreachable("Unexpected type");
4398 }
4399
4400 if (ImmVal < 256) {
4401 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4402 return true;
4403 }
4404 }
4405 return false;
4406}
4407
4408bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4409 bool Invert) {
4410 uint64_t ImmVal;
4411 if (auto CI = dyn_cast<ConstantSDNode>(N))
4412 ImmVal = CI->getZExtValue();
4413 else if (auto CFP = dyn_cast<ConstantFPSDNode>(N))
4414 ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
4415 else
4416 return false;
4417
4418 if (Invert)
4419 ImmVal = ~ImmVal;
4420
4421 uint64_t encoding;
4422 if (!AArch64_AM::isSVELogicalImm(VT.getScalarSizeInBits(), ImmVal, encoding))
4423 return false;
4424
4425 Imm = CurDAG->getTargetConstant(encoding, SDLoc(N), MVT::i64);
4426 return true;
4427}
4428
4429// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4430// Rather than attempt to normalise everything we can sometimes saturate the
4431// shift amount during selection. This function also allows for consistent
4432// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4433// required by the instructions.
4434bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4435 uint64_t High, bool AllowSaturation,
4436 SDValue &Imm) {
4437 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4438 uint64_t ImmVal = CN->getZExtValue();
4439
4440 // Reject shift amounts that are too small.
4441 if (ImmVal < Low)
4442 return false;
4443
4444 // Reject or saturate shift amounts that are too big.
4445 if (ImmVal > High) {
4446 if (!AllowSaturation)
4447 return false;
4448 ImmVal = High;
4449 }
4450
4451 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4452 return true;
4453 }
4454
4455 return false;
4456}
4457
4458bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4459 // tagp(FrameIndex, IRGstack, tag_offset):
4460 // since the offset between FrameIndex and IRGstack is a compile-time
4461 // constant, this can be lowered to a single ADDG instruction.
4462 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4463 return false;
4464 }
4465
4466 SDValue IRG_SP = N->getOperand(2);
4467 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4468 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4469 return false;
4470 }
4471
4472 const TargetLowering *TLI = getTargetLowering();
4473 SDLoc DL(N);
4474 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4475 SDValue FiOp = CurDAG->getTargetFrameIndex(
4476 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4477 int TagOffset = N->getConstantOperandVal(3);
4478
4479 SDNode *Out = CurDAG->getMachineNode(
4480 AArch64::TAGPstack, DL, MVT::i64,
4481 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4482 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4483 ReplaceNode(N, Out);
4484 return true;
4485}
4486
4487void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4488 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4489 "llvm.aarch64.tagp third argument must be an immediate");
4490 if (trySelectStackSlotTagP(N))
4491 return;
4492 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4493 // compile-time constant, not just for stack allocations.
4494
4495 // General case for unrelated pointers in Op1 and Op2.
4496 SDLoc DL(N);
4497 int TagOffset = N->getConstantOperandVal(3);
4498 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4499 {N->getOperand(1), N->getOperand(2)});
4500 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4501 {SDValue(N1, 0), N->getOperand(2)});
4502 SDNode *N3 = CurDAG->getMachineNode(
4503 AArch64::ADDG, DL, MVT::i64,
4504 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4505 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4506 ReplaceNode(N, N3);
4507}
4508
4509bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4510 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4511
4512 // Bail when not a "cast" like insert_subvector.
4513 if (N->getConstantOperandVal(2) != 0)
4514 return false;
4515 if (!N->getOperand(0).isUndef())
4516 return false;
4517
4518 // Bail when normal isel should do the job.
4519 EVT VT = N->getValueType(0);
4520 EVT InVT = N->getOperand(1).getValueType();
4521 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4522 return false;
4523 if (InVT.getSizeInBits() <= 128)
4524 return false;
4525
4526 // NOTE: We can only get here when doing fixed length SVE code generation.
4527 // We do manual selection because the types involved are not linked to real
4528 // registers (despite being legal) and must be coerced into SVE registers.
4529
4531 "Expected to insert into a packed scalable vector!");
4532
4533 SDLoc DL(N);
4534 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4535 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4536 N->getOperand(1), RC));
4537 return true;
4538}
4539
4540bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4541 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4542
4543 // Bail when not a "cast" like extract_subvector.
4544 if (N->getConstantOperandVal(1) != 0)
4545 return false;
4546
4547 // Bail when normal isel can do the job.
4548 EVT VT = N->getValueType(0);
4549 EVT InVT = N->getOperand(0).getValueType();
4550 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4551 return false;
4552 if (VT.getSizeInBits() <= 128)
4553 return false;
4554
4555 // NOTE: We can only get here when doing fixed length SVE code generation.
4556 // We do manual selection because the types involved are not linked to real
4557 // registers (despite being legal) and must be coerced into SVE registers.
4558
4560 "Expected to extract from a packed scalable vector!");
4561
4562 SDLoc DL(N);
4563 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4564 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4565 N->getOperand(0), RC));
4566 return true;
4567}
4568
4569bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4570 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4571
4572 SDValue N0 = N->getOperand(0);
4573 SDValue N1 = N->getOperand(1);
4574
4575 EVT VT = N->getValueType(0);
4576 SDLoc DL(N);
4577
4578 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4579 // Rotate by a constant is a funnel shift in IR which is exanded to
4580 // an OR with shifted operands.
4581 // We do the following transform:
4582 // OR N0, N1 -> xar (x, y, imm)
4583 // Where:
4584 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4585 // N0 = SHL_PRED true, V, splat(bits-imm)
4586 // V = (xor x, y)
4587 if (VT.isScalableVector() &&
4588 (Subtarget->hasSVE2() ||
4589 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4590 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4591 N1.getOpcode() != AArch64ISD::SRL_PRED)
4592 std::swap(N0, N1);
4593 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4594 N1.getOpcode() != AArch64ISD::SRL_PRED)
4595 return false;
4596
4597 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4598 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4599 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4600 return false;
4601
4602 if (N0.getOperand(1) != N1.getOperand(1))
4603 return false;
4604
4605 SDValue R1, R2;
4606 bool IsXOROperand = true;
4607 if (N0.getOperand(1).getOpcode() != ISD::XOR) {
4608 IsXOROperand = false;
4609 } else {
4610 R1 = N0.getOperand(1).getOperand(0);
4611 R2 = N1.getOperand(1).getOperand(1);
4612 }
4613
4614 APInt ShlAmt, ShrAmt;
4615 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4617 return false;
4618
4619 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4620 return false;
4621
4622 if (!IsXOROperand) {
4623 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4624 SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
4625 SDValue MOVIV = SDValue(MOV, 0);
4626
4627 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4628 SDNode *SubRegToReg = CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL,
4629 VT, Zero, MOVIV, ZSub);
4630
4631 R1 = N1->getOperand(1);
4632 R2 = SDValue(SubRegToReg, 0);
4633 }
4634
4635 SDValue Imm =
4636 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4637
4638 SDValue Ops[] = {R1, R2, Imm};
4640 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4641 AArch64::XAR_ZZZI_D})) {
4642 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4643 return true;
4644 }
4645 return false;
4646 }
4647
4648 // We have Neon SHA3 XAR operation for v2i64 but for types
4649 // v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4650 // is available.
4651 EVT SVT;
4652 switch (VT.getSimpleVT().SimpleTy) {
4653 case MVT::v4i32:
4654 case MVT::v2i32:
4655 SVT = MVT::nxv4i32;
4656 break;
4657 case MVT::v8i16:
4658 case MVT::v4i16:
4659 SVT = MVT::nxv8i16;
4660 break;
4661 case MVT::v16i8:
4662 case MVT::v8i8:
4663 SVT = MVT::nxv16i8;
4664 break;
4665 case MVT::v2i64:
4666 case MVT::v1i64:
4667 SVT = Subtarget->hasSHA3() ? MVT::v2i64 : MVT::nxv2i64;
4668 break;
4669 default:
4670 return false;
4671 }
4672
4673 if ((!SVT.isScalableVector() && !Subtarget->hasSHA3()) ||
4674 (SVT.isScalableVector() && !Subtarget->hasSVE2()))
4675 return false;
4676
4677 if (N0->getOpcode() != AArch64ISD::VSHL ||
4678 N1->getOpcode() != AArch64ISD::VLSHR)
4679 return false;
4680
4681 if (N0->getOperand(0) != N1->getOperand(0))
4682 return false;
4683
4684 SDValue R1, R2;
4685 bool IsXOROperand = true;
4686 if (N1->getOperand(0)->getOpcode() != ISD::XOR) {
4687 IsXOROperand = false;
4688 } else {
4689 SDValue XOR = N0.getOperand(0);
4690 R1 = XOR.getOperand(0);
4691 R2 = XOR.getOperand(1);
4692 }
4693
4694 unsigned HsAmt = N0.getConstantOperandVal(1);
4695 unsigned ShAmt = N1.getConstantOperandVal(1);
4696
4697 SDValue Imm = CurDAG->getTargetConstant(
4698 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4699
4700 unsigned VTSizeInBits = VT.getScalarSizeInBits();
4701 if (ShAmt + HsAmt != VTSizeInBits)
4702 return false;
4703
4704 if (!IsXOROperand) {
4705 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4706 SDNode *MOV =
4707 CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, MVT::v2i64, Zero);
4708 SDValue MOVIV = SDValue(MOV, 0);
4709
4710 R1 = N1->getOperand(0);
4711 R2 = MOVIV;
4712 }
4713
4714 if (SVT != VT) {
4715 SDValue Undef =
4716 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, SVT), 0);
4717
4718 if (SVT.isScalableVector() && VT.is64BitVector()) {
4719 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4720
4721 SDValue UndefQ = SDValue(
4722 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, QVT), 0);
4723 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4724
4725 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4726 UndefQ, R1, DSub),
4727 0);
4728 if (R2.getValueType() == VT)
4729 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4730 UndefQ, R2, DSub),
4731 0);
4732 }
4733
4734 SDValue SubReg = CurDAG->getTargetConstant(
4735 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL, MVT::i32);
4736
4737 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
4738 R1, SubReg),
4739 0);
4740
4741 if (SVT.isScalableVector() || R2.getValueType() != SVT)
4742 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT,
4743 Undef, R2, SubReg),
4744 0);
4745 }
4746
4747 SDValue Ops[] = {R1, R2, Imm};
4748 SDNode *XAR = nullptr;
4749
4750 if (SVT.isScalableVector()) {
4752 SVT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4753 AArch64::XAR_ZZZI_D}))
4754 XAR = CurDAG->getMachineNode(Opc, DL, SVT, Ops);
4755 } else {
4756 XAR = CurDAG->getMachineNode(AArch64::XAR, DL, SVT, Ops);
4757 }
4758
4759 assert(XAR && "Unexpected NULL value for XAR instruction in DAG");
4760
4761 if (SVT != VT) {
4762 if (VT.is64BitVector() && SVT.isScalableVector()) {
4763 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4764
4765 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4766 SDNode *Q = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, QVT,
4767 SDValue(XAR, 0), ZSub);
4768
4769 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4770 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4771 SDValue(Q, 0), DSub);
4772 } else {
4773 SDValue SubReg = CurDAG->getTargetConstant(
4774 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL,
4775 MVT::i32);
4776 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4777 SDValue(XAR, 0), SubReg);
4778 }
4779 }
4780 ReplaceNode(N, XAR);
4781 return true;
4782}
4783
4784void AArch64DAGToDAGISel::Select(SDNode *Node) {
4785 // If we have a custom node, we already have selected!
4786 if (Node->isMachineOpcode()) {
4787 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4788 Node->setNodeId(-1);
4789 return;
4790 }
4791
4792 // Few custom selection stuff.
4793 EVT VT = Node->getValueType(0);
4794
4795 switch (Node->getOpcode()) {
4796 default:
4797 break;
4798
4800 if (SelectCMP_SWAP(Node))
4801 return;
4802 break;
4803
4804 case ISD::READ_REGISTER:
4805 case AArch64ISD::MRRS:
4806 if (tryReadRegister(Node))
4807 return;
4808 break;
4809
4811 case AArch64ISD::MSRR:
4812 if (tryWriteRegister(Node))
4813 return;
4814 break;
4815
4816 case ISD::LOAD: {
4817 // Try to select as an indexed load. Fall through to normal processing
4818 // if we can't.
4819 if (tryIndexedLoad(Node))
4820 return;
4821 break;
4822 }
4823
4824 case ISD::SRL:
4825 case ISD::AND:
4826 case ISD::SRA:
4828 if (tryBitfieldExtractOp(Node))
4829 return;
4830 if (tryBitfieldInsertInZeroOp(Node))
4831 return;
4832 [[fallthrough]];
4833 case ISD::ROTR:
4834 case ISD::SHL:
4835 if (tryShiftAmountMod(Node))
4836 return;
4837 break;
4838
4839 case ISD::SIGN_EXTEND:
4840 if (tryBitfieldExtractOpFromSExt(Node))
4841 return;
4842 break;
4843
4844 case ISD::OR:
4845 if (tryBitfieldInsertOp(Node))
4846 return;
4847 if (trySelectXAR(Node))
4848 return;
4849 break;
4850
4852 if (trySelectCastScalableToFixedLengthVector(Node))
4853 return;
4854 break;
4855 }
4856
4857 case ISD::INSERT_SUBVECTOR: {
4858 if (trySelectCastFixedLengthToScalableVector(Node))
4859 return;
4860 break;
4861 }
4862
4863 case ISD::Constant: {
4864 // Materialize zero constants as copies from WZR/XZR. This allows
4865 // the coalescer to propagate these into other instructions.
4866 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4867 if (ConstNode->isZero()) {
4868 if (VT == MVT::i32) {
4869 SDValue New = CurDAG->getCopyFromReg(
4870 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4871 ReplaceNode(Node, New.getNode());
4872 return;
4873 } else if (VT == MVT::i64) {
4874 SDValue New = CurDAG->getCopyFromReg(
4875 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4876 ReplaceNode(Node, New.getNode());
4877 return;
4878 }
4879 }
4880 break;
4881 }
4882
4883 case ISD::FrameIndex: {
4884 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4885 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4886 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4887 const TargetLowering *TLI = getTargetLowering();
4888 SDValue TFI = CurDAG->getTargetFrameIndex(
4889 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4890 SDLoc DL(Node);
4891 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4892 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4893 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4894 return;
4895 }
4897 unsigned IntNo = Node->getConstantOperandVal(1);
4898 switch (IntNo) {
4899 default:
4900 break;
4901 case Intrinsic::aarch64_gcsss: {
4902 SDLoc DL(Node);
4903 SDValue Chain = Node->getOperand(0);
4904 SDValue Val = Node->getOperand(2);
4905 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
4906 SDNode *SS1 =
4907 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
4908 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
4909 MVT::Other, Zero, SDValue(SS1, 0));
4910 ReplaceNode(Node, SS2);
4911 return;
4912 }
4913 case Intrinsic::aarch64_ldaxp:
4914 case Intrinsic::aarch64_ldxp: {
4915 unsigned Op =
4916 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4917 SDValue MemAddr = Node->getOperand(2);
4918 SDLoc DL(Node);
4919 SDValue Chain = Node->getOperand(0);
4920
4921 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4922 MVT::Other, MemAddr, Chain);
4923
4924 // Transfer memoperands.
4925 MachineMemOperand *MemOp =
4926 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4927 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4928 ReplaceNode(Node, Ld);
4929 return;
4930 }
4931 case Intrinsic::aarch64_stlxp:
4932 case Intrinsic::aarch64_stxp: {
4933 unsigned Op =
4934 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4935 SDLoc DL(Node);
4936 SDValue Chain = Node->getOperand(0);
4937 SDValue ValLo = Node->getOperand(2);
4938 SDValue ValHi = Node->getOperand(3);
4939 SDValue MemAddr = Node->getOperand(4);
4940
4941 // Place arguments in the right order.
4942 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4943
4944 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4945 // Transfer memoperands.
4946 MachineMemOperand *MemOp =
4947 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4948 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4949
4950 ReplaceNode(Node, St);
4951 return;
4952 }
4953 case Intrinsic::aarch64_neon_ld1x2:
4954 if (VT == MVT::v8i8) {
4955 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4956 return;
4957 } else if (VT == MVT::v16i8) {
4958 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4959 return;
4960 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4961 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4962 return;
4963 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4964 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4965 return;
4966 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4967 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4968 return;
4969 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4970 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4971 return;
4972 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4973 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4974 return;
4975 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4976 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4977 return;
4978 }
4979 break;
4980 case Intrinsic::aarch64_neon_ld1x3:
4981 if (VT == MVT::v8i8) {
4982 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4983 return;
4984 } else if (VT == MVT::v16i8) {
4985 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4986 return;
4987 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4988 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
4989 return;
4990 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4991 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
4992 return;
4993 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4994 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
4995 return;
4996 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4997 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
4998 return;
4999 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5000 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5001 return;
5002 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5003 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
5004 return;
5005 }
5006 break;
5007 case Intrinsic::aarch64_neon_ld1x4:
5008 if (VT == MVT::v8i8) {
5009 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
5010 return;
5011 } else if (VT == MVT::v16i8) {
5012 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
5013 return;
5014 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5015 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
5016 return;
5017 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5018 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
5019 return;
5020 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5021 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
5022 return;
5023 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5024 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
5025 return;
5026 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5027 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5028 return;
5029 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5030 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
5031 return;
5032 }
5033 break;
5034 case Intrinsic::aarch64_neon_ld2:
5035 if (VT == MVT::v8i8) {
5036 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
5037 return;
5038 } else if (VT == MVT::v16i8) {
5039 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
5040 return;
5041 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5042 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
5043 return;
5044 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5045 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
5046 return;
5047 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5048 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
5049 return;
5050 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5051 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
5052 return;
5053 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5054 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5055 return;
5056 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5057 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
5058 return;
5059 }
5060 break;
5061 case Intrinsic::aarch64_neon_ld3:
5062 if (VT == MVT::v8i8) {
5063 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
5064 return;
5065 } else if (VT == MVT::v16i8) {
5066 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
5067 return;
5068 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5069 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
5070 return;
5071 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5072 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
5073 return;
5074 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5075 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
5076 return;
5077 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5078 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
5079 return;
5080 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5081 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5082 return;
5083 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5084 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
5085 return;
5086 }
5087 break;
5088 case Intrinsic::aarch64_neon_ld4:
5089 if (VT == MVT::v8i8) {
5090 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
5091 return;
5092 } else if (VT == MVT::v16i8) {
5093 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
5094 return;
5095 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5096 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
5097 return;
5098 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5099 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
5100 return;
5101 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5102 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
5103 return;
5104 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5105 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
5106 return;
5107 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5108 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5109 return;
5110 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5111 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
5112 return;
5113 }
5114 break;
5115 case Intrinsic::aarch64_neon_ld2r:
5116 if (VT == MVT::v8i8) {
5117 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
5118 return;
5119 } else if (VT == MVT::v16i8) {
5120 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
5121 return;
5122 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5123 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
5124 return;
5125 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5126 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
5127 return;
5128 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5129 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
5130 return;
5131 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5132 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
5133 return;
5134 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5135 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
5136 return;
5137 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5138 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
5139 return;
5140 }
5141 break;
5142 case Intrinsic::aarch64_neon_ld3r:
5143 if (VT == MVT::v8i8) {
5144 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
5145 return;
5146 } else if (VT == MVT::v16i8) {
5147 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
5148 return;
5149 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5150 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
5151 return;
5152 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5153 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
5154 return;
5155 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5156 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
5157 return;
5158 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5159 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
5160 return;
5161 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5162 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
5163 return;
5164 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5165 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
5166 return;
5167 }
5168 break;
5169 case Intrinsic::aarch64_neon_ld4r:
5170 if (VT == MVT::v8i8) {
5171 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
5172 return;
5173 } else if (VT == MVT::v16i8) {
5174 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
5175 return;
5176 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5177 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
5178 return;
5179 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5180 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
5181 return;
5182 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5183 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
5184 return;
5185 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5186 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
5187 return;
5188 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5189 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
5190 return;
5191 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5192 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
5193 return;
5194 }
5195 break;
5196 case Intrinsic::aarch64_neon_ld2lane:
5197 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5198 SelectLoadLane(Node, 2, AArch64::LD2i8);
5199 return;
5200 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5201 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5202 SelectLoadLane(Node, 2, AArch64::LD2i16);
5203 return;
5204 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5205 VT == MVT::v2f32) {
5206 SelectLoadLane(Node, 2, AArch64::LD2i32);
5207 return;
5208 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5209 VT == MVT::v1f64) {
5210 SelectLoadLane(Node, 2, AArch64::LD2i64);
5211 return;
5212 }
5213 break;
5214 case Intrinsic::aarch64_neon_ld3lane:
5215 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5216 SelectLoadLane(Node, 3, AArch64::LD3i8);
5217 return;
5218 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5219 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5220 SelectLoadLane(Node, 3, AArch64::LD3i16);
5221 return;
5222 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5223 VT == MVT::v2f32) {
5224 SelectLoadLane(Node, 3, AArch64::LD3i32);
5225 return;
5226 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5227 VT == MVT::v1f64) {
5228 SelectLoadLane(Node, 3, AArch64::LD3i64);
5229 return;
5230 }
5231 break;
5232 case Intrinsic::aarch64_neon_ld4lane:
5233 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5234 SelectLoadLane(Node, 4, AArch64::LD4i8);
5235 return;
5236 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5237 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5238 SelectLoadLane(Node, 4, AArch64::LD4i16);
5239 return;
5240 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5241 VT == MVT::v2f32) {
5242 SelectLoadLane(Node, 4, AArch64::LD4i32);
5243 return;
5244 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5245 VT == MVT::v1f64) {
5246 SelectLoadLane(Node, 4, AArch64::LD4i64);
5247 return;
5248 }
5249 break;
5250 case Intrinsic::aarch64_ld64b:
5251 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5252 return;
5253 case Intrinsic::aarch64_sve_ld2q_sret: {
5254 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5255 return;
5256 }
5257 case Intrinsic::aarch64_sve_ld3q_sret: {
5258 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5259 return;
5260 }
5261 case Intrinsic::aarch64_sve_ld4q_sret: {
5262 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5263 return;
5264 }
5265 case Intrinsic::aarch64_sve_ld2_sret: {
5266 if (VT == MVT::nxv16i8) {
5267 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5268 true);
5269 return;
5270 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5271 VT == MVT::nxv8bf16) {
5272 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5273 true);
5274 return;
5275 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5276 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5277 true);
5278 return;
5279 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5280 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5281 true);
5282 return;
5283 }
5284 break;
5285 }
5286 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5287 if (VT == MVT::nxv16i8) {
5288 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5289 SelectContiguousMultiVectorLoad(
5290 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5291 else if (Subtarget->hasSVE2p1())
5292 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5293 AArch64::LD1B_2Z);
5294 else
5295 break;
5296 return;
5297 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5298 VT == MVT::nxv8bf16) {
5299 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5300 SelectContiguousMultiVectorLoad(
5301 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5302 else if (Subtarget->hasSVE2p1())
5303 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5304 AArch64::LD1H_2Z);
5305 else
5306 break;
5307 return;
5308 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5309 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5310 SelectContiguousMultiVectorLoad(
5311 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5312 else if (Subtarget->hasSVE2p1())
5313 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5314 AArch64::LD1W_2Z);
5315 else
5316 break;
5317 return;
5318 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5319 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5320 SelectContiguousMultiVectorLoad(
5321 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5322 else if (Subtarget->hasSVE2p1())
5323 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5324 AArch64::LD1D_2Z);
5325 else
5326 break;
5327 return;
5328 }
5329 break;
5330 }
5331 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5332 if (VT == MVT::nxv16i8) {
5333 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5334 SelectContiguousMultiVectorLoad(
5335 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5336 else if (Subtarget->hasSVE2p1())
5337 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5338 AArch64::LD1B_4Z);
5339 else
5340 break;
5341 return;
5342 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5343 VT == MVT::nxv8bf16) {
5344 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5345 SelectContiguousMultiVectorLoad(
5346 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5347 else if (Subtarget->hasSVE2p1())
5348 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5349 AArch64::LD1H_4Z);
5350 else
5351 break;
5352 return;
5353 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5354 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5355 SelectContiguousMultiVectorLoad(
5356 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5357 else if (Subtarget->hasSVE2p1())
5358 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5359 AArch64::LD1W_4Z);
5360 else
5361 break;
5362 return;
5363 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5364 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5365 SelectContiguousMultiVectorLoad(
5366 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5367 else if (Subtarget->hasSVE2p1())
5368 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5369 AArch64::LD1D_4Z);
5370 else
5371 break;
5372 return;
5373 }
5374 break;
5375 }
5376 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5377 if (VT == MVT::nxv16i8) {
5378 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5379 SelectContiguousMultiVectorLoad(Node, 2, 0,
5380 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5381 AArch64::LDNT1B_2Z_PSEUDO);
5382 else if (Subtarget->hasSVE2p1())
5383 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5384 AArch64::LDNT1B_2Z);
5385 else
5386 break;
5387 return;
5388 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5389 VT == MVT::nxv8bf16) {
5390 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5391 SelectContiguousMultiVectorLoad(Node, 2, 1,
5392 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5393 AArch64::LDNT1H_2Z_PSEUDO);
5394 else if (Subtarget->hasSVE2p1())
5395 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5396 AArch64::LDNT1H_2Z);
5397 else
5398 break;
5399 return;
5400 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5401 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5402 SelectContiguousMultiVectorLoad(Node, 2, 2,
5403 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5404 AArch64::LDNT1W_2Z_PSEUDO);
5405 else if (Subtarget->hasSVE2p1())
5406 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5407 AArch64::LDNT1W_2Z);
5408 else
5409 break;
5410 return;
5411 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5412 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5413 SelectContiguousMultiVectorLoad(Node, 2, 3,
5414 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5415 AArch64::LDNT1D_2Z_PSEUDO);
5416 else if (Subtarget->hasSVE2p1())
5417 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5418 AArch64::LDNT1D_2Z);
5419 else
5420 break;
5421 return;
5422 }
5423 break;
5424 }
5425 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5426 if (VT == MVT::nxv16i8) {
5427 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5428 SelectContiguousMultiVectorLoad(Node, 4, 0,
5429 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5430 AArch64::LDNT1B_4Z_PSEUDO);
5431 else if (Subtarget->hasSVE2p1())
5432 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5433 AArch64::LDNT1B_4Z);
5434 else
5435 break;
5436 return;
5437 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5438 VT == MVT::nxv8bf16) {
5439 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5440 SelectContiguousMultiVectorLoad(Node, 4, 1,
5441 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5442 AArch64::LDNT1H_4Z_PSEUDO);
5443 else if (Subtarget->hasSVE2p1())
5444 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5445 AArch64::LDNT1H_4Z);
5446 else
5447 break;
5448 return;
5449 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5450 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5451 SelectContiguousMultiVectorLoad(Node, 4, 2,
5452 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5453 AArch64::LDNT1W_4Z_PSEUDO);
5454 else if (Subtarget->hasSVE2p1())
5455 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5456 AArch64::LDNT1W_4Z);
5457 else
5458 break;
5459 return;
5460 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5461 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5462 SelectContiguousMultiVectorLoad(Node, 4, 3,
5463 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5464 AArch64::LDNT1D_4Z_PSEUDO);
5465 else if (Subtarget->hasSVE2p1())
5466 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5467 AArch64::LDNT1D_4Z);
5468 else
5469 break;
5470 return;
5471 }
5472 break;
5473 }
5474 case Intrinsic::aarch64_sve_ld3_sret: {
5475 if (VT == MVT::nxv16i8) {
5476 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5477 true);
5478 return;
5479 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5480 VT == MVT::nxv8bf16) {
5481 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5482 true);
5483 return;
5484 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5485 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5486 true);
5487 return;
5488 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5489 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5490 true);
5491 return;
5492 }
5493 break;
5494 }
5495 case Intrinsic::aarch64_sve_ld4_sret: {
5496 if (VT == MVT::nxv16i8) {
5497 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5498 true);
5499 return;
5500 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5501 VT == MVT::nxv8bf16) {
5502 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5503 true);
5504 return;
5505 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5506 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5507 true);
5508 return;
5509 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5510 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5511 true);
5512 return;
5513 }
5514 break;
5515 }
5516 case Intrinsic::aarch64_sme_read_hor_vg2: {
5517 if (VT == MVT::nxv16i8) {
5518 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5519 AArch64::MOVA_2ZMXI_H_B);
5520 return;
5521 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5522 VT == MVT::nxv8bf16) {
5523 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5524 AArch64::MOVA_2ZMXI_H_H);
5525 return;
5526 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5527 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5528 AArch64::MOVA_2ZMXI_H_S);
5529 return;
5530 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5531 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5532 AArch64::MOVA_2ZMXI_H_D);
5533 return;
5534 }
5535 break;
5536 }
5537 case Intrinsic::aarch64_sme_read_ver_vg2: {
5538 if (VT == MVT::nxv16i8) {
5539 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5540 AArch64::MOVA_2ZMXI_V_B);
5541 return;
5542 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5543 VT == MVT::nxv8bf16) {
5544 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5545 AArch64::MOVA_2ZMXI_V_H);
5546 return;
5547 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5548 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5549 AArch64::MOVA_2ZMXI_V_S);
5550 return;
5551 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5552 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5553 AArch64::MOVA_2ZMXI_V_D);
5554 return;
5555 }
5556 break;
5557 }
5558 case Intrinsic::aarch64_sme_read_hor_vg4: {
5559 if (VT == MVT::nxv16i8) {
5560 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5561 AArch64::MOVA_4ZMXI_H_B);
5562 return;
5563 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5564 VT == MVT::nxv8bf16) {
5565 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5566 AArch64::MOVA_4ZMXI_H_H);
5567 return;
5568 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5569 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5570 AArch64::MOVA_4ZMXI_H_S);
5571 return;
5572 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5573 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5574 AArch64::MOVA_4ZMXI_H_D);
5575 return;
5576 }
5577 break;
5578 }
5579 case Intrinsic::aarch64_sme_read_ver_vg4: {
5580 if (VT == MVT::nxv16i8) {
5581 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5582 AArch64::MOVA_4ZMXI_V_B);
5583 return;
5584 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5585 VT == MVT::nxv8bf16) {
5586 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5587 AArch64::MOVA_4ZMXI_V_H);
5588 return;
5589 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5590 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5591 AArch64::MOVA_4ZMXI_V_S);
5592 return;
5593 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5594 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5595 AArch64::MOVA_4ZMXI_V_D);
5596 return;
5597 }
5598 break;
5599 }
5600 case Intrinsic::aarch64_sme_read_vg1x2: {
5601 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5602 AArch64::MOVA_VG2_2ZMXI);
5603 return;
5604 }
5605 case Intrinsic::aarch64_sme_read_vg1x4: {
5606 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5607 AArch64::MOVA_VG4_4ZMXI);
5608 return;
5609 }
5610 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5611 if (VT == MVT::nxv16i8) {
5612 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5613 return;
5614 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5615 VT == MVT::nxv8bf16) {
5616 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5617 return;
5618 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5619 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5620 return;
5621 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5622 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5623 return;
5624 }
5625 break;
5626 }
5627 case Intrinsic::aarch64_sme_readz_vert_x2: {
5628 if (VT == MVT::nxv16i8) {
5629 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5630 return;
5631 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5632 VT == MVT::nxv8bf16) {
5633 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5634 return;
5635 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5636 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5637 return;
5638 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5639 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5640 return;
5641 }
5642 break;
5643 }
5644 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5645 if (VT == MVT::nxv16i8) {
5646 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5647 return;
5648 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5649 VT == MVT::nxv8bf16) {
5650 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5651 return;
5652 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5653 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5654 return;
5655 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5656 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5657 return;
5658 }
5659 break;
5660 }
5661 case Intrinsic::aarch64_sme_readz_vert_x4: {
5662 if (VT == MVT::nxv16i8) {
5663 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5664 return;
5665 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5666 VT == MVT::nxv8bf16) {
5667 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5668 return;
5669 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5670 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5671 return;
5672 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5673 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5674 return;
5675 }
5676 break;
5677 }
5678 case Intrinsic::aarch64_sme_readz_x2: {
5679 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5680 AArch64::ZA);
5681 return;
5682 }
5683 case Intrinsic::aarch64_sme_readz_x4: {
5684 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5685 AArch64::ZA);
5686 return;
5687 }
5688 case Intrinsic::swift_async_context_addr: {
5689 SDLoc DL(Node);
5690 SDValue Chain = Node->getOperand(0);
5691 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5692 SDValue Res = SDValue(
5693 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5694 CurDAG->getTargetConstant(8, DL, MVT::i32),
5695 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5696 0);
5697 ReplaceUses(SDValue(Node, 0), Res);
5698 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5699 CurDAG->RemoveDeadNode(Node);
5700
5701 auto &MF = CurDAG->getMachineFunction();
5702 MF.getFrameInfo().setFrameAddressIsTaken(true);
5703 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5704 return;
5705 }
5706 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5708 Node->getValueType(0),
5709 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5710 AArch64::LUTI2_4ZTZI_S}))
5711 // Second Immediate must be <= 3:
5712 SelectMultiVectorLutiLane(Node, 4, Opc, 3);
5713 return;
5714 }
5715 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5717 Node->getValueType(0),
5718 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5719 // Second Immediate must be <= 1:
5720 SelectMultiVectorLutiLane(Node, 4, Opc, 1);
5721 return;
5722 }
5723 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5725 Node->getValueType(0),
5726 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5727 AArch64::LUTI2_2ZTZI_S}))
5728 // Second Immediate must be <= 7:
5729 SelectMultiVectorLutiLane(Node, 2, Opc, 7);
5730 return;
5731 }
5732 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5734 Node->getValueType(0),
5735 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5736 AArch64::LUTI4_2ZTZI_S}))
5737 // Second Immediate must be <= 3:
5738 SelectMultiVectorLutiLane(Node, 2, Opc, 3);
5739 return;
5740 }
5741 case Intrinsic::aarch64_sme_luti4_zt_x4: {
5742 SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
5743 return;
5744 }
5745 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5747 Node->getValueType(0),
5748 {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5749 SelectCVTIntrinsicFP8(Node, 2, Opc);
5750 return;
5751 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5753 Node->getValueType(0),
5754 {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
5755 SelectCVTIntrinsicFP8(Node, 2, Opc);
5756 return;
5757 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
5759 Node->getValueType(0),
5760 {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
5761 SelectCVTIntrinsicFP8(Node, 2, Opc);
5762 return;
5763 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
5765 Node->getValueType(0),
5766 {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
5767 SelectCVTIntrinsicFP8(Node, 2, Opc);
5768 return;
5769 }
5770 } break;
5772 unsigned IntNo = Node->getConstantOperandVal(0);
5773 switch (IntNo) {
5774 default:
5775 break;
5776 case Intrinsic::aarch64_tagp:
5777 SelectTagP(Node);
5778 return;
5779
5780 case Intrinsic::ptrauth_auth:
5781 SelectPtrauthAuth(Node);
5782 return;
5783
5784 case Intrinsic::ptrauth_resign:
5785 SelectPtrauthResign(Node);
5786 return;
5787
5788 case Intrinsic::aarch64_neon_tbl2:
5789 SelectTable(Node, 2,
5790 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5791 false);
5792 return;
5793 case Intrinsic::aarch64_neon_tbl3:
5794 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5795 : AArch64::TBLv16i8Three,
5796 false);
5797 return;
5798 case Intrinsic::aarch64_neon_tbl4:
5799 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5800 : AArch64::TBLv16i8Four,
5801 false);
5802 return;
5803 case Intrinsic::aarch64_neon_tbx2:
5804 SelectTable(Node, 2,
5805 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5806 true);
5807 return;
5808 case Intrinsic::aarch64_neon_tbx3:
5809 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5810 : AArch64::TBXv16i8Three,
5811 true);
5812 return;
5813 case Intrinsic::aarch64_neon_tbx4:
5814 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5815 : AArch64::TBXv16i8Four,
5816 true);
5817 return;
5818 case Intrinsic::aarch64_sve_srshl_single_x2:
5820 Node->getValueType(0),
5821 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5822 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5823 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5824 return;
5825 case Intrinsic::aarch64_sve_srshl_single_x4:
5827 Node->getValueType(0),
5828 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5829 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5830 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5831 return;
5832 case Intrinsic::aarch64_sve_urshl_single_x2:
5834 Node->getValueType(0),
5835 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5836 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5837 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5838 return;
5839 case Intrinsic::aarch64_sve_urshl_single_x4:
5841 Node->getValueType(0),
5842 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5843 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5844 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5845 return;
5846 case Intrinsic::aarch64_sve_srshl_x2:
5848 Node->getValueType(0),
5849 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5850 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5851 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5852 return;
5853 case Intrinsic::aarch64_sve_srshl_x4:
5855 Node->getValueType(0),
5856 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5857 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5858 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5859 return;
5860 case Intrinsic::aarch64_sve_urshl_x2:
5862 Node->getValueType(0),
5863 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5864 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5865 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5866 return;
5867 case Intrinsic::aarch64_sve_urshl_x4:
5869 Node->getValueType(0),
5870 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5871 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5872 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5873 return;
5874 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5876 Node->getValueType(0),
5877 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5878 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5879 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5880 return;
5881 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5883 Node->getValueType(0),
5884 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5885 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5886 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5887 return;
5888 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5890 Node->getValueType(0),
5891 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5892 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5893 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5894 return;
5895 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5897 Node->getValueType(0),
5898 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5899 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5900 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5901 return;
5902 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
5904 Node->getValueType(0),
5905 {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
5906 AArch64::FSCALE_2ZZ_D}))
5907 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5908 return;
5909 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
5911 Node->getValueType(0),
5912 {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
5913 AArch64::FSCALE_4ZZ_D}))
5914 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5915 return;
5916 case Intrinsic::aarch64_sme_fp8_scale_x2:
5918 Node->getValueType(0),
5919 {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
5920 AArch64::FSCALE_2Z2Z_D}))
5921 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5922 return;
5923 case Intrinsic::aarch64_sme_fp8_scale_x4:
5925 Node->getValueType(0),
5926 {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
5927 AArch64::FSCALE_4Z4Z_D}))
5928 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5929 return;
5930 case Intrinsic::aarch64_sve_whilege_x2:
5932 Node->getValueType(0),
5933 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5934 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5935 SelectWhilePair(Node, Op);
5936 return;
5937 case Intrinsic::aarch64_sve_whilegt_x2:
5939 Node->getValueType(0),
5940 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5941 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5942 SelectWhilePair(Node, Op);
5943 return;
5944 case Intrinsic::aarch64_sve_whilehi_x2:
5946 Node->getValueType(0),
5947 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5948 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5949 SelectWhilePair(Node, Op);
5950 return;
5951 case Intrinsic::aarch64_sve_whilehs_x2:
5953 Node->getValueType(0),
5954 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5955 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5956 SelectWhilePair(Node, Op);
5957 return;
5958 case Intrinsic::aarch64_sve_whilele_x2:
5960 Node->getValueType(0),
5961 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5962 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5963 SelectWhilePair(Node, Op);
5964 return;
5965 case Intrinsic::aarch64_sve_whilelo_x2:
5967 Node->getValueType(0),
5968 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
5969 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
5970 SelectWhilePair(Node, Op);
5971 return;
5972 case Intrinsic::aarch64_sve_whilels_x2:
5974 Node->getValueType(0),
5975 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
5976 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
5977 SelectWhilePair(Node, Op);
5978 return;
5979 case Intrinsic::aarch64_sve_whilelt_x2:
5981 Node->getValueType(0),
5982 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
5983 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
5984 SelectWhilePair(Node, Op);
5985 return;
5986 case Intrinsic::aarch64_sve_smax_single_x2:
5988 Node->getValueType(0),
5989 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
5990 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
5991 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5992 return;
5993 case Intrinsic::aarch64_sve_umax_single_x2:
5995 Node->getValueType(0),
5996 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
5997 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
5998 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5999 return;
6000 case Intrinsic::aarch64_sve_fmax_single_x2:
6002 Node->getValueType(0),
6003 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
6004 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
6005 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6006 return;
6007 case Intrinsic::aarch64_sve_smax_single_x4:
6009 Node->getValueType(0),
6010 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
6011 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
6012 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6013 return;
6014 case Intrinsic::aarch64_sve_umax_single_x4:
6016 Node->getValueType(0),
6017 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
6018 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
6019 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6020 return;
6021 case Intrinsic::aarch64_sve_fmax_single_x4:
6023 Node->getValueType(0),
6024 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
6025 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
6026 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6027 return;
6028 case Intrinsic::aarch64_sve_smin_single_x2:
6030 Node->getValueType(0),
6031 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
6032 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
6033 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6034 return;
6035 case Intrinsic::aarch64_sve_umin_single_x2:
6037 Node->getValueType(0),
6038 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
6039 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
6040 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6041 return;
6042 case Intrinsic::aarch64_sve_fmin_single_x2:
6044 Node->getValueType(0),
6045 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
6046 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
6047 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6048 return;
6049 case Intrinsic::aarch64_sve_smin_single_x4:
6051 Node->getValueType(0),
6052 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
6053 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
6054 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6055 return;
6056 case Intrinsic::aarch64_sve_umin_single_x4:
6058 Node->getValueType(0),
6059 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
6060 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
6061 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6062 return;
6063 case Intrinsic::aarch64_sve_fmin_single_x4:
6065 Node->getValueType(0),
6066 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
6067 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
6068 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6069 return;
6070 case Intrinsic::aarch64_sve_smax_x2:
6072 Node->getValueType(0),
6073 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
6074 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
6075 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6076 return;
6077 case Intrinsic::aarch64_sve_umax_x2:
6079 Node->getValueType(0),
6080 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
6081 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
6082 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6083 return;
6084 case Intrinsic::aarch64_sve_fmax_x2:
6086 Node->getValueType(0),
6087 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
6088 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
6089 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6090 return;
6091 case Intrinsic::aarch64_sve_smax_x4:
6093 Node->getValueType(0),
6094 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
6095 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
6096 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6097 return;
6098 case Intrinsic::aarch64_sve_umax_x4:
6100 Node->getValueType(0),
6101 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
6102 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
6103 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6104 return;
6105 case Intrinsic::aarch64_sve_fmax_x4:
6107 Node->getValueType(0),
6108 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
6109 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
6110 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6111 return;
6112 case Intrinsic::aarch64_sme_famax_x2:
6114 Node->getValueType(0),
6115 {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
6116 AArch64::FAMAX_2Z2Z_D}))
6117 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6118 return;
6119 case Intrinsic::aarch64_sme_famax_x4:
6121 Node->getValueType(0),
6122 {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
6123 AArch64::FAMAX_4Z4Z_D}))
6124 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6125 return;
6126 case Intrinsic::aarch64_sme_famin_x2:
6128 Node->getValueType(0),
6129 {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
6130 AArch64::FAMIN_2Z2Z_D}))
6131 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6132 return;
6133 case Intrinsic::aarch64_sme_famin_x4:
6135 Node->getValueType(0),
6136 {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
6137 AArch64::FAMIN_4Z4Z_D}))
6138 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6139 return;
6140 case Intrinsic::aarch64_sve_smin_x2:
6142 Node->getValueType(0),
6143 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
6144 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
6145 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6146 return;
6147 case Intrinsic::aarch64_sve_umin_x2:
6149 Node->getValueType(0),
6150 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
6151 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
6152 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6153 return;
6154 case Intrinsic::aarch64_sve_fmin_x2:
6156 Node->getValueType(0),
6157 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
6158 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
6159 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6160 return;
6161 case Intrinsic::aarch64_sve_smin_x4:
6163 Node->getValueType(0),
6164 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
6165 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
6166 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6167 return;
6168 case Intrinsic::aarch64_sve_umin_x4:
6170 Node->getValueType(0),
6171 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6172 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6173 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6174 return;
6175 case Intrinsic::aarch64_sve_fmin_x4:
6177 Node->getValueType(0),
6178 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6179 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6180 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6181 return;
6182 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6184 Node->getValueType(0),
6185 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6186 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6187 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6188 return;
6189 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6191 Node->getValueType(0),
6192 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6193 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6194 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6195 return;
6196 case Intrinsic::aarch64_sve_fminnm_single_x2:
6198 Node->getValueType(0),
6199 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6200 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6201 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6202 return;
6203 case Intrinsic::aarch64_sve_fminnm_single_x4:
6205 Node->getValueType(0),
6206 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6207 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6208 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6209 return;
6210 case Intrinsic::aarch64_sve_fscale_single_x4:
6211 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::BFSCALE_4ZZ);
6212 return;
6213 case Intrinsic::aarch64_sve_fscale_single_x2:
6214 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::BFSCALE_2ZZ);
6215 return;
6216 case Intrinsic::aarch64_sve_fmul_single_x4:
6218 Node->getValueType(0),
6219 {AArch64::BFMUL_4ZZ, AArch64::FMUL_4ZZ_H, AArch64::FMUL_4ZZ_S,
6220 AArch64::FMUL_4ZZ_D}))
6221 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6222 return;
6223 case Intrinsic::aarch64_sve_fmul_single_x2:
6225 Node->getValueType(0),
6226 {AArch64::BFMUL_2ZZ, AArch64::FMUL_2ZZ_H, AArch64::FMUL_2ZZ_S,
6227 AArch64::FMUL_2ZZ_D}))
6228 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6229 return;
6230 case Intrinsic::aarch64_sve_fmaxnm_x2:
6232 Node->getValueType(0),
6233 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6234 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6235 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6236 return;
6237 case Intrinsic::aarch64_sve_fmaxnm_x4:
6239 Node->getValueType(0),
6240 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6241 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6242 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6243 return;
6244 case Intrinsic::aarch64_sve_fminnm_x2:
6246 Node->getValueType(0),
6247 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6248 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6249 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6250 return;
6251 case Intrinsic::aarch64_sve_fminnm_x4:
6253 Node->getValueType(0),
6254 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6255 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6256 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6257 return;
6258 case Intrinsic::aarch64_sve_aese_lane_x2:
6259 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESE_2ZZI_B);
6260 return;
6261 case Intrinsic::aarch64_sve_aesd_lane_x2:
6262 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESD_2ZZI_B);
6263 return;
6264 case Intrinsic::aarch64_sve_aesemc_lane_x2:
6265 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESEMC_2ZZI_B);
6266 return;
6267 case Intrinsic::aarch64_sve_aesdimc_lane_x2:
6268 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESDIMC_2ZZI_B);
6269 return;
6270 case Intrinsic::aarch64_sve_aese_lane_x4:
6271 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESE_4ZZI_B);
6272 return;
6273 case Intrinsic::aarch64_sve_aesd_lane_x4:
6274 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESD_4ZZI_B);
6275 return;
6276 case Intrinsic::aarch64_sve_aesemc_lane_x4:
6277 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESEMC_4ZZI_B);
6278 return;
6279 case Intrinsic::aarch64_sve_aesdimc_lane_x4:
6280 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESDIMC_4ZZI_B);
6281 return;
6282 case Intrinsic::aarch64_sve_pmlal_pair_x2:
6283 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::PMLAL_2ZZZ_Q);
6284 return;
6285 case Intrinsic::aarch64_sve_pmull_pair_x2: {
6286 SDLoc DL(Node);
6287 SmallVector<SDValue, 4> Regs(Node->ops().slice(1, 2));
6288 SDNode *Res =
6289 CurDAG->getMachineNode(AArch64::PMULL_2ZZZ_Q, DL, MVT::Untyped, Regs);
6290 SDValue SuperReg = SDValue(Res, 0);
6291 for (unsigned I = 0; I < 2; I++)
6292 ReplaceUses(SDValue(Node, I),
6293 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
6294 SuperReg));
6295 CurDAG->RemoveDeadNode(Node);
6296 return;
6297 }
6298 case Intrinsic::aarch64_sve_fscale_x4:
6299 SelectDestructiveMultiIntrinsic(Node, 4, true, AArch64::BFSCALE_4Z4Z);
6300 return;
6301 case Intrinsic::aarch64_sve_fscale_x2:
6302 SelectDestructiveMultiIntrinsic(Node, 2, true, AArch64::BFSCALE_2Z2Z);
6303 return;
6304 case Intrinsic::aarch64_sve_fmul_x4:
6306 Node->getValueType(0),
6307 {AArch64::BFMUL_4Z4Z, AArch64::FMUL_4Z4Z_H, AArch64::FMUL_4Z4Z_S,
6308 AArch64::FMUL_4Z4Z_D}))
6309 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6310 return;
6311 case Intrinsic::aarch64_sve_fmul_x2:
6313 Node->getValueType(0),
6314 {AArch64::BFMUL_2Z2Z, AArch64::FMUL_2Z2Z_H, AArch64::FMUL_2Z2Z_S,
6315 AArch64::FMUL_2Z2Z_D}))
6316 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6317 return;
6318 case Intrinsic::aarch64_sve_fcvtzs_x2:
6319 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
6320 return;
6321 case Intrinsic::aarch64_sve_scvtf_x2:
6322 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
6323 return;
6324 case Intrinsic::aarch64_sve_fcvtzu_x2:
6325 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
6326 return;
6327 case Intrinsic::aarch64_sve_ucvtf_x2:
6328 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
6329 return;
6330 case Intrinsic::aarch64_sve_fcvtzs_x4:
6331 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
6332 return;
6333 case Intrinsic::aarch64_sve_scvtf_x4:
6334 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
6335 return;
6336 case Intrinsic::aarch64_sve_fcvtzu_x4:
6337 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
6338 return;
6339 case Intrinsic::aarch64_sve_ucvtf_x4:
6340 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
6341 return;
6342 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6343 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
6344 return;
6345 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6346 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
6347 return;
6348 case Intrinsic::aarch64_sve_sclamp_single_x2:
6350 Node->getValueType(0),
6351 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6352 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6353 SelectClamp(Node, 2, Op);
6354 return;
6355 case Intrinsic::aarch64_sve_uclamp_single_x2:
6357 Node->getValueType(0),
6358 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6359 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6360 SelectClamp(Node, 2, Op);
6361 return;
6362 case Intrinsic::aarch64_sve_fclamp_single_x2:
6364 Node->getValueType(0),
6365 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6366 AArch64::FCLAMP_VG2_2Z2Z_D}))
6367 SelectClamp(Node, 2, Op);
6368 return;
6369 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6370 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
6371 return;
6372 case Intrinsic::aarch64_sve_sclamp_single_x4:
6374 Node->getValueType(0),
6375 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6376 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6377 SelectClamp(Node, 4, Op);
6378 return;
6379 case Intrinsic::aarch64_sve_uclamp_single_x4:
6381 Node->getValueType(0),
6382 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6383 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6384 SelectClamp(Node, 4, Op);
6385 return;
6386 case Intrinsic::aarch64_sve_fclamp_single_x4:
6388 Node->getValueType(0),
6389 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6390 AArch64::FCLAMP_VG4_4Z4Z_D}))
6391 SelectClamp(Node, 4, Op);
6392 return;
6393 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6394 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6395 return;
6396 case Intrinsic::aarch64_sve_add_single_x2:
6398 Node->getValueType(0),
6399 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6400 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6401 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6402 return;
6403 case Intrinsic::aarch64_sve_add_single_x4:
6405 Node->getValueType(0),
6406 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6407 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6408 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6409 return;
6410 case Intrinsic::aarch64_sve_zip_x2:
6412 Node->getValueType(0),
6413 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6414 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6415 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6416 return;
6417 case Intrinsic::aarch64_sve_zipq_x2:
6418 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6419 AArch64::ZIP_VG2_2ZZZ_Q);
6420 return;
6421 case Intrinsic::aarch64_sve_zip_x4:
6423 Node->getValueType(0),
6424 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6425 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6426 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6427 return;
6428 case Intrinsic::aarch64_sve_zipq_x4:
6429 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6430 AArch64::ZIP_VG4_4Z4Z_Q);
6431 return;
6432 case Intrinsic::aarch64_sve_uzp_x2:
6434 Node->getValueType(0),
6435 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6436 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6437 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6438 return;
6439 case Intrinsic::aarch64_sve_uzpq_x2:
6440 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6441 AArch64::UZP_VG2_2ZZZ_Q);
6442 return;
6443 case Intrinsic::aarch64_sve_uzp_x4:
6445 Node->getValueType(0),
6446 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6447 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6448 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6449 return;
6450 case Intrinsic::aarch64_sve_uzpq_x4:
6451 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6452 AArch64::UZP_VG4_4Z4Z_Q);
6453 return;
6454 case Intrinsic::aarch64_sve_sel_x2:
6456 Node->getValueType(0),
6457 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6458 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6459 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6460 return;
6461 case Intrinsic::aarch64_sve_sel_x4:
6463 Node->getValueType(0),
6464 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6465 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6466 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6467 return;
6468 case Intrinsic::aarch64_sve_frinta_x2:
6469 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6470 return;
6471 case Intrinsic::aarch64_sve_frinta_x4:
6472 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6473 return;
6474 case Intrinsic::aarch64_sve_frintm_x2:
6475 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6476 return;
6477 case Intrinsic::aarch64_sve_frintm_x4:
6478 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6479 return;
6480 case Intrinsic::aarch64_sve_frintn_x2:
6481 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6482 return;
6483 case Intrinsic::aarch64_sve_frintn_x4:
6484 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6485 return;
6486 case Intrinsic::aarch64_sve_frintp_x2:
6487 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6488 return;
6489 case Intrinsic::aarch64_sve_frintp_x4:
6490 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6491 return;
6492 case Intrinsic::aarch64_sve_sunpk_x2:
6494 Node->getValueType(0),
6495 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6496 AArch64::SUNPK_VG2_2ZZ_D}))
6497 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6498 return;
6499 case Intrinsic::aarch64_sve_uunpk_x2:
6501 Node->getValueType(0),
6502 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6503 AArch64::UUNPK_VG2_2ZZ_D}))
6504 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6505 return;
6506 case Intrinsic::aarch64_sve_sunpk_x4:
6508 Node->getValueType(0),
6509 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6510 AArch64::SUNPK_VG4_4Z2Z_D}))
6511 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6512 return;
6513 case Intrinsic::aarch64_sve_uunpk_x4:
6515 Node->getValueType(0),
6516 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6517 AArch64::UUNPK_VG4_4Z2Z_D}))
6518 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6519 return;
6520 case Intrinsic::aarch64_sve_pext_x2: {
6522 Node->getValueType(0),
6523 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6524 AArch64::PEXT_2PCI_D}))
6525 SelectPExtPair(Node, Op);
6526 return;
6527 }
6528 }
6529 break;
6530 }
6531 case ISD::INTRINSIC_VOID: {
6532 unsigned IntNo = Node->getConstantOperandVal(1);
6533 if (Node->getNumOperands() >= 3)
6534 VT = Node->getOperand(2)->getValueType(0);
6535 switch (IntNo) {
6536 default:
6537 break;
6538 case Intrinsic::aarch64_neon_st1x2: {
6539 if (VT == MVT::v8i8) {
6540 SelectStore(Node, 2, AArch64::ST1Twov8b);
6541 return;
6542 } else if (VT == MVT::v16i8) {
6543 SelectStore(Node, 2, AArch64::ST1Twov16b);
6544 return;
6545 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6546 VT == MVT::v4bf16) {
6547 SelectStore(Node, 2, AArch64::ST1Twov4h);
6548 return;
6549 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6550 VT == MVT::v8bf16) {
6551 SelectStore(Node, 2, AArch64::ST1Twov8h);
6552 return;
6553 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6554 SelectStore(Node, 2, AArch64::ST1Twov2s);
6555 return;
6556 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6557 SelectStore(Node, 2, AArch64::ST1Twov4s);
6558 return;
6559 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6560 SelectStore(Node, 2, AArch64::ST1Twov2d);
6561 return;
6562 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6563 SelectStore(Node, 2, AArch64::ST1Twov1d);
6564 return;
6565 }
6566 break;
6567 }
6568 case Intrinsic::aarch64_neon_st1x3: {
6569 if (VT == MVT::v8i8) {
6570 SelectStore(Node, 3, AArch64::ST1Threev8b);
6571 return;
6572 } else if (VT == MVT::v16i8) {
6573 SelectStore(Node, 3, AArch64::ST1Threev16b);
6574 return;
6575 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6576 VT == MVT::v4bf16) {
6577 SelectStore(Node, 3, AArch64::ST1Threev4h);
6578 return;
6579 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6580 VT == MVT::v8bf16) {
6581 SelectStore(Node, 3, AArch64::ST1Threev8h);
6582 return;
6583 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6584 SelectStore(Node, 3, AArch64::ST1Threev2s);
6585 return;
6586 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6587 SelectStore(Node, 3, AArch64::ST1Threev4s);
6588 return;
6589 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6590 SelectStore(Node, 3, AArch64::ST1Threev2d);
6591 return;
6592 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6593 SelectStore(Node, 3, AArch64::ST1Threev1d);
6594 return;
6595 }
6596 break;
6597 }
6598 case Intrinsic::aarch64_neon_st1x4: {
6599 if (VT == MVT::v8i8) {
6600 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6601 return;
6602 } else if (VT == MVT::v16i8) {
6603 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6604 return;
6605 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6606 VT == MVT::v4bf16) {
6607 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6608 return;
6609 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6610 VT == MVT::v8bf16) {
6611 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6612 return;
6613 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6614 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6615 return;
6616 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6617 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6618 return;
6619 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6620 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6621 return;
6622 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6623 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6624 return;
6625 }
6626 break;
6627 }
6628 case Intrinsic::aarch64_neon_st2: {
6629 if (VT == MVT::v8i8) {
6630 SelectStore(Node, 2, AArch64::ST2Twov8b);
6631 return;
6632 } else if (VT == MVT::v16i8) {
6633 SelectStore(Node, 2, AArch64::ST2Twov16b);
6634 return;
6635 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6636 VT == MVT::v4bf16) {
6637 SelectStore(Node, 2, AArch64::ST2Twov4h);
6638 return;
6639 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6640 VT == MVT::v8bf16) {
6641 SelectStore(Node, 2, AArch64::ST2Twov8h);
6642 return;
6643 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6644 SelectStore(Node, 2, AArch64::ST2Twov2s);
6645 return;
6646 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6647 SelectStore(Node, 2, AArch64::ST2Twov4s);
6648 return;
6649 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6650 SelectStore(Node, 2, AArch64::ST2Twov2d);
6651 return;
6652 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6653 SelectStore(Node, 2, AArch64::ST1Twov1d);
6654 return;
6655 }
6656 break;
6657 }
6658 case Intrinsic::aarch64_neon_st3: {
6659 if (VT == MVT::v8i8) {
6660 SelectStore(Node, 3, AArch64::ST3Threev8b);
6661 return;
6662 } else if (VT == MVT::v16i8) {
6663 SelectStore(Node, 3, AArch64::ST3Threev16b);
6664 return;
6665 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6666 VT == MVT::v4bf16) {
6667 SelectStore(Node, 3, AArch64::ST3Threev4h);
6668 return;
6669 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6670 VT == MVT::v8bf16) {
6671 SelectStore(Node, 3, AArch64::ST3Threev8h);
6672 return;
6673 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6674 SelectStore(Node, 3, AArch64::ST3Threev2s);
6675 return;
6676 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6677 SelectStore(Node, 3, AArch64::ST3Threev4s);
6678 return;
6679 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6680 SelectStore(Node, 3, AArch64::ST3Threev2d);
6681 return;
6682 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6683 SelectStore(Node, 3, AArch64::ST1Threev1d);
6684 return;
6685 }
6686 break;
6687 }
6688 case Intrinsic::aarch64_neon_st4: {
6689 if (VT == MVT::v8i8) {
6690 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6691 return;
6692 } else if (VT == MVT::v16i8) {
6693 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6694 return;
6695 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6696 VT == MVT::v4bf16) {
6697 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6698 return;
6699 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6700 VT == MVT::v8bf16) {
6701 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6702 return;
6703 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6704 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6705 return;
6706 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6707 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6708 return;
6709 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6710 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6711 return;
6712 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6713 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6714 return;
6715 }
6716 break;
6717 }
6718 case Intrinsic::aarch64_neon_st2lane: {
6719 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6720 SelectStoreLane(Node, 2, AArch64::ST2i8);
6721 return;
6722 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6723 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6724 SelectStoreLane(Node, 2, AArch64::ST2i16);
6725 return;
6726 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6727 VT == MVT::v2f32) {
6728 SelectStoreLane(Node, 2, AArch64::ST2i32);
6729 return;
6730 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6731 VT == MVT::v1f64) {
6732 SelectStoreLane(Node, 2, AArch64::ST2i64);
6733 return;
6734 }
6735 break;
6736 }
6737 case Intrinsic::aarch64_neon_st3lane: {
6738 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6739 SelectStoreLane(Node, 3, AArch64::ST3i8);
6740 return;
6741 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6742 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6743 SelectStoreLane(Node, 3, AArch64::ST3i16);
6744 return;
6745 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6746 VT == MVT::v2f32) {
6747 SelectStoreLane(Node, 3, AArch64::ST3i32);
6748 return;
6749 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6750 VT == MVT::v1f64) {
6751 SelectStoreLane(Node, 3, AArch64::ST3i64);
6752 return;
6753 }
6754 break;
6755 }
6756 case Intrinsic::aarch64_neon_st4lane: {
6757 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6758 SelectStoreLane(Node, 4, AArch64::ST4i8);
6759 return;
6760 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6761 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6762 SelectStoreLane(Node, 4, AArch64::ST4i16);
6763 return;
6764 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6765 VT == MVT::v2f32) {
6766 SelectStoreLane(Node, 4, AArch64::ST4i32);
6767 return;
6768 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6769 VT == MVT::v1f64) {
6770 SelectStoreLane(Node, 4, AArch64::ST4i64);
6771 return;
6772 }
6773 break;
6774 }
6775 case Intrinsic::aarch64_sve_st2q: {
6776 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6777 return;
6778 }
6779 case Intrinsic::aarch64_sve_st3q: {
6780 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6781 return;
6782 }
6783 case Intrinsic::aarch64_sve_st4q: {
6784 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6785 return;
6786 }
6787 case Intrinsic::aarch64_sve_st2: {
6788 if (VT == MVT::nxv16i8) {
6789 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6790 return;
6791 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6792 VT == MVT::nxv8bf16) {
6793 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6794 return;
6795 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6796 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6797 return;
6798 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6799 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6800 return;
6801 }
6802 break;
6803 }
6804 case Intrinsic::aarch64_sve_st3: {
6805 if (VT == MVT::nxv16i8) {
6806 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6807 return;
6808 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6809 VT == MVT::nxv8bf16) {
6810 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6811 return;
6812 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6813 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6814 return;
6815 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6816 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6817 return;
6818 }
6819 break;
6820 }
6821 case Intrinsic::aarch64_sve_st4: {
6822 if (VT == MVT::nxv16i8) {
6823 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6824 return;
6825 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6826 VT == MVT::nxv8bf16) {
6827 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6828 return;
6829 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6830 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6831 return;
6832 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6833 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6834 return;
6835 }
6836 break;
6837 }
6838 }
6839 break;
6840 }
6841 case AArch64ISD::LD2post: {
6842 if (VT == MVT::v8i8) {
6843 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6844 return;
6845 } else if (VT == MVT::v16i8) {
6846 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6847 return;
6848 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6849 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6850 return;
6851 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6852 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6853 return;
6854 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6855 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6856 return;
6857 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6858 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6859 return;
6860 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6861 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6862 return;
6863 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6864 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6865 return;
6866 }
6867 break;
6868 }
6869 case AArch64ISD::LD3post: {
6870 if (VT == MVT::v8i8) {
6871 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6872 return;
6873 } else if (VT == MVT::v16i8) {
6874 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6875 return;
6876 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6877 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6878 return;
6879 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6880 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6881 return;
6882 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6883 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6884 return;
6885 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6886 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6887 return;
6888 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6889 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6890 return;
6891 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6892 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6893 return;
6894 }
6895 break;
6896 }
6897 case AArch64ISD::LD4post: {
6898 if (VT == MVT::v8i8) {
6899 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6900 return;
6901 } else if (VT == MVT::v16i8) {
6902 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6903 return;
6904 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6905 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6906 return;
6907 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6908 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6909 return;
6910 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6911 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6912 return;
6913 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6914 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6915 return;
6916 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6917 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6918 return;
6919 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6920 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6921 return;
6922 }
6923 break;
6924 }
6925 case AArch64ISD::LD1x2post: {
6926 if (VT == MVT::v8i8) {
6927 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6928 return;
6929 } else if (VT == MVT::v16i8) {
6930 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6931 return;
6932 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6933 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6934 return;
6935 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6936 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6937 return;
6938 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6939 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6940 return;
6941 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6942 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6943 return;
6944 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6945 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6946 return;
6947 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6948 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
6949 return;
6950 }
6951 break;
6952 }
6953 case AArch64ISD::LD1x3post: {
6954 if (VT == MVT::v8i8) {
6955 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
6956 return;
6957 } else if (VT == MVT::v16i8) {
6958 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
6959 return;
6960 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6961 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
6962 return;
6963 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6964 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
6965 return;
6966 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6967 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
6968 return;
6969 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6970 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
6971 return;
6972 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6973 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6974 return;
6975 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6976 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
6977 return;
6978 }
6979 break;
6980 }
6981 case AArch64ISD::LD1x4post: {
6982 if (VT == MVT::v8i8) {
6983 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
6984 return;
6985 } else if (VT == MVT::v16i8) {
6986 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
6987 return;
6988 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6989 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
6990 return;
6991 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6992 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
6993 return;
6994 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6995 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
6996 return;
6997 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6998 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
6999 return;
7000 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7001 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
7002 return;
7003 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7004 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
7005 return;
7006 }
7007 break;
7008 }
7009 case AArch64ISD::LD1DUPpost: {
7010 if (VT == MVT::v8i8) {
7011 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
7012 return;
7013 } else if (VT == MVT::v16i8) {
7014 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
7015 return;
7016 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7017 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
7018 return;
7019 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7020 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
7021 return;
7022 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7023 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
7024 return;
7025 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7026 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
7027 return;
7028 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7029 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
7030 return;
7031 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7032 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
7033 return;
7034 }
7035 break;
7036 }
7037 case AArch64ISD::LD2DUPpost: {
7038 if (VT == MVT::v8i8) {
7039 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
7040 return;
7041 } else if (VT == MVT::v16i8) {
7042 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
7043 return;
7044 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7045 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
7046 return;
7047 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7048 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
7049 return;
7050 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7051 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
7052 return;
7053 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7054 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
7055 return;
7056 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7057 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
7058 return;
7059 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7060 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
7061 return;
7062 }
7063 break;
7064 }
7065 case AArch64ISD::LD3DUPpost: {
7066 if (VT == MVT::v8i8) {
7067 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
7068 return;
7069 } else if (VT == MVT::v16i8) {
7070 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
7071 return;
7072 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7073 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
7074 return;
7075 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7076 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
7077 return;
7078 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7079 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
7080 return;
7081 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7082 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
7083 return;
7084 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7085 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
7086 return;
7087 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7088 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
7089 return;
7090 }
7091 break;
7092 }
7093 case AArch64ISD::LD4DUPpost: {
7094 if (VT == MVT::v8i8) {
7095 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
7096 return;
7097 } else if (VT == MVT::v16i8) {
7098 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
7099 return;
7100 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7101 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
7102 return;
7103 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7104 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
7105 return;
7106 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7107 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
7108 return;
7109 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7110 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
7111 return;
7112 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7113 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
7114 return;
7115 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7116 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
7117 return;
7118 }
7119 break;
7120 }
7121 case AArch64ISD::LD1LANEpost: {
7122 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7123 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
7124 return;
7125 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7126 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7127 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
7128 return;
7129 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7130 VT == MVT::v2f32) {
7131 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
7132 return;
7133 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7134 VT == MVT::v1f64) {
7135 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
7136 return;
7137 }
7138 break;
7139 }
7140 case AArch64ISD::LD2LANEpost: {
7141 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7142 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
7143 return;
7144 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7145 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7146 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
7147 return;
7148 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7149 VT == MVT::v2f32) {
7150 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
7151 return;
7152 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7153 VT == MVT::v1f64) {
7154 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
7155 return;
7156 }
7157 break;
7158 }
7159 case AArch64ISD::LD3LANEpost: {
7160 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7161 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
7162 return;
7163 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7164 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7165 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
7166 return;
7167 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7168 VT == MVT::v2f32) {
7169 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
7170 return;
7171 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7172 VT == MVT::v1f64) {
7173 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
7174 return;
7175 }
7176 break;
7177 }
7178 case AArch64ISD::LD4LANEpost: {
7179 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7180 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
7181 return;
7182 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7183 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7184 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
7185 return;
7186 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7187 VT == MVT::v2f32) {
7188 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
7189 return;
7190 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7191 VT == MVT::v1f64) {
7192 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
7193 return;
7194 }
7195 break;
7196 }
7197 case AArch64ISD::ST2post: {
7198 VT = Node->getOperand(1).getValueType();
7199 if (VT == MVT::v8i8) {
7200 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
7201 return;
7202 } else if (VT == MVT::v16i8) {
7203 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
7204 return;
7205 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7206 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
7207 return;
7208 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7209 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
7210 return;
7211 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7212 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
7213 return;
7214 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7215 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
7216 return;
7217 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7218 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
7219 return;
7220 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7221 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7222 return;
7223 }
7224 break;
7225 }
7226 case AArch64ISD::ST3post: {
7227 VT = Node->getOperand(1).getValueType();
7228 if (VT == MVT::v8i8) {
7229 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
7230 return;
7231 } else if (VT == MVT::v16i8) {
7232 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
7233 return;
7234 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7235 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
7236 return;
7237 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7238 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
7239 return;
7240 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7241 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
7242 return;
7243 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7244 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
7245 return;
7246 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7247 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
7248 return;
7249 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7250 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7251 return;
7252 }
7253 break;
7254 }
7255 case AArch64ISD::ST4post: {
7256 VT = Node->getOperand(1).getValueType();
7257 if (VT == MVT::v8i8) {
7258 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
7259 return;
7260 } else if (VT == MVT::v16i8) {
7261 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
7262 return;
7263 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7264 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
7265 return;
7266 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7267 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
7268 return;
7269 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7270 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
7271 return;
7272 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7273 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
7274 return;
7275 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7276 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
7277 return;
7278 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7279 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7280 return;
7281 }
7282 break;
7283 }
7284 case AArch64ISD::ST1x2post: {
7285 VT = Node->getOperand(1).getValueType();
7286 if (VT == MVT::v8i8) {
7287 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
7288 return;
7289 } else if (VT == MVT::v16i8) {
7290 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
7291 return;
7292 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7293 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
7294 return;
7295 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7296 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
7297 return;
7298 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7299 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
7300 return;
7301 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7302 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
7303 return;
7304 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7305 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7306 return;
7307 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7308 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
7309 return;
7310 }
7311 break;
7312 }
7313 case AArch64ISD::ST1x3post: {
7314 VT = Node->getOperand(1).getValueType();
7315 if (VT == MVT::v8i8) {
7316 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
7317 return;
7318 } else if (VT == MVT::v16i8) {
7319 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
7320 return;
7321 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7322 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
7323 return;
7324 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7325 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
7326 return;
7327 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7328 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
7329 return;
7330 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7331 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
7332 return;
7333 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7334 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7335 return;
7336 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7337 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
7338 return;
7339 }
7340 break;
7341 }
7342 case AArch64ISD::ST1x4post: {
7343 VT = Node->getOperand(1).getValueType();
7344 if (VT == MVT::v8i8) {
7345 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
7346 return;
7347 } else if (VT == MVT::v16i8) {
7348 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
7349 return;
7350 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7351 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
7352 return;
7353 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7354 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
7355 return;
7356 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7357 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
7358 return;
7359 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7360 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
7361 return;
7362 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7363 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7364 return;
7365 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7366 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
7367 return;
7368 }
7369 break;
7370 }
7371 case AArch64ISD::ST2LANEpost: {
7372 VT = Node->getOperand(1).getValueType();
7373 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7374 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
7375 return;
7376 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7377 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7378 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
7379 return;
7380 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7381 VT == MVT::v2f32) {
7382 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7383 return;
7384 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7385 VT == MVT::v1f64) {
7386 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7387 return;
7388 }
7389 break;
7390 }
7391 case AArch64ISD::ST3LANEpost: {
7392 VT = Node->getOperand(1).getValueType();
7393 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7394 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7395 return;
7396 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7397 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7398 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7399 return;
7400 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7401 VT == MVT::v2f32) {
7402 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7403 return;
7404 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7405 VT == MVT::v1f64) {
7406 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7407 return;
7408 }
7409 break;
7410 }
7411 case AArch64ISD::ST4LANEpost: {
7412 VT = Node->getOperand(1).getValueType();
7413 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7414 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7415 return;
7416 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7417 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7418 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7419 return;
7420 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7421 VT == MVT::v2f32) {
7422 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7423 return;
7424 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7425 VT == MVT::v1f64) {
7426 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7427 return;
7428 }
7429 break;
7430 }
7431 }
7432
7433 // Select the default instruction
7434 SelectCode(Node);
7435}
7436
7437/// createAArch64ISelDag - This pass converts a legalized DAG into a
7438/// AArch64-specific DAG, ready for instruction scheduling.
7440 CodeGenOptLevel OptLevel) {
7441 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7442}
7443
7444/// When \p PredVT is a scalable vector predicate in the form
7445/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7446/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7447/// structured vectors (NumVec >1), the output data type is
7448/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7449/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7450/// EVT.
7452 unsigned NumVec) {
7453 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7454 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7455 return EVT();
7456
7457 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7458 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7459 return EVT();
7460
7461 ElementCount EC = PredVT.getVectorElementCount();
7462 EVT ScalarVT =
7463 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7464 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7465
7466 return MemVT;
7467}
7468
7469/// Return the EVT of the data associated to a memory operation in \p
7470/// Root. If such EVT cannot be retrieved, it returns an invalid EVT.
7472 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Root))
7473 return MemIntr->getMemoryVT();
7474
7475 if (isa<MemSDNode>(Root)) {
7476 EVT MemVT = cast<MemSDNode>(Root)->getMemoryVT();
7477
7478 EVT DataVT;
7479 if (auto *Load = dyn_cast<LoadSDNode>(Root))
7480 DataVT = Load->getValueType(0);
7481 else if (auto *Load = dyn_cast<MaskedLoadSDNode>(Root))
7482 DataVT = Load->getValueType(0);
7483 else if (auto *Store = dyn_cast<StoreSDNode>(Root))
7484 DataVT = Store->getValue().getValueType();
7485 else if (auto *Store = dyn_cast<MaskedStoreSDNode>(Root))
7486 DataVT = Store->getValue().getValueType();
7487 else
7488 llvm_unreachable("Unexpected MemSDNode!");
7489
7490 return DataVT.changeVectorElementType(Ctx, MemVT.getVectorElementType());
7491 }
7492
7493 const unsigned Opcode = Root->getOpcode();
7494 // For custom ISD nodes, we have to look at them individually to extract the
7495 // type of the data moved to/from memory.
7496 switch (Opcode) {
7497 case AArch64ISD::LD1_MERGE_ZERO:
7498 case AArch64ISD::LD1S_MERGE_ZERO:
7499 case AArch64ISD::LDNF1_MERGE_ZERO:
7500 case AArch64ISD::LDNF1S_MERGE_ZERO:
7501 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7502 case AArch64ISD::ST1_PRED:
7503 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7504 default:
7505 break;
7506 }
7507
7508 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7509 return EVT();
7510
7511 switch (Root->getConstantOperandVal(1)) {
7512 default:
7513 return EVT();
7514 case Intrinsic::aarch64_sme_ldr:
7515 case Intrinsic::aarch64_sme_str:
7516 return MVT::nxv16i8;
7517 case Intrinsic::aarch64_sve_prf:
7518 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7519 // width of the predicate.
7521 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7522 case Intrinsic::aarch64_sve_ld2_sret:
7523 case Intrinsic::aarch64_sve_ld2q_sret:
7525 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7526 case Intrinsic::aarch64_sve_st2q:
7528 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7529 case Intrinsic::aarch64_sve_ld3_sret:
7530 case Intrinsic::aarch64_sve_ld3q_sret:
7532 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7533 case Intrinsic::aarch64_sve_st3q:
7535 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7536 case Intrinsic::aarch64_sve_ld4_sret:
7537 case Intrinsic::aarch64_sve_ld4q_sret:
7539 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7540 case Intrinsic::aarch64_sve_st4q:
7542 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7543 case Intrinsic::aarch64_sve_ld1udq:
7544 case Intrinsic::aarch64_sve_st1dq:
7545 return EVT(MVT::nxv1i64);
7546 case Intrinsic::aarch64_sve_ld1uwq:
7547 case Intrinsic::aarch64_sve_st1wq:
7548 return EVT(MVT::nxv1i32);
7549 }
7550}
7551
7552/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7553/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7554/// where Root is the memory access using N for its address.
7555template <int64_t Min, int64_t Max>
7556bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7557 SDValue &Base,
7558 SDValue &OffImm) {
7559 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7560 const DataLayout &DL = CurDAG->getDataLayout();
7561 const MachineFrameInfo &MFI = MF->getFrameInfo();
7562
7563 if (N.getOpcode() == ISD::FrameIndex) {
7564 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7565 // We can only encode VL scaled offsets, so only fold in frame indexes
7566 // referencing SVE objects.
7567 if (MFI.hasScalableStackID(FI)) {
7568 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7569 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7570 return true;
7571 }
7572
7573 return false;
7574 }
7575
7576 if (MemVT == EVT())
7577 return false;
7578
7579 if (N.getOpcode() != ISD::ADD)
7580 return false;
7581
7582 SDValue VScale = N.getOperand(1);
7583 int64_t MulImm = std::numeric_limits<int64_t>::max();
7584 if (VScale.getOpcode() == ISD::VSCALE) {
7585 MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7586 } else if (auto C = dyn_cast<ConstantSDNode>(VScale)) {
7587 int64_t ByteOffset = C->getSExtValue();
7588 const auto KnownVScale =
7590
7591 if (!KnownVScale || ByteOffset % KnownVScale != 0)
7592 return false;
7593
7594 MulImm = ByteOffset / KnownVScale;
7595 } else
7596 return false;
7597
7598 TypeSize TS = MemVT.getSizeInBits();
7599 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7600
7601 if ((MulImm % MemWidthBytes) != 0)
7602 return false;
7603
7604 int64_t Offset = MulImm / MemWidthBytes;
7606 return false;
7607
7608 Base = N.getOperand(0);
7609 if (Base.getOpcode() == ISD::FrameIndex) {
7610 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7611 // We can only encode VL scaled offsets, so only fold in frame indexes
7612 // referencing SVE objects.
7613 if (MFI.hasScalableStackID(FI))
7614 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7615 }
7616
7617 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7618 return true;
7619}
7620
7621/// Select register plus register addressing mode for SVE, with scaled
7622/// offset.
7623bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7624 SDValue &Base,
7625 SDValue &Offset) {
7626 if (N.getOpcode() != ISD::ADD)
7627 return false;
7628
7629 // Process an ADD node.
7630 const SDValue LHS = N.getOperand(0);
7631 const SDValue RHS = N.getOperand(1);
7632
7633 // 8 bit data does not come with the SHL node, so it is treated
7634 // separately.
7635 if (Scale == 0) {
7636 Base = LHS;
7637 Offset = RHS;
7638 return true;
7639 }
7640
7641 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7642 int64_t ImmOff = C->getSExtValue();
7643 unsigned Size = 1 << Scale;
7644
7645 // To use the reg+reg addressing mode, the immediate must be a multiple of
7646 // the vector element's byte size.
7647 if (ImmOff % Size)
7648 return false;
7649
7650 SDLoc DL(N);
7651 Base = LHS;
7652 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7653 SDValue Ops[] = {Offset};
7654 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7655 Offset = SDValue(MI, 0);
7656 return true;
7657 }
7658
7659 // Check if the RHS is a shift node with a constant.
7660 if (RHS.getOpcode() != ISD::SHL)
7661 return false;
7662
7663 const SDValue ShiftRHS = RHS.getOperand(1);
7664 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7665 if (C->getZExtValue() == Scale) {
7666 Base = LHS;
7667 Offset = RHS.getOperand(0);
7668 return true;
7669 }
7670
7671 return false;
7672}
7673
7674bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7675 const AArch64TargetLowering *TLI =
7676 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7677
7678 return TLI->isAllActivePredicate(*CurDAG, N);
7679}
7680
7681bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7682 EVT VT = N.getValueType();
7683 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7684}
7685
7686bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7688 unsigned Scale) {
7689 auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
7690 if (auto *C = dyn_cast<ConstantSDNode>(CN)) {
7691 int64_t ImmOff = C->getSExtValue();
7692 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
7693 return CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7694 }
7695 return SDValue();
7696 };
7697
7698 if (SDValue C = MatchConstantOffset(N)) {
7699 Base = CurDAG->getConstant(0, SDLoc(N), MVT::i32);
7700 Offset = C;
7701 return true;
7702 }
7703
7704 // Try to untangle an ADD node into a 'reg + offset'
7705 if (CurDAG->isBaseWithConstantOffset(N)) {
7706 if (SDValue C = MatchConstantOffset(N.getOperand(1))) {
7707 Base = N.getOperand(0);
7708 Offset = C;
7709 return true;
7710 }
7711 }
7712
7713 // By default, just match reg + 0.
7714 Base = N;
7715 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7716 return true;
7717}
7718
7719bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N,
7720 SDValue &Imm) {
7722 static_cast<AArch64CC::CondCode>(P->getConstantOperandVal(1));
7723 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
7724 // Check conservatively if the immediate fits the valid range [0, 64).
7725 // Immediate variants for GE and HS definitely need to be decremented
7726 // when lowering the pseudos later, so an immediate of 1 would become 0.
7727 // For the inverse conditions LT and LO we don't know for sure if they
7728 // will need a decrement but should the decision be made to reverse the
7729 // branch condition, we again end up with the need to decrement.
7730 // The same argument holds for LE, LS, GT and HI and possibly
7731 // incremented immediates. This can lead to slightly less optimal
7732 // codegen, e.g. we never codegen the legal case
7733 // cblt w0, #63, A
7734 // because we could end up with the illegal case
7735 // cbge w0, #64, B
7736 // should the decision to reverse the branch direction be made. For the
7737 // lower bound cases this is no problem since we can express comparisons
7738 // against 0 with either tbz/tnbz or using wzr/xzr.
7739 uint64_t LowerBound = 0, UpperBound = 64;
7740 switch (CC) {
7741 case AArch64CC::GE:
7742 case AArch64CC::HS:
7743 case AArch64CC::LT:
7744 case AArch64CC::LO:
7745 LowerBound = 1;
7746 break;
7747 case AArch64CC::LE:
7748 case AArch64CC::LS:
7749 case AArch64CC::GT:
7750 case AArch64CC::HI:
7751 UpperBound = 63;
7752 break;
7753 default:
7754 break;
7755 }
7756
7757 if (CN->getAPIntValue().uge(LowerBound) &&
7758 CN->getAPIntValue().ult(UpperBound)) {
7759 SDLoc DL(N);
7760 Imm = CurDAG->getTargetConstant(CN->getZExtValue(), DL, N.getValueType());
7761 return true;
7762 }
7763 }
7764
7765 return false;
7766}
7767
7768template <bool MatchCBB>
7769bool AArch64DAGToDAGISel::SelectCmpBranchExtOperand(SDValue N, SDValue &Reg,
7770 SDValue &ExtType) {
7771
7772 // Use an invalid shift-extend value to indicate we don't need to extend later
7773 if (N.getOpcode() == ISD::AssertZext || N.getOpcode() == ISD::AssertSext) {
7774 EVT Ty = cast<VTSDNode>(N.getOperand(1))->getVT();
7775 if (Ty != (MatchCBB ? MVT::i8 : MVT::i16))
7776 return false;
7777 Reg = N.getOperand(0);
7778 ExtType = CurDAG->getSignedTargetConstant(AArch64_AM::InvalidShiftExtend,
7779 SDLoc(N), MVT::i32);
7780 return true;
7781 }
7782
7784
7785 if ((MatchCBB && (ET == AArch64_AM::UXTB || ET == AArch64_AM::SXTB)) ||
7786 (!MatchCBB && (ET == AArch64_AM::UXTH || ET == AArch64_AM::SXTH))) {
7787 Reg = N.getOperand(0);
7788 ExtType =
7789 CurDAG->getTargetConstant(getExtendEncoding(ET), SDLoc(N), MVT::i32);
7790 return true;
7791 }
7792
7793 return false;
7794}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isMemOpOrPrefetch(SDNode *N)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
return SDValue()
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define DEBUG_TYPE
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
OptimizedStructLayoutField Field
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define PASS_NAME
Value * RHS
Value * LHS
const AArch64RegisterInfo * getRegisterInfo() const override
bool isStreaming() const
Returns true if the function has a streaming body.
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
unsigned getSVEVectorSizeInBits() const
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
LLVM_ABI bool getExactInverse(APFloat *Inv) const
If this value is normal and has an exact, normal, multiplicative inverse, store it in inv and return ...
Definition APFloat.cpp:5995
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1314
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1671
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1640
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1599
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1453
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:511
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:859
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool hasScalableStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:712
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned getID() const
Return the register class ID number.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:963
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
unsigned getExtendEncoding(AArch64_AM::ShiftExtendType ET)
Mapping from extend bits to required operation: shifter: 000 ==> uxtb 001 ==> uxth 010 ==> uxtw 011 =...
static bool isSVELogicalImm(unsigned SizeInBits, uint64_t ImmVal, uint64_t &Encoding)
static bool isSVECpyDupImm(int SizeInBits, int64_t Val, int32_t &Imm, int32_t &Shift)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:595
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:847
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:987
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:838
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:666
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:759
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:609
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:844
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:882
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:733
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:850
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:267
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2016
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
Extended Value Type.
Definition ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:463
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition ValueTypes.h:202
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.