LLVM 23.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34// https://github.com/llvm/llvm-project/issues/114425
35#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36#pragma inline_depth(0)
37#endif
38
39//===--------------------------------------------------------------------===//
40/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41/// instructions for SelectionDAG operations.
42///
43namespace {
44
45class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const AArch64Subtarget *Subtarget;
50
51public:
52 AArch64DAGToDAGISel() = delete;
53
54 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55 CodeGenOptLevel OptLevel)
56 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
57
58 bool runOnMachineFunction(MachineFunction &MF) override {
59 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
61 }
62
63 void Select(SDNode *Node) override;
64 void PreprocessISelDAG() override;
65
66 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
67 /// inline asm expressions.
68 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
69 InlineAsm::ConstraintCode ConstraintID,
70 std::vector<SDValue> &OutOps) override;
71
72 template <signed Low, signed High, signed Scale>
73 bool SelectRDVLImm(SDValue N, SDValue &Imm);
74
75 template <signed Low, signed High>
76 bool SelectRDSVLShiftImm(SDValue N, SDValue &Imm);
77
78 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
79 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
80 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
81 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
82 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
83 return SelectShiftedRegister(N, false, Reg, Shift);
84 }
85 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
86 return SelectShiftedRegister(N, true, Reg, Shift);
87 }
88 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
89 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
90 }
91 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
92 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
93 }
94 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
95 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
96 }
97 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
98 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
99 }
100 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
101 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
102 }
103 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
104 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
105 }
106 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
107 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
108 }
109 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
110 return SelectAddrModeIndexed(N, 1, Base, OffImm);
111 }
112 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
113 return SelectAddrModeIndexed(N, 2, Base, OffImm);
114 }
115 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
116 return SelectAddrModeIndexed(N, 4, Base, OffImm);
117 }
118 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
119 return SelectAddrModeIndexed(N, 8, Base, OffImm);
120 }
121 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
122 return SelectAddrModeIndexed(N, 16, Base, OffImm);
123 }
124 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
125 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
126 }
127 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
128 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
129 }
130 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
131 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
132 }
133 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
134 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
135 }
136 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
137 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
138 }
139 template <unsigned Size, unsigned Max>
140 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
141 // Test if there is an appropriate addressing mode and check if the
142 // immediate fits.
143 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
144 if (Found) {
145 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
146 int64_t C = CI->getSExtValue();
147 if (C <= Max)
148 return true;
149 }
150 }
151
152 // Otherwise, base only, materialize address in register.
153 Base = N;
154 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
155 return true;
156 }
157
158 template<int Width>
159 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
160 SDValue &SignExtend, SDValue &DoShift) {
161 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
162 }
163
164 template<int Width>
165 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
166 SDValue &SignExtend, SDValue &DoShift) {
167 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
168 }
169
170 bool SelectExtractHigh(SDValue N, SDValue &Res) {
171 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
172 N = N->getOperand(0);
173 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
174 !isa<ConstantSDNode>(N->getOperand(1)))
175 return false;
176 EVT VT = N->getValueType(0);
177 EVT LVT = N->getOperand(0).getValueType();
178 unsigned Index = N->getConstantOperandVal(1);
179 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
180 Index != VT.getVectorNumElements())
181 return false;
182 Res = N->getOperand(0);
183 return true;
184 }
185
186 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
187 if (N.getOpcode() != AArch64ISD::VLSHR)
188 return false;
189 SDValue Op = N->getOperand(0);
190 EVT VT = Op.getValueType();
191 unsigned ShtAmt = N->getConstantOperandVal(1);
192 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
193 return false;
194
195 APInt Imm;
196 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
197 Imm = APInt(VT.getScalarSizeInBits(),
198 Op.getOperand(1).getConstantOperandVal(0)
199 << Op.getOperand(1).getConstantOperandVal(1));
200 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
201 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
202 Imm = APInt(VT.getScalarSizeInBits(),
203 Op.getOperand(1).getConstantOperandVal(0));
204 else
205 return false;
206
207 if (Imm != 1ULL << (ShtAmt - 1))
208 return false;
209
210 Res1 = Op.getOperand(0);
211 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
212 return true;
213 }
214
215 bool SelectDupZeroOrUndef(SDValue N) {
216 switch(N->getOpcode()) {
217 case ISD::UNDEF:
218 return true;
219 case AArch64ISD::DUP:
220 case ISD::SPLAT_VECTOR: {
221 auto Opnd0 = N->getOperand(0);
222 if (isNullConstant(Opnd0))
223 return true;
224 if (isNullFPConstant(Opnd0))
225 return true;
226 break;
227 }
228 default:
229 break;
230 }
231
232 return false;
233 }
234
235 bool SelectAny(SDValue) { return true; }
236
237 bool SelectDupZero(SDValue N) {
238 switch(N->getOpcode()) {
239 case AArch64ISD::DUP:
240 case ISD::SPLAT_VECTOR: {
241 auto Opnd0 = N->getOperand(0);
242 if (isNullConstant(Opnd0))
243 return true;
244 if (isNullFPConstant(Opnd0))
245 return true;
246 break;
247 }
248 }
249
250 return false;
251 }
252
253 template <MVT::SimpleValueType VT, bool Negate>
254 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
255 return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
256 }
257
258 template <MVT::SimpleValueType VT, bool Negate>
259 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
260 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
261 }
262
263 template <MVT::SimpleValueType VT>
264 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
265 return SelectSVECpyDupImm(N, VT, Imm, Shift);
266 }
267
268 template <MVT::SimpleValueType VT, bool Invert = false>
269 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
270 return SelectSVELogicalImm(N, VT, Imm, Invert);
271 }
272
273 template <MVT::SimpleValueType VT>
274 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
275 return SelectSVEArithImm(N, VT, Imm);
276 }
277
278 template <unsigned Low, unsigned High, bool AllowSaturation = false>
279 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
280 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
281 }
282
283 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
284 if (N->getOpcode() != ISD::SPLAT_VECTOR)
285 return false;
286
287 EVT EltVT = N->getValueType(0).getVectorElementType();
288 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
289 /* High */ EltVT.getFixedSizeInBits(),
290 /* AllowSaturation */ true, Imm);
291 }
292
293 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
294 template<signed Min, signed Max, signed Scale, bool Shift>
295 bool SelectCntImm(SDValue N, SDValue &Imm) {
297 return false;
298
299 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
300 if (Shift)
301 MulImm = 1LL << MulImm;
302
303 if ((MulImm % std::abs(Scale)) != 0)
304 return false;
305
306 MulImm /= Scale;
307 if ((MulImm >= Min) && (MulImm <= Max)) {
308 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
309 return true;
310 }
311
312 return false;
313 }
314
315 template <signed Max, signed Scale>
316 bool SelectEXTImm(SDValue N, SDValue &Imm) {
318 return false;
319
320 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
321
322 if (MulImm >= 0 && MulImm <= Max) {
323 MulImm *= Scale;
324 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
325 return true;
326 }
327
328 return false;
329 }
330
331 template <unsigned BaseReg, unsigned Max>
332 bool ImmToReg(SDValue N, SDValue &Imm) {
333 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
334 uint64_t C = CI->getZExtValue();
335
336 if (C > Max)
337 return false;
338
339 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
340 return true;
341 }
342 return false;
343 }
344
345 /// Form sequences of consecutive 64/128-bit registers for use in NEON
346 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
347 /// between 1 and 4 elements. If it contains a single element that is returned
348 /// unchanged; otherwise a REG_SEQUENCE value is returned.
351 // Form a sequence of SVE registers for instructions using list of vectors,
352 // e.g. structured loads and stores (ldN, stN).
353 SDValue createZTuple(ArrayRef<SDValue> Vecs);
354
355 // Similar to above, except the register must start at a multiple of the
356 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
357 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
358
359 /// Generic helper for the createDTuple/createQTuple
360 /// functions. Those should almost always be called instead.
361 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
362 const unsigned SubRegs[]);
363
364 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
365
366 bool tryIndexedLoad(SDNode *N);
367
368 void SelectPtrauthAuth(SDNode *N);
369 void SelectPtrauthResign(SDNode *N);
370
371 bool trySelectStackSlotTagP(SDNode *N);
372 void SelectTagP(SDNode *N);
373
374 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
375 unsigned SubRegIdx);
376 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
377 unsigned SubRegIdx);
378 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
379 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
380 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
381 unsigned Opc_rr, unsigned Opc_ri,
382 bool IsIntr = false);
383 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
384 unsigned Scale, unsigned Opc_ri,
385 unsigned Opc_rr);
386 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
387 bool IsZmMulti, unsigned Opcode,
388 bool HasPred = false);
389 void SelectPExtPair(SDNode *N, unsigned Opc);
390 void SelectWhilePair(SDNode *N, unsigned Opc);
391 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
392 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
393 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
394 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
395 bool IsTupleInput, unsigned Opc);
396 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
397
398 template <unsigned MaxIdx, unsigned Scale>
399 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
400 unsigned Op);
401 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
402 unsigned Op, unsigned MaxIdx, unsigned Scale,
403 unsigned BaseReg = 0);
404 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
405 /// SVE Reg+Imm addressing mode.
406 template <int64_t Min, int64_t Max>
407 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
408 SDValue &OffImm);
409 /// SVE Reg+Reg address mode.
410 template <unsigned Scale>
411 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
412 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
413 }
414
415 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
416 unsigned Opc, uint32_t MaxImm);
417
418 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
419
420 template <unsigned MaxIdx, unsigned Scale>
421 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
422 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
423 }
424
425 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
426 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
427 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
428 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
429 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
430 unsigned Opc_rr, unsigned Opc_ri);
431 std::tuple<unsigned, SDValue, SDValue>
432 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
433 const SDValue &OldBase, const SDValue &OldOffset,
434 unsigned Scale);
435
436 bool tryBitfieldExtractOp(SDNode *N);
437 bool tryBitfieldExtractOpFromSExt(SDNode *N);
438 bool tryBitfieldInsertOp(SDNode *N);
439 bool tryBitfieldInsertInZeroOp(SDNode *N);
440 bool tryShiftAmountMod(SDNode *N);
441
442 bool tryReadRegister(SDNode *N);
443 bool tryWriteRegister(SDNode *N);
444
445 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
446 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
447
448 bool trySelectXAR(SDNode *N);
449
450 SDValue tryFoldCselToFMaxMin(SDNode &N);
451
452// Include the pieces autogenerated from the target description.
453#include "AArch64GenDAGISel.inc"
454
455private:
456 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
457 SDValue &Shift);
458 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
459 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
460 SDValue &OffImm) {
461 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
462 }
463 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
464 unsigned Size, SDValue &Base,
465 SDValue &OffImm);
466 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
467 SDValue &OffImm);
468 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
469 SDValue &OffImm);
470 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
471 SDValue &Offset, SDValue &SignExtend,
472 SDValue &DoShift);
473 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
474 SDValue &Offset, SDValue &SignExtend,
475 SDValue &DoShift);
476 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
477 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
478 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
479 SDValue &Offset, SDValue &SignExtend);
480
481 template<unsigned RegWidth>
482 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
483 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
484 }
485 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
486
487 template <unsigned RegWidth>
488 bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos) {
489 return SelectCVTFixedPointVec(N, FixedPos, RegWidth);
490 }
491 bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos, unsigned Width);
492
493 template<unsigned RegWidth>
494 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
495 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
496 }
497
498 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
499 unsigned Width);
500
501 template <unsigned FloatWidth>
502 bool SelectCVTFixedPosRecipOperandVec(SDValue N, SDValue &FixedPos) {
503 return SelectCVTFixedPosRecipOperandVec(N, FixedPos, FloatWidth);
504 }
505
506 bool SelectCVTFixedPosRecipOperandVec(SDValue N, SDValue &FixedPos,
507 unsigned Width);
508
509 bool SelectCMP_SWAP(SDNode *N);
510
511 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
512 bool Negate);
513 bool SelectSVEAddSubImm(SDLoc DL, APInt Value, MVT VT, SDValue &Imm,
514 SDValue &Shift, bool Negate);
515 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
516 bool Negate);
517 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
518 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
519
520 // Match `<NEON Splat> SVEImm` (where <NEON Splat> could be fmov, movi, etc).
521 bool SelectNEONSplatOfSVELogicalImm(SDValue N, SDValue &Imm);
522 bool SelectNEONSplatOfSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift);
523 bool SelectNEONSplatOfSVEArithSImm(SDValue N, SDValue &Imm);
524
525 bool SelectSVESignedArithImm(SDLoc DL, APInt Value, SDValue &Imm);
526 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
527 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
528 bool AllowSaturation, SDValue &Imm);
529
530 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
531 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
532 SDValue &Offset);
533 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
534 SDValue &Offset, unsigned Scale = 1);
535
536 bool SelectAllActivePredicate(SDValue N);
537 bool SelectAnyPredicate(SDValue N);
538
539 bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm);
540
541 template <bool MatchCBB>
542 bool SelectCmpBranchExtOperand(SDValue N, SDValue &Reg, SDValue &ExtType);
543};
544
545class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
546public:
547 static char ID;
548 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
549 CodeGenOptLevel OptLevel)
551 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
552};
553} // end anonymous namespace
554
555char AArch64DAGToDAGISelLegacy::ID = 0;
556
557INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
558
559/// addBitcastHints - This method adds bitcast hints to the operands of a node
560/// to help instruction selector determine which operands are in Neon registers.
562 SDLoc DL(&N);
563 auto getFloatVT = [&](EVT VT) {
564 EVT ScalarVT = VT.getScalarType();
565 assert((ScalarVT == MVT::i32 || ScalarVT == MVT::i64) && "Unexpected VT");
566 return VT.changeElementType(*(DAG.getContext()),
567 ScalarVT == MVT::i32 ? MVT::f32 : MVT::f64);
568 };
570 NewOps.reserve(N.getNumOperands());
571
572 for (unsigned I = 0, E = N.getNumOperands(); I < E; ++I) {
573 auto bitcasted = DAG.getBitcast(getFloatVT(N.getOperand(I).getValueType()),
574 N.getOperand(I));
575 NewOps.push_back(bitcasted);
576 }
577 EVT OrigVT = N.getValueType(0);
578 SDValue OpNode = DAG.getNode(N.getOpcode(), DL, getFloatVT(OrigVT), NewOps);
579 return DAG.getBitcast(OrigVT, OpNode);
580}
581
582/// isIntImmediate - This method tests to see if the node is a constant
583/// operand. If so Imm will receive the 64-bit value.
584static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
586 Imm = C->getZExtValue();
587 return true;
588 }
589 return false;
590}
591
592// isIntImmediate - This method tests to see if a constant operand.
593// If so Imm will receive the value.
594static bool isIntImmediate(SDValue N, uint64_t &Imm) {
595 return isIntImmediate(N.getNode(), Imm);
596}
597
598// isOpcWithIntImmediate - This method tests to see if the node is a specific
599// opcode and that it has a immediate integer right operand.
600// If so Imm will receive the 32 bit value.
601static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
602 uint64_t &Imm) {
603 return N->getOpcode() == Opc &&
604 isIntImmediate(N->getOperand(1).getNode(), Imm);
605}
606
607// isIntImmediateEq - This method tests to see if N is a constant operand that
608// is equivalent to 'ImmExpected'.
609#ifndef NDEBUG
610static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
611 uint64_t Imm;
612 if (!isIntImmediate(N.getNode(), Imm))
613 return false;
614 return Imm == ImmExpected;
615}
616#endif
617
618static APInt DecodeFMOVImm(uint64_t Imm, unsigned RegWidth) {
619 assert(RegWidth == 32 || RegWidth == 64);
620 if (RegWidth == 32)
621 return APInt(RegWidth,
623 return APInt(RegWidth, AArch64_AM::decodeAdvSIMDModImmType12(Imm));
624}
625
626// Decodes the raw integer splat value from a NEON splat operation.
627static std::optional<APInt> DecodeNEONSplat(SDValue N) {
628 assert(N.getValueType().isInteger() && "Only integers are supported");
629 if (N->getOpcode() == AArch64ISD::NVCAST)
630 N = N->getOperand(0);
631 unsigned SplatWidth = N.getScalarValueSizeInBits();
632 if (N.getOpcode() == AArch64ISD::FMOV)
633 return DecodeFMOVImm(N.getConstantOperandVal(0), SplatWidth);
634 if (N->getOpcode() == AArch64ISD::MOVI)
635 return APInt(SplatWidth, N.getConstantOperandVal(0));
636 if (N->getOpcode() == AArch64ISD::MOVIshift)
637 return APInt(SplatWidth, N.getConstantOperandVal(0)
638 << N.getConstantOperandVal(1));
639 if (N->getOpcode() == AArch64ISD::MVNIshift)
640 return ~APInt(SplatWidth, N.getConstantOperandVal(0)
641 << N.getConstantOperandVal(1));
642 if (N->getOpcode() == AArch64ISD::MOVIedit)
644 N.getConstantOperandVal(0)));
645 if (N->getOpcode() == AArch64ISD::DUP)
646 if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(0)))
647 return Const->getAPIntValue().trunc(SplatWidth);
648 // TODO: Recognize more splat-like NEON operations. See ConstantBuildVector
649 // in AArch64ISelLowering.
650 return std::nullopt;
651}
652
653// If \p N is a NEON splat operation (movi, fmov, etc), return the splat value
654// matching the element size of N.
655static std::optional<APInt> GetNEONSplatValue(SDValue N) {
656 unsigned SplatWidth = N.getScalarValueSizeInBits();
657 if (std::optional<APInt> SplatVal = DecodeNEONSplat(N)) {
658 if (SplatVal->getBitWidth() <= SplatWidth)
659 return APInt::getSplat(SplatWidth, *SplatVal);
660 if (SplatVal->isSplat(SplatWidth))
661 return SplatVal->trunc(SplatWidth);
662 }
663 return std::nullopt;
664}
665
666bool AArch64DAGToDAGISel::SelectNEONSplatOfSVELogicalImm(SDValue N,
667 SDValue &Imm) {
668 std::optional<APInt> ImmVal = GetNEONSplatValue(N);
669 if (!ImmVal)
670 return false;
671 uint64_t Encoding;
672 if (!AArch64_AM::isSVELogicalImm(N.getScalarValueSizeInBits(),
673 ImmVal->getZExtValue(), Encoding))
674 return false;
675
676 Imm = CurDAG->getTargetConstant(Encoding, SDLoc(N), MVT::i64);
677 return true;
678}
679
680bool AArch64DAGToDAGISel::SelectNEONSplatOfSVEAddSubImm(SDValue N, SDValue &Imm,
681 SDValue &Shift) {
682 if (std::optional<APInt> ImmVal = GetNEONSplatValue(N))
683 return SelectSVEAddSubImm(SDLoc(N), *ImmVal,
684 N.getValueType().getScalarType().getSimpleVT(),
685 Imm, Shift,
686 /*Negate=*/false);
687 return false;
688}
689
690bool AArch64DAGToDAGISel::SelectNEONSplatOfSVEArithSImm(SDValue N,
691 SDValue &Imm) {
692 if (std::optional<APInt> ImmVal = GetNEONSplatValue(N))
693 return SelectSVESignedArithImm(SDLoc(N), *ImmVal, Imm);
694 return false;
695}
696
697bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
698 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
699 std::vector<SDValue> &OutOps) {
700 switch(ConstraintID) {
701 default:
702 llvm_unreachable("Unexpected asm memory constraint");
703 case InlineAsm::ConstraintCode::m:
704 case InlineAsm::ConstraintCode::o:
705 case InlineAsm::ConstraintCode::Q:
706 // We need to make sure that this one operand does not end up in XZR, thus
707 // require the address to be in a PointerRegClass register.
708 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
709 const TargetRegisterClass *TRC = TRI->getPointerRegClass();
710 SDLoc dl(Op);
711 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
712 SDValue NewOp =
713 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
714 dl, Op.getValueType(),
715 Op, RC), 0);
716 OutOps.push_back(NewOp);
717 return false;
718 }
719 return true;
720}
721
722/// SelectArithImmed - Select an immediate value that can be represented as
723/// a 12-bit value shifted left by either 0 or 12. If so, return true with
724/// Val set to the 12-bit value and Shift set to the shifter operand.
725bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
726 SDValue &Shift) {
727 // This function is called from the addsub_shifted_imm ComplexPattern,
728 // which lists [imm] as the list of opcode it's interested in, however
729 // we still need to check whether the operand is actually an immediate
730 // here because the ComplexPattern opcode list is only used in
731 // root-level opcode matching.
732 if (!isa<ConstantSDNode>(N.getNode()))
733 return false;
734
735 uint64_t Immed = N.getNode()->getAsZExtVal();
736 unsigned ShiftAmt;
737
738 if (Immed >> 12 == 0) {
739 ShiftAmt = 0;
740 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
741 ShiftAmt = 12;
742 Immed = Immed >> 12;
743 } else
744 return false;
745
746 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
747 SDLoc dl(N);
748 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
749 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
750 return true;
751}
752
753/// SelectNegArithImmed - As above, but negates the value before trying to
754/// select it.
755bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
756 SDValue &Shift) {
757 // This function is called from the addsub_shifted_imm ComplexPattern,
758 // which lists [imm] as the list of opcode it's interested in, however
759 // we still need to check whether the operand is actually an immediate
760 // here because the ComplexPattern opcode list is only used in
761 // root-level opcode matching.
762 if (!isa<ConstantSDNode>(N.getNode()))
763 return false;
764
765 // The immediate operand must be a 24-bit zero-extended immediate.
766 uint64_t Immed = N.getNode()->getAsZExtVal();
767
768 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
769 // have the opposite effect on the C flag, so this pattern mustn't match under
770 // those circumstances.
771 if (Immed == 0)
772 return false;
773
774 if (N.getValueType() == MVT::i32)
775 Immed = ~((uint32_t)Immed) + 1;
776 else
777 Immed = ~Immed + 1ULL;
778 if (Immed & 0xFFFFFFFFFF000000ULL)
779 return false;
780
781 Immed &= 0xFFFFFFULL;
782 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
783 Shift);
784}
785
786/// getShiftTypeForNode - Translate a shift node to the corresponding
787/// ShiftType value.
789 switch (N.getOpcode()) {
790 default:
792 case ISD::SHL:
793 return AArch64_AM::LSL;
794 case ISD::SRL:
795 return AArch64_AM::LSR;
796 case ISD::SRA:
797 return AArch64_AM::ASR;
798 case ISD::ROTR:
799 return AArch64_AM::ROR;
800 }
801}
802
804 return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
805}
806
807/// Determine whether it is worth it to fold SHL into the addressing
808/// mode.
810 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
811 // It is worth folding logical shift of up to three places.
812 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
813 if (!CSD)
814 return false;
815 unsigned ShiftVal = CSD->getZExtValue();
816 if (ShiftVal > 3)
817 return false;
818
819 // Check if this particular node is reused in any non-memory related
820 // operation. If yes, do not try to fold this node into the address
821 // computation, since the computation will be kept.
822 const SDNode *Node = V.getNode();
823 for (SDNode *UI : Node->users())
824 if (!isMemOpOrPrefetch(UI))
825 for (SDNode *UII : UI->users())
826 if (!isMemOpOrPrefetch(UII))
827 return false;
828 return true;
829}
830
831/// Determine whether it is worth to fold V into an extended register addressing
832/// mode.
833bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
834 // Trivial if we are optimizing for code size or if there is only
835 // one use of the value.
836 if (CurDAG->shouldOptForSize() || V.hasOneUse())
837 return true;
838
839 // If a subtarget has a slow shift, folding a shift into multiple loads
840 // costs additional micro-ops.
841 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
842 return false;
843
844 // Check whether we're going to emit the address arithmetic anyway because
845 // it's used by a non-address operation.
846 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
847 return true;
848 if (V.getOpcode() == ISD::ADD) {
849 const SDValue LHS = V.getOperand(0);
850 const SDValue RHS = V.getOperand(1);
851 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
852 return true;
853 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
854 return true;
855 }
856
857 // It hurts otherwise, since the value will be reused.
858 return false;
859}
860
861/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
862/// to select more shifted register
863bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
864 SDValue &Shift) {
865 EVT VT = N.getValueType();
866 if (VT != MVT::i32 && VT != MVT::i64)
867 return false;
868
869 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
870 return false;
871 SDValue LHS = N.getOperand(0);
872 if (!LHS->hasOneUse())
873 return false;
874
875 unsigned LHSOpcode = LHS->getOpcode();
876 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
877 return false;
878
879 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
880 if (!ShiftAmtNode)
881 return false;
882
883 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
884 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
885 if (!RHSC)
886 return false;
887
888 APInt AndMask = RHSC->getAPIntValue();
889 unsigned LowZBits, MaskLen;
890 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
891 return false;
892
893 unsigned BitWidth = N.getValueSizeInBits();
894 SDLoc DL(LHS);
895 uint64_t NewShiftC;
896 unsigned NewShiftOp;
897 if (LHSOpcode == ISD::SHL) {
898 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
899 // BitWidth != LowZBits + MaskLen doesn't match the pattern
900 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
901 return false;
902
903 NewShiftC = LowZBits - ShiftAmtC;
904 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
905 } else {
906 if (LowZBits == 0)
907 return false;
908
909 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
910 NewShiftC = LowZBits + ShiftAmtC;
911 if (NewShiftC >= BitWidth)
912 return false;
913
914 // SRA need all high bits
915 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
916 return false;
917
918 // SRL high bits can be 0 or 1
919 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
920 return false;
921
922 if (LHSOpcode == ISD::SRL)
923 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
924 else
925 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
926 }
927
928 assert(NewShiftC < BitWidth && "Invalid shift amount");
929 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
930 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
931 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
932 NewShiftAmt, BitWidthMinus1),
933 0);
934 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
935 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
936 return true;
937}
938
939/// getExtendTypeForNode - Translate an extend node to the corresponding
940/// ExtendType value.
942getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
943 if (N.getOpcode() == ISD::SIGN_EXTEND ||
944 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
945 EVT SrcVT;
946 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
947 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
948 else
949 SrcVT = N.getOperand(0).getValueType();
950
951 if (!IsLoadStore && SrcVT == MVT::i8)
952 return AArch64_AM::SXTB;
953 else if (!IsLoadStore && SrcVT == MVT::i16)
954 return AArch64_AM::SXTH;
955 else if (SrcVT == MVT::i32)
956 return AArch64_AM::SXTW;
957 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
958
960 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
961 N.getOpcode() == ISD::ANY_EXTEND) {
962 EVT SrcVT = N.getOperand(0).getValueType();
963 if (!IsLoadStore && SrcVT == MVT::i8)
964 return AArch64_AM::UXTB;
965 else if (!IsLoadStore && SrcVT == MVT::i16)
966 return AArch64_AM::UXTH;
967 else if (SrcVT == MVT::i32)
968 return AArch64_AM::UXTW;
969 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
970
972 } else if (N.getOpcode() == ISD::AND) {
973 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
974 if (!CSD)
976 uint64_t AndMask = CSD->getZExtValue();
977
978 switch (AndMask) {
979 default:
981 case 0xFF:
982 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
983 case 0xFFFF:
984 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
985 case 0xFFFFFFFF:
986 return AArch64_AM::UXTW;
987 }
988 }
989
991}
992
993/// Determine whether it is worth to fold V into an extended register of an
994/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
995/// instruction, and the shift should be treated as worth folding even if has
996/// multiple uses.
997bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
998 // Trivial if we are optimizing for code size or if there is only
999 // one use of the value.
1000 if (CurDAG->shouldOptForSize() || V.hasOneUse())
1001 return true;
1002
1003 // If a subtarget has a fastpath LSL we can fold a logical shift into
1004 // the add/sub and save a cycle.
1005 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
1006 V.getConstantOperandVal(1) <= 4 &&
1008 return true;
1009
1010 // It hurts otherwise, since the value will be reused.
1011 return false;
1012}
1013
1014/// SelectShiftedRegister - Select a "shifted register" operand. If the value
1015/// is not shifted, set the Shift operand to default of "LSL 0". The logical
1016/// instructions allow the shifted register to be rotated, but the arithmetic
1017/// instructions do not. The AllowROR parameter specifies whether ROR is
1018/// supported.
1019bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
1020 SDValue &Reg, SDValue &Shift) {
1021 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
1022 return true;
1023
1025 if (ShType == AArch64_AM::InvalidShiftExtend)
1026 return false;
1027 if (!AllowROR && ShType == AArch64_AM::ROR)
1028 return false;
1029
1030 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1031 unsigned BitSize = N.getValueSizeInBits();
1032 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
1033 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
1034
1035 Reg = N.getOperand(0);
1036 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
1037 return isWorthFoldingALU(N, true);
1038 }
1039
1040 return false;
1041}
1042
1043/// Instructions that accept extend modifiers like UXTW expect the register
1044/// being extended to be a GPR32, but the incoming DAG might be acting on a
1045/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
1046/// this is the case.
1048 if (N.getValueType() == MVT::i32)
1049 return N;
1050
1051 SDLoc dl(N);
1052 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
1053}
1054
1055// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
1056template<signed Low, signed High, signed Scale>
1057bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
1058 if (!isa<ConstantSDNode>(N))
1059 return false;
1060
1061 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
1062 if ((MulImm % std::abs(Scale)) == 0) {
1063 int64_t RDVLImm = MulImm / Scale;
1064 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
1065 Imm = CurDAG->getSignedTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
1066 return true;
1067 }
1068 }
1069
1070 return false;
1071}
1072
1073// Returns a suitable RDSVL multiplier from a left shift.
1074template <signed Low, signed High>
1075bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) {
1076 if (!isa<ConstantSDNode>(N))
1077 return false;
1078
1079 int64_t MulImm = 1LL << cast<ConstantSDNode>(N)->getSExtValue();
1080 if (MulImm >= Low && MulImm <= High) {
1081 Imm = CurDAG->getSignedTargetConstant(MulImm, SDLoc(N), MVT::i32);
1082 return true;
1083 }
1084
1085 return false;
1086}
1087
1088/// SelectArithExtendedRegister - Select a "extended register" operand. This
1089/// operand folds in an extend followed by an optional left shift.
1090bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
1091 SDValue &Shift) {
1092 unsigned ShiftVal = 0;
1094
1095 if (N.getOpcode() == ISD::SHL) {
1096 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1097 if (!CSD)
1098 return false;
1099 ShiftVal = CSD->getZExtValue();
1100 if (ShiftVal > 4)
1101 return false;
1102
1103 Ext = getExtendTypeForNode(N.getOperand(0));
1105 return false;
1106
1107 Reg = N.getOperand(0).getOperand(0);
1108 } else {
1109 Ext = getExtendTypeForNode(N);
1111 return false;
1112
1113 // Don't match sext of vector extracts. These can use SMOV, but if we match
1114 // this as an extended register, we'll always fold the extend into an ALU op
1115 // user of the extend (which results in a UMOV).
1117 SDValue Op = N.getOperand(0);
1118 if (Op->getOpcode() == ISD::ANY_EXTEND)
1119 Op = Op->getOperand(0);
1120 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
1121 Op.getOperand(0).getValueType().isFixedLengthVector())
1122 return false;
1123 }
1124
1125 Reg = N.getOperand(0);
1126
1127 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
1128 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
1129 auto isDef32 = [](SDValue N) {
1130 unsigned Opc = N.getOpcode();
1131 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
1134 Opc != ISD::FREEZE;
1135 };
1136 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
1137 isDef32(Reg))
1138 return false;
1139 }
1140
1141 // AArch64 mandates that the RHS of the operation must use the smallest
1142 // register class that could contain the size being extended from. Thus,
1143 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
1144 // there might not be an actual 32-bit value in the program. We can
1145 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
1146 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
1147 Reg = narrowIfNeeded(CurDAG, Reg);
1148 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1149 MVT::i32);
1150 return isWorthFoldingALU(N);
1151}
1152
1153/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
1154/// operand is referred by the instructions have SP operand
1155bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
1156 SDValue &Shift) {
1157 unsigned ShiftVal = 0;
1159
1160 if (N.getOpcode() != ISD::SHL)
1161 return false;
1162
1163 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1164 if (!CSD)
1165 return false;
1166 ShiftVal = CSD->getZExtValue();
1167 if (ShiftVal > 4)
1168 return false;
1169
1170 Ext = AArch64_AM::UXTX;
1171 Reg = N.getOperand(0);
1172 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1173 MVT::i32);
1174 return isWorthFoldingALU(N);
1175}
1176
1177/// If there's a use of this ADDlow that's not itself a load/store then we'll
1178/// need to create a real ADD instruction from it anyway and there's no point in
1179/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1180/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1181/// leads to duplicated ADRP instructions.
1183 for (auto *User : N->users()) {
1184 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1185 User->getOpcode() != ISD::ATOMIC_LOAD &&
1186 User->getOpcode() != ISD::ATOMIC_STORE)
1187 return false;
1188
1189 // ldar and stlr have much more restrictive addressing modes (just a
1190 // register).
1191 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
1192 return false;
1193 }
1194
1195 return true;
1196}
1197
1198/// Check if the immediate offset is valid as a scaled immediate.
1199static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1200 unsigned Size) {
1201 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1202 Offset < (Range << Log2_32(Size)))
1203 return true;
1204 return false;
1205}
1206
1207/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1208/// immediate" address. The "Size" argument is the size in bytes of the memory
1209/// reference, which determines the scale.
1210bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1211 unsigned BW, unsigned Size,
1212 SDValue &Base,
1213 SDValue &OffImm) {
1214 SDLoc dl(N);
1215 const DataLayout &DL = CurDAG->getDataLayout();
1216 const TargetLowering *TLI = getTargetLowering();
1217 if (N.getOpcode() == ISD::FrameIndex) {
1218 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1219 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1220 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1221 return true;
1222 }
1223
1224 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1225 // selected here doesn't support labels/immediates, only base+offset.
1226 if (CurDAG->isBaseWithConstantOffset(N)) {
1227 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1228 if (IsSignedImm) {
1229 int64_t RHSC = RHS->getSExtValue();
1230 unsigned Scale = Log2_32(Size);
1231 int64_t Range = 0x1LL << (BW - 1);
1232
1233 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1234 RHSC < (Range << Scale)) {
1235 Base = N.getOperand(0);
1236 if (Base.getOpcode() == ISD::FrameIndex) {
1237 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1238 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1239 }
1240 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1241 return true;
1242 }
1243 } else {
1244 // unsigned Immediate
1245 uint64_t RHSC = RHS->getZExtValue();
1246 unsigned Scale = Log2_32(Size);
1247 uint64_t Range = 0x1ULL << BW;
1248
1249 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1250 Base = N.getOperand(0);
1251 if (Base.getOpcode() == ISD::FrameIndex) {
1252 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1253 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1254 }
1255 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1256 return true;
1257 }
1258 }
1259 }
1260 }
1261 // Base only. The address will be materialized into a register before
1262 // the memory is accessed.
1263 // add x0, Xbase, #offset
1264 // stp x1, x2, [x0]
1265 Base = N;
1266 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1267 return true;
1268}
1269
1270/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1271/// immediate" address. The "Size" argument is the size in bytes of the memory
1272/// reference, which determines the scale.
1273bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1274 SDValue &Base, SDValue &OffImm) {
1275 SDLoc dl(N);
1276 const DataLayout &DL = CurDAG->getDataLayout();
1277 const TargetLowering *TLI = getTargetLowering();
1278 if (N.getOpcode() == ISD::FrameIndex) {
1279 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1280 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1281 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1282 return true;
1283 }
1284
1285 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1286 GlobalAddressSDNode *GAN =
1287 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1288 Base = N.getOperand(0);
1289 OffImm = N.getOperand(1);
1290 if (!GAN)
1291 return true;
1292
1293 if (GAN->getOffset() % Size == 0 &&
1295 return true;
1296 }
1297
1298 if (CurDAG->isBaseWithConstantOffset(N)) {
1299 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1300 int64_t RHSC = (int64_t)RHS->getZExtValue();
1301 unsigned Scale = Log2_32(Size);
1302 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1303 Base = N.getOperand(0);
1304 if (Base.getOpcode() == ISD::FrameIndex) {
1305 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1306 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1307 }
1308 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1309 return true;
1310 }
1311 }
1312 }
1313
1314 // Before falling back to our general case, check if the unscaled
1315 // instructions can handle this. If so, that's preferable.
1316 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1317 return false;
1318
1319 // Base only. The address will be materialized into a register before
1320 // the memory is accessed.
1321 // add x0, Xbase, #offset
1322 // ldr x0, [x0]
1323 Base = N;
1324 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1325 return true;
1326}
1327
1328/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1329/// immediate" address. This should only match when there is an offset that
1330/// is not valid for a scaled immediate addressing mode. The "Size" argument
1331/// is the size in bytes of the memory reference, which is needed here to know
1332/// what is valid for a scaled immediate.
1333bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1334 SDValue &Base,
1335 SDValue &OffImm) {
1336 if (!CurDAG->isBaseWithConstantOffset(N))
1337 return false;
1338 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1339 int64_t RHSC = RHS->getSExtValue();
1340 if (RHSC >= -256 && RHSC < 256) {
1341 Base = N.getOperand(0);
1342 if (Base.getOpcode() == ISD::FrameIndex) {
1343 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1344 const TargetLowering *TLI = getTargetLowering();
1345 Base = CurDAG->getTargetFrameIndex(
1346 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1347 }
1348 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1349 return true;
1350 }
1351 }
1352 return false;
1353}
1354
1356 SDLoc dl(N);
1357 SDValue ImpDef = SDValue(
1358 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1359 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1360 N);
1361}
1362
1363/// Check if the given SHL node (\p N), can be used to form an
1364/// extended register for an addressing mode.
1365bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1366 bool WantExtend, SDValue &Offset,
1367 SDValue &SignExtend) {
1368 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1369 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1370 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1371 return false;
1372
1373 SDLoc dl(N);
1374 if (WantExtend) {
1376 getExtendTypeForNode(N.getOperand(0), true);
1378 return false;
1379
1380 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1381 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1382 MVT::i32);
1383 } else {
1384 Offset = N.getOperand(0);
1385 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1386 }
1387
1388 unsigned LegalShiftVal = Log2_32(Size);
1389 unsigned ShiftVal = CSD->getZExtValue();
1390
1391 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1392 return false;
1393
1394 return isWorthFoldingAddr(N, Size);
1395}
1396
1397bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1399 SDValue &SignExtend,
1400 SDValue &DoShift) {
1401 if (N.getOpcode() != ISD::ADD)
1402 return false;
1403 SDValue LHS = N.getOperand(0);
1404 SDValue RHS = N.getOperand(1);
1405 SDLoc dl(N);
1406
1407 // We don't want to match immediate adds here, because they are better lowered
1408 // to the register-immediate addressing modes.
1410 return false;
1411
1412 // Check if this particular node is reused in any non-memory related
1413 // operation. If yes, do not try to fold this node into the address
1414 // computation, since the computation will be kept.
1415 const SDNode *Node = N.getNode();
1416 for (SDNode *UI : Node->users()) {
1417 if (!isMemOpOrPrefetch(UI))
1418 return false;
1419 }
1420
1421 // Remember if it is worth folding N when it produces extended register.
1422 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1423
1424 // Try to match a shifted extend on the RHS.
1425 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1426 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1427 Base = LHS;
1428 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1429 return true;
1430 }
1431
1432 // Try to match a shifted extend on the LHS.
1433 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1434 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1435 Base = RHS;
1436 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1437 return true;
1438 }
1439
1440 // There was no shift, whatever else we find.
1441 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1442
1444 // Try to match an unshifted extend on the LHS.
1445 if (IsExtendedRegisterWorthFolding &&
1446 (Ext = getExtendTypeForNode(LHS, true)) !=
1448 Base = RHS;
1449 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1450 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1451 MVT::i32);
1452 if (isWorthFoldingAddr(LHS, Size))
1453 return true;
1454 }
1455
1456 // Try to match an unshifted extend on the RHS.
1457 if (IsExtendedRegisterWorthFolding &&
1458 (Ext = getExtendTypeForNode(RHS, true)) !=
1460 Base = LHS;
1461 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1462 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1463 MVT::i32);
1464 if (isWorthFoldingAddr(RHS, Size))
1465 return true;
1466 }
1467
1468 return false;
1469}
1470
1471// Check if the given immediate is preferred by ADD. If an immediate can be
1472// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1473// encoded by one MOVZ, return true.
1474static bool isPreferredADD(int64_t ImmOff) {
1475 // Constant in [0x0, 0xfff] can be encoded in ADD.
1476 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1477 return true;
1478 // Check if it can be encoded in an "ADD LSL #12".
1479 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1480 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1481 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1482 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1483 return false;
1484}
1485
1486bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1488 SDValue &SignExtend,
1489 SDValue &DoShift) {
1490 if (N.getOpcode() != ISD::ADD)
1491 return false;
1492 SDValue LHS = N.getOperand(0);
1493 SDValue RHS = N.getOperand(1);
1494 SDLoc DL(N);
1495
1496 // Check if this particular node is reused in any non-memory related
1497 // operation. If yes, do not try to fold this node into the address
1498 // computation, since the computation will be kept.
1499 const SDNode *Node = N.getNode();
1500 for (SDNode *UI : Node->users()) {
1501 if (!isMemOpOrPrefetch(UI))
1502 return false;
1503 }
1504
1505 // Watch out if RHS is a wide immediate, it can not be selected into
1506 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1507 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1508 // instructions like:
1509 // MOV X0, WideImmediate
1510 // ADD X1, BaseReg, X0
1511 // LDR X2, [X1, 0]
1512 // For such situation, using [BaseReg, XReg] addressing mode can save one
1513 // ADD/SUB:
1514 // MOV X0, WideImmediate
1515 // LDR X2, [BaseReg, X0]
1516 if (isa<ConstantSDNode>(RHS)) {
1517 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1518 // Skip the immediate can be selected by load/store addressing mode.
1519 // Also skip the immediate can be encoded by a single ADD (SUB is also
1520 // checked by using -ImmOff).
1521 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1522 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1523 return false;
1524
1525 SDValue Ops[] = { RHS };
1526 SDNode *MOVI =
1527 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1528 SDValue MOVIV = SDValue(MOVI, 0);
1529 // This ADD of two X register will be selected into [Reg+Reg] mode.
1530 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1531 }
1532
1533 // Remember if it is worth folding N when it produces extended register.
1534 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1535
1536 // Try to match a shifted extend on the RHS.
1537 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1538 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1539 Base = LHS;
1540 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1541 return true;
1542 }
1543
1544 // Try to match a shifted extend on the LHS.
1545 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1546 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1547 Base = RHS;
1548 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1549 return true;
1550 }
1551
1552 // Match any non-shifted, non-extend, non-immediate add expression.
1553 Base = LHS;
1554 Offset = RHS;
1555 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1556 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1557 // Reg1 + Reg2 is free: no check needed.
1558 return true;
1559}
1560
1561SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1562 static const unsigned RegClassIDs[] = {
1563 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1564 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1565 AArch64::dsub2, AArch64::dsub3};
1566
1567 return createTuple(Regs, RegClassIDs, SubRegs);
1568}
1569
1570SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1571 static const unsigned RegClassIDs[] = {
1572 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1573 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1574 AArch64::qsub2, AArch64::qsub3};
1575
1576 return createTuple(Regs, RegClassIDs, SubRegs);
1577}
1578
1579SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1580 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1581 AArch64::ZPR3RegClassID,
1582 AArch64::ZPR4RegClassID};
1583 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1584 AArch64::zsub2, AArch64::zsub3};
1585
1586 return createTuple(Regs, RegClassIDs, SubRegs);
1587}
1588
1589SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1590 assert(Regs.size() == 2 || Regs.size() == 4);
1591
1592 // The createTuple interface requires 3 RegClassIDs for each possible
1593 // tuple type even though we only have them for ZPR2 and ZPR4.
1594 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1595 AArch64::ZPR4Mul4RegClassID};
1596 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1597 AArch64::zsub2, AArch64::zsub3};
1598 return createTuple(Regs, RegClassIDs, SubRegs);
1599}
1600
1601SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1602 const unsigned RegClassIDs[],
1603 const unsigned SubRegs[]) {
1604 // There's no special register-class for a vector-list of 1 element: it's just
1605 // a vector.
1606 if (Regs.size() == 1)
1607 return Regs[0];
1608
1609 assert(Regs.size() >= 2 && Regs.size() <= 4);
1610
1611 SDLoc DL(Regs[0]);
1612
1614
1615 // First operand of REG_SEQUENCE is the desired RegClass.
1616 Ops.push_back(
1617 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1618
1619 // Then we get pairs of source & subregister-position for the components.
1620 for (unsigned i = 0; i < Regs.size(); ++i) {
1621 Ops.push_back(Regs[i]);
1622 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1623 }
1624
1625 SDNode *N =
1626 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1627 return SDValue(N, 0);
1628}
1629
1630void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1631 bool isExt) {
1632 SDLoc dl(N);
1633 EVT VT = N->getValueType(0);
1634
1635 unsigned ExtOff = isExt;
1636
1637 // Form a REG_SEQUENCE to force register allocation.
1638 unsigned Vec0Off = ExtOff + 1;
1639 SmallVector<SDValue, 4> Regs(N->ops().slice(Vec0Off, NumVecs));
1640 SDValue RegSeq = createQTuple(Regs);
1641
1643 if (isExt)
1644 Ops.push_back(N->getOperand(1));
1645 Ops.push_back(RegSeq);
1646 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1647 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1648}
1649
1650static std::tuple<SDValue, SDValue>
1652 SDLoc DL(Disc);
1653 SDValue AddrDisc;
1654 SDValue ConstDisc;
1655
1656 // If this is a blend, remember the constant and address discriminators.
1657 // Otherwise, it's either a constant discriminator, or a non-blended
1658 // address discriminator.
1659 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1660 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1661 AddrDisc = Disc->getOperand(1);
1662 ConstDisc = Disc->getOperand(2);
1663 } else {
1664 ConstDisc = Disc;
1665 }
1666
1667 // If the constant discriminator (either the blend RHS, or the entire
1668 // discriminator value) isn't a 16-bit constant, bail out, and let the
1669 // discriminator be computed separately.
1670 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1671 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1672 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1673
1674 // If there's no address discriminator, use XZR directly.
1675 if (!AddrDisc)
1676 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1677
1678 return std::make_tuple(
1679 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1680 AddrDisc);
1681}
1682
1683void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1684 SDLoc DL(N);
1685 // IntrinsicID is operand #0
1686 SDValue Val = N->getOperand(1);
1687 SDValue AUTKey = N->getOperand(2);
1688 SDValue AUTDisc = N->getOperand(3);
1689
1690 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1691 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1692
1693 SDValue AUTAddrDisc, AUTConstDisc;
1694 std::tie(AUTConstDisc, AUTAddrDisc) =
1695 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1696
1697 if (!Subtarget->isX16X17Safer()) {
1698 std::vector<SDValue> Ops = {Val, AUTKey, AUTConstDisc, AUTAddrDisc};
1699 // Copy deactivation symbol if present.
1700 if (N->getNumOperands() > 4)
1701 Ops.push_back(N->getOperand(4));
1702
1703 SDNode *AUT =
1704 CurDAG->getMachineNode(AArch64::AUTxMxN, DL, MVT::i64, MVT::i64, Ops);
1705 ReplaceNode(N, AUT);
1706 } else {
1707 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1708 AArch64::X16, Val, SDValue());
1709 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1710
1711 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUTx16x17, DL, MVT::i64, Ops);
1712 ReplaceNode(N, AUT);
1713 }
1714}
1715
1716void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1717 SDLoc DL(N);
1718 // IntrinsicID is operand #0, if W_CHAIN it is #1
1719 int OffsetBase = N->getOpcode() == ISD::INTRINSIC_W_CHAIN ? 1 : 0;
1720 SDValue Val = N->getOperand(OffsetBase + 1);
1721 SDValue AUTKey = N->getOperand(OffsetBase + 2);
1722 SDValue AUTDisc = N->getOperand(OffsetBase + 3);
1723 SDValue PACKey = N->getOperand(OffsetBase + 4);
1724 SDValue PACDisc = N->getOperand(OffsetBase + 5);
1725 uint32_t IntNum = N->getConstantOperandVal(OffsetBase + 0);
1726 bool HasLoad = IntNum == Intrinsic::ptrauth_resign_load_relative;
1727
1728 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1729 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1730
1731 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1732 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1733
1734 SDValue AUTAddrDisc, AUTConstDisc;
1735 std::tie(AUTConstDisc, AUTAddrDisc) =
1736 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1737
1738 SDValue PACAddrDisc, PACConstDisc;
1739 std::tie(PACConstDisc, PACAddrDisc) =
1740 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1741
1742 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1743 AArch64::X16, Val, SDValue());
1744
1745 if (HasLoad) {
1746 SDValue Addend = N->getOperand(OffsetBase + 6);
1747 SDValue IncomingChain = N->getOperand(0);
1748 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc,
1749 PACKey, PACConstDisc, PACAddrDisc,
1750 Addend, IncomingChain, X16Copy.getValue(1)};
1751
1752 SDNode *AUTRELLOADPAC = CurDAG->getMachineNode(AArch64::AUTRELLOADPAC, DL,
1753 MVT::i64, MVT::Other, Ops);
1754 ReplaceNode(N, AUTRELLOADPAC);
1755 } else {
1756 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1757 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1758
1759 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1760 ReplaceNode(N, AUTPAC);
1761 }
1762}
1763
1764bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1765 LoadSDNode *LD = cast<LoadSDNode>(N);
1766 if (LD->isUnindexed())
1767 return false;
1768 EVT VT = LD->getMemoryVT();
1769 EVT DstVT = N->getValueType(0);
1770 ISD::MemIndexedMode AM = LD->getAddressingMode();
1771 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1772 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1773 int OffsetVal = (int)OffsetOp->getZExtValue();
1774
1775 // We're not doing validity checking here. That was done when checking
1776 // if we should mark the load as indexed or not. We're just selecting
1777 // the right instruction.
1778 unsigned Opcode = 0;
1779
1780 ISD::LoadExtType ExtType = LD->getExtensionType();
1781 bool InsertTo64 = false;
1782 if (VT == MVT::i64)
1783 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1784 else if (VT == MVT::i32) {
1785 if (ExtType == ISD::NON_EXTLOAD)
1786 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1787 else if (ExtType == ISD::SEXTLOAD)
1788 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1789 else {
1790 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1791 InsertTo64 = true;
1792 // The result of the load is only i32. It's the subreg_to_reg that makes
1793 // it into an i64.
1794 DstVT = MVT::i32;
1795 }
1796 } else if (VT == MVT::i16) {
1797 if (ExtType == ISD::SEXTLOAD) {
1798 if (DstVT == MVT::i64)
1799 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1800 else
1801 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1802 } else {
1803 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1804 InsertTo64 = DstVT == MVT::i64;
1805 // The result of the load is only i32. It's the subreg_to_reg that makes
1806 // it into an i64.
1807 DstVT = MVT::i32;
1808 }
1809 } else if (VT == MVT::i8) {
1810 if (ExtType == ISD::SEXTLOAD) {
1811 if (DstVT == MVT::i64)
1812 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1813 else
1814 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1815 } else {
1816 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1817 InsertTo64 = DstVT == MVT::i64;
1818 // The result of the load is only i32. It's the subreg_to_reg that makes
1819 // it into an i64.
1820 DstVT = MVT::i32;
1821 }
1822 } else if (VT == MVT::f16) {
1823 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1824 } else if (VT == MVT::bf16) {
1825 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1826 } else if (VT == MVT::f32) {
1827 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1828 } else if (VT == MVT::f64 ||
1829 (VT.is64BitVector() && Subtarget->isLittleEndian())) {
1830 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1831 } else if (VT.is128BitVector() && Subtarget->isLittleEndian()) {
1832 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1833 } else if (VT.is64BitVector()) {
1834 if (IsPre || OffsetVal != 8)
1835 return false;
1836 switch (VT.getScalarSizeInBits()) {
1837 case 8:
1838 Opcode = AArch64::LD1Onev8b_POST;
1839 break;
1840 case 16:
1841 Opcode = AArch64::LD1Onev4h_POST;
1842 break;
1843 case 32:
1844 Opcode = AArch64::LD1Onev2s_POST;
1845 break;
1846 case 64:
1847 Opcode = AArch64::LD1Onev1d_POST;
1848 break;
1849 default:
1850 llvm_unreachable("Expected vector element to be a power of 2");
1851 }
1852 } else if (VT.is128BitVector()) {
1853 if (IsPre || OffsetVal != 16)
1854 return false;
1855 switch (VT.getScalarSizeInBits()) {
1856 case 8:
1857 Opcode = AArch64::LD1Onev16b_POST;
1858 break;
1859 case 16:
1860 Opcode = AArch64::LD1Onev8h_POST;
1861 break;
1862 case 32:
1863 Opcode = AArch64::LD1Onev4s_POST;
1864 break;
1865 case 64:
1866 Opcode = AArch64::LD1Onev2d_POST;
1867 break;
1868 default:
1869 llvm_unreachable("Expected vector element to be a power of 2");
1870 }
1871 } else
1872 return false;
1873 SDValue Chain = LD->getChain();
1874 SDValue Base = LD->getBasePtr();
1875 SDLoc dl(N);
1876 // LD1 encodes an immediate offset by using XZR as the offset register.
1877 SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian())
1878 ? CurDAG->getRegister(AArch64::XZR, MVT::i64)
1879 : CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1880 SDValue Ops[] = { Base, Offset, Chain };
1881 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1882 MVT::Other, Ops);
1883
1884 // Transfer memoperands.
1885 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1886 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1887
1888 // Either way, we're replacing the node, so tell the caller that.
1889 SDValue LoadedVal = SDValue(Res, 1);
1890 if (InsertTo64) {
1891 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1892 LoadedVal = SDValue(CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, dl,
1893 MVT::i64, LoadedVal, SubReg),
1894 0);
1895 }
1896
1897 ReplaceUses(SDValue(N, 0), LoadedVal);
1898 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1899 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1900 CurDAG->RemoveDeadNode(N);
1901 return true;
1902}
1903
1904void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1905 unsigned SubRegIdx) {
1906 SDLoc dl(N);
1907 EVT VT = N->getValueType(0);
1908 SDValue Chain = N->getOperand(0);
1909
1910 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1911 Chain};
1912
1913 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1914
1915 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1916 SDValue SuperReg = SDValue(Ld, 0);
1917 for (unsigned i = 0; i < NumVecs; ++i)
1918 ReplaceUses(SDValue(N, i),
1919 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1920
1921 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1922
1923 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1924 // because it's too simple to have needed special treatment during lowering.
1925 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1926 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1927 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1928 }
1929
1930 CurDAG->RemoveDeadNode(N);
1931}
1932
1933void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1934 unsigned Opc, unsigned SubRegIdx) {
1935 SDLoc dl(N);
1936 EVT VT = N->getValueType(0);
1937 SDValue Chain = N->getOperand(0);
1938
1939 SDValue Ops[] = {N->getOperand(1), // Mem operand
1940 N->getOperand(2), // Incremental
1941 Chain};
1942
1943 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1944 MVT::Untyped, MVT::Other};
1945
1946 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1947
1948 // Update uses of write back register
1949 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1950
1951 // Update uses of vector list
1952 SDValue SuperReg = SDValue(Ld, 1);
1953 if (NumVecs == 1)
1954 ReplaceUses(SDValue(N, 0), SuperReg);
1955 else
1956 for (unsigned i = 0; i < NumVecs; ++i)
1957 ReplaceUses(SDValue(N, i),
1958 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1959
1960 // Transfer memoperands.
1961 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1962 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1963
1964 // Update the chain
1965 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1966 CurDAG->RemoveDeadNode(N);
1967}
1968
1969/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1970/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1971/// new Base and an SDValue representing the new offset.
1972std::tuple<unsigned, SDValue, SDValue>
1973AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1974 unsigned Opc_ri,
1975 const SDValue &OldBase,
1976 const SDValue &OldOffset,
1977 unsigned Scale) {
1978 SDValue NewBase = OldBase;
1979 SDValue NewOffset = OldOffset;
1980 // Detect a possible Reg+Imm addressing mode.
1981 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1982 N, OldBase, NewBase, NewOffset);
1983
1984 // Detect a possible reg+reg addressing mode, but only if we haven't already
1985 // detected a Reg+Imm one.
1986 const bool IsRegReg =
1987 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1988
1989 // Select the instruction.
1990 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1991}
1992
1993enum class SelectTypeKind {
1994 Int1 = 0,
1995 Int = 1,
1996 FP = 2,
1998};
1999
2000/// This function selects an opcode from a list of opcodes, which is
2001/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
2002/// element types, in this order.
2003template <SelectTypeKind Kind>
2004static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
2005 // Only match scalable vector VTs
2006 if (!VT.isScalableVector())
2007 return 0;
2008
2009 EVT EltVT = VT.getVectorElementType();
2010 unsigned Key = VT.getVectorMinNumElements();
2011 switch (Kind) {
2013 break;
2015 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
2016 EltVT != MVT::i64)
2017 return 0;
2018 break;
2020 if (EltVT != MVT::i1)
2021 return 0;
2022 break;
2023 case SelectTypeKind::FP:
2024 if (EltVT == MVT::bf16)
2025 Key = 16;
2026 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
2027 EltVT != MVT::f64)
2028 return 0;
2029 break;
2030 }
2031
2032 unsigned Offset;
2033 switch (Key) {
2034 case 16: // 8-bit or bf16
2035 Offset = 0;
2036 break;
2037 case 8: // 16-bit
2038 Offset = 1;
2039 break;
2040 case 4: // 32-bit
2041 Offset = 2;
2042 break;
2043 case 2: // 64-bit
2044 Offset = 3;
2045 break;
2046 default:
2047 return 0;
2048 }
2049
2050 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
2051}
2052
2053// This function is almost identical to SelectWhilePair, but has an
2054// extra check on the range of the immediate operand.
2055// TODO: Merge these two functions together at some point?
2056void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
2057 // Immediate can be either 0 or 1.
2058 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
2059 if (Imm->getZExtValue() > 1)
2060 return;
2061
2062 SDLoc DL(N);
2063 EVT VT = N->getValueType(0);
2064 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
2065 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2066 SDValue SuperReg = SDValue(WhilePair, 0);
2067
2068 for (unsigned I = 0; I < 2; ++I)
2069 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2070 AArch64::psub0 + I, DL, VT, SuperReg));
2071
2072 CurDAG->RemoveDeadNode(N);
2073}
2074
2075void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
2076 SDLoc DL(N);
2077 EVT VT = N->getValueType(0);
2078
2079 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
2080
2081 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2082 SDValue SuperReg = SDValue(WhilePair, 0);
2083
2084 for (unsigned I = 0; I < 2; ++I)
2085 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2086 AArch64::psub0 + I, DL, VT, SuperReg));
2087
2088 CurDAG->RemoveDeadNode(N);
2089}
2090
2091void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
2092 unsigned Opcode) {
2093 EVT VT = N->getValueType(0);
2094 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2095 SDValue Ops = createZTuple(Regs);
2096 SDLoc DL(N);
2097 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
2098 SDValue SuperReg = SDValue(Intrinsic, 0);
2099 for (unsigned i = 0; i < NumVecs; ++i)
2100 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2101 AArch64::zsub0 + i, DL, VT, SuperReg));
2102
2103 CurDAG->RemoveDeadNode(N);
2104}
2105
2106void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
2107 unsigned Opcode) {
2108 SDLoc DL(N);
2109 EVT VT = N->getValueType(0);
2110 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
2111 Ops.push_back(/*Chain*/ N->getOperand(0));
2112
2113 SDNode *Instruction =
2114 CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
2115 SDValue SuperReg = SDValue(Instruction, 0);
2116
2117 for (unsigned i = 0; i < NumVecs; ++i)
2118 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2119 AArch64::zsub0 + i, DL, VT, SuperReg));
2120
2121 // Copy chain
2122 unsigned ChainIdx = NumVecs;
2123 ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
2124 CurDAG->RemoveDeadNode(N);
2125}
2126
2127void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
2128 unsigned NumVecs,
2129 bool IsZmMulti,
2130 unsigned Opcode,
2131 bool HasPred) {
2132 assert(Opcode != 0 && "Unexpected opcode");
2133
2134 SDLoc DL(N);
2135 EVT VT = N->getValueType(0);
2136 SDUse *OpsIter = N->op_begin() + 1; // Skip intrinsic ID
2138
2139 auto GetMultiVecOperand = [&]() {
2140 SmallVector<SDValue, 4> Regs(OpsIter, OpsIter + NumVecs);
2141 OpsIter += NumVecs;
2142 return createZMulTuple(Regs);
2143 };
2144
2145 if (HasPred)
2146 Ops.push_back(*OpsIter++);
2147
2148 Ops.push_back(GetMultiVecOperand());
2149 if (IsZmMulti)
2150 Ops.push_back(GetMultiVecOperand());
2151 else
2152 Ops.push_back(*OpsIter++);
2153
2154 // Append any remaining operands.
2155 Ops.append(OpsIter, N->op_end());
2156 SDNode *Intrinsic;
2157 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
2158 SDValue SuperReg = SDValue(Intrinsic, 0);
2159 for (unsigned i = 0; i < NumVecs; ++i)
2160 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2161 AArch64::zsub0 + i, DL, VT, SuperReg));
2162
2163 CurDAG->RemoveDeadNode(N);
2164}
2165
2166void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
2167 unsigned Scale, unsigned Opc_ri,
2168 unsigned Opc_rr, bool IsIntr) {
2169 assert(Scale < 5 && "Invalid scaling value.");
2170 SDLoc DL(N);
2171 EVT VT = N->getValueType(0);
2172 SDValue Chain = N->getOperand(0);
2173
2174 // Optimize addressing mode.
2176 unsigned Opc;
2177 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2178 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
2179 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
2180
2181 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
2182 Base, // Memory operand
2183 Offset, Chain};
2184
2185 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2186
2187 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2188 SDValue SuperReg = SDValue(Load, 0);
2189 for (unsigned i = 0; i < NumVecs; ++i)
2190 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2191 AArch64::zsub0 + i, DL, VT, SuperReg));
2192
2193 // Copy chain
2194 unsigned ChainIdx = NumVecs;
2195 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2196 CurDAG->RemoveDeadNode(N);
2197}
2198
2199void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
2200 unsigned NumVecs,
2201 unsigned Scale,
2202 unsigned Opc_ri,
2203 unsigned Opc_rr) {
2204 assert(Scale < 4 && "Invalid scaling value.");
2205 SDLoc DL(N);
2206 EVT VT = N->getValueType(0);
2207 SDValue Chain = N->getOperand(0);
2208
2209 SDValue PNg = N->getOperand(2);
2210 SDValue Base = N->getOperand(3);
2211 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
2212 unsigned Opc;
2213 std::tie(Opc, Base, Offset) =
2214 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
2215
2216 SDValue Ops[] = {PNg, // Predicate-as-counter
2217 Base, // Memory operand
2218 Offset, Chain};
2219
2220 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2221
2222 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2223 SDValue SuperReg = SDValue(Load, 0);
2224 for (unsigned i = 0; i < NumVecs; ++i)
2225 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2226 AArch64::zsub0 + i, DL, VT, SuperReg));
2227
2228 // Copy chain
2229 unsigned ChainIdx = NumVecs;
2230 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2231 CurDAG->RemoveDeadNode(N);
2232}
2233
2234void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2235 unsigned Opcode) {
2236 if (N->getValueType(0) != MVT::nxv4f32)
2237 return;
2238 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
2239}
2240
2241void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2242 unsigned NumOutVecs,
2243 unsigned Opc,
2244 uint32_t MaxImm) {
2245 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
2246 if (Imm->getZExtValue() > MaxImm)
2247 return;
2248
2249 SDValue ZtValue;
2250 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2251 return;
2252
2253 SDValue Chain = Node->getOperand(0);
2254 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4), Chain};
2255 SDLoc DL(Node);
2256 EVT VT = Node->getValueType(0);
2257
2258 SDNode *Instruction =
2259 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2260 SDValue SuperReg = SDValue(Instruction, 0);
2261
2262 for (unsigned I = 0; I < NumOutVecs; ++I)
2263 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2264 AArch64::zsub0 + I, DL, VT, SuperReg));
2265
2266 // Copy chain
2267 unsigned ChainIdx = NumOutVecs;
2268 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2269 CurDAG->RemoveDeadNode(Node);
2270}
2271
2272void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2273 unsigned NumOutVecs,
2274 unsigned Opc) {
2275 SDValue ZtValue;
2276 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2277 return;
2278
2279 SDValue Chain = Node->getOperand(0);
2280 SDValue Ops[] = {ZtValue,
2281 createZMulTuple({Node->getOperand(3), Node->getOperand(4)}),
2282 Chain};
2283
2284 SDLoc DL(Node);
2285 EVT VT = Node->getValueType(0);
2286
2287 SDNode *Instruction =
2288 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2289 SDValue SuperReg = SDValue(Instruction, 0);
2290
2291 for (unsigned I = 0; I < NumOutVecs; ++I)
2292 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2293 AArch64::zsub0 + I, DL, VT, SuperReg));
2294
2295 // Copy chain
2296 unsigned ChainIdx = NumOutVecs;
2297 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2298 CurDAG->RemoveDeadNode(Node);
2299}
2300
2301void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2302 unsigned Op) {
2303 SDLoc DL(N);
2304 EVT VT = N->getValueType(0);
2305
2306 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2307 SDValue Zd = createZMulTuple(Regs);
2308 SDValue Zn = N->getOperand(1 + NumVecs);
2309 SDValue Zm = N->getOperand(2 + NumVecs);
2310
2311 SDValue Ops[] = {Zd, Zn, Zm};
2312
2313 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2314 SDValue SuperReg = SDValue(Intrinsic, 0);
2315 for (unsigned i = 0; i < NumVecs; ++i)
2316 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2317 AArch64::zsub0 + i, DL, VT, SuperReg));
2318
2319 CurDAG->RemoveDeadNode(N);
2320}
2321
2322bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2323 switch (BaseReg) {
2324 default:
2325 return false;
2326 case AArch64::ZA:
2327 case AArch64::ZAB0:
2328 if (TileNum == 0)
2329 break;
2330 return false;
2331 case AArch64::ZAH0:
2332 if (TileNum <= 1)
2333 break;
2334 return false;
2335 case AArch64::ZAS0:
2336 if (TileNum <= 3)
2337 break;
2338 return false;
2339 case AArch64::ZAD0:
2340 if (TileNum <= 7)
2341 break;
2342 return false;
2343 }
2344
2345 BaseReg += TileNum;
2346 return true;
2347}
2348
2349template <unsigned MaxIdx, unsigned Scale>
2350void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2351 unsigned BaseReg, unsigned Op) {
2352 unsigned TileNum = 0;
2353 if (BaseReg != AArch64::ZA)
2354 TileNum = N->getConstantOperandVal(2);
2355
2356 if (!SelectSMETile(BaseReg, TileNum))
2357 return;
2358
2359 SDValue SliceBase, Base, Offset;
2360 if (BaseReg == AArch64::ZA)
2361 SliceBase = N->getOperand(2);
2362 else
2363 SliceBase = N->getOperand(3);
2364
2365 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2366 return;
2367
2368 SDLoc DL(N);
2369 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2370 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2371 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2372
2373 EVT VT = N->getValueType(0);
2374 for (unsigned I = 0; I < NumVecs; ++I)
2375 ReplaceUses(SDValue(N, I),
2376 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2377 SDValue(Mov, 0)));
2378 // Copy chain
2379 unsigned ChainIdx = NumVecs;
2380 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2381 CurDAG->RemoveDeadNode(N);
2382}
2383
2384void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2385 unsigned Op, unsigned MaxIdx,
2386 unsigned Scale, unsigned BaseReg) {
2387 // Slice can be in different positions
2388 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2389 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2390 SDValue SliceBase = N->getOperand(2);
2391 if (BaseReg != AArch64::ZA)
2392 SliceBase = N->getOperand(3);
2393
2395 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2396 return;
2397 // The correct Za tile number is computed in Machine Instruction
2398 // See EmitZAInstr
2399 // DAG cannot select Za tile as an output register with ZReg
2400 SDLoc DL(N);
2402 if (BaseReg != AArch64::ZA )
2403 Ops.push_back(N->getOperand(2));
2404 Ops.push_back(Base);
2405 Ops.push_back(Offset);
2406 Ops.push_back(N->getOperand(0)); //Chain
2407 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2408
2409 EVT VT = N->getValueType(0);
2410 for (unsigned I = 0; I < NumVecs; ++I)
2411 ReplaceUses(SDValue(N, I),
2412 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2413 SDValue(Mov, 0)));
2414
2415 // Copy chain
2416 unsigned ChainIdx = NumVecs;
2417 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2418 CurDAG->RemoveDeadNode(N);
2419}
2420
2421void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2422 unsigned NumOutVecs,
2423 bool IsTupleInput,
2424 unsigned Opc) {
2425 SDLoc DL(N);
2426 EVT VT = N->getValueType(0);
2427 unsigned NumInVecs = N->getNumOperands() - 1;
2428
2430 if (IsTupleInput) {
2431 assert((NumInVecs == 2 || NumInVecs == 4) &&
2432 "Don't know how to handle multi-register input!");
2433 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2434 Ops.push_back(createZMulTuple(Regs));
2435 } else {
2436 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2437 for (unsigned I = 0; I < NumInVecs; I++)
2438 Ops.push_back(N->getOperand(1 + I));
2439 }
2440
2441 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2442 SDValue SuperReg = SDValue(Res, 0);
2443
2444 for (unsigned I = 0; I < NumOutVecs; I++)
2445 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2446 AArch64::zsub0 + I, DL, VT, SuperReg));
2447 CurDAG->RemoveDeadNode(N);
2448}
2449
2450void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2451 unsigned Opc) {
2452 SDLoc dl(N);
2453 EVT VT = N->getOperand(2)->getValueType(0);
2454
2455 // Form a REG_SEQUENCE to force register allocation.
2456 bool Is128Bit = VT.getSizeInBits() == 128;
2457 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2458 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2459
2460 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2461 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2462
2463 // Transfer memoperands.
2464 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2465 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2466
2467 ReplaceNode(N, St);
2468}
2469
2470void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2471 unsigned Scale, unsigned Opc_rr,
2472 unsigned Opc_ri) {
2473 SDLoc dl(N);
2474
2475 // Form a REG_SEQUENCE to force register allocation.
2476 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2477 SDValue RegSeq = createZTuple(Regs);
2478
2479 // Optimize addressing mode.
2480 unsigned Opc;
2482 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2483 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2484 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2485
2486 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2487 Base, // address
2488 Offset, // offset
2489 N->getOperand(0)}; // chain
2490 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2491
2492 ReplaceNode(N, St);
2493}
2494
2495bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2496 SDValue &OffImm) {
2497 SDLoc dl(N);
2498 const DataLayout &DL = CurDAG->getDataLayout();
2499 const TargetLowering *TLI = getTargetLowering();
2500
2501 // Try to match it for the frame address
2502 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2503 int FI = FINode->getIndex();
2504 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2505 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2506 return true;
2507 }
2508
2509 return false;
2510}
2511
2512void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2513 unsigned Opc) {
2514 SDLoc dl(N);
2515 EVT VT = N->getOperand(2)->getValueType(0);
2516 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2517 MVT::Other}; // Type for the Chain
2518
2519 // Form a REG_SEQUENCE to force register allocation.
2520 bool Is128Bit = VT.getSizeInBits() == 128;
2521 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2522 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2523
2524 SDValue Ops[] = {RegSeq,
2525 N->getOperand(NumVecs + 1), // base register
2526 N->getOperand(NumVecs + 2), // Incremental
2527 N->getOperand(0)}; // Chain
2528 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2529
2530 // Transfer memoperands.
2531 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2532 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2533
2534 ReplaceNode(N, St);
2535}
2536
2537namespace {
2538/// WidenVector - Given a value in the V64 register class, produce the
2539/// equivalent value in the V128 register class.
2540class WidenVector {
2541 SelectionDAG &DAG;
2542
2543public:
2544 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2545
2546 SDValue operator()(SDValue V64Reg) {
2547 EVT VT = V64Reg.getValueType();
2548 unsigned NarrowSize = VT.getVectorNumElements();
2549 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2550 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2551 SDLoc DL(V64Reg);
2552
2553 SDValue Undef =
2554 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2555 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2556 }
2557};
2558} // namespace
2559
2560/// NarrowVector - Given a value in the V128 register class, produce the
2561/// equivalent value in the V64 register class.
2563 EVT VT = V128Reg.getValueType();
2564 unsigned WideSize = VT.getVectorNumElements();
2565 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2566 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2567
2568 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2569 V128Reg);
2570}
2571
2572void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2573 unsigned Opc) {
2574 SDLoc dl(N);
2575 EVT VT = N->getValueType(0);
2576 bool Narrow = VT.getSizeInBits() == 64;
2577
2578 // Form a REG_SEQUENCE to force register allocation.
2579 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2580
2581 if (Narrow)
2582 transform(Regs, Regs.begin(),
2583 WidenVector(*CurDAG));
2584
2585 SDValue RegSeq = createQTuple(Regs);
2586
2587 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2588
2589 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2590
2591 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2592 N->getOperand(NumVecs + 3), N->getOperand(0)};
2593 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2594 SDValue SuperReg = SDValue(Ld, 0);
2595
2596 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2597 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2598 AArch64::qsub2, AArch64::qsub3 };
2599 for (unsigned i = 0; i < NumVecs; ++i) {
2600 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2601 if (Narrow)
2602 NV = NarrowVector(NV, *CurDAG);
2603 ReplaceUses(SDValue(N, i), NV);
2604 }
2605
2606 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2607 CurDAG->RemoveDeadNode(N);
2608}
2609
2610void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2611 unsigned Opc) {
2612 SDLoc dl(N);
2613 EVT VT = N->getValueType(0);
2614 bool Narrow = VT.getSizeInBits() == 64;
2615
2616 // Form a REG_SEQUENCE to force register allocation.
2617 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2618
2619 if (Narrow)
2620 transform(Regs, Regs.begin(),
2621 WidenVector(*CurDAG));
2622
2623 SDValue RegSeq = createQTuple(Regs);
2624
2625 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2626 RegSeq->getValueType(0), MVT::Other};
2627
2628 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2629
2630 SDValue Ops[] = {RegSeq,
2631 CurDAG->getTargetConstant(LaneNo, dl,
2632 MVT::i64), // Lane Number
2633 N->getOperand(NumVecs + 2), // Base register
2634 N->getOperand(NumVecs + 3), // Incremental
2635 N->getOperand(0)};
2636 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2637
2638 // Update uses of the write back register
2639 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2640
2641 // Update uses of the vector list
2642 SDValue SuperReg = SDValue(Ld, 1);
2643 if (NumVecs == 1) {
2644 ReplaceUses(SDValue(N, 0),
2645 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2646 } else {
2647 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2648 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2649 AArch64::qsub2, AArch64::qsub3 };
2650 for (unsigned i = 0; i < NumVecs; ++i) {
2651 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2652 SuperReg);
2653 if (Narrow)
2654 NV = NarrowVector(NV, *CurDAG);
2655 ReplaceUses(SDValue(N, i), NV);
2656 }
2657 }
2658
2659 // Update the Chain
2660 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2661 CurDAG->RemoveDeadNode(N);
2662}
2663
2664void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2665 unsigned Opc) {
2666 SDLoc dl(N);
2667 EVT VT = N->getOperand(2)->getValueType(0);
2668 bool Narrow = VT.getSizeInBits() == 64;
2669
2670 // Form a REG_SEQUENCE to force register allocation.
2671 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2672
2673 if (Narrow)
2674 transform(Regs, Regs.begin(),
2675 WidenVector(*CurDAG));
2676
2677 SDValue RegSeq = createQTuple(Regs);
2678
2679 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2680
2681 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2682 N->getOperand(NumVecs + 3), N->getOperand(0)};
2683 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2684
2685 // Transfer memoperands.
2686 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2687 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2688
2689 ReplaceNode(N, St);
2690}
2691
2692void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2693 unsigned Opc) {
2694 SDLoc dl(N);
2695 EVT VT = N->getOperand(2)->getValueType(0);
2696 bool Narrow = VT.getSizeInBits() == 64;
2697
2698 // Form a REG_SEQUENCE to force register allocation.
2699 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2700
2701 if (Narrow)
2702 transform(Regs, Regs.begin(),
2703 WidenVector(*CurDAG));
2704
2705 SDValue RegSeq = createQTuple(Regs);
2706
2707 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2708 MVT::Other};
2709
2710 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2711
2712 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2713 N->getOperand(NumVecs + 2), // Base Register
2714 N->getOperand(NumVecs + 3), // Incremental
2715 N->getOperand(0)};
2716 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2717
2718 // Transfer memoperands.
2719 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2720 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2721
2722 ReplaceNode(N, St);
2723}
2724
2726 unsigned &Opc, SDValue &Opd0,
2727 unsigned &LSB, unsigned &MSB,
2728 unsigned NumberOfIgnoredLowBits,
2729 bool BiggerPattern) {
2730 assert(N->getOpcode() == ISD::AND &&
2731 "N must be a AND operation to call this function");
2732
2733 EVT VT = N->getValueType(0);
2734
2735 // Here we can test the type of VT and return false when the type does not
2736 // match, but since it is done prior to that call in the current context
2737 // we turned that into an assert to avoid redundant code.
2738 assert((VT == MVT::i32 || VT == MVT::i64) &&
2739 "Type checking must have been done before calling this function");
2740
2741 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2742 // changed the AND node to a 32-bit mask operation. We'll have to
2743 // undo that as part of the transform here if we want to catch all
2744 // the opportunities.
2745 // Currently the NumberOfIgnoredLowBits argument helps to recover
2746 // from these situations when matching bigger pattern (bitfield insert).
2747
2748 // For unsigned extracts, check for a shift right and mask
2749 uint64_t AndImm = 0;
2750 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2751 return false;
2752
2753 const SDNode *Op0 = N->getOperand(0).getNode();
2754
2755 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2756 // simplified. Try to undo that
2757 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2758
2759 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2760 if (AndImm & (AndImm + 1))
2761 return false;
2762
2763 bool ClampMSB = false;
2764 uint64_t SrlImm = 0;
2765 // Handle the SRL + ANY_EXTEND case.
2766 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2767 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2768 // Extend the incoming operand of the SRL to 64-bit.
2769 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2770 // Make sure to clamp the MSB so that we preserve the semantics of the
2771 // original operations.
2772 ClampMSB = true;
2773 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2775 SrlImm)) {
2776 // If the shift result was truncated, we can still combine them.
2777 Opd0 = Op0->getOperand(0).getOperand(0);
2778
2779 // Use the type of SRL node.
2780 VT = Opd0->getValueType(0);
2781 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2782 Opd0 = Op0->getOperand(0);
2783 ClampMSB = (VT == MVT::i32);
2784 } else if (BiggerPattern) {
2785 // Let's pretend a 0 shift right has been performed.
2786 // The resulting code will be at least as good as the original one
2787 // plus it may expose more opportunities for bitfield insert pattern.
2788 // FIXME: Currently we limit this to the bigger pattern, because
2789 // some optimizations expect AND and not UBFM.
2790 Opd0 = N->getOperand(0);
2791 } else
2792 return false;
2793
2794 // Bail out on large immediates. This happens when no proper
2795 // combining/constant folding was performed.
2796 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2797 LLVM_DEBUG(
2798 (dbgs() << N
2799 << ": Found large shift immediate, this should not happen\n"));
2800 return false;
2801 }
2802
2803 LSB = SrlImm;
2804 MSB = SrlImm +
2805 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2806 : llvm::countr_one<uint64_t>(AndImm)) -
2807 1;
2808 if (ClampMSB)
2809 // Since we're moving the extend before the right shift operation, we need
2810 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2811 // the zeros which would get shifted in with the original right shift
2812 // operation.
2813 MSB = MSB > 31 ? 31 : MSB;
2814
2815 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2816 return true;
2817}
2818
2820 SDValue &Opd0, unsigned &Immr,
2821 unsigned &Imms) {
2822 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2823
2824 EVT VT = N->getValueType(0);
2825 unsigned BitWidth = VT.getSizeInBits();
2826 assert((VT == MVT::i32 || VT == MVT::i64) &&
2827 "Type checking must have been done before calling this function");
2828
2829 SDValue Op = N->getOperand(0);
2830 if (Op->getOpcode() == ISD::TRUNCATE) {
2831 Op = Op->getOperand(0);
2832 VT = Op->getValueType(0);
2833 BitWidth = VT.getSizeInBits();
2834 }
2835
2836 uint64_t ShiftImm;
2837 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2838 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2839 return false;
2840
2841 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2842 if (ShiftImm + Width > BitWidth)
2843 return false;
2844
2845 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2846 Opd0 = Op.getOperand(0);
2847 Immr = ShiftImm;
2848 Imms = ShiftImm + Width - 1;
2849 return true;
2850}
2851
2853 SDValue &Opd0, unsigned &LSB,
2854 unsigned &MSB) {
2855 // We are looking for the following pattern which basically extracts several
2856 // continuous bits from the source value and places it from the LSB of the
2857 // destination value, all other bits of the destination value or set to zero:
2858 //
2859 // Value2 = AND Value, MaskImm
2860 // SRL Value2, ShiftImm
2861 //
2862 // with MaskImm >> ShiftImm to search for the bit width.
2863 //
2864 // This gets selected into a single UBFM:
2865 //
2866 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2867 //
2868
2869 if (N->getOpcode() != ISD::SRL)
2870 return false;
2871
2872 uint64_t AndMask = 0;
2873 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2874 return false;
2875
2876 Opd0 = N->getOperand(0).getOperand(0);
2877
2878 uint64_t SrlImm = 0;
2879 if (!isIntImmediate(N->getOperand(1), SrlImm))
2880 return false;
2881
2882 // Check whether we really have several bits extract here.
2883 if (!isMask_64(AndMask >> SrlImm))
2884 return false;
2885
2886 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2887 LSB = SrlImm;
2888 MSB = llvm::Log2_64(AndMask);
2889 return true;
2890}
2891
2892static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2893 unsigned &Immr, unsigned &Imms,
2894 bool BiggerPattern) {
2895 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2896 "N must be a SHR/SRA operation to call this function");
2897
2898 EVT VT = N->getValueType(0);
2899
2900 // Here we can test the type of VT and return false when the type does not
2901 // match, but since it is done prior to that call in the current context
2902 // we turned that into an assert to avoid redundant code.
2903 assert((VT == MVT::i32 || VT == MVT::i64) &&
2904 "Type checking must have been done before calling this function");
2905
2906 // Check for AND + SRL doing several bits extract.
2907 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2908 return true;
2909
2910 // We're looking for a shift of a shift.
2911 uint64_t ShlImm = 0;
2912 uint64_t TruncBits = 0;
2913 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2914 Opd0 = N->getOperand(0).getOperand(0);
2915 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2916 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2917 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2918 // be considered as setting high 32 bits as zero. Our strategy here is to
2919 // always generate 64bit UBFM. This consistency will help the CSE pass
2920 // later find more redundancy.
2921 Opd0 = N->getOperand(0).getOperand(0);
2922 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2923 VT = Opd0.getValueType();
2924 assert(VT == MVT::i64 && "the promoted type should be i64");
2925 } else if (BiggerPattern) {
2926 // Let's pretend a 0 shift left has been performed.
2927 // FIXME: Currently we limit this to the bigger pattern case,
2928 // because some optimizations expect AND and not UBFM
2929 Opd0 = N->getOperand(0);
2930 } else
2931 return false;
2932
2933 // Missing combines/constant folding may have left us with strange
2934 // constants.
2935 if (ShlImm >= VT.getSizeInBits()) {
2936 LLVM_DEBUG(
2937 (dbgs() << N
2938 << ": Found large shift immediate, this should not happen\n"));
2939 return false;
2940 }
2941
2942 uint64_t SrlImm = 0;
2943 if (!isIntImmediate(N->getOperand(1), SrlImm))
2944 return false;
2945
2946 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2947 "bad amount in shift node!");
2948 int immr = SrlImm - ShlImm;
2949 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2950 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2951 // SRA requires a signed extraction
2952 if (VT == MVT::i32)
2953 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2954 else
2955 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2956 return true;
2957}
2958
2959bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2960 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2961
2962 EVT VT = N->getValueType(0);
2963 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2964 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2965 return false;
2966
2967 uint64_t ShiftImm;
2968 SDValue Op = N->getOperand(0);
2969 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2970 return false;
2971
2972 SDLoc dl(N);
2973 // Extend the incoming operand of the shift to 64-bits.
2974 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2975 unsigned Immr = ShiftImm;
2976 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2977 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2978 CurDAG->getTargetConstant(Imms, dl, VT)};
2979 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2980 return true;
2981}
2982
2983static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2984 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2985 unsigned NumberOfIgnoredLowBits = 0,
2986 bool BiggerPattern = false) {
2987 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2988 return false;
2989
2990 switch (N->getOpcode()) {
2991 default:
2992 if (!N->isMachineOpcode())
2993 return false;
2994 break;
2995 case ISD::AND:
2996 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2997 NumberOfIgnoredLowBits, BiggerPattern);
2998 case ISD::SRL:
2999 case ISD::SRA:
3000 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
3001
3003 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
3004 }
3005
3006 unsigned NOpc = N->getMachineOpcode();
3007 switch (NOpc) {
3008 default:
3009 return false;
3010 case AArch64::SBFMWri:
3011 case AArch64::UBFMWri:
3012 case AArch64::SBFMXri:
3013 case AArch64::UBFMXri:
3014 Opc = NOpc;
3015 Opd0 = N->getOperand(0);
3016 Immr = N->getConstantOperandVal(1);
3017 Imms = N->getConstantOperandVal(2);
3018 return true;
3019 }
3020 // Unreachable
3021 return false;
3022}
3023
3024bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
3025 unsigned Opc, Immr, Imms;
3026 SDValue Opd0;
3027 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
3028 return false;
3029
3030 EVT VT = N->getValueType(0);
3031 SDLoc dl(N);
3032
3033 // If the bit extract operation is 64bit but the original type is 32bit, we
3034 // need to add one EXTRACT_SUBREG.
3035 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
3036 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
3037 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
3038
3039 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
3040 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
3041 MVT::i32, SDValue(BFM, 0));
3042 ReplaceNode(N, Inner.getNode());
3043 return true;
3044 }
3045
3046 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
3047 CurDAG->getTargetConstant(Imms, dl, VT)};
3048 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3049 return true;
3050}
3051
3052/// Does DstMask form a complementary pair with the mask provided by
3053/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
3054/// this asks whether DstMask zeroes precisely those bits that will be set by
3055/// the other half.
3056static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
3057 unsigned NumberOfIgnoredHighBits, EVT VT) {
3058 assert((VT == MVT::i32 || VT == MVT::i64) &&
3059 "i32 or i64 mask type expected!");
3060 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
3061
3062 // Enable implicitTrunc as we're intentionally ignoring high bits.
3063 APInt SignificantDstMask =
3064 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
3065 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
3066
3067 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
3068 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
3069}
3070
3071// Look for bits that will be useful for later uses.
3072// A bit is consider useless as soon as it is dropped and never used
3073// before it as been dropped.
3074// E.g., looking for useful bit of x
3075// 1. y = x & 0x7
3076// 2. z = y >> 2
3077// After #1, x useful bits are 0x7, then the useful bits of x, live through
3078// y.
3079// After #2, the useful bits of x are 0x4.
3080// However, if x is used on an unpredictable instruction, then all its bits
3081// are useful.
3082// E.g.
3083// 1. y = x & 0x7
3084// 2. z = y >> 2
3085// 3. str x, [@x]
3086static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
3087
3089 unsigned Depth) {
3090 uint64_t Imm =
3091 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
3092 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
3093 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
3094 getUsefulBits(Op, UsefulBits, Depth + 1);
3095}
3096
3098 uint64_t Imm, uint64_t MSB,
3099 unsigned Depth) {
3100 // inherit the bitwidth value
3101 APInt OpUsefulBits(UsefulBits);
3102 OpUsefulBits = 1;
3103
3104 if (MSB >= Imm) {
3105 OpUsefulBits <<= MSB - Imm + 1;
3106 --OpUsefulBits;
3107 // The interesting part will be in the lower part of the result
3108 getUsefulBits(Op, OpUsefulBits, Depth + 1);
3109 // The interesting part was starting at Imm in the argument
3110 OpUsefulBits <<= Imm;
3111 } else {
3112 OpUsefulBits <<= MSB + 1;
3113 --OpUsefulBits;
3114 // The interesting part will be shifted in the result
3115 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
3116 getUsefulBits(Op, OpUsefulBits, Depth + 1);
3117 // The interesting part was at zero in the argument
3118 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
3119 }
3120
3121 UsefulBits &= OpUsefulBits;
3122}
3123
3124static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
3125 unsigned Depth) {
3126 uint64_t Imm =
3127 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
3128 uint64_t MSB =
3129 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3130
3131 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
3132}
3133
3135 unsigned Depth) {
3136 uint64_t ShiftTypeAndValue =
3137 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3138 APInt Mask(UsefulBits);
3139 Mask.clearAllBits();
3140 Mask.flipAllBits();
3141
3142 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
3143 // Shift Left
3144 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
3145 Mask <<= ShiftAmt;
3146 getUsefulBits(Op, Mask, Depth + 1);
3147 Mask.lshrInPlace(ShiftAmt);
3148 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
3149 // Shift Right
3150 // We do not handle AArch64_AM::ASR, because the sign will change the
3151 // number of useful bits
3152 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
3153 Mask.lshrInPlace(ShiftAmt);
3154 getUsefulBits(Op, Mask, Depth + 1);
3155 Mask <<= ShiftAmt;
3156 } else
3157 return;
3158
3159 UsefulBits &= Mask;
3160}
3161
3162static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
3163 unsigned Depth) {
3164 uint64_t Imm =
3165 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3166 uint64_t MSB =
3167 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
3168
3169 APInt OpUsefulBits(UsefulBits);
3170 OpUsefulBits = 1;
3171
3172 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
3173 ResultUsefulBits.flipAllBits();
3174 APInt Mask(UsefulBits.getBitWidth(), 0);
3175
3176 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
3177
3178 if (MSB >= Imm) {
3179 // The instruction is a BFXIL.
3180 uint64_t Width = MSB - Imm + 1;
3181 uint64_t LSB = Imm;
3182
3183 OpUsefulBits <<= Width;
3184 --OpUsefulBits;
3185
3186 if (Op.getOperand(1) == Orig) {
3187 // Copy the low bits from the result to bits starting from LSB.
3188 Mask = ResultUsefulBits & OpUsefulBits;
3189 Mask <<= LSB;
3190 }
3191
3192 if (Op.getOperand(0) == Orig)
3193 // Bits starting from LSB in the input contribute to the result.
3194 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3195 } else {
3196 // The instruction is a BFI.
3197 uint64_t Width = MSB + 1;
3198 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
3199
3200 OpUsefulBits <<= Width;
3201 --OpUsefulBits;
3202 OpUsefulBits <<= LSB;
3203
3204 if (Op.getOperand(1) == Orig) {
3205 // Copy the bits from the result to the zero bits.
3206 Mask = ResultUsefulBits & OpUsefulBits;
3207 Mask.lshrInPlace(LSB);
3208 }
3209
3210 if (Op.getOperand(0) == Orig)
3211 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3212 }
3213
3214 UsefulBits &= Mask;
3215}
3216
3217static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
3218 SDValue Orig, unsigned Depth) {
3219
3220 // Users of this node should have already been instruction selected
3221 // FIXME: Can we turn that into an assert?
3222 if (!UserNode->isMachineOpcode())
3223 return;
3224
3225 switch (UserNode->getMachineOpcode()) {
3226 default:
3227 return;
3228 case AArch64::ANDSWri:
3229 case AArch64::ANDSXri:
3230 case AArch64::ANDWri:
3231 case AArch64::ANDXri:
3232 // We increment Depth only when we call the getUsefulBits
3233 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
3234 Depth);
3235 case AArch64::UBFMWri:
3236 case AArch64::UBFMXri:
3237 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
3238
3239 case AArch64::ORRWrs:
3240 case AArch64::ORRXrs:
3241 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
3242 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
3243 Depth);
3244 return;
3245 case AArch64::BFMWri:
3246 case AArch64::BFMXri:
3247 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3248
3249 case AArch64::STRBBui:
3250 case AArch64::STURBBi:
3251 if (UserNode->getOperand(0) != Orig)
3252 return;
3253 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3254 return;
3255
3256 case AArch64::STRHHui:
3257 case AArch64::STURHHi:
3258 if (UserNode->getOperand(0) != Orig)
3259 return;
3260 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3261 return;
3262 }
3263}
3264
3265static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3267 return;
3268 // Initialize UsefulBits
3269 if (!Depth) {
3270 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3271 // At the beginning, assume every produced bits is useful
3272 UsefulBits = APInt(Bitwidth, 0);
3273 UsefulBits.flipAllBits();
3274 }
3275 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3276
3277 for (SDNode *Node : Op.getNode()->users()) {
3278 // A use cannot produce useful bits
3279 APInt UsefulBitsForUse = APInt(UsefulBits);
3280 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
3281 UsersUsefulBits |= UsefulBitsForUse;
3282 }
3283 // UsefulBits contains the produced bits that are meaningful for the
3284 // current definition, thus a user cannot make a bit meaningful at
3285 // this point
3286 UsefulBits &= UsersUsefulBits;
3287}
3288
3289/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3290/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3291/// 0, return Op unchanged.
3292static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3293 if (ShlAmount == 0)
3294 return Op;
3295
3296 EVT VT = Op.getValueType();
3297 SDLoc dl(Op);
3298 unsigned BitWidth = VT.getSizeInBits();
3299 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3300
3301 SDNode *ShiftNode;
3302 if (ShlAmount > 0) {
3303 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3304 ShiftNode = CurDAG->getMachineNode(
3305 UBFMOpc, dl, VT, Op,
3306 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3307 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3308 } else {
3309 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3310 assert(ShlAmount < 0 && "expected right shift");
3311 int ShrAmount = -ShlAmount;
3312 ShiftNode = CurDAG->getMachineNode(
3313 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3314 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3315 }
3316
3317 return SDValue(ShiftNode, 0);
3318}
3319
3320// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3321static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3322 bool BiggerPattern,
3323 const uint64_t NonZeroBits,
3324 SDValue &Src, int &DstLSB,
3325 int &Width);
3326
3327// For bit-field-positioning pattern "shl VAL, N)".
3328static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3329 bool BiggerPattern,
3330 const uint64_t NonZeroBits,
3331 SDValue &Src, int &DstLSB,
3332 int &Width);
3333
3334/// Does this tree qualify as an attempt to move a bitfield into position,
3335/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3337 bool BiggerPattern, SDValue &Src,
3338 int &DstLSB, int &Width) {
3339 EVT VT = Op.getValueType();
3340 unsigned BitWidth = VT.getSizeInBits();
3341 (void)BitWidth;
3342 assert(BitWidth == 32 || BitWidth == 64);
3343
3344 KnownBits Known = CurDAG->computeKnownBits(Op);
3345
3346 // Non-zero in the sense that they're not provably zero, which is the key
3347 // point if we want to use this value
3348 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3349 if (!isShiftedMask_64(NonZeroBits))
3350 return false;
3351
3352 switch (Op.getOpcode()) {
3353 default:
3354 break;
3355 case ISD::AND:
3356 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3357 NonZeroBits, Src, DstLSB, Width);
3358 case ISD::SHL:
3359 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3360 NonZeroBits, Src, DstLSB, Width);
3361 }
3362
3363 return false;
3364}
3365
3367 bool BiggerPattern,
3368 const uint64_t NonZeroBits,
3369 SDValue &Src, int &DstLSB,
3370 int &Width) {
3371 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3372
3373 EVT VT = Op.getValueType();
3374 assert((VT == MVT::i32 || VT == MVT::i64) &&
3375 "Caller guarantees VT is one of i32 or i64");
3376 (void)VT;
3377
3378 uint64_t AndImm;
3379 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3380 return false;
3381
3382 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3383 // 1) (AndImm & (1 << POS) == 0)
3384 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3385 //
3386 // 1) and 2) don't agree so something must be wrong (e.g., in
3387 // 'SelectionDAG::computeKnownBits')
3388 assert((~AndImm & NonZeroBits) == 0 &&
3389 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3390
3391 SDValue AndOp0 = Op.getOperand(0);
3392
3393 uint64_t ShlImm;
3394 SDValue ShlOp0;
3395 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3396 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3397 ShlOp0 = AndOp0.getOperand(0);
3398 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3400 ShlImm)) {
3401 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3402
3403 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3404 SDValue ShlVal = AndOp0.getOperand(0);
3405
3406 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3407 // expect VT to be MVT::i32.
3408 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3409
3410 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3411 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3412 } else
3413 return false;
3414
3415 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3416 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3417 // AndOp0+AND.
3418 if (!BiggerPattern && !AndOp0.hasOneUse())
3419 return false;
3420
3421 DstLSB = llvm::countr_zero(NonZeroBits);
3422 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3423
3424 // Bail out on large Width. This happens when no proper combining / constant
3425 // folding was performed.
3426 if (Width >= (int)VT.getSizeInBits()) {
3427 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3428 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3429 // "val".
3430 // If VT is i32, what Width >= 32 means:
3431 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3432 // demands at least 'Width' bits (after dag-combiner). This together with
3433 // `any_extend` Op (undefined higher bits) indicates missed combination
3434 // when lowering the 'and' IR instruction to an machine IR instruction.
3435 LLVM_DEBUG(
3436 dbgs()
3437 << "Found large Width in bit-field-positioning -- this indicates no "
3438 "proper combining / constant folding was performed\n");
3439 return false;
3440 }
3441
3442 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3443 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3444 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3445 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3446 // which case it is not profitable to insert an extra shift.
3447 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3448 return false;
3449
3450 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3451 return true;
3452}
3453
3454// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3455// UBFIZ.
3457 SDValue &Src, int &DstLSB,
3458 int &Width) {
3459 // Caller should have verified that N is a left shift with constant shift
3460 // amount; asserts that.
3461 assert(Op.getOpcode() == ISD::SHL &&
3462 "Op.getNode() should be a SHL node to call this function");
3463 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3464 "Op.getNode() should shift ShlImm to call this function");
3465
3466 uint64_t AndImm = 0;
3467 SDValue Op0 = Op.getOperand(0);
3468 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3469 return false;
3470
3471 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3472 if (isMask_64(ShiftedAndImm)) {
3473 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3474 // should end with Mask, and could be prefixed with random bits if those
3475 // bits are shifted out.
3476 //
3477 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3478 // the AND result corresponding to those bits are shifted out, so it's fine
3479 // to not extract them.
3480 Width = llvm::countr_one(ShiftedAndImm);
3481 DstLSB = ShlImm;
3482 Src = Op0.getOperand(0);
3483 return true;
3484 }
3485 return false;
3486}
3487
3489 bool BiggerPattern,
3490 const uint64_t NonZeroBits,
3491 SDValue &Src, int &DstLSB,
3492 int &Width) {
3493 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3494
3495 EVT VT = Op.getValueType();
3496 assert((VT == MVT::i32 || VT == MVT::i64) &&
3497 "Caller guarantees that type is i32 or i64");
3498 (void)VT;
3499
3500 uint64_t ShlImm;
3501 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3502 return false;
3503
3504 if (!BiggerPattern && !Op.hasOneUse())
3505 return false;
3506
3507 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3508 return true;
3509
3510 DstLSB = llvm::countr_zero(NonZeroBits);
3511 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3512
3513 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3514 return false;
3515
3516 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3517 return true;
3518}
3519
3520static bool isShiftedMask(uint64_t Mask, EVT VT) {
3521 assert(VT == MVT::i32 || VT == MVT::i64);
3522 if (VT == MVT::i32)
3523 return isShiftedMask_32(Mask);
3524 return isShiftedMask_64(Mask);
3525}
3526
3527// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3528// inserted only sets known zero bits.
3530 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3531
3532 EVT VT = N->getValueType(0);
3533 if (VT != MVT::i32 && VT != MVT::i64)
3534 return false;
3535
3536 unsigned BitWidth = VT.getSizeInBits();
3537
3538 uint64_t OrImm;
3539 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3540 return false;
3541
3542 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3543 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3544 // performance neutral.
3546 return false;
3547
3548 uint64_t MaskImm;
3549 SDValue And = N->getOperand(0);
3550 // Must be a single use AND with an immediate operand.
3551 if (!And.hasOneUse() ||
3552 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3553 return false;
3554
3555 // Compute the Known Zero for the AND as this allows us to catch more general
3556 // cases than just looking for AND with imm.
3557 KnownBits Known = CurDAG->computeKnownBits(And);
3558
3559 // Non-zero in the sense that they're not provably zero, which is the key
3560 // point if we want to use this value.
3561 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3562
3563 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3564 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3565 return false;
3566
3567 // The bits being inserted must only set those bits that are known to be zero.
3568 if ((OrImm & NotKnownZero) != 0) {
3569 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3570 // currently handle this case.
3571 return false;
3572 }
3573
3574 // BFI/BFXIL dst, src, #lsb, #width.
3575 int LSB = llvm::countr_one(NotKnownZero);
3576 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3577
3578 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3579 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3580 unsigned ImmS = Width - 1;
3581
3582 // If we're creating a BFI instruction avoid cases where we need more
3583 // instructions to materialize the BFI constant as compared to the original
3584 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3585 // should be no worse in this case.
3586 bool IsBFI = LSB != 0;
3587 uint64_t BFIImm = OrImm >> LSB;
3588 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3589 // We have a BFI instruction and we know the constant can't be materialized
3590 // with a ORR-immediate with the zero register.
3591 unsigned OrChunks = 0, BFIChunks = 0;
3592 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3593 if (((OrImm >> Shift) & 0xFFFF) != 0)
3594 ++OrChunks;
3595 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3596 ++BFIChunks;
3597 }
3598 if (BFIChunks > OrChunks)
3599 return false;
3600 }
3601
3602 // Materialize the constant to be inserted.
3603 SDLoc DL(N);
3604 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3605 SDNode *MOVI = CurDAG->getMachineNode(
3606 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3607
3608 // Create the BFI/BFXIL instruction.
3609 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3610 CurDAG->getTargetConstant(ImmR, DL, VT),
3611 CurDAG->getTargetConstant(ImmS, DL, VT)};
3612 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3613 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3614 return true;
3615}
3616
3618 SDValue &ShiftedOperand,
3619 uint64_t &EncodedShiftImm) {
3620 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3621 if (!Dst.hasOneUse())
3622 return false;
3623
3624 EVT VT = Dst.getValueType();
3625 assert((VT == MVT::i32 || VT == MVT::i64) &&
3626 "Caller should guarantee that VT is one of i32 or i64");
3627 const unsigned SizeInBits = VT.getSizeInBits();
3628
3629 SDLoc DL(Dst.getNode());
3630 uint64_t AndImm, ShlImm;
3631 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3632 isShiftedMask_64(AndImm)) {
3633 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3634 SDValue DstOp0 = Dst.getOperand(0);
3635 if (!DstOp0.hasOneUse())
3636 return false;
3637
3638 // An example to illustrate the transformation
3639 // From:
3640 // lsr x8, x1, #1
3641 // and x8, x8, #0x3f80
3642 // bfxil x8, x1, #0, #7
3643 // To:
3644 // and x8, x23, #0x7f
3645 // ubfx x9, x23, #8, #7
3646 // orr x23, x8, x9, lsl #7
3647 //
3648 // The number of instructions remains the same, but ORR is faster than BFXIL
3649 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3650 // the dependency chain is improved after the transformation.
3651 uint64_t SrlImm;
3652 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3653 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3654 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3655 unsigned MaskWidth =
3656 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3657 unsigned UBFMOpc =
3658 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3659 SDNode *UBFMNode = CurDAG->getMachineNode(
3660 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3661 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3662 VT),
3663 CurDAG->getTargetConstant(
3664 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3665 ShiftedOperand = SDValue(UBFMNode, 0);
3666 EncodedShiftImm = AArch64_AM::getShifterImm(
3667 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3668 return true;
3669 }
3670 }
3671 return false;
3672 }
3673
3674 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3675 ShiftedOperand = Dst.getOperand(0);
3676 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3677 return true;
3678 }
3679
3680 uint64_t SrlImm;
3681 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3682 ShiftedOperand = Dst.getOperand(0);
3683 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3684 return true;
3685 }
3686 return false;
3687}
3688
3689// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3690// the operands and select it to AArch64::ORR with shifted registers if
3691// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3692static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3693 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3694 const bool BiggerPattern) {
3695 EVT VT = N->getValueType(0);
3696 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3697 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3698 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3699 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3700 assert((VT == MVT::i32 || VT == MVT::i64) &&
3701 "Expect result type to be i32 or i64 since N is combinable to BFM");
3702 SDLoc DL(N);
3703
3704 // Bail out if BFM simplifies away one node in BFM Dst.
3705 if (OrOpd1 != Dst)
3706 return false;
3707
3708 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3709 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3710 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3711 if (BiggerPattern) {
3712 uint64_t SrcAndImm;
3713 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3714 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3715 // OrOpd0 = AND Src, #Mask
3716 // So BFM simplifies away one AND node from Src and doesn't simplify away
3717 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3718 // one node (from Rd), ORR is better since it has higher throughput and
3719 // smaller latency than BFM on many AArch64 processors (and for the rest
3720 // ORR is at least as good as BFM).
3721 SDValue ShiftedOperand;
3722 uint64_t EncodedShiftImm;
3723 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3724 EncodedShiftImm)) {
3725 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3726 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3727 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3728 return true;
3729 }
3730 }
3731 return false;
3732 }
3733
3734 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3735
3736 uint64_t ShlImm;
3737 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3738 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3739 SDValue Ops[] = {
3740 Dst, Src,
3741 CurDAG->getTargetConstant(
3743 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3744 return true;
3745 }
3746
3747 // Select the following pattern to left-shifted operand rather than BFI.
3748 // %val1 = op ..
3749 // %val2 = shl %val1, #imm
3750 // %res = or %val1, %val2
3751 //
3752 // If N is selected to be BFI, we know that
3753 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3754 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3755 //
3756 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3757 if (OrOpd0.getOperand(0) == OrOpd1) {
3758 SDValue Ops[] = {
3759 OrOpd1, OrOpd1,
3760 CurDAG->getTargetConstant(
3762 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3763 return true;
3764 }
3765 }
3766
3767 uint64_t SrlImm;
3768 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3769 // Select the following pattern to right-shifted operand rather than BFXIL.
3770 // %val1 = op ..
3771 // %val2 = lshr %val1, #imm
3772 // %res = or %val1, %val2
3773 //
3774 // If N is selected to be BFXIL, we know that
3775 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3776 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3777 //
3778 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3779 if (OrOpd0.getOperand(0) == OrOpd1) {
3780 SDValue Ops[] = {
3781 OrOpd1, OrOpd1,
3782 CurDAG->getTargetConstant(
3784 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3785 return true;
3786 }
3787 }
3788
3789 return false;
3790}
3791
3792static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3793 SelectionDAG *CurDAG) {
3794 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3795
3796 EVT VT = N->getValueType(0);
3797 if (VT != MVT::i32 && VT != MVT::i64)
3798 return false;
3799
3800 unsigned BitWidth = VT.getSizeInBits();
3801
3802 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3803 // have the expected shape. Try to undo that.
3804
3805 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3806 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3807
3808 // Given a OR operation, check if we have the following pattern
3809 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3810 // isBitfieldExtractOp)
3811 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3812 // countTrailingZeros(mask2) == imm2 - imm + 1
3813 // f = d | c
3814 // if yes, replace the OR instruction with:
3815 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3816
3817 // OR is commutative, check all combinations of operand order and values of
3818 // BiggerPattern, i.e.
3819 // Opd0, Opd1, BiggerPattern=false
3820 // Opd1, Opd0, BiggerPattern=false
3821 // Opd0, Opd1, BiggerPattern=true
3822 // Opd1, Opd0, BiggerPattern=true
3823 // Several of these combinations may match, so check with BiggerPattern=false
3824 // first since that will produce better results by matching more instructions
3825 // and/or inserting fewer extra instructions.
3826 for (int I = 0; I < 4; ++I) {
3827
3828 SDValue Dst, Src;
3829 unsigned ImmR, ImmS;
3830 bool BiggerPattern = I / 2;
3831 SDValue OrOpd0Val = N->getOperand(I % 2);
3832 SDNode *OrOpd0 = OrOpd0Val.getNode();
3833 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3834 SDNode *OrOpd1 = OrOpd1Val.getNode();
3835
3836 unsigned BFXOpc;
3837 int DstLSB, Width;
3838 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3839 NumberOfIgnoredLowBits, BiggerPattern)) {
3840 // Check that the returned opcode is compatible with the pattern,
3841 // i.e., same type and zero extended (U and not S)
3842 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3843 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3844 continue;
3845
3846 // Compute the width of the bitfield insertion
3847 DstLSB = 0;
3848 Width = ImmS - ImmR + 1;
3849 // FIXME: This constraint is to catch bitfield insertion we may
3850 // want to widen the pattern if we want to grab general bitfield
3851 // move case
3852 if (Width <= 0)
3853 continue;
3854
3855 // If the mask on the insertee is correct, we have a BFXIL operation. We
3856 // can share the ImmR and ImmS values from the already-computed UBFM.
3857 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3858 BiggerPattern,
3859 Src, DstLSB, Width)) {
3860 ImmR = (BitWidth - DstLSB) % BitWidth;
3861 ImmS = Width - 1;
3862 } else
3863 continue;
3864
3865 // Check the second part of the pattern
3866 EVT VT = OrOpd1Val.getValueType();
3867 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3868
3869 // Compute the Known Zero for the candidate of the first operand.
3870 // This allows to catch more general case than just looking for
3871 // AND with imm. Indeed, simplify-demanded-bits may have removed
3872 // the AND instruction because it proves it was useless.
3873 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3874
3875 // Check if there is enough room for the second operand to appear
3876 // in the first one
3877 APInt BitsToBeInserted =
3878 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3879
3880 if ((BitsToBeInserted & ~Known.Zero) != 0)
3881 continue;
3882
3883 // Set the first operand
3884 uint64_t Imm;
3885 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3886 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3887 // In that case, we can eliminate the AND
3888 Dst = OrOpd1->getOperand(0);
3889 else
3890 // Maybe the AND has been removed by simplify-demanded-bits
3891 // or is useful because it discards more bits
3892 Dst = OrOpd1Val;
3893
3894 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3895 // with shifted operand is more efficient.
3896 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3897 BiggerPattern))
3898 return true;
3899
3900 // both parts match
3901 SDLoc DL(N);
3902 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3903 CurDAG->getTargetConstant(ImmS, DL, VT)};
3904 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3905 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3906 return true;
3907 }
3908
3909 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3910 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3911 // mask (e.g., 0x000ffff0).
3912 uint64_t Mask0Imm, Mask1Imm;
3913 SDValue And0 = N->getOperand(0);
3914 SDValue And1 = N->getOperand(1);
3915 if (And0.hasOneUse() && And1.hasOneUse() &&
3916 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3917 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3918 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3919 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3920
3921 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3922 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3923 // bits to be inserted.
3924 if (isShiftedMask(Mask0Imm, VT)) {
3925 std::swap(And0, And1);
3926 std::swap(Mask0Imm, Mask1Imm);
3927 }
3928
3929 SDValue Src = And1->getOperand(0);
3930 SDValue Dst = And0->getOperand(0);
3931 unsigned LSB = llvm::countr_zero(Mask1Imm);
3932 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3933
3934 // The BFXIL inserts the low-order bits from a source register, so right
3935 // shift the needed bits into place.
3936 SDLoc DL(N);
3937 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3938 uint64_t LsrImm = LSB;
3939 if (Src->hasOneUse() &&
3940 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3941 (LsrImm + LSB) < BitWidth) {
3942 Src = Src->getOperand(0);
3943 LsrImm += LSB;
3944 }
3945
3946 SDNode *LSR = CurDAG->getMachineNode(
3947 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3948 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3949
3950 // BFXIL is an alias of BFM, so translate to BFM operands.
3951 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3952 unsigned ImmS = Width - 1;
3953
3954 // Create the BFXIL instruction.
3955 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3956 CurDAG->getTargetConstant(ImmR, DL, VT),
3957 CurDAG->getTargetConstant(ImmS, DL, VT)};
3958 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3959 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3960 return true;
3961 }
3962
3963 return false;
3964}
3965
3966bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3967 if (N->getOpcode() != ISD::OR)
3968 return false;
3969
3970 APInt NUsefulBits;
3971 getUsefulBits(SDValue(N, 0), NUsefulBits);
3972
3973 // If all bits are not useful, just return UNDEF.
3974 if (!NUsefulBits) {
3975 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3976 return true;
3977 }
3978
3979 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3980 return true;
3981
3982 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3983}
3984
3985/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3986/// equivalent of a left shift by a constant amount followed by an and masking
3987/// out a contiguous set of bits.
3988bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3989 if (N->getOpcode() != ISD::AND)
3990 return false;
3991
3992 EVT VT = N->getValueType(0);
3993 if (VT != MVT::i32 && VT != MVT::i64)
3994 return false;
3995
3996 SDValue Op0;
3997 int DstLSB, Width;
3998 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3999 Op0, DstLSB, Width))
4000 return false;
4001
4002 // ImmR is the rotate right amount.
4003 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
4004 // ImmS is the most significant bit of the source to be moved.
4005 unsigned ImmS = Width - 1;
4006
4007 SDLoc DL(N);
4008 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
4009 CurDAG->getTargetConstant(ImmS, DL, VT)};
4010 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
4011 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4012 return true;
4013}
4014
4015/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
4016/// variable shift/rotate instructions.
4017bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
4018 EVT VT = N->getValueType(0);
4019
4020 unsigned Opc;
4021 switch (N->getOpcode()) {
4022 case ISD::ROTR:
4023 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
4024 break;
4025 case ISD::SHL:
4026 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
4027 break;
4028 case ISD::SRL:
4029 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
4030 break;
4031 case ISD::SRA:
4032 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
4033 break;
4034 default:
4035 return false;
4036 }
4037
4038 uint64_t Size;
4039 uint64_t Bits;
4040 if (VT == MVT::i32) {
4041 Bits = 5;
4042 Size = 32;
4043 } else if (VT == MVT::i64) {
4044 Bits = 6;
4045 Size = 64;
4046 } else
4047 return false;
4048
4049 SDValue ShiftAmt = N->getOperand(1);
4050 SDLoc DL(N);
4051 SDValue NewShiftAmt;
4052
4053 // Skip over an extend of the shift amount.
4054 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
4055 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
4056 ShiftAmt = ShiftAmt->getOperand(0);
4057
4058 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
4059 SDValue Add0 = ShiftAmt->getOperand(0);
4060 SDValue Add1 = ShiftAmt->getOperand(1);
4061 uint64_t Add0Imm;
4062 uint64_t Add1Imm;
4063 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
4064 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
4065 // to avoid the ADD/SUB.
4066 NewShiftAmt = Add0;
4067 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
4068 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
4069 (Add0Imm % Size == 0)) {
4070 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
4071 // to generate a NEG instead of a SUB from a constant.
4072 unsigned NegOpc;
4073 unsigned ZeroReg;
4074 EVT SubVT = ShiftAmt->getValueType(0);
4075 if (SubVT == MVT::i32) {
4076 NegOpc = AArch64::SUBWrr;
4077 ZeroReg = AArch64::WZR;
4078 } else {
4079 assert(SubVT == MVT::i64);
4080 NegOpc = AArch64::SUBXrr;
4081 ZeroReg = AArch64::XZR;
4082 }
4083 SDValue Zero =
4084 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
4085 MachineSDNode *Neg =
4086 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
4087 NewShiftAmt = SDValue(Neg, 0);
4088 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
4089 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
4090 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
4091 // to generate a NOT instead of a SUB from a constant.
4092 unsigned NotOpc;
4093 unsigned ZeroReg;
4094 EVT SubVT = ShiftAmt->getValueType(0);
4095 if (SubVT == MVT::i32) {
4096 NotOpc = AArch64::ORNWrr;
4097 ZeroReg = AArch64::WZR;
4098 } else {
4099 assert(SubVT == MVT::i64);
4100 NotOpc = AArch64::ORNXrr;
4101 ZeroReg = AArch64::XZR;
4102 }
4103 SDValue Zero =
4104 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
4105 MachineSDNode *Not =
4106 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
4107 NewShiftAmt = SDValue(Not, 0);
4108 } else
4109 return false;
4110 } else {
4111 // If the shift amount is masked with an AND, check that the mask covers the
4112 // bits that are implicitly ANDed off by the above opcodes and if so, skip
4113 // the AND.
4114 uint64_t MaskImm;
4115 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
4116 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
4117 return false;
4118
4119 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
4120 return false;
4121
4122 NewShiftAmt = ShiftAmt->getOperand(0);
4123 }
4124
4125 // Narrow/widen the shift amount to match the size of the shift operation.
4126 if (VT == MVT::i32)
4127 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
4128 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
4129 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
4130 MachineSDNode *Ext = CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL, VT,
4131 NewShiftAmt, SubReg);
4132 NewShiftAmt = SDValue(Ext, 0);
4133 }
4134
4135 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
4136 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4137 return true;
4138}
4139
4141 SDValue &FixedPos,
4142 unsigned RegWidth,
4143 bool isReciprocal) {
4144 APFloat FVal(0.0);
4146 FVal = CN->getValueAPF();
4147 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
4148 // Some otherwise illegal constants are allowed in this case.
4149 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
4150 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
4151 return false;
4152
4153 ConstantPoolSDNode *CN =
4154 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
4155 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
4156 } else
4157 return false;
4158
4159 if (unsigned FBits =
4160 CheckFixedPointOperandConstant(FVal, RegWidth, isReciprocal)) {
4161 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4162 return true;
4163 }
4164
4165 return false;
4166}
4167
4169 SDValue N,
4170 SDValue &FixedPos,
4171 unsigned RegWidth,
4172 bool isReciprocal) {
4173 if ((N.getOpcode() == AArch64ISD::NVCAST || N.getOpcode() == ISD::BITCAST) &&
4174 N.getValueType().getScalarSizeInBits() ==
4175 N.getOperand(0).getValueType().getScalarSizeInBits())
4176 N = N.getOperand(0);
4177
4178 auto ImmToFloat = [RegWidth](APInt Imm) {
4179 switch (RegWidth) {
4180 case 16:
4181 return APFloat(APFloat::IEEEhalf(), Imm);
4182 case 32:
4183 return APFloat(APFloat::IEEEsingle(), Imm);
4184 case 64:
4185 return APFloat(APFloat::IEEEdouble(), Imm);
4186 default:
4187 llvm_unreachable("Unexpected RegWidth!");
4188 };
4189 };
4190
4191 APFloat FVal(0.0);
4192 switch (N->getOpcode()) {
4193 case AArch64ISD::MOVIshift:
4194 FVal = ImmToFloat(APInt(RegWidth, N.getConstantOperandVal(0)
4195 << N.getConstantOperandVal(1)));
4196 break;
4197 case AArch64ISD::FMOV:
4198 FVal = ImmToFloat(DecodeFMOVImm(N.getConstantOperandVal(0), RegWidth));
4199 break;
4200 case AArch64ISD::DUP:
4201 if (isa<ConstantSDNode>(N.getOperand(0)))
4202 FVal = ImmToFloat(N.getConstantOperandAPInt(0).trunc(RegWidth));
4203 else
4204 return false;
4205 break;
4206 default:
4207 return false;
4208 }
4209
4210 if (unsigned FBits =
4211 CheckFixedPointOperandConstant(FVal, RegWidth, isReciprocal)) {
4212 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4213 return true;
4214 }
4215
4216 return false;
4217}
4218
4219bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4220 unsigned RegWidth) {
4221 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4222 /*isReciprocal*/ false);
4223}
4224
4225bool AArch64DAGToDAGISel::SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos,
4226 unsigned RegWidth) {
4228 CurDAG, N, FixedPos, RegWidth, /*isReciprocal*/ false);
4229}
4230
4231bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperandVec(SDValue N,
4232 SDValue &FixedPos,
4233 unsigned RegWidth) {
4235 CurDAG, N, FixedPos, RegWidth, /*isReciprocal*/ true);
4236}
4237
4238bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
4239 SDValue &FixedPos,
4240 unsigned RegWidth) {
4241 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4242 /*isReciprocal*/ true);
4243}
4244
4245// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
4246// of the string and obtains the integer values from them and combines these
4247// into a single value to be used in the MRS/MSR instruction.
4250 RegString.split(Fields, ':');
4251
4252 if (Fields.size() == 1)
4253 return -1;
4254
4255 assert(Fields.size() == 5
4256 && "Invalid number of fields in read register string");
4257
4259 bool AllIntFields = true;
4260
4261 for (StringRef Field : Fields) {
4262 unsigned IntField;
4263 AllIntFields &= !Field.getAsInteger(10, IntField);
4264 Ops.push_back(IntField);
4265 }
4266
4267 assert(AllIntFields &&
4268 "Unexpected non-integer value in special register string.");
4269 (void)AllIntFields;
4270
4271 if (Ops[0] < 2 || Ops[1] > 7 || Ops[2] > 15 || Ops[3] > 15 || Ops[4] > 7)
4272 return -1;
4273
4274 // Need to combine the integer fields of the string into a single value
4275 // based on the bit encoding of MRS/MSR instruction. We also mask Ops[0], as
4276 // top bit as it is implicitly assumed to be 1 for MRS/MSR instruction and is
4277 // not part of the encoding.
4278 return ((Ops[0] & 0x1) << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
4279 (Ops[3] << 3) | (Ops[4]);
4280}
4281
4282// Lower the read_register intrinsic to an MRS instruction node if the special
4283// register string argument is either of the form detailed in the ALCE (the
4284// form described in getIntOperandsFromRegisterString) or is a named register
4285// known by the MRS SysReg mapper.
4286bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4287 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4288 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4289 SDLoc DL(N);
4290
4291 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4292
4293 unsigned Opcode64Bit = AArch64::MRS;
4294 int Imm = getIntOperandFromRegisterString(RegString->getString());
4295 if (Imm == -1) {
4296 // No match, Use the sysreg mapper to map the remaining possible strings to
4297 // the value for the register to be used for the instruction operand.
4298 const auto *TheReg =
4299 AArch64SysReg::lookupSysRegByName(RegString->getString());
4300 if (TheReg && TheReg->Readable &&
4301 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4302 Imm = TheReg->Encoding;
4303 else
4304 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4305
4306 if (Imm == -1) {
4307 // Still no match, see if this is "pc" or give up.
4308 if (!ReadIs128Bit && RegString->getString() == "pc") {
4309 Opcode64Bit = AArch64::ADR;
4310 Imm = 0;
4311 } else {
4312 return false;
4313 }
4314 }
4315 }
4316
4317 SDValue InChain = N->getOperand(0);
4318 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
4319 if (!ReadIs128Bit) {
4320 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
4321 {SysRegImm, InChain});
4322 } else {
4323 SDNode *MRRS = CurDAG->getMachineNode(
4324 AArch64::MRRS, DL,
4325 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4326 {SysRegImm, InChain});
4327
4328 // Sysregs are not endian. The even register always contains the low half
4329 // of the register.
4330 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
4331 SDValue(MRRS, 0));
4332 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
4333 SDValue(MRRS, 0));
4334 SDValue OutChain = SDValue(MRRS, 1);
4335
4336 ReplaceUses(SDValue(N, 0), Lo);
4337 ReplaceUses(SDValue(N, 1), Hi);
4338 ReplaceUses(SDValue(N, 2), OutChain);
4339 };
4340 return true;
4341}
4342
4343// Lower the write_register intrinsic to an MSR instruction node if the special
4344// register string argument is either of the form detailed in the ALCE (the
4345// form described in getIntOperandsFromRegisterString) or is a named register
4346// known by the MSR SysReg mapper.
4347bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4348 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4349 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4350 SDLoc DL(N);
4351
4352 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4353
4354 if (!WriteIs128Bit) {
4355 // Check if the register was one of those allowed as the pstatefield value
4356 // in the MSR (immediate) instruction. To accept the values allowed in the
4357 // pstatefield for the MSR (immediate) instruction, we also require that an
4358 // immediate value has been provided as an argument, we know that this is
4359 // the case as it has been ensured by semantic checking.
4360 auto trySelectPState = [&](auto PMapper, unsigned State) {
4361 if (PMapper) {
4362 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4363 "Expected a constant integer expression.");
4364 unsigned Reg = PMapper->Encoding;
4365 uint64_t Immed = N->getConstantOperandVal(2);
4366 CurDAG->SelectNodeTo(
4367 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4368 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4369 return true;
4370 }
4371 return false;
4372 };
4373
4374 if (trySelectPState(
4375 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4376 AArch64::MSRpstateImm4))
4377 return true;
4378 if (trySelectPState(
4379 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4380 AArch64::MSRpstateImm1))
4381 return true;
4382 }
4383
4384 int Imm = getIntOperandFromRegisterString(RegString->getString());
4385 if (Imm == -1) {
4386 // Use the sysreg mapper to attempt to map the remaining possible strings
4387 // to the value for the register to be used for the MSR (register)
4388 // instruction operand.
4389 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4390 if (TheReg && TheReg->Writeable &&
4391 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4392 Imm = TheReg->Encoding;
4393 else
4394 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4395
4396 if (Imm == -1)
4397 return false;
4398 }
4399
4400 SDValue InChain = N->getOperand(0);
4401 if (!WriteIs128Bit) {
4402 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4403 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4404 N->getOperand(2), InChain);
4405 } else {
4406 // No endian swap. The lower half always goes into the even subreg, and the
4407 // higher half always into the odd supreg.
4408 SDNode *Pair = CurDAG->getMachineNode(
4409 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4410 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4411 MVT::i32),
4412 N->getOperand(2),
4413 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4414 N->getOperand(3),
4415 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4416
4417 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4418 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4419 SDValue(Pair, 0), InChain);
4420 }
4421
4422 return true;
4423}
4424
4425/// We've got special pseudo-instructions for these
4426bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4427 unsigned Opcode;
4428 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4429
4430 // Leave IR for LSE if subtarget supports it.
4431 if (Subtarget->hasLSE()) return false;
4432
4433 if (MemTy == MVT::i8)
4434 Opcode = AArch64::CMP_SWAP_8;
4435 else if (MemTy == MVT::i16)
4436 Opcode = AArch64::CMP_SWAP_16;
4437 else if (MemTy == MVT::i32)
4438 Opcode = AArch64::CMP_SWAP_32;
4439 else if (MemTy == MVT::i64)
4440 Opcode = AArch64::CMP_SWAP_64;
4441 else
4442 llvm_unreachable("Unknown AtomicCmpSwap type");
4443
4444 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4445 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4446 N->getOperand(0)};
4447 SDNode *CmpSwap = CurDAG->getMachineNode(
4448 Opcode, SDLoc(N),
4449 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4450
4451 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4452 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4453
4454 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4455 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4456 CurDAG->RemoveDeadNode(N);
4457
4458 return true;
4459}
4460
4461bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4462 SDValue &Shift, bool Negate) {
4463 if (!isa<ConstantSDNode>(N))
4464 return false;
4465
4466 APInt Val =
4467 cast<ConstantSDNode>(N)->getAPIntValue().trunc(VT.getFixedSizeInBits());
4468
4469 return SelectSVEAddSubImm(SDLoc(N), Val, VT, Imm, Shift, Negate);
4470}
4471
4472bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDLoc DL, APInt Val, MVT VT,
4473 SDValue &Imm, SDValue &Shift,
4474 bool Negate) {
4475 if (Negate)
4476 Val = -Val;
4477
4478 switch (VT.SimpleTy) {
4479 case MVT::i8:
4480 // All immediates are supported.
4481 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4482 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4483 return true;
4484 case MVT::i16:
4485 case MVT::i32:
4486 case MVT::i64:
4487 // Support 8bit unsigned immediates.
4488 if ((Val & ~0xff) == 0) {
4489 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4490 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4491 return true;
4492 }
4493 // Support 16bit unsigned immediates that are a multiple of 256.
4494 if ((Val & ~0xff00) == 0) {
4495 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4496 Imm = CurDAG->getTargetConstant(Val.lshr(8).getZExtValue(), DL, MVT::i32);
4497 return true;
4498 }
4499 break;
4500 default:
4501 break;
4502 }
4503
4504 return false;
4505}
4506
4507bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4508 SDValue &Imm, SDValue &Shift,
4509 bool Negate) {
4510 if (!isa<ConstantSDNode>(N))
4511 return false;
4512
4513 SDLoc DL(N);
4514 int64_t Val = cast<ConstantSDNode>(N)
4515 ->getAPIntValue()
4517 .getSExtValue();
4518
4519 if (Negate)
4520 Val = -Val;
4521
4522 // Signed saturating instructions treat their immediate operand as unsigned,
4523 // whereas the related intrinsics define their operands to be signed. This
4524 // means we can only use the immediate form when the operand is non-negative.
4525 if (Val < 0)
4526 return false;
4527
4528 switch (VT.SimpleTy) {
4529 case MVT::i8:
4530 // All positive immediates are supported.
4531 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4532 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4533 return true;
4534 case MVT::i16:
4535 case MVT::i32:
4536 case MVT::i64:
4537 // Support 8bit positive immediates.
4538 if (Val <= 255) {
4539 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4540 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4541 return true;
4542 }
4543 // Support 16bit positive immediates that are a multiple of 256.
4544 if (Val <= 65280 && Val % 256 == 0) {
4545 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4546 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4547 return true;
4548 }
4549 break;
4550 default:
4551 break;
4552 }
4553
4554 return false;
4555}
4556
4557bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4558 SDValue &Shift) {
4559 if (!isa<ConstantSDNode>(N))
4560 return false;
4561
4562 SDLoc DL(N);
4563 int64_t Val = cast<ConstantSDNode>(N)
4564 ->getAPIntValue()
4565 .trunc(VT.getFixedSizeInBits())
4566 .getSExtValue();
4567 int32_t ImmVal, ShiftVal;
4568 if (!AArch64_AM::isSVECpyDupImm(VT.getScalarSizeInBits(), Val, ImmVal,
4569 ShiftVal))
4570 return false;
4571
4572 Shift = CurDAG->getTargetConstant(ShiftVal, DL, MVT::i32);
4573 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4574 return true;
4575}
4576
4577bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4578 if (auto CNode = dyn_cast<ConstantSDNode>(N))
4579 return SelectSVESignedArithImm(SDLoc(N), CNode->getAPIntValue(), Imm);
4580 return false;
4581}
4582
4583bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDLoc DL, APInt Val,
4584 SDValue &Imm) {
4585 int64_t ImmVal = Val.getSExtValue();
4586 if (ImmVal >= -128 && ImmVal < 128) {
4587 Imm = CurDAG->getSignedTargetConstant(ImmVal, DL, MVT::i32);
4588 return true;
4589 }
4590 return false;
4591}
4592
4593bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4594 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4595 uint64_t ImmVal = CNode->getZExtValue();
4596
4597 switch (VT.SimpleTy) {
4598 case MVT::i8:
4599 ImmVal &= 0xFF;
4600 break;
4601 case MVT::i16:
4602 ImmVal &= 0xFFFF;
4603 break;
4604 case MVT::i32:
4605 ImmVal &= 0xFFFFFFFF;
4606 break;
4607 case MVT::i64:
4608 break;
4609 default:
4610 llvm_unreachable("Unexpected type");
4611 }
4612
4613 if (ImmVal < 256) {
4614 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4615 return true;
4616 }
4617 }
4618 return false;
4619}
4620
4621bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4622 bool Invert) {
4623 uint64_t ImmVal;
4624 if (auto CI = dyn_cast<ConstantSDNode>(N))
4625 ImmVal = CI->getZExtValue();
4626 else if (auto CFP = dyn_cast<ConstantFPSDNode>(N))
4627 ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
4628 else
4629 return false;
4630
4631 if (Invert)
4632 ImmVal = ~ImmVal;
4633
4634 uint64_t encoding;
4635 if (!AArch64_AM::isSVELogicalImm(VT.getScalarSizeInBits(), ImmVal, encoding))
4636 return false;
4637
4638 Imm = CurDAG->getTargetConstant(encoding, SDLoc(N), MVT::i64);
4639 return true;
4640}
4641
4642// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4643// Rather than attempt to normalise everything we can sometimes saturate the
4644// shift amount during selection. This function also allows for consistent
4645// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4646// required by the instructions.
4647bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4648 uint64_t High, bool AllowSaturation,
4649 SDValue &Imm) {
4650 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4651 uint64_t ImmVal = CN->getZExtValue();
4652
4653 // Reject shift amounts that are too small.
4654 if (ImmVal < Low)
4655 return false;
4656
4657 // Reject or saturate shift amounts that are too big.
4658 if (ImmVal > High) {
4659 if (!AllowSaturation)
4660 return false;
4661 ImmVal = High;
4662 }
4663
4664 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4665 return true;
4666 }
4667
4668 return false;
4669}
4670
4671bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4672 // tagp(FrameIndex, IRGstack, tag_offset):
4673 // since the offset between FrameIndex and IRGstack is a compile-time
4674 // constant, this can be lowered to a single ADDG instruction.
4675 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4676 return false;
4677 }
4678
4679 SDValue IRG_SP = N->getOperand(2);
4680 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4681 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4682 return false;
4683 }
4684
4685 const TargetLowering *TLI = getTargetLowering();
4686 SDLoc DL(N);
4687 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4688 SDValue FiOp = CurDAG->getTargetFrameIndex(
4689 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4690 int TagOffset = N->getConstantOperandVal(3);
4691
4692 SDNode *Out = CurDAG->getMachineNode(
4693 AArch64::TAGPstack, DL, MVT::i64,
4694 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4695 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4696 ReplaceNode(N, Out);
4697 return true;
4698}
4699
4700void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4701 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4702 "llvm.aarch64.tagp third argument must be an immediate");
4703 if (trySelectStackSlotTagP(N))
4704 return;
4705 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4706 // compile-time constant, not just for stack allocations.
4707
4708 // General case for unrelated pointers in Op1 and Op2.
4709 SDLoc DL(N);
4710 int TagOffset = N->getConstantOperandVal(3);
4711 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4712 {N->getOperand(1), N->getOperand(2)});
4713 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4714 {SDValue(N1, 0), N->getOperand(2)});
4715 SDNode *N3 = CurDAG->getMachineNode(
4716 AArch64::ADDG, DL, MVT::i64,
4717 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4718 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4719 ReplaceNode(N, N3);
4720}
4721
4722bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4723 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4724
4725 // Bail when not a "cast" like insert_subvector.
4726 if (N->getConstantOperandVal(2) != 0)
4727 return false;
4728 if (!N->getOperand(0).isUndef())
4729 return false;
4730
4731 // Bail when normal isel should do the job.
4732 EVT VT = N->getValueType(0);
4733 EVT InVT = N->getOperand(1).getValueType();
4734 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4735 return false;
4736 if (InVT.getSizeInBits() <= 128)
4737 return false;
4738
4739 // NOTE: We can only get here when doing fixed length SVE code generation.
4740 // We do manual selection because the types involved are not linked to real
4741 // registers (despite being legal) and must be coerced into SVE registers.
4742
4744 "Expected to insert into a packed scalable vector!");
4745
4746 SDLoc DL(N);
4747 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4748 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4749 N->getOperand(1), RC));
4750 return true;
4751}
4752
4753bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4754 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4755
4756 // Bail when not a "cast" like extract_subvector.
4757 if (N->getConstantOperandVal(1) != 0)
4758 return false;
4759
4760 // Bail when normal isel can do the job.
4761 EVT VT = N->getValueType(0);
4762 EVT InVT = N->getOperand(0).getValueType();
4763 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4764 return false;
4765 if (VT.getSizeInBits() <= 128)
4766 return false;
4767
4768 // NOTE: We can only get here when doing fixed length SVE code generation.
4769 // We do manual selection because the types involved are not linked to real
4770 // registers (despite being legal) and must be coerced into SVE registers.
4771
4773 "Expected to extract from a packed scalable vector!");
4774
4775 SDLoc DL(N);
4776 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4777 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4778 N->getOperand(0), RC));
4779 return true;
4780}
4781
4782bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4783 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4784
4785 SDValue N0 = N->getOperand(0);
4786 SDValue N1 = N->getOperand(1);
4787
4788 EVT VT = N->getValueType(0);
4789 SDLoc DL(N);
4790
4791 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4792 // Rotate by a constant is a funnel shift in IR which is expanded to
4793 // an OR with shifted operands.
4794 // We do the following transform:
4795 // OR N0, N1 -> xar (x, y, imm)
4796 // Where:
4797 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4798 // N0 = SHL_PRED true, V, splat(bits-imm)
4799 // V = (xor x, y)
4800 if (VT.isScalableVector() &&
4801 (Subtarget->hasSVE2() ||
4802 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4803 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4804 N1.getOpcode() != AArch64ISD::SRL_PRED)
4805 std::swap(N0, N1);
4806 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4807 N1.getOpcode() != AArch64ISD::SRL_PRED)
4808 return false;
4809
4810 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4811 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4812 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4813 return false;
4814
4815 if (N0.getOperand(1) != N1.getOperand(1))
4816 return false;
4817
4818 SDValue R1, R2;
4819 bool IsXOROperand = true;
4820 if (N0.getOperand(1).getOpcode() != ISD::XOR) {
4821 IsXOROperand = false;
4822 } else {
4823 R1 = N0.getOperand(1).getOperand(0);
4824 R2 = N1.getOperand(1).getOperand(1);
4825 }
4826
4827 APInt ShlAmt, ShrAmt;
4828 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4830 return false;
4831
4832 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4833 return false;
4834
4835 if (!IsXOROperand) {
4836 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4837 SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
4838 SDValue MOVIV = SDValue(MOV, 0);
4839
4840 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4841 SDNode *SubRegToReg =
4842 CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL, VT, MOVIV, ZSub);
4843
4844 R1 = N1->getOperand(1);
4845 R2 = SDValue(SubRegToReg, 0);
4846 }
4847
4848 SDValue Imm =
4849 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4850
4851 SDValue Ops[] = {R1, R2, Imm};
4853 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4854 AArch64::XAR_ZZZI_D})) {
4855 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4856 return true;
4857 }
4858 return false;
4859 }
4860
4861 // We have Neon SHA3 XAR operation for v2i64 but for types
4862 // v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4863 // is available.
4864 EVT SVT;
4865 switch (VT.getSimpleVT().SimpleTy) {
4866 case MVT::v4i32:
4867 case MVT::v2i32:
4868 SVT = MVT::nxv4i32;
4869 break;
4870 case MVT::v8i16:
4871 case MVT::v4i16:
4872 SVT = MVT::nxv8i16;
4873 break;
4874 case MVT::v16i8:
4875 case MVT::v8i8:
4876 SVT = MVT::nxv16i8;
4877 break;
4878 case MVT::v2i64:
4879 case MVT::v1i64:
4880 SVT = Subtarget->hasSHA3() ? MVT::v2i64 : MVT::nxv2i64;
4881 break;
4882 default:
4883 return false;
4884 }
4885
4886 if ((!SVT.isScalableVector() && !Subtarget->hasSHA3()) ||
4887 (SVT.isScalableVector() && !Subtarget->hasSVE2()))
4888 return false;
4889
4890 if (N0->getOpcode() != AArch64ISD::VSHL ||
4891 N1->getOpcode() != AArch64ISD::VLSHR)
4892 return false;
4893
4894 if (N0->getOperand(0) != N1->getOperand(0))
4895 return false;
4896
4897 SDValue R1, R2;
4898 bool IsXOROperand = true;
4899 if (N1->getOperand(0)->getOpcode() != ISD::XOR) {
4900 IsXOROperand = false;
4901 } else {
4902 SDValue XOR = N0.getOperand(0);
4903 R1 = XOR.getOperand(0);
4904 R2 = XOR.getOperand(1);
4905 }
4906
4907 unsigned HsAmt = N0.getConstantOperandVal(1);
4908 unsigned ShAmt = N1.getConstantOperandVal(1);
4909
4910 SDValue Imm = CurDAG->getTargetConstant(
4911 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4912
4913 unsigned VTSizeInBits = VT.getScalarSizeInBits();
4914 if (ShAmt + HsAmt != VTSizeInBits)
4915 return false;
4916
4917 if (!IsXOROperand) {
4918 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4919 SDNode *MOV =
4920 CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, MVT::v2i64, Zero);
4921 SDValue MOVIV = SDValue(MOV, 0);
4922
4923 R1 = N1->getOperand(0);
4924 R2 = MOVIV;
4925 }
4926
4927 if (SVT != VT) {
4928 SDValue Undef =
4929 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, SVT), 0);
4930
4931 if (SVT.isScalableVector() && VT.is64BitVector()) {
4932 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4933
4934 SDValue UndefQ = SDValue(
4935 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, QVT), 0);
4936 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4937
4938 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4939 UndefQ, R1, DSub),
4940 0);
4941 if (R2.getValueType() == VT)
4942 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4943 UndefQ, R2, DSub),
4944 0);
4945 }
4946
4947 SDValue SubReg = CurDAG->getTargetConstant(
4948 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL, MVT::i32);
4949
4950 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
4951 R1, SubReg),
4952 0);
4953
4954 if (SVT.isScalableVector() || R2.getValueType() != SVT)
4955 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT,
4956 Undef, R2, SubReg),
4957 0);
4958 }
4959
4960 SDValue Ops[] = {R1, R2, Imm};
4961 SDNode *XAR = nullptr;
4962
4963 if (SVT.isScalableVector()) {
4965 SVT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4966 AArch64::XAR_ZZZI_D}))
4967 XAR = CurDAG->getMachineNode(Opc, DL, SVT, Ops);
4968 } else {
4969 XAR = CurDAG->getMachineNode(AArch64::XAR, DL, SVT, Ops);
4970 }
4971
4972 assert(XAR && "Unexpected NULL value for XAR instruction in DAG");
4973
4974 if (SVT != VT) {
4975 if (VT.is64BitVector() && SVT.isScalableVector()) {
4976 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4977
4978 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4979 SDNode *Q = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, QVT,
4980 SDValue(XAR, 0), ZSub);
4981
4982 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4983 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4984 SDValue(Q, 0), DSub);
4985 } else {
4986 SDValue SubReg = CurDAG->getTargetConstant(
4987 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL,
4988 MVT::i32);
4989 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4990 SDValue(XAR, 0), SubReg);
4991 }
4992 }
4993 ReplaceNode(N, XAR);
4994 return true;
4995}
4996
4997void AArch64DAGToDAGISel::Select(SDNode *Node) {
4998 // If we have a custom node, we already have selected!
4999 if (Node->isMachineOpcode()) {
5000 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
5001 Node->setNodeId(-1);
5002 return;
5003 }
5004
5005 // Few custom selection stuff.
5006 EVT VT = Node->getValueType(0);
5007
5008 switch (Node->getOpcode()) {
5009 default:
5010 break;
5011
5013 if (SelectCMP_SWAP(Node))
5014 return;
5015 break;
5016
5017 case ISD::READ_REGISTER:
5018 case AArch64ISD::MRRS:
5019 if (tryReadRegister(Node))
5020 return;
5021 break;
5022
5024 case AArch64ISD::MSRR:
5025 if (tryWriteRegister(Node))
5026 return;
5027 break;
5028
5029 case ISD::LOAD: {
5030 // Try to select as an indexed load. Fall through to normal processing
5031 // if we can't.
5032 if (tryIndexedLoad(Node))
5033 return;
5034 break;
5035 }
5036
5037 case ISD::SRL:
5038 case ISD::AND:
5039 case ISD::SRA:
5041 if (tryBitfieldExtractOp(Node))
5042 return;
5043 if (tryBitfieldInsertInZeroOp(Node))
5044 return;
5045 [[fallthrough]];
5046 case ISD::ROTR:
5047 case ISD::SHL:
5048 if (tryShiftAmountMod(Node))
5049 return;
5050 break;
5051
5052 case ISD::SIGN_EXTEND:
5053 if (tryBitfieldExtractOpFromSExt(Node))
5054 return;
5055 break;
5056
5057 case ISD::OR:
5058 if (tryBitfieldInsertOp(Node))
5059 return;
5060 if (trySelectXAR(Node))
5061 return;
5062 break;
5063
5065 if (trySelectCastScalableToFixedLengthVector(Node))
5066 return;
5067 break;
5068 }
5069
5070 case ISD::INSERT_SUBVECTOR: {
5071 if (trySelectCastFixedLengthToScalableVector(Node))
5072 return;
5073 break;
5074 }
5075
5076 case ISD::Constant: {
5077 // Materialize zero constants as copies from WZR/XZR. This allows
5078 // the coalescer to propagate these into other instructions.
5079 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
5080 if (ConstNode->isZero()) {
5081 if (VT == MVT::i32) {
5082 SDValue New = CurDAG->getCopyFromReg(
5083 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
5084 ReplaceNode(Node, New.getNode());
5085 return;
5086 } else if (VT == MVT::i64) {
5087 SDValue New = CurDAG->getCopyFromReg(
5088 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
5089 ReplaceNode(Node, New.getNode());
5090 return;
5091 }
5092 }
5093 break;
5094 }
5095
5096 case ISD::FrameIndex: {
5097 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
5098 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
5099 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
5100 const TargetLowering *TLI = getTargetLowering();
5101 SDValue TFI = CurDAG->getTargetFrameIndex(
5102 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
5103 SDLoc DL(Node);
5104 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
5105 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
5106 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
5107 return;
5108 }
5110 unsigned IntNo = Node->getConstantOperandVal(1);
5111 switch (IntNo) {
5112 default:
5113 break;
5114 case Intrinsic::aarch64_gcsss: {
5115 SDLoc DL(Node);
5116 SDValue Chain = Node->getOperand(0);
5117 SDValue Val = Node->getOperand(2);
5118 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
5119 SDNode *SS1 =
5120 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
5121 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
5122 MVT::Other, Zero, SDValue(SS1, 0));
5123 ReplaceNode(Node, SS2);
5124 return;
5125 }
5126 case Intrinsic::aarch64_ldaxp:
5127 case Intrinsic::aarch64_ldxp: {
5128 unsigned Op =
5129 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
5130 SDValue MemAddr = Node->getOperand(2);
5131 SDLoc DL(Node);
5132 SDValue Chain = Node->getOperand(0);
5133
5134 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
5135 MVT::Other, MemAddr, Chain);
5136
5137 // Transfer memoperands.
5138 MachineMemOperand *MemOp =
5139 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
5140 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
5141 ReplaceNode(Node, Ld);
5142 return;
5143 }
5144 case Intrinsic::aarch64_stlxp:
5145 case Intrinsic::aarch64_stxp: {
5146 unsigned Op =
5147 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
5148 SDLoc DL(Node);
5149 SDValue Chain = Node->getOperand(0);
5150 SDValue ValLo = Node->getOperand(2);
5151 SDValue ValHi = Node->getOperand(3);
5152 SDValue MemAddr = Node->getOperand(4);
5153
5154 // Place arguments in the right order.
5155 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
5156
5157 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
5158 // Transfer memoperands.
5159 MachineMemOperand *MemOp =
5160 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
5161 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
5162
5163 ReplaceNode(Node, St);
5164 return;
5165 }
5166 case Intrinsic::aarch64_neon_ld1x2:
5167 if (VT == MVT::v8i8) {
5168 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
5169 return;
5170 } else if (VT == MVT::v16i8) {
5171 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
5172 return;
5173 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5174 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
5175 return;
5176 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5177 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
5178 return;
5179 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5180 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
5181 return;
5182 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5183 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
5184 return;
5185 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5186 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5187 return;
5188 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5189 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
5190 return;
5191 }
5192 break;
5193 case Intrinsic::aarch64_neon_ld1x3:
5194 if (VT == MVT::v8i8) {
5195 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
5196 return;
5197 } else if (VT == MVT::v16i8) {
5198 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
5199 return;
5200 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5201 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
5202 return;
5203 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5204 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
5205 return;
5206 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5207 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
5208 return;
5209 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5210 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
5211 return;
5212 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5213 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5214 return;
5215 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5216 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
5217 return;
5218 }
5219 break;
5220 case Intrinsic::aarch64_neon_ld1x4:
5221 if (VT == MVT::v8i8) {
5222 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
5223 return;
5224 } else if (VT == MVT::v16i8) {
5225 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
5226 return;
5227 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5228 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
5229 return;
5230 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5231 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
5232 return;
5233 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5234 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
5235 return;
5236 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5237 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
5238 return;
5239 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5240 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5241 return;
5242 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5243 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
5244 return;
5245 }
5246 break;
5247 case Intrinsic::aarch64_neon_ld2:
5248 if (VT == MVT::v8i8) {
5249 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
5250 return;
5251 } else if (VT == MVT::v16i8) {
5252 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
5253 return;
5254 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5255 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
5256 return;
5257 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5258 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
5259 return;
5260 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5261 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
5262 return;
5263 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5264 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
5265 return;
5266 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5267 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5268 return;
5269 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5270 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
5271 return;
5272 }
5273 break;
5274 case Intrinsic::aarch64_neon_ld3:
5275 if (VT == MVT::v8i8) {
5276 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
5277 return;
5278 } else if (VT == MVT::v16i8) {
5279 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
5280 return;
5281 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5282 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
5283 return;
5284 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5285 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
5286 return;
5287 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5288 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
5289 return;
5290 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5291 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
5292 return;
5293 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5294 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5295 return;
5296 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5297 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
5298 return;
5299 }
5300 break;
5301 case Intrinsic::aarch64_neon_ld4:
5302 if (VT == MVT::v8i8) {
5303 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
5304 return;
5305 } else if (VT == MVT::v16i8) {
5306 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
5307 return;
5308 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5309 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
5310 return;
5311 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5312 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
5313 return;
5314 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5315 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
5316 return;
5317 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5318 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
5319 return;
5320 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5321 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5322 return;
5323 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5324 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
5325 return;
5326 }
5327 break;
5328 case Intrinsic::aarch64_neon_ld2r:
5329 if (VT == MVT::v8i8) {
5330 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
5331 return;
5332 } else if (VT == MVT::v16i8) {
5333 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
5334 return;
5335 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5336 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
5337 return;
5338 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5339 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
5340 return;
5341 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5342 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
5343 return;
5344 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5345 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
5346 return;
5347 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5348 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
5349 return;
5350 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5351 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
5352 return;
5353 }
5354 break;
5355 case Intrinsic::aarch64_neon_ld3r:
5356 if (VT == MVT::v8i8) {
5357 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
5358 return;
5359 } else if (VT == MVT::v16i8) {
5360 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
5361 return;
5362 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5363 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
5364 return;
5365 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5366 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
5367 return;
5368 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5369 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
5370 return;
5371 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5372 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
5373 return;
5374 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5375 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
5376 return;
5377 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5378 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
5379 return;
5380 }
5381 break;
5382 case Intrinsic::aarch64_neon_ld4r:
5383 if (VT == MVT::v8i8) {
5384 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
5385 return;
5386 } else if (VT == MVT::v16i8) {
5387 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
5388 return;
5389 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5390 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
5391 return;
5392 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5393 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
5394 return;
5395 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5396 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
5397 return;
5398 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5399 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
5400 return;
5401 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5402 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
5403 return;
5404 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5405 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
5406 return;
5407 }
5408 break;
5409 case Intrinsic::aarch64_neon_ld2lane:
5410 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5411 SelectLoadLane(Node, 2, AArch64::LD2i8);
5412 return;
5413 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5414 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5415 SelectLoadLane(Node, 2, AArch64::LD2i16);
5416 return;
5417 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5418 VT == MVT::v2f32) {
5419 SelectLoadLane(Node, 2, AArch64::LD2i32);
5420 return;
5421 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5422 VT == MVT::v1f64) {
5423 SelectLoadLane(Node, 2, AArch64::LD2i64);
5424 return;
5425 }
5426 break;
5427 case Intrinsic::aarch64_neon_ld3lane:
5428 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5429 SelectLoadLane(Node, 3, AArch64::LD3i8);
5430 return;
5431 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5432 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5433 SelectLoadLane(Node, 3, AArch64::LD3i16);
5434 return;
5435 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5436 VT == MVT::v2f32) {
5437 SelectLoadLane(Node, 3, AArch64::LD3i32);
5438 return;
5439 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5440 VT == MVT::v1f64) {
5441 SelectLoadLane(Node, 3, AArch64::LD3i64);
5442 return;
5443 }
5444 break;
5445 case Intrinsic::aarch64_neon_ld4lane:
5446 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5447 SelectLoadLane(Node, 4, AArch64::LD4i8);
5448 return;
5449 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5450 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5451 SelectLoadLane(Node, 4, AArch64::LD4i16);
5452 return;
5453 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5454 VT == MVT::v2f32) {
5455 SelectLoadLane(Node, 4, AArch64::LD4i32);
5456 return;
5457 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5458 VT == MVT::v1f64) {
5459 SelectLoadLane(Node, 4, AArch64::LD4i64);
5460 return;
5461 }
5462 break;
5463 case Intrinsic::aarch64_ld64b:
5464 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5465 return;
5466 case Intrinsic::aarch64_sve_ld2q_sret: {
5467 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5468 return;
5469 }
5470 case Intrinsic::aarch64_sve_ld3q_sret: {
5471 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5472 return;
5473 }
5474 case Intrinsic::aarch64_sve_ld4q_sret: {
5475 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5476 return;
5477 }
5478 case Intrinsic::aarch64_sve_ld2_sret: {
5479 if (VT == MVT::nxv16i8) {
5480 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5481 true);
5482 return;
5483 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5484 VT == MVT::nxv8bf16) {
5485 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5486 true);
5487 return;
5488 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5489 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5490 true);
5491 return;
5492 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5493 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5494 true);
5495 return;
5496 }
5497 break;
5498 }
5499 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5500 if (VT == MVT::nxv16i8) {
5501 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5502 SelectContiguousMultiVectorLoad(
5503 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5504 else if (Subtarget->hasSVE2p1())
5505 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5506 AArch64::LD1B_2Z);
5507 else
5508 break;
5509 return;
5510 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5511 VT == MVT::nxv8bf16) {
5512 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5513 SelectContiguousMultiVectorLoad(
5514 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5515 else if (Subtarget->hasSVE2p1())
5516 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5517 AArch64::LD1H_2Z);
5518 else
5519 break;
5520 return;
5521 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5522 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5523 SelectContiguousMultiVectorLoad(
5524 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5525 else if (Subtarget->hasSVE2p1())
5526 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5527 AArch64::LD1W_2Z);
5528 else
5529 break;
5530 return;
5531 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5532 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5533 SelectContiguousMultiVectorLoad(
5534 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5535 else if (Subtarget->hasSVE2p1())
5536 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5537 AArch64::LD1D_2Z);
5538 else
5539 break;
5540 return;
5541 }
5542 break;
5543 }
5544 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5545 if (VT == MVT::nxv16i8) {
5546 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5547 SelectContiguousMultiVectorLoad(
5548 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5549 else if (Subtarget->hasSVE2p1())
5550 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5551 AArch64::LD1B_4Z);
5552 else
5553 break;
5554 return;
5555 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5556 VT == MVT::nxv8bf16) {
5557 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5558 SelectContiguousMultiVectorLoad(
5559 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5560 else if (Subtarget->hasSVE2p1())
5561 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5562 AArch64::LD1H_4Z);
5563 else
5564 break;
5565 return;
5566 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5567 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5568 SelectContiguousMultiVectorLoad(
5569 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5570 else if (Subtarget->hasSVE2p1())
5571 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5572 AArch64::LD1W_4Z);
5573 else
5574 break;
5575 return;
5576 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5577 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5578 SelectContiguousMultiVectorLoad(
5579 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5580 else if (Subtarget->hasSVE2p1())
5581 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5582 AArch64::LD1D_4Z);
5583 else
5584 break;
5585 return;
5586 }
5587 break;
5588 }
5589 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5590 if (VT == MVT::nxv16i8) {
5591 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5592 SelectContiguousMultiVectorLoad(Node, 2, 0,
5593 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5594 AArch64::LDNT1B_2Z_PSEUDO);
5595 else if (Subtarget->hasSVE2p1())
5596 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5597 AArch64::LDNT1B_2Z);
5598 else
5599 break;
5600 return;
5601 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5602 VT == MVT::nxv8bf16) {
5603 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5604 SelectContiguousMultiVectorLoad(Node, 2, 1,
5605 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5606 AArch64::LDNT1H_2Z_PSEUDO);
5607 else if (Subtarget->hasSVE2p1())
5608 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5609 AArch64::LDNT1H_2Z);
5610 else
5611 break;
5612 return;
5613 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5614 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5615 SelectContiguousMultiVectorLoad(Node, 2, 2,
5616 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5617 AArch64::LDNT1W_2Z_PSEUDO);
5618 else if (Subtarget->hasSVE2p1())
5619 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5620 AArch64::LDNT1W_2Z);
5621 else
5622 break;
5623 return;
5624 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5625 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5626 SelectContiguousMultiVectorLoad(Node, 2, 3,
5627 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5628 AArch64::LDNT1D_2Z_PSEUDO);
5629 else if (Subtarget->hasSVE2p1())
5630 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5631 AArch64::LDNT1D_2Z);
5632 else
5633 break;
5634 return;
5635 }
5636 break;
5637 }
5638 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5639 if (VT == MVT::nxv16i8) {
5640 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5641 SelectContiguousMultiVectorLoad(Node, 4, 0,
5642 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5643 AArch64::LDNT1B_4Z_PSEUDO);
5644 else if (Subtarget->hasSVE2p1())
5645 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5646 AArch64::LDNT1B_4Z);
5647 else
5648 break;
5649 return;
5650 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5651 VT == MVT::nxv8bf16) {
5652 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5653 SelectContiguousMultiVectorLoad(Node, 4, 1,
5654 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5655 AArch64::LDNT1H_4Z_PSEUDO);
5656 else if (Subtarget->hasSVE2p1())
5657 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5658 AArch64::LDNT1H_4Z);
5659 else
5660 break;
5661 return;
5662 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5663 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5664 SelectContiguousMultiVectorLoad(Node, 4, 2,
5665 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5666 AArch64::LDNT1W_4Z_PSEUDO);
5667 else if (Subtarget->hasSVE2p1())
5668 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5669 AArch64::LDNT1W_4Z);
5670 else
5671 break;
5672 return;
5673 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5674 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5675 SelectContiguousMultiVectorLoad(Node, 4, 3,
5676 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5677 AArch64::LDNT1D_4Z_PSEUDO);
5678 else if (Subtarget->hasSVE2p1())
5679 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5680 AArch64::LDNT1D_4Z);
5681 else
5682 break;
5683 return;
5684 }
5685 break;
5686 }
5687 case Intrinsic::aarch64_sve_ld3_sret: {
5688 if (VT == MVT::nxv16i8) {
5689 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5690 true);
5691 return;
5692 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5693 VT == MVT::nxv8bf16) {
5694 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5695 true);
5696 return;
5697 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5698 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5699 true);
5700 return;
5701 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5702 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5703 true);
5704 return;
5705 }
5706 break;
5707 }
5708 case Intrinsic::aarch64_sve_ld4_sret: {
5709 if (VT == MVT::nxv16i8) {
5710 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5711 true);
5712 return;
5713 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5714 VT == MVT::nxv8bf16) {
5715 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5716 true);
5717 return;
5718 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5719 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5720 true);
5721 return;
5722 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5723 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5724 true);
5725 return;
5726 }
5727 break;
5728 }
5729 case Intrinsic::aarch64_sme_read_hor_vg2: {
5730 if (VT == MVT::nxv16i8) {
5731 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5732 AArch64::MOVA_2ZMXI_H_B);
5733 return;
5734 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5735 VT == MVT::nxv8bf16) {
5736 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5737 AArch64::MOVA_2ZMXI_H_H);
5738 return;
5739 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5740 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5741 AArch64::MOVA_2ZMXI_H_S);
5742 return;
5743 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5744 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5745 AArch64::MOVA_2ZMXI_H_D);
5746 return;
5747 }
5748 break;
5749 }
5750 case Intrinsic::aarch64_sme_read_ver_vg2: {
5751 if (VT == MVT::nxv16i8) {
5752 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5753 AArch64::MOVA_2ZMXI_V_B);
5754 return;
5755 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5756 VT == MVT::nxv8bf16) {
5757 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5758 AArch64::MOVA_2ZMXI_V_H);
5759 return;
5760 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5761 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5762 AArch64::MOVA_2ZMXI_V_S);
5763 return;
5764 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5765 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5766 AArch64::MOVA_2ZMXI_V_D);
5767 return;
5768 }
5769 break;
5770 }
5771 case Intrinsic::aarch64_sme_read_hor_vg4: {
5772 if (VT == MVT::nxv16i8) {
5773 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5774 AArch64::MOVA_4ZMXI_H_B);
5775 return;
5776 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5777 VT == MVT::nxv8bf16) {
5778 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5779 AArch64::MOVA_4ZMXI_H_H);
5780 return;
5781 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5782 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5783 AArch64::MOVA_4ZMXI_H_S);
5784 return;
5785 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5786 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5787 AArch64::MOVA_4ZMXI_H_D);
5788 return;
5789 }
5790 break;
5791 }
5792 case Intrinsic::aarch64_sme_read_ver_vg4: {
5793 if (VT == MVT::nxv16i8) {
5794 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5795 AArch64::MOVA_4ZMXI_V_B);
5796 return;
5797 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5798 VT == MVT::nxv8bf16) {
5799 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5800 AArch64::MOVA_4ZMXI_V_H);
5801 return;
5802 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5803 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5804 AArch64::MOVA_4ZMXI_V_S);
5805 return;
5806 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5807 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5808 AArch64::MOVA_4ZMXI_V_D);
5809 return;
5810 }
5811 break;
5812 }
5813 case Intrinsic::aarch64_sme_read_vg1x2: {
5814 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5815 AArch64::MOVA_VG2_2ZMXI);
5816 return;
5817 }
5818 case Intrinsic::aarch64_sme_read_vg1x4: {
5819 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5820 AArch64::MOVA_VG4_4ZMXI);
5821 return;
5822 }
5823 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5824 if (VT == MVT::nxv16i8) {
5825 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5826 return;
5827 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5828 VT == MVT::nxv8bf16) {
5829 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5830 return;
5831 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5832 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5833 return;
5834 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5835 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5836 return;
5837 }
5838 break;
5839 }
5840 case Intrinsic::aarch64_sme_readz_vert_x2: {
5841 if (VT == MVT::nxv16i8) {
5842 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5843 return;
5844 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5845 VT == MVT::nxv8bf16) {
5846 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5847 return;
5848 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5849 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5850 return;
5851 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5852 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5853 return;
5854 }
5855 break;
5856 }
5857 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5858 if (VT == MVT::nxv16i8) {
5859 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5860 return;
5861 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5862 VT == MVT::nxv8bf16) {
5863 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5864 return;
5865 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5866 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5867 return;
5868 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5869 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5870 return;
5871 }
5872 break;
5873 }
5874 case Intrinsic::aarch64_sme_readz_vert_x4: {
5875 if (VT == MVT::nxv16i8) {
5876 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5877 return;
5878 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5879 VT == MVT::nxv8bf16) {
5880 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5881 return;
5882 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5883 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5884 return;
5885 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5886 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5887 return;
5888 }
5889 break;
5890 }
5891 case Intrinsic::aarch64_sme_readz_x2: {
5892 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5893 AArch64::ZA);
5894 return;
5895 }
5896 case Intrinsic::aarch64_sme_readz_x4: {
5897 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5898 AArch64::ZA);
5899 return;
5900 }
5901 case Intrinsic::swift_async_context_addr: {
5902 SDLoc DL(Node);
5903 SDValue Chain = Node->getOperand(0);
5904 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5905 SDValue Res = SDValue(
5906 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5907 CurDAG->getTargetConstant(8, DL, MVT::i32),
5908 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5909 0);
5910 ReplaceUses(SDValue(Node, 0), Res);
5911 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5912 CurDAG->RemoveDeadNode(Node);
5913
5914 auto &MF = CurDAG->getMachineFunction();
5915 MF.getFrameInfo().setFrameAddressIsTaken(true);
5916 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5917 return;
5918 }
5919 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5921 Node->getValueType(0),
5922 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5923 AArch64::LUTI2_4ZTZI_S}))
5924 // Second Immediate must be <= 3:
5925 SelectMultiVectorLutiLane(Node, 4, Opc, 3);
5926 return;
5927 }
5928 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5930 Node->getValueType(0),
5931 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5932 // Second Immediate must be <= 1:
5933 SelectMultiVectorLutiLane(Node, 4, Opc, 1);
5934 return;
5935 }
5936 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5938 Node->getValueType(0),
5939 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5940 AArch64::LUTI2_2ZTZI_S}))
5941 // Second Immediate must be <= 7:
5942 SelectMultiVectorLutiLane(Node, 2, Opc, 7);
5943 return;
5944 }
5945 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5947 Node->getValueType(0),
5948 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5949 AArch64::LUTI4_2ZTZI_S}))
5950 // Second Immediate must be <= 3:
5951 SelectMultiVectorLutiLane(Node, 2, Opc, 3);
5952 return;
5953 }
5954 case Intrinsic::aarch64_sme_luti4_zt_x4: {
5955 SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
5956 return;
5957 }
5958 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5960 Node->getValueType(0),
5961 {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5962 SelectCVTIntrinsicFP8(Node, 2, Opc);
5963 return;
5964 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5966 Node->getValueType(0),
5967 {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
5968 SelectCVTIntrinsicFP8(Node, 2, Opc);
5969 return;
5970 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
5972 Node->getValueType(0),
5973 {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
5974 SelectCVTIntrinsicFP8(Node, 2, Opc);
5975 return;
5976 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
5978 Node->getValueType(0),
5979 {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
5980 SelectCVTIntrinsicFP8(Node, 2, Opc);
5981 return;
5982 case Intrinsic::ptrauth_resign_load_relative:
5983 SelectPtrauthResign(Node);
5984 return;
5985 }
5986 } break;
5988 unsigned IntNo = Node->getConstantOperandVal(0);
5989 switch (IntNo) {
5990 default:
5991 break;
5992 case Intrinsic::aarch64_tagp:
5993 SelectTagP(Node);
5994 return;
5995
5996 case Intrinsic::ptrauth_auth:
5997 SelectPtrauthAuth(Node);
5998 return;
5999
6000 case Intrinsic::ptrauth_resign:
6001 SelectPtrauthResign(Node);
6002 return;
6003
6004 case Intrinsic::aarch64_neon_tbl2:
6005 SelectTable(Node, 2,
6006 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
6007 false);
6008 return;
6009 case Intrinsic::aarch64_neon_tbl3:
6010 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
6011 : AArch64::TBLv16i8Three,
6012 false);
6013 return;
6014 case Intrinsic::aarch64_neon_tbl4:
6015 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
6016 : AArch64::TBLv16i8Four,
6017 false);
6018 return;
6019 case Intrinsic::aarch64_neon_tbx2:
6020 SelectTable(Node, 2,
6021 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
6022 true);
6023 return;
6024 case Intrinsic::aarch64_neon_tbx3:
6025 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
6026 : AArch64::TBXv16i8Three,
6027 true);
6028 return;
6029 case Intrinsic::aarch64_neon_tbx4:
6030 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
6031 : AArch64::TBXv16i8Four,
6032 true);
6033 return;
6034 case Intrinsic::aarch64_sve_srshl_single_x2:
6036 Node->getValueType(0),
6037 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
6038 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
6039 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6040 return;
6041 case Intrinsic::aarch64_sve_srshl_single_x4:
6043 Node->getValueType(0),
6044 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
6045 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
6046 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6047 return;
6048 case Intrinsic::aarch64_sve_urshl_single_x2:
6050 Node->getValueType(0),
6051 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
6052 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
6053 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6054 return;
6055 case Intrinsic::aarch64_sve_urshl_single_x4:
6057 Node->getValueType(0),
6058 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
6059 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
6060 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6061 return;
6062 case Intrinsic::aarch64_sve_srshl_x2:
6064 Node->getValueType(0),
6065 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
6066 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
6067 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6068 return;
6069 case Intrinsic::aarch64_sve_srshl_x4:
6071 Node->getValueType(0),
6072 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
6073 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
6074 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6075 return;
6076 case Intrinsic::aarch64_sve_urshl_x2:
6078 Node->getValueType(0),
6079 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
6080 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
6081 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6082 return;
6083 case Intrinsic::aarch64_sve_urshl_x4:
6085 Node->getValueType(0),
6086 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
6087 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
6088 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6089 return;
6090 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
6092 Node->getValueType(0),
6093 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
6094 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
6095 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6096 return;
6097 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
6099 Node->getValueType(0),
6100 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
6101 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
6102 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6103 return;
6104 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
6106 Node->getValueType(0),
6107 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
6108 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
6109 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6110 return;
6111 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
6113 Node->getValueType(0),
6114 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
6115 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
6116 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6117 return;
6118 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
6120 Node->getValueType(0),
6121 {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
6122 AArch64::FSCALE_2ZZ_D}))
6123 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6124 return;
6125 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
6127 Node->getValueType(0),
6128 {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
6129 AArch64::FSCALE_4ZZ_D}))
6130 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6131 return;
6132 case Intrinsic::aarch64_sme_fp8_scale_x2:
6134 Node->getValueType(0),
6135 {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
6136 AArch64::FSCALE_2Z2Z_D}))
6137 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6138 return;
6139 case Intrinsic::aarch64_sme_fp8_scale_x4:
6141 Node->getValueType(0),
6142 {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
6143 AArch64::FSCALE_4Z4Z_D}))
6144 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6145 return;
6146 case Intrinsic::aarch64_sve_whilege_x2:
6148 Node->getValueType(0),
6149 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
6150 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
6151 SelectWhilePair(Node, Op);
6152 return;
6153 case Intrinsic::aarch64_sve_whilegt_x2:
6155 Node->getValueType(0),
6156 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
6157 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
6158 SelectWhilePair(Node, Op);
6159 return;
6160 case Intrinsic::aarch64_sve_whilehi_x2:
6162 Node->getValueType(0),
6163 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
6164 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
6165 SelectWhilePair(Node, Op);
6166 return;
6167 case Intrinsic::aarch64_sve_whilehs_x2:
6169 Node->getValueType(0),
6170 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
6171 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
6172 SelectWhilePair(Node, Op);
6173 return;
6174 case Intrinsic::aarch64_sve_whilele_x2:
6176 Node->getValueType(0),
6177 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
6178 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
6179 SelectWhilePair(Node, Op);
6180 return;
6181 case Intrinsic::aarch64_sve_whilelo_x2:
6183 Node->getValueType(0),
6184 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
6185 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
6186 SelectWhilePair(Node, Op);
6187 return;
6188 case Intrinsic::aarch64_sve_whilels_x2:
6190 Node->getValueType(0),
6191 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
6192 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
6193 SelectWhilePair(Node, Op);
6194 return;
6195 case Intrinsic::aarch64_sve_whilelt_x2:
6197 Node->getValueType(0),
6198 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
6199 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
6200 SelectWhilePair(Node, Op);
6201 return;
6202 case Intrinsic::aarch64_sve_smax_single_x2:
6204 Node->getValueType(0),
6205 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
6206 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
6207 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6208 return;
6209 case Intrinsic::aarch64_sve_umax_single_x2:
6211 Node->getValueType(0),
6212 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
6213 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
6214 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6215 return;
6216 case Intrinsic::aarch64_sve_fmax_single_x2:
6218 Node->getValueType(0),
6219 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
6220 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
6221 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6222 return;
6223 case Intrinsic::aarch64_sve_smax_single_x4:
6225 Node->getValueType(0),
6226 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
6227 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
6228 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6229 return;
6230 case Intrinsic::aarch64_sve_umax_single_x4:
6232 Node->getValueType(0),
6233 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
6234 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
6235 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6236 return;
6237 case Intrinsic::aarch64_sve_fmax_single_x4:
6239 Node->getValueType(0),
6240 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
6241 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
6242 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6243 return;
6244 case Intrinsic::aarch64_sve_smin_single_x2:
6246 Node->getValueType(0),
6247 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
6248 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
6249 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6250 return;
6251 case Intrinsic::aarch64_sve_umin_single_x2:
6253 Node->getValueType(0),
6254 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
6255 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
6256 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6257 return;
6258 case Intrinsic::aarch64_sve_fmin_single_x2:
6260 Node->getValueType(0),
6261 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
6262 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
6263 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6264 return;
6265 case Intrinsic::aarch64_sve_smin_single_x4:
6267 Node->getValueType(0),
6268 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
6269 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
6270 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6271 return;
6272 case Intrinsic::aarch64_sve_umin_single_x4:
6274 Node->getValueType(0),
6275 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
6276 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
6277 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6278 return;
6279 case Intrinsic::aarch64_sve_fmin_single_x4:
6281 Node->getValueType(0),
6282 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
6283 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
6284 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6285 return;
6286 case Intrinsic::aarch64_sve_smax_x2:
6288 Node->getValueType(0),
6289 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
6290 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
6291 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6292 return;
6293 case Intrinsic::aarch64_sve_umax_x2:
6295 Node->getValueType(0),
6296 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
6297 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
6298 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6299 return;
6300 case Intrinsic::aarch64_sve_fmax_x2:
6302 Node->getValueType(0),
6303 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
6304 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
6305 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6306 return;
6307 case Intrinsic::aarch64_sve_smax_x4:
6309 Node->getValueType(0),
6310 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
6311 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
6312 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6313 return;
6314 case Intrinsic::aarch64_sve_umax_x4:
6316 Node->getValueType(0),
6317 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
6318 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
6319 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6320 return;
6321 case Intrinsic::aarch64_sve_fmax_x4:
6323 Node->getValueType(0),
6324 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
6325 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
6326 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6327 return;
6328 case Intrinsic::aarch64_sme_famax_x2:
6330 Node->getValueType(0),
6331 {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
6332 AArch64::FAMAX_2Z2Z_D}))
6333 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6334 return;
6335 case Intrinsic::aarch64_sme_famax_x4:
6337 Node->getValueType(0),
6338 {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
6339 AArch64::FAMAX_4Z4Z_D}))
6340 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6341 return;
6342 case Intrinsic::aarch64_sme_famin_x2:
6344 Node->getValueType(0),
6345 {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
6346 AArch64::FAMIN_2Z2Z_D}))
6347 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6348 return;
6349 case Intrinsic::aarch64_sme_famin_x4:
6351 Node->getValueType(0),
6352 {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
6353 AArch64::FAMIN_4Z4Z_D}))
6354 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6355 return;
6356 case Intrinsic::aarch64_sve_smin_x2:
6358 Node->getValueType(0),
6359 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
6360 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
6361 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6362 return;
6363 case Intrinsic::aarch64_sve_umin_x2:
6365 Node->getValueType(0),
6366 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
6367 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
6368 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6369 return;
6370 case Intrinsic::aarch64_sve_fmin_x2:
6372 Node->getValueType(0),
6373 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
6374 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
6375 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6376 return;
6377 case Intrinsic::aarch64_sve_smin_x4:
6379 Node->getValueType(0),
6380 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
6381 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
6382 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6383 return;
6384 case Intrinsic::aarch64_sve_umin_x4:
6386 Node->getValueType(0),
6387 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6388 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6389 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6390 return;
6391 case Intrinsic::aarch64_sve_fmin_x4:
6393 Node->getValueType(0),
6394 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6395 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6396 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6397 return;
6398 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6400 Node->getValueType(0),
6401 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6402 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6403 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6404 return;
6405 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6407 Node->getValueType(0),
6408 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6409 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6410 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6411 return;
6412 case Intrinsic::aarch64_sve_fminnm_single_x2:
6414 Node->getValueType(0),
6415 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6416 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6417 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6418 return;
6419 case Intrinsic::aarch64_sve_fminnm_single_x4:
6421 Node->getValueType(0),
6422 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6423 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6424 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6425 return;
6426 case Intrinsic::aarch64_sve_fscale_single_x4:
6427 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::BFSCALE_4ZZ);
6428 return;
6429 case Intrinsic::aarch64_sve_fscale_single_x2:
6430 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::BFSCALE_2ZZ);
6431 return;
6432 case Intrinsic::aarch64_sve_fmul_single_x4:
6434 Node->getValueType(0),
6435 {AArch64::BFMUL_4ZZ, AArch64::FMUL_4ZZ_H, AArch64::FMUL_4ZZ_S,
6436 AArch64::FMUL_4ZZ_D}))
6437 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6438 return;
6439 case Intrinsic::aarch64_sve_fmul_single_x2:
6441 Node->getValueType(0),
6442 {AArch64::BFMUL_2ZZ, AArch64::FMUL_2ZZ_H, AArch64::FMUL_2ZZ_S,
6443 AArch64::FMUL_2ZZ_D}))
6444 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6445 return;
6446 case Intrinsic::aarch64_sve_fmaxnm_x2:
6448 Node->getValueType(0),
6449 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6450 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6451 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6452 return;
6453 case Intrinsic::aarch64_sve_fmaxnm_x4:
6455 Node->getValueType(0),
6456 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6457 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6458 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6459 return;
6460 case Intrinsic::aarch64_sve_fminnm_x2:
6462 Node->getValueType(0),
6463 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6464 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6465 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6466 return;
6467 case Intrinsic::aarch64_sve_fminnm_x4:
6469 Node->getValueType(0),
6470 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6471 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6472 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6473 return;
6474 case Intrinsic::aarch64_sve_aese_lane_x2:
6475 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESE_2ZZI_B);
6476 return;
6477 case Intrinsic::aarch64_sve_aesd_lane_x2:
6478 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESD_2ZZI_B);
6479 return;
6480 case Intrinsic::aarch64_sve_aesemc_lane_x2:
6481 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESEMC_2ZZI_B);
6482 return;
6483 case Intrinsic::aarch64_sve_aesdimc_lane_x2:
6484 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESDIMC_2ZZI_B);
6485 return;
6486 case Intrinsic::aarch64_sve_aese_lane_x4:
6487 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESE_4ZZI_B);
6488 return;
6489 case Intrinsic::aarch64_sve_aesd_lane_x4:
6490 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESD_4ZZI_B);
6491 return;
6492 case Intrinsic::aarch64_sve_aesemc_lane_x4:
6493 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESEMC_4ZZI_B);
6494 return;
6495 case Intrinsic::aarch64_sve_aesdimc_lane_x4:
6496 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESDIMC_4ZZI_B);
6497 return;
6498 case Intrinsic::aarch64_sve_pmlal_pair_x2:
6499 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::PMLAL_2ZZZ_Q);
6500 return;
6501 case Intrinsic::aarch64_sve_pmull_pair_x2: {
6502 SDLoc DL(Node);
6503 SmallVector<SDValue, 4> Regs(Node->ops().slice(1, 2));
6504 SDNode *Res =
6505 CurDAG->getMachineNode(AArch64::PMULL_2ZZZ_Q, DL, MVT::Untyped, Regs);
6506 SDValue SuperReg = SDValue(Res, 0);
6507 for (unsigned I = 0; I < 2; I++)
6508 ReplaceUses(SDValue(Node, I),
6509 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
6510 SuperReg));
6511 CurDAG->RemoveDeadNode(Node);
6512 return;
6513 }
6514 case Intrinsic::aarch64_sve_fscale_x4:
6515 SelectDestructiveMultiIntrinsic(Node, 4, true, AArch64::BFSCALE_4Z4Z);
6516 return;
6517 case Intrinsic::aarch64_sve_fscale_x2:
6518 SelectDestructiveMultiIntrinsic(Node, 2, true, AArch64::BFSCALE_2Z2Z);
6519 return;
6520 case Intrinsic::aarch64_sve_fmul_x4:
6522 Node->getValueType(0),
6523 {AArch64::BFMUL_4Z4Z, AArch64::FMUL_4Z4Z_H, AArch64::FMUL_4Z4Z_S,
6524 AArch64::FMUL_4Z4Z_D}))
6525 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6526 return;
6527 case Intrinsic::aarch64_sve_fmul_x2:
6529 Node->getValueType(0),
6530 {AArch64::BFMUL_2Z2Z, AArch64::FMUL_2Z2Z_H, AArch64::FMUL_2Z2Z_S,
6531 AArch64::FMUL_2Z2Z_D}))
6532 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6533 return;
6534 case Intrinsic::aarch64_sve_fcvtzs_x2:
6535 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
6536 return;
6537 case Intrinsic::aarch64_sve_scvtf_x2:
6538 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
6539 return;
6540 case Intrinsic::aarch64_sve_fcvtzu_x2:
6541 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
6542 return;
6543 case Intrinsic::aarch64_sve_ucvtf_x2:
6544 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
6545 return;
6546 case Intrinsic::aarch64_sve_fcvtzs_x4:
6547 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
6548 return;
6549 case Intrinsic::aarch64_sve_scvtf_x4:
6550 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
6551 return;
6552 case Intrinsic::aarch64_sve_fcvtzu_x4:
6553 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
6554 return;
6555 case Intrinsic::aarch64_sve_ucvtf_x4:
6556 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
6557 return;
6558 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6559 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
6560 return;
6561 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6562 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
6563 return;
6564 case Intrinsic::aarch64_sve_sclamp_single_x2:
6566 Node->getValueType(0),
6567 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6568 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6569 SelectClamp(Node, 2, Op);
6570 return;
6571 case Intrinsic::aarch64_sve_uclamp_single_x2:
6573 Node->getValueType(0),
6574 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6575 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6576 SelectClamp(Node, 2, Op);
6577 return;
6578 case Intrinsic::aarch64_sve_fclamp_single_x2:
6580 Node->getValueType(0),
6581 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6582 AArch64::FCLAMP_VG2_2Z2Z_D}))
6583 SelectClamp(Node, 2, Op);
6584 return;
6585 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6586 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
6587 return;
6588 case Intrinsic::aarch64_sve_sclamp_single_x4:
6590 Node->getValueType(0),
6591 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6592 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6593 SelectClamp(Node, 4, Op);
6594 return;
6595 case Intrinsic::aarch64_sve_uclamp_single_x4:
6597 Node->getValueType(0),
6598 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6599 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6600 SelectClamp(Node, 4, Op);
6601 return;
6602 case Intrinsic::aarch64_sve_fclamp_single_x4:
6604 Node->getValueType(0),
6605 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6606 AArch64::FCLAMP_VG4_4Z4Z_D}))
6607 SelectClamp(Node, 4, Op);
6608 return;
6609 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6610 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6611 return;
6612 case Intrinsic::aarch64_sve_add_single_x2:
6614 Node->getValueType(0),
6615 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6616 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6617 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6618 return;
6619 case Intrinsic::aarch64_sve_add_single_x4:
6621 Node->getValueType(0),
6622 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6623 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6624 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6625 return;
6626 case Intrinsic::aarch64_sve_zip_x2:
6628 Node->getValueType(0),
6629 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6630 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6631 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6632 return;
6633 case Intrinsic::aarch64_sve_zipq_x2:
6634 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6635 AArch64::ZIP_VG2_2ZZZ_Q);
6636 return;
6637 case Intrinsic::aarch64_sve_zip_x4:
6639 Node->getValueType(0),
6640 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6641 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6642 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6643 return;
6644 case Intrinsic::aarch64_sve_zipq_x4:
6645 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6646 AArch64::ZIP_VG4_4Z4Z_Q);
6647 return;
6648 case Intrinsic::aarch64_sve_uzp_x2:
6650 Node->getValueType(0),
6651 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6652 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6653 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6654 return;
6655 case Intrinsic::aarch64_sve_uzpq_x2:
6656 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6657 AArch64::UZP_VG2_2ZZZ_Q);
6658 return;
6659 case Intrinsic::aarch64_sve_uzp_x4:
6661 Node->getValueType(0),
6662 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6663 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6664 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6665 return;
6666 case Intrinsic::aarch64_sve_uzpq_x4:
6667 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6668 AArch64::UZP_VG4_4Z4Z_Q);
6669 return;
6670 case Intrinsic::aarch64_sve_sel_x2:
6672 Node->getValueType(0),
6673 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6674 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6675 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6676 return;
6677 case Intrinsic::aarch64_sve_sel_x4:
6679 Node->getValueType(0),
6680 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6681 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6682 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6683 return;
6684 case Intrinsic::aarch64_sve_frinta_x2:
6685 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6686 return;
6687 case Intrinsic::aarch64_sve_frinta_x4:
6688 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6689 return;
6690 case Intrinsic::aarch64_sve_frintm_x2:
6691 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6692 return;
6693 case Intrinsic::aarch64_sve_frintm_x4:
6694 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6695 return;
6696 case Intrinsic::aarch64_sve_frintn_x2:
6697 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6698 return;
6699 case Intrinsic::aarch64_sve_frintn_x4:
6700 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6701 return;
6702 case Intrinsic::aarch64_sve_frintp_x2:
6703 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6704 return;
6705 case Intrinsic::aarch64_sve_frintp_x4:
6706 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6707 return;
6708 case Intrinsic::aarch64_sve_sunpk_x2:
6710 Node->getValueType(0),
6711 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6712 AArch64::SUNPK_VG2_2ZZ_D}))
6713 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6714 return;
6715 case Intrinsic::aarch64_sve_uunpk_x2:
6717 Node->getValueType(0),
6718 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6719 AArch64::UUNPK_VG2_2ZZ_D}))
6720 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6721 return;
6722 case Intrinsic::aarch64_sve_sunpk_x4:
6724 Node->getValueType(0),
6725 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6726 AArch64::SUNPK_VG4_4Z2Z_D}))
6727 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6728 return;
6729 case Intrinsic::aarch64_sve_uunpk_x4:
6731 Node->getValueType(0),
6732 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6733 AArch64::UUNPK_VG4_4Z2Z_D}))
6734 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6735 return;
6736 case Intrinsic::aarch64_sve_pext_x2: {
6738 Node->getValueType(0),
6739 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6740 AArch64::PEXT_2PCI_D}))
6741 SelectPExtPair(Node, Op);
6742 return;
6743 }
6744 }
6745 break;
6746 }
6747 case ISD::INTRINSIC_VOID: {
6748 unsigned IntNo = Node->getConstantOperandVal(1);
6749 if (Node->getNumOperands() >= 3)
6750 VT = Node->getOperand(2)->getValueType(0);
6751 switch (IntNo) {
6752 default:
6753 break;
6754 case Intrinsic::aarch64_neon_st1x2: {
6755 if (VT == MVT::v8i8) {
6756 SelectStore(Node, 2, AArch64::ST1Twov8b);
6757 return;
6758 } else if (VT == MVT::v16i8) {
6759 SelectStore(Node, 2, AArch64::ST1Twov16b);
6760 return;
6761 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6762 VT == MVT::v4bf16) {
6763 SelectStore(Node, 2, AArch64::ST1Twov4h);
6764 return;
6765 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6766 VT == MVT::v8bf16) {
6767 SelectStore(Node, 2, AArch64::ST1Twov8h);
6768 return;
6769 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6770 SelectStore(Node, 2, AArch64::ST1Twov2s);
6771 return;
6772 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6773 SelectStore(Node, 2, AArch64::ST1Twov4s);
6774 return;
6775 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6776 SelectStore(Node, 2, AArch64::ST1Twov2d);
6777 return;
6778 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6779 SelectStore(Node, 2, AArch64::ST1Twov1d);
6780 return;
6781 }
6782 break;
6783 }
6784 case Intrinsic::aarch64_neon_st1x3: {
6785 if (VT == MVT::v8i8) {
6786 SelectStore(Node, 3, AArch64::ST1Threev8b);
6787 return;
6788 } else if (VT == MVT::v16i8) {
6789 SelectStore(Node, 3, AArch64::ST1Threev16b);
6790 return;
6791 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6792 VT == MVT::v4bf16) {
6793 SelectStore(Node, 3, AArch64::ST1Threev4h);
6794 return;
6795 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6796 VT == MVT::v8bf16) {
6797 SelectStore(Node, 3, AArch64::ST1Threev8h);
6798 return;
6799 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6800 SelectStore(Node, 3, AArch64::ST1Threev2s);
6801 return;
6802 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6803 SelectStore(Node, 3, AArch64::ST1Threev4s);
6804 return;
6805 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6806 SelectStore(Node, 3, AArch64::ST1Threev2d);
6807 return;
6808 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6809 SelectStore(Node, 3, AArch64::ST1Threev1d);
6810 return;
6811 }
6812 break;
6813 }
6814 case Intrinsic::aarch64_neon_st1x4: {
6815 if (VT == MVT::v8i8) {
6816 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6817 return;
6818 } else if (VT == MVT::v16i8) {
6819 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6820 return;
6821 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6822 VT == MVT::v4bf16) {
6823 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6824 return;
6825 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6826 VT == MVT::v8bf16) {
6827 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6828 return;
6829 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6830 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6831 return;
6832 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6833 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6834 return;
6835 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6836 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6837 return;
6838 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6839 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6840 return;
6841 }
6842 break;
6843 }
6844 case Intrinsic::aarch64_neon_st2: {
6845 if (VT == MVT::v8i8) {
6846 SelectStore(Node, 2, AArch64::ST2Twov8b);
6847 return;
6848 } else if (VT == MVT::v16i8) {
6849 SelectStore(Node, 2, AArch64::ST2Twov16b);
6850 return;
6851 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6852 VT == MVT::v4bf16) {
6853 SelectStore(Node, 2, AArch64::ST2Twov4h);
6854 return;
6855 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6856 VT == MVT::v8bf16) {
6857 SelectStore(Node, 2, AArch64::ST2Twov8h);
6858 return;
6859 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6860 SelectStore(Node, 2, AArch64::ST2Twov2s);
6861 return;
6862 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6863 SelectStore(Node, 2, AArch64::ST2Twov4s);
6864 return;
6865 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6866 SelectStore(Node, 2, AArch64::ST2Twov2d);
6867 return;
6868 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6869 SelectStore(Node, 2, AArch64::ST1Twov1d);
6870 return;
6871 }
6872 break;
6873 }
6874 case Intrinsic::aarch64_neon_st3: {
6875 if (VT == MVT::v8i8) {
6876 SelectStore(Node, 3, AArch64::ST3Threev8b);
6877 return;
6878 } else if (VT == MVT::v16i8) {
6879 SelectStore(Node, 3, AArch64::ST3Threev16b);
6880 return;
6881 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6882 VT == MVT::v4bf16) {
6883 SelectStore(Node, 3, AArch64::ST3Threev4h);
6884 return;
6885 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6886 VT == MVT::v8bf16) {
6887 SelectStore(Node, 3, AArch64::ST3Threev8h);
6888 return;
6889 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6890 SelectStore(Node, 3, AArch64::ST3Threev2s);
6891 return;
6892 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6893 SelectStore(Node, 3, AArch64::ST3Threev4s);
6894 return;
6895 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6896 SelectStore(Node, 3, AArch64::ST3Threev2d);
6897 return;
6898 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6899 SelectStore(Node, 3, AArch64::ST1Threev1d);
6900 return;
6901 }
6902 break;
6903 }
6904 case Intrinsic::aarch64_neon_st4: {
6905 if (VT == MVT::v8i8) {
6906 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6907 return;
6908 } else if (VT == MVT::v16i8) {
6909 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6910 return;
6911 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6912 VT == MVT::v4bf16) {
6913 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6914 return;
6915 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6916 VT == MVT::v8bf16) {
6917 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6918 return;
6919 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6920 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6921 return;
6922 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6923 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6924 return;
6925 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6926 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6927 return;
6928 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6929 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6930 return;
6931 }
6932 break;
6933 }
6934 case Intrinsic::aarch64_neon_st2lane: {
6935 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6936 SelectStoreLane(Node, 2, AArch64::ST2i8);
6937 return;
6938 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6939 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6940 SelectStoreLane(Node, 2, AArch64::ST2i16);
6941 return;
6942 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6943 VT == MVT::v2f32) {
6944 SelectStoreLane(Node, 2, AArch64::ST2i32);
6945 return;
6946 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6947 VT == MVT::v1f64) {
6948 SelectStoreLane(Node, 2, AArch64::ST2i64);
6949 return;
6950 }
6951 break;
6952 }
6953 case Intrinsic::aarch64_neon_st3lane: {
6954 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6955 SelectStoreLane(Node, 3, AArch64::ST3i8);
6956 return;
6957 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6958 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6959 SelectStoreLane(Node, 3, AArch64::ST3i16);
6960 return;
6961 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6962 VT == MVT::v2f32) {
6963 SelectStoreLane(Node, 3, AArch64::ST3i32);
6964 return;
6965 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6966 VT == MVT::v1f64) {
6967 SelectStoreLane(Node, 3, AArch64::ST3i64);
6968 return;
6969 }
6970 break;
6971 }
6972 case Intrinsic::aarch64_neon_st4lane: {
6973 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6974 SelectStoreLane(Node, 4, AArch64::ST4i8);
6975 return;
6976 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6977 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6978 SelectStoreLane(Node, 4, AArch64::ST4i16);
6979 return;
6980 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6981 VT == MVT::v2f32) {
6982 SelectStoreLane(Node, 4, AArch64::ST4i32);
6983 return;
6984 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6985 VT == MVT::v1f64) {
6986 SelectStoreLane(Node, 4, AArch64::ST4i64);
6987 return;
6988 }
6989 break;
6990 }
6991 case Intrinsic::aarch64_sve_st2q: {
6992 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6993 return;
6994 }
6995 case Intrinsic::aarch64_sve_st3q: {
6996 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6997 return;
6998 }
6999 case Intrinsic::aarch64_sve_st4q: {
7000 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
7001 return;
7002 }
7003 case Intrinsic::aarch64_sve_st2: {
7004 if (VT == MVT::nxv16i8) {
7005 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
7006 return;
7007 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7008 VT == MVT::nxv8bf16) {
7009 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
7010 return;
7011 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7012 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
7013 return;
7014 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7015 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
7016 return;
7017 }
7018 break;
7019 }
7020 case Intrinsic::aarch64_sve_st3: {
7021 if (VT == MVT::nxv16i8) {
7022 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
7023 return;
7024 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7025 VT == MVT::nxv8bf16) {
7026 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
7027 return;
7028 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7029 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
7030 return;
7031 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7032 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
7033 return;
7034 }
7035 break;
7036 }
7037 case Intrinsic::aarch64_sve_st4: {
7038 if (VT == MVT::nxv16i8) {
7039 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
7040 return;
7041 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7042 VT == MVT::nxv8bf16) {
7043 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
7044 return;
7045 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7046 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
7047 return;
7048 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7049 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
7050 return;
7051 }
7052 break;
7053 }
7054 }
7055 break;
7056 }
7057 case AArch64ISD::LD2post: {
7058 if (VT == MVT::v8i8) {
7059 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
7060 return;
7061 } else if (VT == MVT::v16i8) {
7062 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
7063 return;
7064 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7065 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
7066 return;
7067 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7068 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
7069 return;
7070 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7071 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
7072 return;
7073 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7074 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
7075 return;
7076 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7077 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
7078 return;
7079 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7080 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
7081 return;
7082 }
7083 break;
7084 }
7085 case AArch64ISD::LD3post: {
7086 if (VT == MVT::v8i8) {
7087 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
7088 return;
7089 } else if (VT == MVT::v16i8) {
7090 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
7091 return;
7092 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7093 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
7094 return;
7095 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7096 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
7097 return;
7098 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7099 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
7100 return;
7101 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7102 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
7103 return;
7104 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7105 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
7106 return;
7107 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7108 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
7109 return;
7110 }
7111 break;
7112 }
7113 case AArch64ISD::LD4post: {
7114 if (VT == MVT::v8i8) {
7115 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
7116 return;
7117 } else if (VT == MVT::v16i8) {
7118 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
7119 return;
7120 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7121 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
7122 return;
7123 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7124 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
7125 return;
7126 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7127 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
7128 return;
7129 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7130 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
7131 return;
7132 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7133 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
7134 return;
7135 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7136 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
7137 return;
7138 }
7139 break;
7140 }
7141 case AArch64ISD::LD1x2post: {
7142 if (VT == MVT::v8i8) {
7143 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
7144 return;
7145 } else if (VT == MVT::v16i8) {
7146 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
7147 return;
7148 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7149 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
7150 return;
7151 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7152 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
7153 return;
7154 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7155 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
7156 return;
7157 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7158 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
7159 return;
7160 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7161 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
7162 return;
7163 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7164 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
7165 return;
7166 }
7167 break;
7168 }
7169 case AArch64ISD::LD1x3post: {
7170 if (VT == MVT::v8i8) {
7171 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
7172 return;
7173 } else if (VT == MVT::v16i8) {
7174 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
7175 return;
7176 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7177 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
7178 return;
7179 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7180 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
7181 return;
7182 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7183 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
7184 return;
7185 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7186 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
7187 return;
7188 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7189 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
7190 return;
7191 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7192 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
7193 return;
7194 }
7195 break;
7196 }
7197 case AArch64ISD::LD1x4post: {
7198 if (VT == MVT::v8i8) {
7199 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
7200 return;
7201 } else if (VT == MVT::v16i8) {
7202 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
7203 return;
7204 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7205 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
7206 return;
7207 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7208 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
7209 return;
7210 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7211 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
7212 return;
7213 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7214 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
7215 return;
7216 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7217 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
7218 return;
7219 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7220 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
7221 return;
7222 }
7223 break;
7224 }
7225 case AArch64ISD::LD1DUPpost: {
7226 if (VT == MVT::v8i8) {
7227 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
7228 return;
7229 } else if (VT == MVT::v16i8) {
7230 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
7231 return;
7232 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7233 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
7234 return;
7235 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7236 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
7237 return;
7238 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7239 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
7240 return;
7241 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7242 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
7243 return;
7244 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7245 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
7246 return;
7247 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7248 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
7249 return;
7250 }
7251 break;
7252 }
7253 case AArch64ISD::LD2DUPpost: {
7254 if (VT == MVT::v8i8) {
7255 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
7256 return;
7257 } else if (VT == MVT::v16i8) {
7258 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
7259 return;
7260 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7261 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
7262 return;
7263 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7264 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
7265 return;
7266 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7267 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
7268 return;
7269 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7270 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
7271 return;
7272 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7273 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
7274 return;
7275 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7276 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
7277 return;
7278 }
7279 break;
7280 }
7281 case AArch64ISD::LD3DUPpost: {
7282 if (VT == MVT::v8i8) {
7283 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
7284 return;
7285 } else if (VT == MVT::v16i8) {
7286 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
7287 return;
7288 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7289 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
7290 return;
7291 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7292 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
7293 return;
7294 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7295 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
7296 return;
7297 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7298 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
7299 return;
7300 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7301 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
7302 return;
7303 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7304 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
7305 return;
7306 }
7307 break;
7308 }
7309 case AArch64ISD::LD4DUPpost: {
7310 if (VT == MVT::v8i8) {
7311 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
7312 return;
7313 } else if (VT == MVT::v16i8) {
7314 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
7315 return;
7316 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7317 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
7318 return;
7319 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7320 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
7321 return;
7322 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7323 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
7324 return;
7325 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7326 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
7327 return;
7328 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7329 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
7330 return;
7331 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7332 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
7333 return;
7334 }
7335 break;
7336 }
7337 case AArch64ISD::LD1LANEpost: {
7338 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7339 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
7340 return;
7341 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7342 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7343 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
7344 return;
7345 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7346 VT == MVT::v2f32) {
7347 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
7348 return;
7349 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7350 VT == MVT::v1f64) {
7351 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
7352 return;
7353 }
7354 break;
7355 }
7356 case AArch64ISD::LD2LANEpost: {
7357 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7358 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
7359 return;
7360 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7361 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7362 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
7363 return;
7364 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7365 VT == MVT::v2f32) {
7366 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
7367 return;
7368 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7369 VT == MVT::v1f64) {
7370 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
7371 return;
7372 }
7373 break;
7374 }
7375 case AArch64ISD::LD3LANEpost: {
7376 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7377 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
7378 return;
7379 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7380 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7381 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
7382 return;
7383 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7384 VT == MVT::v2f32) {
7385 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
7386 return;
7387 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7388 VT == MVT::v1f64) {
7389 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
7390 return;
7391 }
7392 break;
7393 }
7394 case AArch64ISD::LD4LANEpost: {
7395 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7396 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
7397 return;
7398 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7399 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7400 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
7401 return;
7402 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7403 VT == MVT::v2f32) {
7404 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
7405 return;
7406 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7407 VT == MVT::v1f64) {
7408 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
7409 return;
7410 }
7411 break;
7412 }
7413 case AArch64ISD::ST2post: {
7414 VT = Node->getOperand(1).getValueType();
7415 if (VT == MVT::v8i8) {
7416 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
7417 return;
7418 } else if (VT == MVT::v16i8) {
7419 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
7420 return;
7421 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7422 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
7423 return;
7424 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7425 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
7426 return;
7427 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7428 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
7429 return;
7430 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7431 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
7432 return;
7433 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7434 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
7435 return;
7436 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7437 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7438 return;
7439 }
7440 break;
7441 }
7442 case AArch64ISD::ST3post: {
7443 VT = Node->getOperand(1).getValueType();
7444 if (VT == MVT::v8i8) {
7445 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
7446 return;
7447 } else if (VT == MVT::v16i8) {
7448 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
7449 return;
7450 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7451 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
7452 return;
7453 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7454 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
7455 return;
7456 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7457 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
7458 return;
7459 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7460 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
7461 return;
7462 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7463 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
7464 return;
7465 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7466 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7467 return;
7468 }
7469 break;
7470 }
7471 case AArch64ISD::ST4post: {
7472 VT = Node->getOperand(1).getValueType();
7473 if (VT == MVT::v8i8) {
7474 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
7475 return;
7476 } else if (VT == MVT::v16i8) {
7477 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
7478 return;
7479 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7480 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
7481 return;
7482 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7483 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
7484 return;
7485 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7486 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
7487 return;
7488 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7489 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
7490 return;
7491 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7492 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
7493 return;
7494 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7495 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7496 return;
7497 }
7498 break;
7499 }
7500 case AArch64ISD::ST1x2post: {
7501 VT = Node->getOperand(1).getValueType();
7502 if (VT == MVT::v8i8) {
7503 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
7504 return;
7505 } else if (VT == MVT::v16i8) {
7506 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
7507 return;
7508 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7509 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
7510 return;
7511 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7512 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
7513 return;
7514 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7515 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
7516 return;
7517 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7518 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
7519 return;
7520 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7521 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7522 return;
7523 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7524 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
7525 return;
7526 }
7527 break;
7528 }
7529 case AArch64ISD::ST1x3post: {
7530 VT = Node->getOperand(1).getValueType();
7531 if (VT == MVT::v8i8) {
7532 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
7533 return;
7534 } else if (VT == MVT::v16i8) {
7535 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
7536 return;
7537 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7538 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
7539 return;
7540 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7541 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
7542 return;
7543 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7544 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
7545 return;
7546 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7547 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
7548 return;
7549 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7550 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7551 return;
7552 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7553 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
7554 return;
7555 }
7556 break;
7557 }
7558 case AArch64ISD::ST1x4post: {
7559 VT = Node->getOperand(1).getValueType();
7560 if (VT == MVT::v8i8) {
7561 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
7562 return;
7563 } else if (VT == MVT::v16i8) {
7564 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
7565 return;
7566 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7567 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
7568 return;
7569 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7570 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
7571 return;
7572 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7573 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
7574 return;
7575 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7576 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
7577 return;
7578 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7579 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7580 return;
7581 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7582 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
7583 return;
7584 }
7585 break;
7586 }
7587 case AArch64ISD::ST2LANEpost: {
7588 VT = Node->getOperand(1).getValueType();
7589 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7590 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
7591 return;
7592 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7593 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7594 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
7595 return;
7596 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7597 VT == MVT::v2f32) {
7598 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7599 return;
7600 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7601 VT == MVT::v1f64) {
7602 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7603 return;
7604 }
7605 break;
7606 }
7607 case AArch64ISD::ST3LANEpost: {
7608 VT = Node->getOperand(1).getValueType();
7609 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7610 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7611 return;
7612 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7613 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7614 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7615 return;
7616 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7617 VT == MVT::v2f32) {
7618 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7619 return;
7620 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7621 VT == MVT::v1f64) {
7622 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7623 return;
7624 }
7625 break;
7626 }
7627 case AArch64ISD::ST4LANEpost: {
7628 VT = Node->getOperand(1).getValueType();
7629 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7630 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7631 return;
7632 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7633 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7634 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7635 return;
7636 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7637 VT == MVT::v2f32) {
7638 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7639 return;
7640 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7641 VT == MVT::v1f64) {
7642 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7643 return;
7644 }
7645 break;
7646 }
7647 }
7648
7649 // Select the default instruction
7650 SelectCode(Node);
7651}
7652
7653/// createAArch64ISelDag - This pass converts a legalized DAG into a
7654/// AArch64-specific DAG, ready for instruction scheduling.
7656 CodeGenOptLevel OptLevel) {
7657 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7658}
7659
7660/// When \p PredVT is a scalable vector predicate in the form
7661/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7662/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7663/// structured vectors (NumVec >1), the output data type is
7664/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7665/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7666/// EVT.
7668 unsigned NumVec) {
7669 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7670 if (!PredVT.isScalableVectorOf(MVT::i1))
7671 return EVT();
7672
7673 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7674 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7675 return EVT();
7676
7677 ElementCount EC = PredVT.getVectorElementCount();
7678 EVT ScalarVT =
7679 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7680 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7681
7682 return MemVT;
7683}
7684
7685/// Return the EVT of the data associated to a memory operation in \p
7686/// Root. If such EVT cannot be retrieved, it returns an invalid EVT.
7688 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Root))
7689 return MemIntr->getMemoryVT();
7690
7691 if (isa<MemSDNode>(Root)) {
7692 EVT MemVT = cast<MemSDNode>(Root)->getMemoryVT();
7693
7694 EVT DataVT;
7695 if (auto *Load = dyn_cast<LoadSDNode>(Root))
7696 DataVT = Load->getValueType(0);
7697 else if (auto *Load = dyn_cast<MaskedLoadSDNode>(Root))
7698 DataVT = Load->getValueType(0);
7699 else if (auto *Store = dyn_cast<StoreSDNode>(Root))
7700 DataVT = Store->getValue().getValueType();
7701 else if (auto *Store = dyn_cast<MaskedStoreSDNode>(Root))
7702 DataVT = Store->getValue().getValueType();
7703 else
7704 llvm_unreachable("Unexpected MemSDNode!");
7705
7706 return DataVT.changeVectorElementType(Ctx, MemVT.getVectorElementType());
7707 }
7708
7709 const unsigned Opcode = Root->getOpcode();
7710 // For custom ISD nodes, we have to look at them individually to extract the
7711 // type of the data moved to/from memory.
7712 switch (Opcode) {
7713 case AArch64ISD::LD1_MERGE_ZERO:
7714 case AArch64ISD::LD1S_MERGE_ZERO:
7715 case AArch64ISD::LDNF1_MERGE_ZERO:
7716 case AArch64ISD::LDNF1S_MERGE_ZERO:
7717 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7718 case AArch64ISD::ST1_PRED:
7719 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7720 default:
7721 break;
7722 }
7723
7724 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7725 return EVT();
7726
7727 switch (Root->getConstantOperandVal(1)) {
7728 default:
7729 return EVT();
7730 case Intrinsic::aarch64_sme_ldr:
7731 case Intrinsic::aarch64_sme_str:
7732 return MVT::nxv16i8;
7733 case Intrinsic::aarch64_sve_prf:
7734 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7735 // width of the predicate.
7737 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7738 case Intrinsic::aarch64_sve_ld2_sret:
7739 case Intrinsic::aarch64_sve_ld2q_sret:
7741 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7742 case Intrinsic::aarch64_sve_st2q:
7744 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7745 case Intrinsic::aarch64_sve_ld3_sret:
7746 case Intrinsic::aarch64_sve_ld3q_sret:
7748 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7749 case Intrinsic::aarch64_sve_st3q:
7751 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7752 case Intrinsic::aarch64_sve_ld4_sret:
7753 case Intrinsic::aarch64_sve_ld4q_sret:
7755 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7756 case Intrinsic::aarch64_sve_st4q:
7758 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7759 case Intrinsic::aarch64_sve_ld1udq:
7760 case Intrinsic::aarch64_sve_st1dq:
7761 return EVT(MVT::nxv1i64);
7762 case Intrinsic::aarch64_sve_ld1uwq:
7763 case Intrinsic::aarch64_sve_st1wq:
7764 return EVT(MVT::nxv1i32);
7765 }
7766}
7767
7768/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7769/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7770/// where Root is the memory access using N for its address.
7771template <int64_t Min, int64_t Max>
7772bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7773 SDValue &Base,
7774 SDValue &OffImm) {
7775 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7776 const DataLayout &DL = CurDAG->getDataLayout();
7777 const MachineFrameInfo &MFI = MF->getFrameInfo();
7778
7779 if (N.getOpcode() == ISD::FrameIndex) {
7780 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7781 // We can only encode VL scaled offsets, so only fold in frame indexes
7782 // referencing SVE objects.
7783 if (MFI.hasScalableStackID(FI)) {
7784 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7785 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7786 return true;
7787 }
7788
7789 return false;
7790 }
7791
7792 if (MemVT == EVT())
7793 return false;
7794
7795 if (N.getOpcode() != ISD::ADD)
7796 return false;
7797
7798 SDValue VScale = N.getOperand(1);
7799 int64_t MulImm = std::numeric_limits<int64_t>::max();
7800 if (VScale.getOpcode() == ISD::VSCALE) {
7801 MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7802 } else if (auto C = dyn_cast<ConstantSDNode>(VScale)) {
7803 int64_t ByteOffset = C->getSExtValue();
7804 const auto KnownVScale =
7806
7807 if (!KnownVScale || ByteOffset % KnownVScale != 0)
7808 return false;
7809
7810 MulImm = ByteOffset / KnownVScale;
7811 } else
7812 return false;
7813
7814 TypeSize TS = MemVT.getSizeInBits();
7815 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7816
7817 if ((MulImm % MemWidthBytes) != 0)
7818 return false;
7819
7820 int64_t Offset = MulImm / MemWidthBytes;
7822 return false;
7823
7824 Base = N.getOperand(0);
7825 if (Base.getOpcode() == ISD::FrameIndex) {
7826 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7827 // We can only encode VL scaled offsets, so only fold in frame indexes
7828 // referencing SVE objects.
7829 if (MFI.hasScalableStackID(FI))
7830 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7831 }
7832
7833 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7834 return true;
7835}
7836
7837/// Select register plus register addressing mode for SVE, with scaled
7838/// offset.
7839bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7840 SDValue &Base,
7841 SDValue &Offset) {
7842 if (N.getOpcode() != ISD::ADD)
7843 return false;
7844
7845 // Process an ADD node.
7846 const SDValue LHS = N.getOperand(0);
7847 const SDValue RHS = N.getOperand(1);
7848
7849 // 8 bit data does not come with the SHL node, so it is treated
7850 // separately.
7851 if (Scale == 0) {
7852 Base = LHS;
7853 Offset = RHS;
7854 return true;
7855 }
7856
7857 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7858 int64_t ImmOff = C->getSExtValue();
7859 unsigned Size = 1 << Scale;
7860
7861 // To use the reg+reg addressing mode, the immediate must be a multiple of
7862 // the vector element's byte size.
7863 if (ImmOff % Size)
7864 return false;
7865
7866 SDLoc DL(N);
7867 Base = LHS;
7868 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7869 SDValue Ops[] = {Offset};
7870 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7871 Offset = SDValue(MI, 0);
7872 return true;
7873 }
7874
7875 // Check if the RHS is a shift node with a constant.
7876 if (RHS.getOpcode() != ISD::SHL)
7877 return false;
7878
7879 const SDValue ShiftRHS = RHS.getOperand(1);
7880 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7881 if (C->getZExtValue() == Scale) {
7882 Base = LHS;
7883 Offset = RHS.getOperand(0);
7884 return true;
7885 }
7886
7887 return false;
7888}
7889
7890bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7891 const AArch64TargetLowering *TLI =
7892 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7893
7894 return TLI->isAllActivePredicate(*CurDAG, N);
7895}
7896
7897bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7898 return N.getValueType().isScalableVectorOf(MVT::i1);
7899}
7900
7901bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7903 unsigned Scale) {
7904 auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
7905 if (auto *C = dyn_cast<ConstantSDNode>(CN)) {
7906 int64_t ImmOff = C->getSExtValue();
7907 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
7908 return CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7909 }
7910 return SDValue();
7911 };
7912
7913 if (SDValue C = MatchConstantOffset(N)) {
7914 Base = CurDAG->getConstant(0, SDLoc(N), MVT::i32);
7915 Offset = C;
7916 return true;
7917 }
7918
7919 // Try to untangle an ADD node into a 'reg + offset'
7920 if (CurDAG->isBaseWithConstantOffset(N)) {
7921 if (SDValue C = MatchConstantOffset(N.getOperand(1))) {
7922 Base = N.getOperand(0);
7923 Offset = C;
7924 return true;
7925 }
7926 }
7927
7928 // By default, just match reg + 0.
7929 Base = N;
7930 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7931 return true;
7932}
7933
7934bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N,
7935 SDValue &Imm) {
7937 static_cast<AArch64CC::CondCode>(P->getConstantOperandVal(1));
7938 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
7939 // Check conservatively if the immediate fits the valid range [0, 64).
7940 // Immediate variants for GE and HS definitely need to be decremented
7941 // when lowering the pseudos later, so an immediate of 1 would become 0.
7942 // For the inverse conditions LT and LO we don't know for sure if they
7943 // will need a decrement but should the decision be made to reverse the
7944 // branch condition, we again end up with the need to decrement.
7945 // The same argument holds for LE, LS, GT and HI and possibly
7946 // incremented immediates. This can lead to slightly less optimal
7947 // codegen, e.g. we never codegen the legal case
7948 // cblt w0, #63, A
7949 // because we could end up with the illegal case
7950 // cbge w0, #64, B
7951 // should the decision to reverse the branch direction be made. For the
7952 // lower bound cases this is no problem since we can express comparisons
7953 // against 0 with either tbz/tnbz or using wzr/xzr.
7954 uint64_t LowerBound = 0, UpperBound = 64;
7955 switch (CC) {
7956 case AArch64CC::GE:
7957 case AArch64CC::HS:
7958 case AArch64CC::LT:
7959 case AArch64CC::LO:
7960 LowerBound = 1;
7961 break;
7962 case AArch64CC::LE:
7963 case AArch64CC::LS:
7964 case AArch64CC::GT:
7965 case AArch64CC::HI:
7966 UpperBound = 63;
7967 break;
7968 default:
7969 break;
7970 }
7971
7972 if (CN->getAPIntValue().uge(LowerBound) &&
7973 CN->getAPIntValue().ult(UpperBound)) {
7974 SDLoc DL(N);
7975 Imm = CurDAG->getTargetConstant(CN->getZExtValue(), DL, N.getValueType());
7976 return true;
7977 }
7978 }
7979
7980 return false;
7981}
7982
7983template <bool MatchCBB>
7984bool AArch64DAGToDAGISel::SelectCmpBranchExtOperand(SDValue N, SDValue &Reg,
7985 SDValue &ExtType) {
7986
7987 // Use an invalid shift-extend value to indicate we don't need to extend later
7988 if (N.getOpcode() == ISD::AssertZext || N.getOpcode() == ISD::AssertSext) {
7989 EVT Ty = cast<VTSDNode>(N.getOperand(1))->getVT();
7990 if (Ty != (MatchCBB ? MVT::i8 : MVT::i16))
7991 return false;
7992 Reg = N.getOperand(0);
7993 ExtType = CurDAG->getSignedTargetConstant(AArch64_AM::InvalidShiftExtend,
7994 SDLoc(N), MVT::i32);
7995 return true;
7996 }
7997
7999
8000 if ((MatchCBB && (ET == AArch64_AM::UXTB || ET == AArch64_AM::SXTB)) ||
8001 (!MatchCBB && (ET == AArch64_AM::UXTH || ET == AArch64_AM::SXTH))) {
8002 Reg = N.getOperand(0);
8003 ExtType =
8004 CurDAG->getTargetConstant(getExtendEncoding(ET), SDLoc(N), MVT::i32);
8005 return true;
8006 }
8007
8008 return false;
8009}
8010
8011/// Try to fold AArch64 CSEL/FCMP patterns to FMAXNM/FMINNM.
8012///
8013/// This is intentionally done in PreprocessISelDAG rather than DAGCombine:
8014/// doing this earlier based on the defining operation of X can be invalidated
8015/// by later DAG combines. At this point the DAG is being prepared for
8016/// instruction selection, so the use of isKnownNeverSNaN(X) applies to the
8017/// final SDValue being selected.
8018/// Only handles FCMP(X, C) with scalar FP types, where C is a non-NaN constant.
8019/// The nsz requirement is needed only when C is zero, to avoid signed-zero
8020/// mismatches. The never-sNaN check is required because AArch64 FMAXNM/FMINNM
8021/// differ from fcmp+fcsel for signaling NaN inputs.
8022SDValue AArch64DAGToDAGISel::tryFoldCselToFMaxMin(SDNode &N) {
8023 EVT VT = N.getValueType(0);
8024
8025 // Scalar FP only.
8026 if (!VT.isFloatingPoint() || VT.isVector())
8027 return SDValue();
8028
8029 SDValue TVal = N.getOperand(0);
8030 SDValue FVal = N.getOperand(1);
8031 SDValue CCVal = N.getOperand(2);
8032 SDValue Cmp = N.getOperand(3);
8033
8034 if (Cmp.getOpcode() != AArch64ISD::FCMP)
8035 return SDValue();
8036
8037 auto *CC = dyn_cast<ConstantSDNode>(CCVal);
8038 if (!CC)
8039 return SDValue();
8040
8041 SDValue CmpLHS = Cmp.getOperand(0);
8042 SDValue CmpRHS = Cmp.getOperand(1);
8043 unsigned CondCode = CC->getZExtValue();
8044
8045 // Map VT and operation (max/min) to machine opcode.
8046 auto getOpc = [](EVT VT, bool isMax) -> unsigned {
8047 if (VT == MVT::f16)
8048 return isMax ? AArch64::FMAXNMHrr : AArch64::FMINNMHrr;
8049 else if (VT == MVT::f32)
8050 return isMax ? AArch64::FMAXNMSrr : AArch64::FMINNMSrr;
8051 else if (VT == MVT::f64)
8052 return isMax ? AArch64::FMAXNMDrr : AArch64::FMINNMDrr;
8053 else
8054 return 0; // unsupported
8055 };
8056
8057 // Determine whether to use max or min based on condition code and operands.
8058 bool isMax;
8059 if (CondCode == AArch64CC::GT || CondCode == AArch64CC::GE) {
8060 if (TVal == CmpLHS && FVal == CmpRHS)
8061 isMax = true;
8062 else if (TVal == CmpRHS && FVal == CmpLHS)
8063 isMax = false;
8064 else
8065 return SDValue();
8066 } else if (CondCode == AArch64CC::MI || CondCode == AArch64CC::LS) {
8067 if (TVal == CmpLHS && FVal == CmpRHS)
8068 isMax = false;
8069 else if (TVal == CmpRHS && FVal == CmpLHS)
8070 isMax = true;
8071 else
8072 return SDValue();
8073 } else {
8074 return SDValue();
8075 }
8076
8077 // Get the machine opcode for this VT and operation.
8078 unsigned Opc = getOpc(VT, isMax);
8079 if (!Opc)
8080 return SDValue();
8081
8082 // Constant must be non-NaN.
8083 auto *CFP = dyn_cast<ConstantFPSDNode>(CmpRHS);
8084 if (!CFP || CFP->getValueAPF().isNaN())
8085 return SDValue();
8086
8087 // nsz flag required only when constant is zero: fmaxnm(+0,-0)=+0 differs from
8088 // fcmp+select's -0. For non-zero constants, semantics are identical.
8089 if (CFP->isZero() && !N.getFlags().hasNoSignedZeros())
8090 return SDValue();
8091
8092 // Only fold if variable operand is never sNaN.
8093 // This runs after DAG combines, so later combines cannot remove a defining
8094 // operation used by isKnownNeverSNaN().
8095 if (!CurDAG->isKnownNeverSNaN(CmpLHS))
8096 return SDValue();
8097
8098 SDLoc DL(&N);
8099
8100 // Directly emit the machine node
8101 return SDValue(CurDAG->getMachineNode(Opc, DL, VT, CmpLHS, CmpRHS), 0);
8102}
8103
8104void AArch64DAGToDAGISel::PreprocessISelDAG() {
8105 bool MadeChange = false;
8106 for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
8107 if (N.use_empty())
8108 continue;
8109
8111 switch (N.getOpcode()) {
8112 case ISD::SCALAR_TO_VECTOR: {
8113 EVT ScalarTy = N.getValueType(0).getVectorElementType();
8114 if ((ScalarTy == MVT::i32 || ScalarTy == MVT::i64) &&
8115 ScalarTy == N.getOperand(0).getValueType())
8116 Result = addBitcastHints(*CurDAG, N);
8117
8118 break;
8119 }
8120 case AArch64ISD::CSEL:
8121 Result = tryFoldCselToFMaxMin(N);
8122 break;
8123 default:
8124 break;
8125 }
8126
8127 if (Result) {
8128 LLVM_DEBUG(dbgs() << "AArch64 DAG preprocessing replacing:\nOld: ");
8129 LLVM_DEBUG(N.dump(CurDAG));
8130 LLVM_DEBUG(dbgs() << "\nNew: ");
8131 LLVM_DEBUG(Result.dump(CurDAG));
8132 LLVM_DEBUG(dbgs() << "\n");
8133
8134 CurDAG->ReplaceAllUsesOfValueWith(SDValue(&N, 0), Result);
8135 MadeChange = true;
8136 }
8137 }
8138
8139 if (MadeChange)
8140 CurDAG->RemoveDeadNodes();
8141
8143}
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static std::optional< APInt > GetNEONSplatValue(SDValue N)
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static SDValue addBitcastHints(SelectionDAG &DAG, SDNode &N)
addBitcastHints - This method adds bitcast hints to the operands of a node to help instruction select...
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static std::optional< APInt > DecodeNEONSplat(SDValue N)
static bool checkCVTFixedPointOperandWithFBitsForVectors(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isMemOpOrPrefetch(SDNode *N)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static APInt DecodeFMOVImm(uint64_t Imm, unsigned RegWidth)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
return SDValue()
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_TYPE
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
OptimizedStructLayoutField Field
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
#define LLVM_DEBUG(...)
Definition Debug.h:119
#define PASS_NAME
Value * RHS
Value * LHS
const AArch64RegisterInfo * getRegisterInfo() const override
bool isStreaming() const
Returns true if the function has a streaming body.
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
unsigned getSVEVectorSizeInBits() const
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1693
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1076
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1621
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:652
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1475
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:511
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
iterator begin() const
Definition ArrayRef.h:129
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool hasScalableStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual void PreprocessISelDAG()
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned getID() const
Return the register class ID number.
LLVM Value Representation.
Definition Value.h:75
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:972
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t decodeAdvSIMDModImmType12(uint8_t Imm)
static uint64_t decodeAdvSIMDModImmType11(uint8_t Imm)
unsigned getExtendEncoding(AArch64_AM::ShiftExtendType ET)
Mapping from extend bits to required operation: shifter: 000 ==> uxtb 001 ==> uxth 010 ==> uxtw 011 =...
static uint64_t decodeAdvSIMDModImmType10(uint8_t Imm)
static bool isSVELogicalImm(unsigned SizeInBits, uint64_t ImmVal, uint64_t &Encoding)
static bool isSVECpyDupImm(int SizeInBits, int64_t Val, int32_t &Imm, int32_t &Shift)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
isSignExtendShiftType - Returns true if Type is sign extending.
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:857
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:997
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:848
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:139
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:854
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:892
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:860
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Not(const Pred &P) -> Not< Pred >
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:558
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
unsigned CheckFixedPointOperandConstant(APFloat &FVal, unsigned RegWidth, bool isReciprocal)
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:315
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:267
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2025
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define N
Extended Value Type.
Definition ValueTypes.h:35
bool isScalableVectorOf(EVT EltVT) const
Return true if this is a scalable vector with matching element type.
Definition ValueTypes.h:192
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
ElementCount getVectorElementCount() const
Definition ValueTypes.h:373
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:494
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:382
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:408
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:230
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:404
bool isFixedLengthVector() const
Definition ValueTypes.h:199
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:346
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:187
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:351
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:359
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition ValueTypes.h:225
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.