LLVM 22.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34// https://github.com/llvm/llvm-project/issues/114425
35#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36#pragma inline_depth(0)
37#endif
38
39//===--------------------------------------------------------------------===//
40/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41/// instructions for SelectionDAG operations.
42///
43namespace {
44
45class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const AArch64Subtarget *Subtarget;
50
51public:
52 AArch64DAGToDAGISel() = delete;
53
54 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55 CodeGenOptLevel OptLevel)
56 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
57
58 bool runOnMachineFunction(MachineFunction &MF) override {
59 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
61 }
62
63 void Select(SDNode *Node) override;
64
65 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
66 /// inline asm expressions.
67 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
68 InlineAsm::ConstraintCode ConstraintID,
69 std::vector<SDValue> &OutOps) override;
70
71 template <signed Low, signed High, signed Scale>
72 bool SelectRDVLImm(SDValue N, SDValue &Imm);
73
74 template <signed Low, signed High>
75 bool SelectRDSVLShiftImm(SDValue N, SDValue &Imm);
76
77 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
78 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
79 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
80 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
81 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
82 return SelectShiftedRegister(N, false, Reg, Shift);
83 }
84 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
85 return SelectShiftedRegister(N, true, Reg, Shift);
86 }
87 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
88 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
89 }
90 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
91 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
92 }
93 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
94 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
95 }
96 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
97 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
98 }
99 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
100 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
101 }
102 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
103 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
104 }
105 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
106 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
107 }
108 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
109 return SelectAddrModeIndexed(N, 1, Base, OffImm);
110 }
111 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
112 return SelectAddrModeIndexed(N, 2, Base, OffImm);
113 }
114 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
115 return SelectAddrModeIndexed(N, 4, Base, OffImm);
116 }
117 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
118 return SelectAddrModeIndexed(N, 8, Base, OffImm);
119 }
120 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
121 return SelectAddrModeIndexed(N, 16, Base, OffImm);
122 }
123 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
124 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
125 }
126 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
127 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
128 }
129 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
130 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
131 }
132 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
133 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
134 }
135 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
136 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
137 }
138 template <unsigned Size, unsigned Max>
139 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
140 // Test if there is an appropriate addressing mode and check if the
141 // immediate fits.
142 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
143 if (Found) {
144 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
145 int64_t C = CI->getSExtValue();
146 if (C <= Max)
147 return true;
148 }
149 }
150
151 // Otherwise, base only, materialize address in register.
152 Base = N;
153 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
154 return true;
155 }
156
157 template<int Width>
158 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
159 SDValue &SignExtend, SDValue &DoShift) {
160 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
161 }
162
163 template<int Width>
164 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
165 SDValue &SignExtend, SDValue &DoShift) {
166 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
167 }
168
169 bool SelectExtractHigh(SDValue N, SDValue &Res) {
170 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
171 N = N->getOperand(0);
172 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
173 !isa<ConstantSDNode>(N->getOperand(1)))
174 return false;
175 EVT VT = N->getValueType(0);
176 EVT LVT = N->getOperand(0).getValueType();
177 unsigned Index = N->getConstantOperandVal(1);
178 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
179 Index != VT.getVectorNumElements())
180 return false;
181 Res = N->getOperand(0);
182 return true;
183 }
184
185 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
186 if (N.getOpcode() != AArch64ISD::VLSHR)
187 return false;
188 SDValue Op = N->getOperand(0);
189 EVT VT = Op.getValueType();
190 unsigned ShtAmt = N->getConstantOperandVal(1);
191 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
192 return false;
193
194 APInt Imm;
195 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
196 Imm = APInt(VT.getScalarSizeInBits(),
197 Op.getOperand(1).getConstantOperandVal(0)
198 << Op.getOperand(1).getConstantOperandVal(1));
199 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
200 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
201 Imm = APInt(VT.getScalarSizeInBits(),
202 Op.getOperand(1).getConstantOperandVal(0));
203 else
204 return false;
205
206 if (Imm != 1ULL << (ShtAmt - 1))
207 return false;
208
209 Res1 = Op.getOperand(0);
210 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
211 return true;
212 }
213
214 bool SelectDupZeroOrUndef(SDValue N) {
215 switch(N->getOpcode()) {
216 case ISD::UNDEF:
217 return true;
218 case AArch64ISD::DUP:
219 case ISD::SPLAT_VECTOR: {
220 auto Opnd0 = N->getOperand(0);
221 if (isNullConstant(Opnd0))
222 return true;
223 if (isNullFPConstant(Opnd0))
224 return true;
225 break;
226 }
227 default:
228 break;
229 }
230
231 return false;
232 }
233
234 bool SelectAny(SDValue) { return true; }
235
236 bool SelectDupZero(SDValue N) {
237 switch(N->getOpcode()) {
238 case AArch64ISD::DUP:
239 case ISD::SPLAT_VECTOR: {
240 auto Opnd0 = N->getOperand(0);
241 if (isNullConstant(Opnd0))
242 return true;
243 if (isNullFPConstant(Opnd0))
244 return true;
245 break;
246 }
247 }
248
249 return false;
250 }
251
252 template <MVT::SimpleValueType VT, bool Negate>
253 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
254 return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
255 }
256
257 template <MVT::SimpleValueType VT, bool Negate>
258 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
259 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
260 }
261
262 template <MVT::SimpleValueType VT>
263 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
264 return SelectSVECpyDupImm(N, VT, Imm, Shift);
265 }
266
267 template <MVT::SimpleValueType VT, bool Invert = false>
268 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
269 return SelectSVELogicalImm(N, VT, Imm, Invert);
270 }
271
272 template <MVT::SimpleValueType VT>
273 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
274 return SelectSVEArithImm(N, VT, Imm);
275 }
276
277 template <unsigned Low, unsigned High, bool AllowSaturation = false>
278 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
279 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
280 }
281
282 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
283 if (N->getOpcode() != ISD::SPLAT_VECTOR)
284 return false;
285
286 EVT EltVT = N->getValueType(0).getVectorElementType();
287 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
288 /* High */ EltVT.getFixedSizeInBits(),
289 /* AllowSaturation */ true, Imm);
290 }
291
292 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
293 template<signed Min, signed Max, signed Scale, bool Shift>
294 bool SelectCntImm(SDValue N, SDValue &Imm) {
296 return false;
297
298 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
299 if (Shift)
300 MulImm = 1LL << MulImm;
301
302 if ((MulImm % std::abs(Scale)) != 0)
303 return false;
304
305 MulImm /= Scale;
306 if ((MulImm >= Min) && (MulImm <= Max)) {
307 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
308 return true;
309 }
310
311 return false;
312 }
313
314 template <signed Max, signed Scale>
315 bool SelectEXTImm(SDValue N, SDValue &Imm) {
317 return false;
318
319 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
320
321 if (MulImm >= 0 && MulImm <= Max) {
322 MulImm *= Scale;
323 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
324 return true;
325 }
326
327 return false;
328 }
329
330 template <unsigned BaseReg, unsigned Max>
331 bool ImmToReg(SDValue N, SDValue &Imm) {
332 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
333 uint64_t C = CI->getZExtValue();
334
335 if (C > Max)
336 return false;
337
338 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
339 return true;
340 }
341 return false;
342 }
343
344 /// Form sequences of consecutive 64/128-bit registers for use in NEON
345 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
346 /// between 1 and 4 elements. If it contains a single element that is returned
347 /// unchanged; otherwise a REG_SEQUENCE value is returned.
350 // Form a sequence of SVE registers for instructions using list of vectors,
351 // e.g. structured loads and stores (ldN, stN).
352 SDValue createZTuple(ArrayRef<SDValue> Vecs);
353
354 // Similar to above, except the register must start at a multiple of the
355 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
356 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
357
358 /// Generic helper for the createDTuple/createQTuple
359 /// functions. Those should almost always be called instead.
360 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
361 const unsigned SubRegs[]);
362
363 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
364
365 bool tryIndexedLoad(SDNode *N);
366
367 void SelectPtrauthAuth(SDNode *N);
368 void SelectPtrauthResign(SDNode *N);
369
370 bool trySelectStackSlotTagP(SDNode *N);
371 void SelectTagP(SDNode *N);
372
373 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
374 unsigned SubRegIdx);
375 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
376 unsigned SubRegIdx);
377 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
378 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
379 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
380 unsigned Opc_rr, unsigned Opc_ri,
381 bool IsIntr = false);
382 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
383 unsigned Scale, unsigned Opc_ri,
384 unsigned Opc_rr);
385 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
386 bool IsZmMulti, unsigned Opcode,
387 bool HasPred = false);
388 void SelectPExtPair(SDNode *N, unsigned Opc);
389 void SelectWhilePair(SDNode *N, unsigned Opc);
390 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
391 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
392 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
393 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
394 bool IsTupleInput, unsigned Opc);
395 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
396
397 template <unsigned MaxIdx, unsigned Scale>
398 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
399 unsigned Op);
400 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
401 unsigned Op, unsigned MaxIdx, unsigned Scale,
402 unsigned BaseReg = 0);
403 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
404 /// SVE Reg+Imm addressing mode.
405 template <int64_t Min, int64_t Max>
406 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
407 SDValue &OffImm);
408 /// SVE Reg+Reg address mode.
409 template <unsigned Scale>
410 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
411 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
412 }
413
414 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
415 unsigned Opc, uint32_t MaxImm);
416
417 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
418
419 template <unsigned MaxIdx, unsigned Scale>
420 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
421 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
422 }
423
424 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
425 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
426 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
427 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
428 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
429 unsigned Opc_rr, unsigned Opc_ri);
430 std::tuple<unsigned, SDValue, SDValue>
431 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
432 const SDValue &OldBase, const SDValue &OldOffset,
433 unsigned Scale);
434
435 bool tryBitfieldExtractOp(SDNode *N);
436 bool tryBitfieldExtractOpFromSExt(SDNode *N);
437 bool tryBitfieldInsertOp(SDNode *N);
438 bool tryBitfieldInsertInZeroOp(SDNode *N);
439 bool tryShiftAmountMod(SDNode *N);
440
441 bool tryReadRegister(SDNode *N);
442 bool tryWriteRegister(SDNode *N);
443
444 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
445 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
446
447 bool trySelectXAR(SDNode *N);
448
449// Include the pieces autogenerated from the target description.
450#include "AArch64GenDAGISel.inc"
451
452private:
453 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
454 SDValue &Shift);
455 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
456 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
457 SDValue &OffImm) {
458 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
459 }
460 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
461 unsigned Size, SDValue &Base,
462 SDValue &OffImm);
463 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
464 SDValue &OffImm);
465 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
466 SDValue &OffImm);
467 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
468 SDValue &Offset, SDValue &SignExtend,
469 SDValue &DoShift);
470 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
471 SDValue &Offset, SDValue &SignExtend,
472 SDValue &DoShift);
473 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
474 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
475 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
476 SDValue &Offset, SDValue &SignExtend);
477
478 template<unsigned RegWidth>
479 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
480 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
481 }
482
483 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
484
485 template<unsigned RegWidth>
486 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
487 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
488 }
489
490 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
491 unsigned Width);
492
493 bool SelectCMP_SWAP(SDNode *N);
494
495 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
496 bool Negate);
497 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
498 bool Negate);
499 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
500 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
501
502 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
503 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
504 bool AllowSaturation, SDValue &Imm);
505
506 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
507 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
508 SDValue &Offset);
509 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
510 SDValue &Offset, unsigned Scale = 1);
511
512 bool SelectAllActivePredicate(SDValue N);
513 bool SelectAnyPredicate(SDValue N);
514
515 bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm);
516
517 template <bool MatchCBB>
518 bool SelectCmpBranchExtOperand(SDValue N, SDValue &Reg, SDValue &ExtType);
519};
520
521class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
522public:
523 static char ID;
524 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
525 CodeGenOptLevel OptLevel)
527 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
528};
529} // end anonymous namespace
530
531char AArch64DAGToDAGISelLegacy::ID = 0;
532
533INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
534
535/// isIntImmediate - This method tests to see if the node is a constant
536/// operand. If so Imm will receive the 32-bit value.
537static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
539 Imm = C->getZExtValue();
540 return true;
541 }
542 return false;
543}
544
545// isIntImmediate - This method tests to see if a constant operand.
546// If so Imm will receive the value.
547static bool isIntImmediate(SDValue N, uint64_t &Imm) {
548 return isIntImmediate(N.getNode(), Imm);
549}
550
551// isOpcWithIntImmediate - This method tests to see if the node is a specific
552// opcode and that it has a immediate integer right operand.
553// If so Imm will receive the 32 bit value.
554static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
555 uint64_t &Imm) {
556 return N->getOpcode() == Opc &&
557 isIntImmediate(N->getOperand(1).getNode(), Imm);
558}
559
560// isIntImmediateEq - This method tests to see if N is a constant operand that
561// is equivalent to 'ImmExpected'.
562#ifndef NDEBUG
563static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
564 uint64_t Imm;
565 if (!isIntImmediate(N.getNode(), Imm))
566 return false;
567 return Imm == ImmExpected;
568}
569#endif
570
571bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
572 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
573 std::vector<SDValue> &OutOps) {
574 switch(ConstraintID) {
575 default:
576 llvm_unreachable("Unexpected asm memory constraint");
577 case InlineAsm::ConstraintCode::m:
578 case InlineAsm::ConstraintCode::o:
579 case InlineAsm::ConstraintCode::Q:
580 // We need to make sure that this one operand does not end up in XZR, thus
581 // require the address to be in a PointerRegClass register.
582 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
583 const TargetRegisterClass *TRC = TRI->getPointerRegClass();
584 SDLoc dl(Op);
585 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
586 SDValue NewOp =
587 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
588 dl, Op.getValueType(),
589 Op, RC), 0);
590 OutOps.push_back(NewOp);
591 return false;
592 }
593 return true;
594}
595
596/// SelectArithImmed - Select an immediate value that can be represented as
597/// a 12-bit value shifted left by either 0 or 12. If so, return true with
598/// Val set to the 12-bit value and Shift set to the shifter operand.
599bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
600 SDValue &Shift) {
601 // This function is called from the addsub_shifted_imm ComplexPattern,
602 // which lists [imm] as the list of opcode it's interested in, however
603 // we still need to check whether the operand is actually an immediate
604 // here because the ComplexPattern opcode list is only used in
605 // root-level opcode matching.
606 if (!isa<ConstantSDNode>(N.getNode()))
607 return false;
608
609 uint64_t Immed = N.getNode()->getAsZExtVal();
610 unsigned ShiftAmt;
611
612 if (Immed >> 12 == 0) {
613 ShiftAmt = 0;
614 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
615 ShiftAmt = 12;
616 Immed = Immed >> 12;
617 } else
618 return false;
619
620 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
621 SDLoc dl(N);
622 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
623 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
624 return true;
625}
626
627/// SelectNegArithImmed - As above, but negates the value before trying to
628/// select it.
629bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
630 SDValue &Shift) {
631 // This function is called from the addsub_shifted_imm ComplexPattern,
632 // which lists [imm] as the list of opcode it's interested in, however
633 // we still need to check whether the operand is actually an immediate
634 // here because the ComplexPattern opcode list is only used in
635 // root-level opcode matching.
636 if (!isa<ConstantSDNode>(N.getNode()))
637 return false;
638
639 // The immediate operand must be a 24-bit zero-extended immediate.
640 uint64_t Immed = N.getNode()->getAsZExtVal();
641
642 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
643 // have the opposite effect on the C flag, so this pattern mustn't match under
644 // those circumstances.
645 if (Immed == 0)
646 return false;
647
648 if (N.getValueType() == MVT::i32)
649 Immed = ~((uint32_t)Immed) + 1;
650 else
651 Immed = ~Immed + 1ULL;
652 if (Immed & 0xFFFFFFFFFF000000ULL)
653 return false;
654
655 Immed &= 0xFFFFFFULL;
656 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
657 Shift);
658}
659
660/// getShiftTypeForNode - Translate a shift node to the corresponding
661/// ShiftType value.
663 switch (N.getOpcode()) {
664 default:
666 case ISD::SHL:
667 return AArch64_AM::LSL;
668 case ISD::SRL:
669 return AArch64_AM::LSR;
670 case ISD::SRA:
671 return AArch64_AM::ASR;
672 case ISD::ROTR:
673 return AArch64_AM::ROR;
674 }
675}
676
678 return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
679}
680
681/// Determine whether it is worth it to fold SHL into the addressing
682/// mode.
684 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
685 // It is worth folding logical shift of up to three places.
686 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
687 if (!CSD)
688 return false;
689 unsigned ShiftVal = CSD->getZExtValue();
690 if (ShiftVal > 3)
691 return false;
692
693 // Check if this particular node is reused in any non-memory related
694 // operation. If yes, do not try to fold this node into the address
695 // computation, since the computation will be kept.
696 const SDNode *Node = V.getNode();
697 for (SDNode *UI : Node->users())
698 if (!isMemOpOrPrefetch(UI))
699 for (SDNode *UII : UI->users())
700 if (!isMemOpOrPrefetch(UII))
701 return false;
702 return true;
703}
704
705/// Determine whether it is worth to fold V into an extended register addressing
706/// mode.
707bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
708 // Trivial if we are optimizing for code size or if there is only
709 // one use of the value.
710 if (CurDAG->shouldOptForSize() || V.hasOneUse())
711 return true;
712
713 // If a subtarget has a slow shift, folding a shift into multiple loads
714 // costs additional micro-ops.
715 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
716 return false;
717
718 // Check whether we're going to emit the address arithmetic anyway because
719 // it's used by a non-address operation.
720 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
721 return true;
722 if (V.getOpcode() == ISD::ADD) {
723 const SDValue LHS = V.getOperand(0);
724 const SDValue RHS = V.getOperand(1);
725 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
726 return true;
727 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
728 return true;
729 }
730
731 // It hurts otherwise, since the value will be reused.
732 return false;
733}
734
735/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
736/// to select more shifted register
737bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
738 SDValue &Shift) {
739 EVT VT = N.getValueType();
740 if (VT != MVT::i32 && VT != MVT::i64)
741 return false;
742
743 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
744 return false;
745 SDValue LHS = N.getOperand(0);
746 if (!LHS->hasOneUse())
747 return false;
748
749 unsigned LHSOpcode = LHS->getOpcode();
750 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
751 return false;
752
753 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
754 if (!ShiftAmtNode)
755 return false;
756
757 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
758 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
759 if (!RHSC)
760 return false;
761
762 APInt AndMask = RHSC->getAPIntValue();
763 unsigned LowZBits, MaskLen;
764 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
765 return false;
766
767 unsigned BitWidth = N.getValueSizeInBits();
768 SDLoc DL(LHS);
769 uint64_t NewShiftC;
770 unsigned NewShiftOp;
771 if (LHSOpcode == ISD::SHL) {
772 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
773 // BitWidth != LowZBits + MaskLen doesn't match the pattern
774 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
775 return false;
776
777 NewShiftC = LowZBits - ShiftAmtC;
778 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
779 } else {
780 if (LowZBits == 0)
781 return false;
782
783 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
784 NewShiftC = LowZBits + ShiftAmtC;
785 if (NewShiftC >= BitWidth)
786 return false;
787
788 // SRA need all high bits
789 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
790 return false;
791
792 // SRL high bits can be 0 or 1
793 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
794 return false;
795
796 if (LHSOpcode == ISD::SRL)
797 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
798 else
799 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
800 }
801
802 assert(NewShiftC < BitWidth && "Invalid shift amount");
803 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
804 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
805 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
806 NewShiftAmt, BitWidthMinus1),
807 0);
808 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
809 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
810 return true;
811}
812
813/// getExtendTypeForNode - Translate an extend node to the corresponding
814/// ExtendType value.
816getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
817 if (N.getOpcode() == ISD::SIGN_EXTEND ||
818 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
819 EVT SrcVT;
820 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
821 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
822 else
823 SrcVT = N.getOperand(0).getValueType();
824
825 if (!IsLoadStore && SrcVT == MVT::i8)
826 return AArch64_AM::SXTB;
827 else if (!IsLoadStore && SrcVT == MVT::i16)
828 return AArch64_AM::SXTH;
829 else if (SrcVT == MVT::i32)
830 return AArch64_AM::SXTW;
831 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
832
834 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
835 N.getOpcode() == ISD::ANY_EXTEND) {
836 EVT SrcVT = N.getOperand(0).getValueType();
837 if (!IsLoadStore && SrcVT == MVT::i8)
838 return AArch64_AM::UXTB;
839 else if (!IsLoadStore && SrcVT == MVT::i16)
840 return AArch64_AM::UXTH;
841 else if (SrcVT == MVT::i32)
842 return AArch64_AM::UXTW;
843 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
844
846 } else if (N.getOpcode() == ISD::AND) {
847 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
848 if (!CSD)
850 uint64_t AndMask = CSD->getZExtValue();
851
852 switch (AndMask) {
853 default:
855 case 0xFF:
856 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
857 case 0xFFFF:
858 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
859 case 0xFFFFFFFF:
860 return AArch64_AM::UXTW;
861 }
862 }
863
865}
866
867/// Determine whether it is worth to fold V into an extended register of an
868/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
869/// instruction, and the shift should be treated as worth folding even if has
870/// multiple uses.
871bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
872 // Trivial if we are optimizing for code size or if there is only
873 // one use of the value.
874 if (CurDAG->shouldOptForSize() || V.hasOneUse())
875 return true;
876
877 // If a subtarget has a fastpath LSL we can fold a logical shift into
878 // the add/sub and save a cycle.
879 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
880 V.getConstantOperandVal(1) <= 4 &&
882 return true;
883
884 // It hurts otherwise, since the value will be reused.
885 return false;
886}
887
888/// SelectShiftedRegister - Select a "shifted register" operand. If the value
889/// is not shifted, set the Shift operand to default of "LSL 0". The logical
890/// instructions allow the shifted register to be rotated, but the arithmetic
891/// instructions do not. The AllowROR parameter specifies whether ROR is
892/// supported.
893bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
894 SDValue &Reg, SDValue &Shift) {
895 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
896 return true;
897
899 if (ShType == AArch64_AM::InvalidShiftExtend)
900 return false;
901 if (!AllowROR && ShType == AArch64_AM::ROR)
902 return false;
903
904 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
905 unsigned BitSize = N.getValueSizeInBits();
906 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
907 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
908
909 Reg = N.getOperand(0);
910 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
911 return isWorthFoldingALU(N, true);
912 }
913
914 return false;
915}
916
917/// Instructions that accept extend modifiers like UXTW expect the register
918/// being extended to be a GPR32, but the incoming DAG might be acting on a
919/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
920/// this is the case.
922 if (N.getValueType() == MVT::i32)
923 return N;
924
925 SDLoc dl(N);
926 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
927}
928
929// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
930template<signed Low, signed High, signed Scale>
931bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
933 return false;
934
935 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
936 if ((MulImm % std::abs(Scale)) == 0) {
937 int64_t RDVLImm = MulImm / Scale;
938 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
939 Imm = CurDAG->getSignedTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
940 return true;
941 }
942 }
943
944 return false;
945}
946
947// Returns a suitable RDSVL multiplier from a left shift.
948template <signed Low, signed High>
949bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) {
951 return false;
952
953 int64_t MulImm = 1LL << cast<ConstantSDNode>(N)->getSExtValue();
954 if (MulImm >= Low && MulImm <= High) {
955 Imm = CurDAG->getSignedTargetConstant(MulImm, SDLoc(N), MVT::i32);
956 return true;
957 }
958
959 return false;
960}
961
962/// SelectArithExtendedRegister - Select a "extended register" operand. This
963/// operand folds in an extend followed by an optional left shift.
964bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
965 SDValue &Shift) {
966 unsigned ShiftVal = 0;
968
969 if (N.getOpcode() == ISD::SHL) {
970 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
971 if (!CSD)
972 return false;
973 ShiftVal = CSD->getZExtValue();
974 if (ShiftVal > 4)
975 return false;
976
977 Ext = getExtendTypeForNode(N.getOperand(0));
979 return false;
980
981 Reg = N.getOperand(0).getOperand(0);
982 } else {
985 return false;
986
987 Reg = N.getOperand(0);
988
989 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
990 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
991 auto isDef32 = [](SDValue N) {
992 unsigned Opc = N.getOpcode();
993 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
996 Opc != ISD::FREEZE;
997 };
998 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
999 isDef32(Reg))
1000 return false;
1001 }
1002
1003 // AArch64 mandates that the RHS of the operation must use the smallest
1004 // register class that could contain the size being extended from. Thus,
1005 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
1006 // there might not be an actual 32-bit value in the program. We can
1007 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
1008 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
1009 Reg = narrowIfNeeded(CurDAG, Reg);
1010 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1011 MVT::i32);
1012 return isWorthFoldingALU(N);
1013}
1014
1015/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
1016/// operand is referred by the instructions have SP operand
1017bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
1018 SDValue &Shift) {
1019 unsigned ShiftVal = 0;
1021
1022 if (N.getOpcode() != ISD::SHL)
1023 return false;
1024
1025 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1026 if (!CSD)
1027 return false;
1028 ShiftVal = CSD->getZExtValue();
1029 if (ShiftVal > 4)
1030 return false;
1031
1033 Reg = N.getOperand(0);
1034 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1035 MVT::i32);
1036 return isWorthFoldingALU(N);
1037}
1038
1039/// If there's a use of this ADDlow that's not itself a load/store then we'll
1040/// need to create a real ADD instruction from it anyway and there's no point in
1041/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1042/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1043/// leads to duplicated ADRP instructions.
1045 for (auto *User : N->users()) {
1046 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1047 User->getOpcode() != ISD::ATOMIC_LOAD &&
1048 User->getOpcode() != ISD::ATOMIC_STORE)
1049 return false;
1050
1051 // ldar and stlr have much more restrictive addressing modes (just a
1052 // register).
1053 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
1054 return false;
1055 }
1056
1057 return true;
1058}
1059
1060/// Check if the immediate offset is valid as a scaled immediate.
1061static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1062 unsigned Size) {
1063 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1064 Offset < (Range << Log2_32(Size)))
1065 return true;
1066 return false;
1067}
1068
1069/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1070/// immediate" address. The "Size" argument is the size in bytes of the memory
1071/// reference, which determines the scale.
1072bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1073 unsigned BW, unsigned Size,
1074 SDValue &Base,
1075 SDValue &OffImm) {
1076 SDLoc dl(N);
1077 const DataLayout &DL = CurDAG->getDataLayout();
1078 const TargetLowering *TLI = getTargetLowering();
1079 if (N.getOpcode() == ISD::FrameIndex) {
1080 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1081 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1082 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1083 return true;
1084 }
1085
1086 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1087 // selected here doesn't support labels/immediates, only base+offset.
1088 if (CurDAG->isBaseWithConstantOffset(N)) {
1089 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1090 if (IsSignedImm) {
1091 int64_t RHSC = RHS->getSExtValue();
1092 unsigned Scale = Log2_32(Size);
1093 int64_t Range = 0x1LL << (BW - 1);
1094
1095 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1096 RHSC < (Range << Scale)) {
1097 Base = N.getOperand(0);
1098 if (Base.getOpcode() == ISD::FrameIndex) {
1099 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1100 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1101 }
1102 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1103 return true;
1104 }
1105 } else {
1106 // unsigned Immediate
1107 uint64_t RHSC = RHS->getZExtValue();
1108 unsigned Scale = Log2_32(Size);
1109 uint64_t Range = 0x1ULL << BW;
1110
1111 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1112 Base = N.getOperand(0);
1113 if (Base.getOpcode() == ISD::FrameIndex) {
1114 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1115 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1116 }
1117 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1118 return true;
1119 }
1120 }
1121 }
1122 }
1123 // Base only. The address will be materialized into a register before
1124 // the memory is accessed.
1125 // add x0, Xbase, #offset
1126 // stp x1, x2, [x0]
1127 Base = N;
1128 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1129 return true;
1130}
1131
1132/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1133/// immediate" address. The "Size" argument is the size in bytes of the memory
1134/// reference, which determines the scale.
1135bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1136 SDValue &Base, SDValue &OffImm) {
1137 SDLoc dl(N);
1138 const DataLayout &DL = CurDAG->getDataLayout();
1139 const TargetLowering *TLI = getTargetLowering();
1140 if (N.getOpcode() == ISD::FrameIndex) {
1141 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1142 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1143 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1144 return true;
1145 }
1146
1147 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1148 GlobalAddressSDNode *GAN =
1149 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1150 Base = N.getOperand(0);
1151 OffImm = N.getOperand(1);
1152 if (!GAN)
1153 return true;
1154
1155 if (GAN->getOffset() % Size == 0 &&
1157 return true;
1158 }
1159
1160 if (CurDAG->isBaseWithConstantOffset(N)) {
1161 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1162 int64_t RHSC = (int64_t)RHS->getZExtValue();
1163 unsigned Scale = Log2_32(Size);
1164 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1165 Base = N.getOperand(0);
1166 if (Base.getOpcode() == ISD::FrameIndex) {
1167 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1168 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1169 }
1170 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1171 return true;
1172 }
1173 }
1174 }
1175
1176 // Before falling back to our general case, check if the unscaled
1177 // instructions can handle this. If so, that's preferable.
1178 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1179 return false;
1180
1181 // Base only. The address will be materialized into a register before
1182 // the memory is accessed.
1183 // add x0, Xbase, #offset
1184 // ldr x0, [x0]
1185 Base = N;
1186 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1187 return true;
1188}
1189
1190/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1191/// immediate" address. This should only match when there is an offset that
1192/// is not valid for a scaled immediate addressing mode. The "Size" argument
1193/// is the size in bytes of the memory reference, which is needed here to know
1194/// what is valid for a scaled immediate.
1195bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1196 SDValue &Base,
1197 SDValue &OffImm) {
1198 if (!CurDAG->isBaseWithConstantOffset(N))
1199 return false;
1200 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1201 int64_t RHSC = RHS->getSExtValue();
1202 if (RHSC >= -256 && RHSC < 256) {
1203 Base = N.getOperand(0);
1204 if (Base.getOpcode() == ISD::FrameIndex) {
1205 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1206 const TargetLowering *TLI = getTargetLowering();
1207 Base = CurDAG->getTargetFrameIndex(
1208 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1209 }
1210 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1211 return true;
1212 }
1213 }
1214 return false;
1215}
1216
1218 SDLoc dl(N);
1219 SDValue ImpDef = SDValue(
1220 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1221 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1222 N);
1223}
1224
1225/// Check if the given SHL node (\p N), can be used to form an
1226/// extended register for an addressing mode.
1227bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1228 bool WantExtend, SDValue &Offset,
1229 SDValue &SignExtend) {
1230 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1231 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1232 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1233 return false;
1234
1235 SDLoc dl(N);
1236 if (WantExtend) {
1238 getExtendTypeForNode(N.getOperand(0), true);
1240 return false;
1241
1242 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1243 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1244 MVT::i32);
1245 } else {
1246 Offset = N.getOperand(0);
1247 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1248 }
1249
1250 unsigned LegalShiftVal = Log2_32(Size);
1251 unsigned ShiftVal = CSD->getZExtValue();
1252
1253 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1254 return false;
1255
1256 return isWorthFoldingAddr(N, Size);
1257}
1258
1259bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1261 SDValue &SignExtend,
1262 SDValue &DoShift) {
1263 if (N.getOpcode() != ISD::ADD)
1264 return false;
1265 SDValue LHS = N.getOperand(0);
1266 SDValue RHS = N.getOperand(1);
1267 SDLoc dl(N);
1268
1269 // We don't want to match immediate adds here, because they are better lowered
1270 // to the register-immediate addressing modes.
1272 return false;
1273
1274 // Check if this particular node is reused in any non-memory related
1275 // operation. If yes, do not try to fold this node into the address
1276 // computation, since the computation will be kept.
1277 const SDNode *Node = N.getNode();
1278 for (SDNode *UI : Node->users()) {
1279 if (!isMemOpOrPrefetch(UI))
1280 return false;
1281 }
1282
1283 // Remember if it is worth folding N when it produces extended register.
1284 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1285
1286 // Try to match a shifted extend on the RHS.
1287 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1288 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1289 Base = LHS;
1290 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1291 return true;
1292 }
1293
1294 // Try to match a shifted extend on the LHS.
1295 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1296 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1297 Base = RHS;
1298 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1299 return true;
1300 }
1301
1302 // There was no shift, whatever else we find.
1303 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1304
1306 // Try to match an unshifted extend on the LHS.
1307 if (IsExtendedRegisterWorthFolding &&
1308 (Ext = getExtendTypeForNode(LHS, true)) !=
1310 Base = RHS;
1311 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1312 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1313 MVT::i32);
1314 if (isWorthFoldingAddr(LHS, Size))
1315 return true;
1316 }
1317
1318 // Try to match an unshifted extend on the RHS.
1319 if (IsExtendedRegisterWorthFolding &&
1320 (Ext = getExtendTypeForNode(RHS, true)) !=
1322 Base = LHS;
1323 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1324 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1325 MVT::i32);
1326 if (isWorthFoldingAddr(RHS, Size))
1327 return true;
1328 }
1329
1330 return false;
1331}
1332
1333// Check if the given immediate is preferred by ADD. If an immediate can be
1334// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1335// encoded by one MOVZ, return true.
1336static bool isPreferredADD(int64_t ImmOff) {
1337 // Constant in [0x0, 0xfff] can be encoded in ADD.
1338 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1339 return true;
1340 // Check if it can be encoded in an "ADD LSL #12".
1341 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1342 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1343 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1344 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1345 return false;
1346}
1347
1348bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1350 SDValue &SignExtend,
1351 SDValue &DoShift) {
1352 if (N.getOpcode() != ISD::ADD)
1353 return false;
1354 SDValue LHS = N.getOperand(0);
1355 SDValue RHS = N.getOperand(1);
1356 SDLoc DL(N);
1357
1358 // Check if this particular node is reused in any non-memory related
1359 // operation. If yes, do not try to fold this node into the address
1360 // computation, since the computation will be kept.
1361 const SDNode *Node = N.getNode();
1362 for (SDNode *UI : Node->users()) {
1363 if (!isMemOpOrPrefetch(UI))
1364 return false;
1365 }
1366
1367 // Watch out if RHS is a wide immediate, it can not be selected into
1368 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1369 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1370 // instructions like:
1371 // MOV X0, WideImmediate
1372 // ADD X1, BaseReg, X0
1373 // LDR X2, [X1, 0]
1374 // For such situation, using [BaseReg, XReg] addressing mode can save one
1375 // ADD/SUB:
1376 // MOV X0, WideImmediate
1377 // LDR X2, [BaseReg, X0]
1378 if (isa<ConstantSDNode>(RHS)) {
1379 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1380 // Skip the immediate can be selected by load/store addressing mode.
1381 // Also skip the immediate can be encoded by a single ADD (SUB is also
1382 // checked by using -ImmOff).
1383 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1384 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1385 return false;
1386
1387 SDValue Ops[] = { RHS };
1388 SDNode *MOVI =
1389 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1390 SDValue MOVIV = SDValue(MOVI, 0);
1391 // This ADD of two X register will be selected into [Reg+Reg] mode.
1392 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1393 }
1394
1395 // Remember if it is worth folding N when it produces extended register.
1396 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1397
1398 // Try to match a shifted extend on the RHS.
1399 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1400 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1401 Base = LHS;
1402 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1403 return true;
1404 }
1405
1406 // Try to match a shifted extend on the LHS.
1407 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1408 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1409 Base = RHS;
1410 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1411 return true;
1412 }
1413
1414 // Match any non-shifted, non-extend, non-immediate add expression.
1415 Base = LHS;
1416 Offset = RHS;
1417 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1418 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1419 // Reg1 + Reg2 is free: no check needed.
1420 return true;
1421}
1422
1423SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1424 static const unsigned RegClassIDs[] = {
1425 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1426 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1427 AArch64::dsub2, AArch64::dsub3};
1428
1429 return createTuple(Regs, RegClassIDs, SubRegs);
1430}
1431
1432SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1433 static const unsigned RegClassIDs[] = {
1434 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1435 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1436 AArch64::qsub2, AArch64::qsub3};
1437
1438 return createTuple(Regs, RegClassIDs, SubRegs);
1439}
1440
1441SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1442 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1443 AArch64::ZPR3RegClassID,
1444 AArch64::ZPR4RegClassID};
1445 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1446 AArch64::zsub2, AArch64::zsub3};
1447
1448 return createTuple(Regs, RegClassIDs, SubRegs);
1449}
1450
1451SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1452 assert(Regs.size() == 2 || Regs.size() == 4);
1453
1454 // The createTuple interface requires 3 RegClassIDs for each possible
1455 // tuple type even though we only have them for ZPR2 and ZPR4.
1456 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1457 AArch64::ZPR4Mul4RegClassID};
1458 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1459 AArch64::zsub2, AArch64::zsub3};
1460 return createTuple(Regs, RegClassIDs, SubRegs);
1461}
1462
1463SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1464 const unsigned RegClassIDs[],
1465 const unsigned SubRegs[]) {
1466 // There's no special register-class for a vector-list of 1 element: it's just
1467 // a vector.
1468 if (Regs.size() == 1)
1469 return Regs[0];
1470
1471 assert(Regs.size() >= 2 && Regs.size() <= 4);
1472
1473 SDLoc DL(Regs[0]);
1474
1476
1477 // First operand of REG_SEQUENCE is the desired RegClass.
1478 Ops.push_back(
1479 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1480
1481 // Then we get pairs of source & subregister-position for the components.
1482 for (unsigned i = 0; i < Regs.size(); ++i) {
1483 Ops.push_back(Regs[i]);
1484 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1485 }
1486
1487 SDNode *N =
1488 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1489 return SDValue(N, 0);
1490}
1491
1492void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1493 bool isExt) {
1494 SDLoc dl(N);
1495 EVT VT = N->getValueType(0);
1496
1497 unsigned ExtOff = isExt;
1498
1499 // Form a REG_SEQUENCE to force register allocation.
1500 unsigned Vec0Off = ExtOff + 1;
1501 SmallVector<SDValue, 4> Regs(N->ops().slice(Vec0Off, NumVecs));
1502 SDValue RegSeq = createQTuple(Regs);
1503
1505 if (isExt)
1506 Ops.push_back(N->getOperand(1));
1507 Ops.push_back(RegSeq);
1508 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1509 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1510}
1511
1512static std::tuple<SDValue, SDValue>
1514 SDLoc DL(Disc);
1515 SDValue AddrDisc;
1516 SDValue ConstDisc;
1517
1518 // If this is a blend, remember the constant and address discriminators.
1519 // Otherwise, it's either a constant discriminator, or a non-blended
1520 // address discriminator.
1521 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1522 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1523 AddrDisc = Disc->getOperand(1);
1524 ConstDisc = Disc->getOperand(2);
1525 } else {
1526 ConstDisc = Disc;
1527 }
1528
1529 // If the constant discriminator (either the blend RHS, or the entire
1530 // discriminator value) isn't a 16-bit constant, bail out, and let the
1531 // discriminator be computed separately.
1532 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1533 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1534 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1535
1536 // If there's no address discriminator, use XZR directly.
1537 if (!AddrDisc)
1538 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1539
1540 return std::make_tuple(
1541 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1542 AddrDisc);
1543}
1544
1545void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1546 SDLoc DL(N);
1547 // IntrinsicID is operand #0
1548 SDValue Val = N->getOperand(1);
1549 SDValue AUTKey = N->getOperand(2);
1550 SDValue AUTDisc = N->getOperand(3);
1551
1552 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1553 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1554
1555 SDValue AUTAddrDisc, AUTConstDisc;
1556 std::tie(AUTConstDisc, AUTAddrDisc) =
1557 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1558
1559 if (!Subtarget->isX16X17Safer()) {
1560 std::vector<SDValue> Ops = {Val, AUTKey, AUTConstDisc, AUTAddrDisc};
1561 // Copy deactivation symbol if present.
1562 if (N->getNumOperands() > 4)
1563 Ops.push_back(N->getOperand(4));
1564
1565 SDNode *AUT =
1566 CurDAG->getMachineNode(AArch64::AUTxMxN, DL, MVT::i64, MVT::i64, Ops);
1567 ReplaceNode(N, AUT);
1568 } else {
1569 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1570 AArch64::X16, Val, SDValue());
1571 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1572
1573 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUTx16x17, DL, MVT::i64, Ops);
1574 ReplaceNode(N, AUT);
1575 }
1576}
1577
1578void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1579 SDLoc DL(N);
1580 // IntrinsicID is operand #0
1581 SDValue Val = N->getOperand(1);
1582 SDValue AUTKey = N->getOperand(2);
1583 SDValue AUTDisc = N->getOperand(3);
1584 SDValue PACKey = N->getOperand(4);
1585 SDValue PACDisc = N->getOperand(5);
1586
1587 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1588 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1589
1590 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1591 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1592
1593 SDValue AUTAddrDisc, AUTConstDisc;
1594 std::tie(AUTConstDisc, AUTAddrDisc) =
1595 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1596
1597 SDValue PACAddrDisc, PACConstDisc;
1598 std::tie(PACConstDisc, PACAddrDisc) =
1599 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1600
1601 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1602 AArch64::X16, Val, SDValue());
1603
1604 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1605 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1606
1607 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1608 ReplaceNode(N, AUTPAC);
1609}
1610
1611bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1612 LoadSDNode *LD = cast<LoadSDNode>(N);
1613 if (LD->isUnindexed())
1614 return false;
1615 EVT VT = LD->getMemoryVT();
1616 EVT DstVT = N->getValueType(0);
1617 ISD::MemIndexedMode AM = LD->getAddressingMode();
1618 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1619 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1620 int OffsetVal = (int)OffsetOp->getZExtValue();
1621
1622 // We're not doing validity checking here. That was done when checking
1623 // if we should mark the load as indexed or not. We're just selecting
1624 // the right instruction.
1625 unsigned Opcode = 0;
1626
1627 ISD::LoadExtType ExtType = LD->getExtensionType();
1628 bool InsertTo64 = false;
1629 if (VT == MVT::i64)
1630 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1631 else if (VT == MVT::i32) {
1632 if (ExtType == ISD::NON_EXTLOAD)
1633 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1634 else if (ExtType == ISD::SEXTLOAD)
1635 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1636 else {
1637 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1638 InsertTo64 = true;
1639 // The result of the load is only i32. It's the subreg_to_reg that makes
1640 // it into an i64.
1641 DstVT = MVT::i32;
1642 }
1643 } else if (VT == MVT::i16) {
1644 if (ExtType == ISD::SEXTLOAD) {
1645 if (DstVT == MVT::i64)
1646 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1647 else
1648 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1649 } else {
1650 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1651 InsertTo64 = DstVT == MVT::i64;
1652 // The result of the load is only i32. It's the subreg_to_reg that makes
1653 // it into an i64.
1654 DstVT = MVT::i32;
1655 }
1656 } else if (VT == MVT::i8) {
1657 if (ExtType == ISD::SEXTLOAD) {
1658 if (DstVT == MVT::i64)
1659 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1660 else
1661 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1662 } else {
1663 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1664 InsertTo64 = DstVT == MVT::i64;
1665 // The result of the load is only i32. It's the subreg_to_reg that makes
1666 // it into an i64.
1667 DstVT = MVT::i32;
1668 }
1669 } else if (VT == MVT::f16) {
1670 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1671 } else if (VT == MVT::bf16) {
1672 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1673 } else if (VT == MVT::f32) {
1674 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1675 } else if (VT == MVT::f64 ||
1676 (VT.is64BitVector() && Subtarget->isLittleEndian())) {
1677 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1678 } else if (VT.is128BitVector() && Subtarget->isLittleEndian()) {
1679 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1680 } else if (VT.is64BitVector()) {
1681 if (IsPre || OffsetVal != 8)
1682 return false;
1683 switch (VT.getScalarSizeInBits()) {
1684 case 8:
1685 Opcode = AArch64::LD1Onev8b_POST;
1686 break;
1687 case 16:
1688 Opcode = AArch64::LD1Onev4h_POST;
1689 break;
1690 case 32:
1691 Opcode = AArch64::LD1Onev2s_POST;
1692 break;
1693 case 64:
1694 Opcode = AArch64::LD1Onev1d_POST;
1695 break;
1696 default:
1697 llvm_unreachable("Expected vector element to be a power of 2");
1698 }
1699 } else if (VT.is128BitVector()) {
1700 if (IsPre || OffsetVal != 16)
1701 return false;
1702 switch (VT.getScalarSizeInBits()) {
1703 case 8:
1704 Opcode = AArch64::LD1Onev16b_POST;
1705 break;
1706 case 16:
1707 Opcode = AArch64::LD1Onev8h_POST;
1708 break;
1709 case 32:
1710 Opcode = AArch64::LD1Onev4s_POST;
1711 break;
1712 case 64:
1713 Opcode = AArch64::LD1Onev2d_POST;
1714 break;
1715 default:
1716 llvm_unreachable("Expected vector element to be a power of 2");
1717 }
1718 } else
1719 return false;
1720 SDValue Chain = LD->getChain();
1721 SDValue Base = LD->getBasePtr();
1722 SDLoc dl(N);
1723 // LD1 encodes an immediate offset by using XZR as the offset register.
1724 SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian())
1725 ? CurDAG->getRegister(AArch64::XZR, MVT::i64)
1726 : CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1727 SDValue Ops[] = { Base, Offset, Chain };
1728 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1729 MVT::Other, Ops);
1730
1731 // Transfer memoperands.
1732 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1733 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1734
1735 // Either way, we're replacing the node, so tell the caller that.
1736 SDValue LoadedVal = SDValue(Res, 1);
1737 if (InsertTo64) {
1738 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1739 LoadedVal =
1740 SDValue(CurDAG->getMachineNode(
1741 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1742 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1743 SubReg),
1744 0);
1745 }
1746
1747 ReplaceUses(SDValue(N, 0), LoadedVal);
1748 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1749 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1750 CurDAG->RemoveDeadNode(N);
1751 return true;
1752}
1753
1754void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1755 unsigned SubRegIdx) {
1756 SDLoc dl(N);
1757 EVT VT = N->getValueType(0);
1758 SDValue Chain = N->getOperand(0);
1759
1760 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1761 Chain};
1762
1763 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1764
1765 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1766 SDValue SuperReg = SDValue(Ld, 0);
1767 for (unsigned i = 0; i < NumVecs; ++i)
1768 ReplaceUses(SDValue(N, i),
1769 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1770
1771 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1772
1773 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1774 // because it's too simple to have needed special treatment during lowering.
1775 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1776 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1777 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1778 }
1779
1780 CurDAG->RemoveDeadNode(N);
1781}
1782
1783void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1784 unsigned Opc, unsigned SubRegIdx) {
1785 SDLoc dl(N);
1786 EVT VT = N->getValueType(0);
1787 SDValue Chain = N->getOperand(0);
1788
1789 SDValue Ops[] = {N->getOperand(1), // Mem operand
1790 N->getOperand(2), // Incremental
1791 Chain};
1792
1793 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1794 MVT::Untyped, MVT::Other};
1795
1796 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1797
1798 // Update uses of write back register
1799 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1800
1801 // Update uses of vector list
1802 SDValue SuperReg = SDValue(Ld, 1);
1803 if (NumVecs == 1)
1804 ReplaceUses(SDValue(N, 0), SuperReg);
1805 else
1806 for (unsigned i = 0; i < NumVecs; ++i)
1807 ReplaceUses(SDValue(N, i),
1808 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1809
1810 // Update the chain
1811 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1812 CurDAG->RemoveDeadNode(N);
1813}
1814
1815/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1816/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1817/// new Base and an SDValue representing the new offset.
1818std::tuple<unsigned, SDValue, SDValue>
1819AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1820 unsigned Opc_ri,
1821 const SDValue &OldBase,
1822 const SDValue &OldOffset,
1823 unsigned Scale) {
1824 SDValue NewBase = OldBase;
1825 SDValue NewOffset = OldOffset;
1826 // Detect a possible Reg+Imm addressing mode.
1827 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1828 N, OldBase, NewBase, NewOffset);
1829
1830 // Detect a possible reg+reg addressing mode, but only if we haven't already
1831 // detected a Reg+Imm one.
1832 const bool IsRegReg =
1833 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1834
1835 // Select the instruction.
1836 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1837}
1838
1839enum class SelectTypeKind {
1840 Int1 = 0,
1841 Int = 1,
1842 FP = 2,
1844};
1845
1846/// This function selects an opcode from a list of opcodes, which is
1847/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1848/// element types, in this order.
1849template <SelectTypeKind Kind>
1850static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1851 // Only match scalable vector VTs
1852 if (!VT.isScalableVector())
1853 return 0;
1854
1855 EVT EltVT = VT.getVectorElementType();
1856 unsigned Key = VT.getVectorMinNumElements();
1857 switch (Kind) {
1859 break;
1861 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1862 EltVT != MVT::i64)
1863 return 0;
1864 break;
1866 if (EltVT != MVT::i1)
1867 return 0;
1868 break;
1869 case SelectTypeKind::FP:
1870 if (EltVT == MVT::bf16)
1871 Key = 16;
1872 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1873 EltVT != MVT::f64)
1874 return 0;
1875 break;
1876 }
1877
1878 unsigned Offset;
1879 switch (Key) {
1880 case 16: // 8-bit or bf16
1881 Offset = 0;
1882 break;
1883 case 8: // 16-bit
1884 Offset = 1;
1885 break;
1886 case 4: // 32-bit
1887 Offset = 2;
1888 break;
1889 case 2: // 64-bit
1890 Offset = 3;
1891 break;
1892 default:
1893 return 0;
1894 }
1895
1896 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1897}
1898
1899// This function is almost identical to SelectWhilePair, but has an
1900// extra check on the range of the immediate operand.
1901// TODO: Merge these two functions together at some point?
1902void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1903 // Immediate can be either 0 or 1.
1904 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1905 if (Imm->getZExtValue() > 1)
1906 return;
1907
1908 SDLoc DL(N);
1909 EVT VT = N->getValueType(0);
1910 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1911 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1912 SDValue SuperReg = SDValue(WhilePair, 0);
1913
1914 for (unsigned I = 0; I < 2; ++I)
1915 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1916 AArch64::psub0 + I, DL, VT, SuperReg));
1917
1918 CurDAG->RemoveDeadNode(N);
1919}
1920
1921void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1922 SDLoc DL(N);
1923 EVT VT = N->getValueType(0);
1924
1925 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1926
1927 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1928 SDValue SuperReg = SDValue(WhilePair, 0);
1929
1930 for (unsigned I = 0; I < 2; ++I)
1931 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1932 AArch64::psub0 + I, DL, VT, SuperReg));
1933
1934 CurDAG->RemoveDeadNode(N);
1935}
1936
1937void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1938 unsigned Opcode) {
1939 EVT VT = N->getValueType(0);
1940 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
1941 SDValue Ops = createZTuple(Regs);
1942 SDLoc DL(N);
1943 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1944 SDValue SuperReg = SDValue(Intrinsic, 0);
1945 for (unsigned i = 0; i < NumVecs; ++i)
1946 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1947 AArch64::zsub0 + i, DL, VT, SuperReg));
1948
1949 CurDAG->RemoveDeadNode(N);
1950}
1951
1952void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
1953 unsigned Opcode) {
1954 SDLoc DL(N);
1955 EVT VT = N->getValueType(0);
1956 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
1957 Ops.push_back(/*Chain*/ N->getOperand(0));
1958
1959 SDNode *Instruction =
1960 CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
1961 SDValue SuperReg = SDValue(Instruction, 0);
1962
1963 for (unsigned i = 0; i < NumVecs; ++i)
1964 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1965 AArch64::zsub0 + i, DL, VT, SuperReg));
1966
1967 // Copy chain
1968 unsigned ChainIdx = NumVecs;
1969 ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
1970 CurDAG->RemoveDeadNode(N);
1971}
1972
1973void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1974 unsigned NumVecs,
1975 bool IsZmMulti,
1976 unsigned Opcode,
1977 bool HasPred) {
1978 assert(Opcode != 0 && "Unexpected opcode");
1979
1980 SDLoc DL(N);
1981 EVT VT = N->getValueType(0);
1982 unsigned FirstVecIdx = HasPred ? 2 : 1;
1983
1984 auto GetMultiVecOperand = [=](unsigned StartIdx) {
1985 SmallVector<SDValue, 4> Regs(N->ops().slice(StartIdx, NumVecs));
1986 return createZMulTuple(Regs);
1987 };
1988
1989 SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1990
1991 SDValue Zm;
1992 if (IsZmMulti)
1993 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1994 else
1995 Zm = N->getOperand(NumVecs + FirstVecIdx);
1996
1997 SDNode *Intrinsic;
1998 if (HasPred)
1999 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
2000 N->getOperand(1), Zdn, Zm);
2001 else
2002 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
2003 SDValue SuperReg = SDValue(Intrinsic, 0);
2004 for (unsigned i = 0; i < NumVecs; ++i)
2005 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2006 AArch64::zsub0 + i, DL, VT, SuperReg));
2007
2008 CurDAG->RemoveDeadNode(N);
2009}
2010
2011void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
2012 unsigned Scale, unsigned Opc_ri,
2013 unsigned Opc_rr, bool IsIntr) {
2014 assert(Scale < 5 && "Invalid scaling value.");
2015 SDLoc DL(N);
2016 EVT VT = N->getValueType(0);
2017 SDValue Chain = N->getOperand(0);
2018
2019 // Optimize addressing mode.
2021 unsigned Opc;
2022 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2023 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
2024 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
2025
2026 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
2027 Base, // Memory operand
2028 Offset, Chain};
2029
2030 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2031
2032 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2033 SDValue SuperReg = SDValue(Load, 0);
2034 for (unsigned i = 0; i < NumVecs; ++i)
2035 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2036 AArch64::zsub0 + i, DL, VT, SuperReg));
2037
2038 // Copy chain
2039 unsigned ChainIdx = NumVecs;
2040 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2041 CurDAG->RemoveDeadNode(N);
2042}
2043
2044void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
2045 unsigned NumVecs,
2046 unsigned Scale,
2047 unsigned Opc_ri,
2048 unsigned Opc_rr) {
2049 assert(Scale < 4 && "Invalid scaling value.");
2050 SDLoc DL(N);
2051 EVT VT = N->getValueType(0);
2052 SDValue Chain = N->getOperand(0);
2053
2054 SDValue PNg = N->getOperand(2);
2055 SDValue Base = N->getOperand(3);
2056 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
2057 unsigned Opc;
2058 std::tie(Opc, Base, Offset) =
2059 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
2060
2061 SDValue Ops[] = {PNg, // Predicate-as-counter
2062 Base, // Memory operand
2063 Offset, Chain};
2064
2065 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2066
2067 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2068 SDValue SuperReg = SDValue(Load, 0);
2069 for (unsigned i = 0; i < NumVecs; ++i)
2070 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2071 AArch64::zsub0 + i, DL, VT, SuperReg));
2072
2073 // Copy chain
2074 unsigned ChainIdx = NumVecs;
2075 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2076 CurDAG->RemoveDeadNode(N);
2077}
2078
2079void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2080 unsigned Opcode) {
2081 if (N->getValueType(0) != MVT::nxv4f32)
2082 return;
2083 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
2084}
2085
2086void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2087 unsigned NumOutVecs,
2088 unsigned Opc,
2089 uint32_t MaxImm) {
2090 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
2091 if (Imm->getZExtValue() > MaxImm)
2092 return;
2093
2094 SDValue ZtValue;
2095 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2096 return;
2097
2098 SDValue Chain = Node->getOperand(0);
2099 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4), Chain};
2100 SDLoc DL(Node);
2101 EVT VT = Node->getValueType(0);
2102
2103 SDNode *Instruction =
2104 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2105 SDValue SuperReg = SDValue(Instruction, 0);
2106
2107 for (unsigned I = 0; I < NumOutVecs; ++I)
2108 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2109 AArch64::zsub0 + I, DL, VT, SuperReg));
2110
2111 // Copy chain
2112 unsigned ChainIdx = NumOutVecs;
2113 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2114 CurDAG->RemoveDeadNode(Node);
2115}
2116
2117void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2118 unsigned NumOutVecs,
2119 unsigned Opc) {
2120 SDValue ZtValue;
2121 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2122 return;
2123
2124 SDValue Chain = Node->getOperand(0);
2125 SDValue Ops[] = {ZtValue,
2126 createZMulTuple({Node->getOperand(3), Node->getOperand(4)}),
2127 Chain};
2128
2129 SDLoc DL(Node);
2130 EVT VT = Node->getValueType(0);
2131
2132 SDNode *Instruction =
2133 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2134 SDValue SuperReg = SDValue(Instruction, 0);
2135
2136 for (unsigned I = 0; I < NumOutVecs; ++I)
2137 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2138 AArch64::zsub0 + I, DL, VT, SuperReg));
2139
2140 // Copy chain
2141 unsigned ChainIdx = NumOutVecs;
2142 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2143 CurDAG->RemoveDeadNode(Node);
2144}
2145
2146void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2147 unsigned Op) {
2148 SDLoc DL(N);
2149 EVT VT = N->getValueType(0);
2150
2151 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2152 SDValue Zd = createZMulTuple(Regs);
2153 SDValue Zn = N->getOperand(1 + NumVecs);
2154 SDValue Zm = N->getOperand(2 + NumVecs);
2155
2156 SDValue Ops[] = {Zd, Zn, Zm};
2157
2158 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2159 SDValue SuperReg = SDValue(Intrinsic, 0);
2160 for (unsigned i = 0; i < NumVecs; ++i)
2161 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2162 AArch64::zsub0 + i, DL, VT, SuperReg));
2163
2164 CurDAG->RemoveDeadNode(N);
2165}
2166
2167bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2168 switch (BaseReg) {
2169 default:
2170 return false;
2171 case AArch64::ZA:
2172 case AArch64::ZAB0:
2173 if (TileNum == 0)
2174 break;
2175 return false;
2176 case AArch64::ZAH0:
2177 if (TileNum <= 1)
2178 break;
2179 return false;
2180 case AArch64::ZAS0:
2181 if (TileNum <= 3)
2182 break;
2183 return false;
2184 case AArch64::ZAD0:
2185 if (TileNum <= 7)
2186 break;
2187 return false;
2188 }
2189
2190 BaseReg += TileNum;
2191 return true;
2192}
2193
2194template <unsigned MaxIdx, unsigned Scale>
2195void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2196 unsigned BaseReg, unsigned Op) {
2197 unsigned TileNum = 0;
2198 if (BaseReg != AArch64::ZA)
2199 TileNum = N->getConstantOperandVal(2);
2200
2201 if (!SelectSMETile(BaseReg, TileNum))
2202 return;
2203
2204 SDValue SliceBase, Base, Offset;
2205 if (BaseReg == AArch64::ZA)
2206 SliceBase = N->getOperand(2);
2207 else
2208 SliceBase = N->getOperand(3);
2209
2210 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2211 return;
2212
2213 SDLoc DL(N);
2214 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2215 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2216 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2217
2218 EVT VT = N->getValueType(0);
2219 for (unsigned I = 0; I < NumVecs; ++I)
2220 ReplaceUses(SDValue(N, I),
2221 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2222 SDValue(Mov, 0)));
2223 // Copy chain
2224 unsigned ChainIdx = NumVecs;
2225 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2226 CurDAG->RemoveDeadNode(N);
2227}
2228
2229void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2230 unsigned Op, unsigned MaxIdx,
2231 unsigned Scale, unsigned BaseReg) {
2232 // Slice can be in different positions
2233 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2234 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2235 SDValue SliceBase = N->getOperand(2);
2236 if (BaseReg != AArch64::ZA)
2237 SliceBase = N->getOperand(3);
2238
2240 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2241 return;
2242 // The correct Za tile number is computed in Machine Instruction
2243 // See EmitZAInstr
2244 // DAG cannot select Za tile as an output register with ZReg
2245 SDLoc DL(N);
2247 if (BaseReg != AArch64::ZA )
2248 Ops.push_back(N->getOperand(2));
2249 Ops.push_back(Base);
2250 Ops.push_back(Offset);
2251 Ops.push_back(N->getOperand(0)); //Chain
2252 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2253
2254 EVT VT = N->getValueType(0);
2255 for (unsigned I = 0; I < NumVecs; ++I)
2256 ReplaceUses(SDValue(N, I),
2257 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2258 SDValue(Mov, 0)));
2259
2260 // Copy chain
2261 unsigned ChainIdx = NumVecs;
2262 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2263 CurDAG->RemoveDeadNode(N);
2264}
2265
2266void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2267 unsigned NumOutVecs,
2268 bool IsTupleInput,
2269 unsigned Opc) {
2270 SDLoc DL(N);
2271 EVT VT = N->getValueType(0);
2272 unsigned NumInVecs = N->getNumOperands() - 1;
2273
2275 if (IsTupleInput) {
2276 assert((NumInVecs == 2 || NumInVecs == 4) &&
2277 "Don't know how to handle multi-register input!");
2278 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2279 Ops.push_back(createZMulTuple(Regs));
2280 } else {
2281 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2282 for (unsigned I = 0; I < NumInVecs; I++)
2283 Ops.push_back(N->getOperand(1 + I));
2284 }
2285
2286 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2287 SDValue SuperReg = SDValue(Res, 0);
2288
2289 for (unsigned I = 0; I < NumOutVecs; I++)
2290 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2291 AArch64::zsub0 + I, DL, VT, SuperReg));
2292 CurDAG->RemoveDeadNode(N);
2293}
2294
2295void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2296 unsigned Opc) {
2297 SDLoc dl(N);
2298 EVT VT = N->getOperand(2)->getValueType(0);
2299
2300 // Form a REG_SEQUENCE to force register allocation.
2301 bool Is128Bit = VT.getSizeInBits() == 128;
2302 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2303 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2304
2305 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2306 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2307
2308 // Transfer memoperands.
2309 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2310 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2311
2312 ReplaceNode(N, St);
2313}
2314
2315void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2316 unsigned Scale, unsigned Opc_rr,
2317 unsigned Opc_ri) {
2318 SDLoc dl(N);
2319
2320 // Form a REG_SEQUENCE to force register allocation.
2321 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2322 SDValue RegSeq = createZTuple(Regs);
2323
2324 // Optimize addressing mode.
2325 unsigned Opc;
2327 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2328 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2329 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2330
2331 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2332 Base, // address
2333 Offset, // offset
2334 N->getOperand(0)}; // chain
2335 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2336
2337 ReplaceNode(N, St);
2338}
2339
2340bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2341 SDValue &OffImm) {
2342 SDLoc dl(N);
2343 const DataLayout &DL = CurDAG->getDataLayout();
2344 const TargetLowering *TLI = getTargetLowering();
2345
2346 // Try to match it for the frame address
2347 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2348 int FI = FINode->getIndex();
2349 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2350 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2351 return true;
2352 }
2353
2354 return false;
2355}
2356
2357void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2358 unsigned Opc) {
2359 SDLoc dl(N);
2360 EVT VT = N->getOperand(2)->getValueType(0);
2361 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2362 MVT::Other}; // Type for the Chain
2363
2364 // Form a REG_SEQUENCE to force register allocation.
2365 bool Is128Bit = VT.getSizeInBits() == 128;
2366 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2367 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2368
2369 SDValue Ops[] = {RegSeq,
2370 N->getOperand(NumVecs + 1), // base register
2371 N->getOperand(NumVecs + 2), // Incremental
2372 N->getOperand(0)}; // Chain
2373 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2374
2375 ReplaceNode(N, St);
2376}
2377
2378namespace {
2379/// WidenVector - Given a value in the V64 register class, produce the
2380/// equivalent value in the V128 register class.
2381class WidenVector {
2382 SelectionDAG &DAG;
2383
2384public:
2385 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2386
2387 SDValue operator()(SDValue V64Reg) {
2388 EVT VT = V64Reg.getValueType();
2389 unsigned NarrowSize = VT.getVectorNumElements();
2390 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2391 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2392 SDLoc DL(V64Reg);
2393
2394 SDValue Undef =
2395 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2396 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2397 }
2398};
2399} // namespace
2400
2401/// NarrowVector - Given a value in the V128 register class, produce the
2402/// equivalent value in the V64 register class.
2404 EVT VT = V128Reg.getValueType();
2405 unsigned WideSize = VT.getVectorNumElements();
2406 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2407 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2408
2409 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2410 V128Reg);
2411}
2412
2413void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2414 unsigned Opc) {
2415 SDLoc dl(N);
2416 EVT VT = N->getValueType(0);
2417 bool Narrow = VT.getSizeInBits() == 64;
2418
2419 // Form a REG_SEQUENCE to force register allocation.
2420 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2421
2422 if (Narrow)
2423 transform(Regs, Regs.begin(),
2424 WidenVector(*CurDAG));
2425
2426 SDValue RegSeq = createQTuple(Regs);
2427
2428 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2429
2430 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2431
2432 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2433 N->getOperand(NumVecs + 3), N->getOperand(0)};
2434 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2435 SDValue SuperReg = SDValue(Ld, 0);
2436
2437 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2438 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2439 AArch64::qsub2, AArch64::qsub3 };
2440 for (unsigned i = 0; i < NumVecs; ++i) {
2441 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2442 if (Narrow)
2443 NV = NarrowVector(NV, *CurDAG);
2444 ReplaceUses(SDValue(N, i), NV);
2445 }
2446
2447 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2448 CurDAG->RemoveDeadNode(N);
2449}
2450
2451void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2452 unsigned Opc) {
2453 SDLoc dl(N);
2454 EVT VT = N->getValueType(0);
2455 bool Narrow = VT.getSizeInBits() == 64;
2456
2457 // Form a REG_SEQUENCE to force register allocation.
2458 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2459
2460 if (Narrow)
2461 transform(Regs, Regs.begin(),
2462 WidenVector(*CurDAG));
2463
2464 SDValue RegSeq = createQTuple(Regs);
2465
2466 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2467 RegSeq->getValueType(0), MVT::Other};
2468
2469 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2470
2471 SDValue Ops[] = {RegSeq,
2472 CurDAG->getTargetConstant(LaneNo, dl,
2473 MVT::i64), // Lane Number
2474 N->getOperand(NumVecs + 2), // Base register
2475 N->getOperand(NumVecs + 3), // Incremental
2476 N->getOperand(0)};
2477 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2478
2479 // Update uses of the write back register
2480 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2481
2482 // Update uses of the vector list
2483 SDValue SuperReg = SDValue(Ld, 1);
2484 if (NumVecs == 1) {
2485 ReplaceUses(SDValue(N, 0),
2486 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2487 } else {
2488 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2489 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2490 AArch64::qsub2, AArch64::qsub3 };
2491 for (unsigned i = 0; i < NumVecs; ++i) {
2492 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2493 SuperReg);
2494 if (Narrow)
2495 NV = NarrowVector(NV, *CurDAG);
2496 ReplaceUses(SDValue(N, i), NV);
2497 }
2498 }
2499
2500 // Update the Chain
2501 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2502 CurDAG->RemoveDeadNode(N);
2503}
2504
2505void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2506 unsigned Opc) {
2507 SDLoc dl(N);
2508 EVT VT = N->getOperand(2)->getValueType(0);
2509 bool Narrow = VT.getSizeInBits() == 64;
2510
2511 // Form a REG_SEQUENCE to force register allocation.
2512 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2513
2514 if (Narrow)
2515 transform(Regs, Regs.begin(),
2516 WidenVector(*CurDAG));
2517
2518 SDValue RegSeq = createQTuple(Regs);
2519
2520 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2521
2522 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2523 N->getOperand(NumVecs + 3), N->getOperand(0)};
2524 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2525
2526 // Transfer memoperands.
2527 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2528 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2529
2530 ReplaceNode(N, St);
2531}
2532
2533void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2534 unsigned Opc) {
2535 SDLoc dl(N);
2536 EVT VT = N->getOperand(2)->getValueType(0);
2537 bool Narrow = VT.getSizeInBits() == 64;
2538
2539 // Form a REG_SEQUENCE to force register allocation.
2540 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2541
2542 if (Narrow)
2543 transform(Regs, Regs.begin(),
2544 WidenVector(*CurDAG));
2545
2546 SDValue RegSeq = createQTuple(Regs);
2547
2548 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2549 MVT::Other};
2550
2551 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2552
2553 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2554 N->getOperand(NumVecs + 2), // Base Register
2555 N->getOperand(NumVecs + 3), // Incremental
2556 N->getOperand(0)};
2557 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2558
2559 // Transfer memoperands.
2560 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2561 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2562
2563 ReplaceNode(N, St);
2564}
2565
2567 unsigned &Opc, SDValue &Opd0,
2568 unsigned &LSB, unsigned &MSB,
2569 unsigned NumberOfIgnoredLowBits,
2570 bool BiggerPattern) {
2571 assert(N->getOpcode() == ISD::AND &&
2572 "N must be a AND operation to call this function");
2573
2574 EVT VT = N->getValueType(0);
2575
2576 // Here we can test the type of VT and return false when the type does not
2577 // match, but since it is done prior to that call in the current context
2578 // we turned that into an assert to avoid redundant code.
2579 assert((VT == MVT::i32 || VT == MVT::i64) &&
2580 "Type checking must have been done before calling this function");
2581
2582 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2583 // changed the AND node to a 32-bit mask operation. We'll have to
2584 // undo that as part of the transform here if we want to catch all
2585 // the opportunities.
2586 // Currently the NumberOfIgnoredLowBits argument helps to recover
2587 // from these situations when matching bigger pattern (bitfield insert).
2588
2589 // For unsigned extracts, check for a shift right and mask
2590 uint64_t AndImm = 0;
2591 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2592 return false;
2593
2594 const SDNode *Op0 = N->getOperand(0).getNode();
2595
2596 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2597 // simplified. Try to undo that
2598 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2599
2600 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2601 if (AndImm & (AndImm + 1))
2602 return false;
2603
2604 bool ClampMSB = false;
2605 uint64_t SrlImm = 0;
2606 // Handle the SRL + ANY_EXTEND case.
2607 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2608 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2609 // Extend the incoming operand of the SRL to 64-bit.
2610 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2611 // Make sure to clamp the MSB so that we preserve the semantics of the
2612 // original operations.
2613 ClampMSB = true;
2614 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2616 SrlImm)) {
2617 // If the shift result was truncated, we can still combine them.
2618 Opd0 = Op0->getOperand(0).getOperand(0);
2619
2620 // Use the type of SRL node.
2621 VT = Opd0->getValueType(0);
2622 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2623 Opd0 = Op0->getOperand(0);
2624 ClampMSB = (VT == MVT::i32);
2625 } else if (BiggerPattern) {
2626 // Let's pretend a 0 shift right has been performed.
2627 // The resulting code will be at least as good as the original one
2628 // plus it may expose more opportunities for bitfield insert pattern.
2629 // FIXME: Currently we limit this to the bigger pattern, because
2630 // some optimizations expect AND and not UBFM.
2631 Opd0 = N->getOperand(0);
2632 } else
2633 return false;
2634
2635 // Bail out on large immediates. This happens when no proper
2636 // combining/constant folding was performed.
2637 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2638 LLVM_DEBUG(
2639 (dbgs() << N
2640 << ": Found large shift immediate, this should not happen\n"));
2641 return false;
2642 }
2643
2644 LSB = SrlImm;
2645 MSB = SrlImm +
2646 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2647 : llvm::countr_one<uint64_t>(AndImm)) -
2648 1;
2649 if (ClampMSB)
2650 // Since we're moving the extend before the right shift operation, we need
2651 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2652 // the zeros which would get shifted in with the original right shift
2653 // operation.
2654 MSB = MSB > 31 ? 31 : MSB;
2655
2656 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2657 return true;
2658}
2659
2661 SDValue &Opd0, unsigned &Immr,
2662 unsigned &Imms) {
2663 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2664
2665 EVT VT = N->getValueType(0);
2666 unsigned BitWidth = VT.getSizeInBits();
2667 assert((VT == MVT::i32 || VT == MVT::i64) &&
2668 "Type checking must have been done before calling this function");
2669
2670 SDValue Op = N->getOperand(0);
2671 if (Op->getOpcode() == ISD::TRUNCATE) {
2672 Op = Op->getOperand(0);
2673 VT = Op->getValueType(0);
2674 BitWidth = VT.getSizeInBits();
2675 }
2676
2677 uint64_t ShiftImm;
2678 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2679 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2680 return false;
2681
2682 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2683 if (ShiftImm + Width > BitWidth)
2684 return false;
2685
2686 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2687 Opd0 = Op.getOperand(0);
2688 Immr = ShiftImm;
2689 Imms = ShiftImm + Width - 1;
2690 return true;
2691}
2692
2694 SDValue &Opd0, unsigned &LSB,
2695 unsigned &MSB) {
2696 // We are looking for the following pattern which basically extracts several
2697 // continuous bits from the source value and places it from the LSB of the
2698 // destination value, all other bits of the destination value or set to zero:
2699 //
2700 // Value2 = AND Value, MaskImm
2701 // SRL Value2, ShiftImm
2702 //
2703 // with MaskImm >> ShiftImm to search for the bit width.
2704 //
2705 // This gets selected into a single UBFM:
2706 //
2707 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2708 //
2709
2710 if (N->getOpcode() != ISD::SRL)
2711 return false;
2712
2713 uint64_t AndMask = 0;
2714 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2715 return false;
2716
2717 Opd0 = N->getOperand(0).getOperand(0);
2718
2719 uint64_t SrlImm = 0;
2720 if (!isIntImmediate(N->getOperand(1), SrlImm))
2721 return false;
2722
2723 // Check whether we really have several bits extract here.
2724 if (!isMask_64(AndMask >> SrlImm))
2725 return false;
2726
2727 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2728 LSB = SrlImm;
2729 MSB = llvm::Log2_64(AndMask);
2730 return true;
2731}
2732
2733static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2734 unsigned &Immr, unsigned &Imms,
2735 bool BiggerPattern) {
2736 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2737 "N must be a SHR/SRA operation to call this function");
2738
2739 EVT VT = N->getValueType(0);
2740
2741 // Here we can test the type of VT and return false when the type does not
2742 // match, but since it is done prior to that call in the current context
2743 // we turned that into an assert to avoid redundant code.
2744 assert((VT == MVT::i32 || VT == MVT::i64) &&
2745 "Type checking must have been done before calling this function");
2746
2747 // Check for AND + SRL doing several bits extract.
2748 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2749 return true;
2750
2751 // We're looking for a shift of a shift.
2752 uint64_t ShlImm = 0;
2753 uint64_t TruncBits = 0;
2754 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2755 Opd0 = N->getOperand(0).getOperand(0);
2756 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2757 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2758 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2759 // be considered as setting high 32 bits as zero. Our strategy here is to
2760 // always generate 64bit UBFM. This consistency will help the CSE pass
2761 // later find more redundancy.
2762 Opd0 = N->getOperand(0).getOperand(0);
2763 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2764 VT = Opd0.getValueType();
2765 assert(VT == MVT::i64 && "the promoted type should be i64");
2766 } else if (BiggerPattern) {
2767 // Let's pretend a 0 shift left has been performed.
2768 // FIXME: Currently we limit this to the bigger pattern case,
2769 // because some optimizations expect AND and not UBFM
2770 Opd0 = N->getOperand(0);
2771 } else
2772 return false;
2773
2774 // Missing combines/constant folding may have left us with strange
2775 // constants.
2776 if (ShlImm >= VT.getSizeInBits()) {
2777 LLVM_DEBUG(
2778 (dbgs() << N
2779 << ": Found large shift immediate, this should not happen\n"));
2780 return false;
2781 }
2782
2783 uint64_t SrlImm = 0;
2784 if (!isIntImmediate(N->getOperand(1), SrlImm))
2785 return false;
2786
2787 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2788 "bad amount in shift node!");
2789 int immr = SrlImm - ShlImm;
2790 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2791 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2792 // SRA requires a signed extraction
2793 if (VT == MVT::i32)
2794 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2795 else
2796 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2797 return true;
2798}
2799
2800bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2801 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2802
2803 EVT VT = N->getValueType(0);
2804 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2805 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2806 return false;
2807
2808 uint64_t ShiftImm;
2809 SDValue Op = N->getOperand(0);
2810 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2811 return false;
2812
2813 SDLoc dl(N);
2814 // Extend the incoming operand of the shift to 64-bits.
2815 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2816 unsigned Immr = ShiftImm;
2817 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2818 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2819 CurDAG->getTargetConstant(Imms, dl, VT)};
2820 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2821 return true;
2822}
2823
2824static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2825 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2826 unsigned NumberOfIgnoredLowBits = 0,
2827 bool BiggerPattern = false) {
2828 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2829 return false;
2830
2831 switch (N->getOpcode()) {
2832 default:
2833 if (!N->isMachineOpcode())
2834 return false;
2835 break;
2836 case ISD::AND:
2837 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2838 NumberOfIgnoredLowBits, BiggerPattern);
2839 case ISD::SRL:
2840 case ISD::SRA:
2841 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2842
2844 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2845 }
2846
2847 unsigned NOpc = N->getMachineOpcode();
2848 switch (NOpc) {
2849 default:
2850 return false;
2851 case AArch64::SBFMWri:
2852 case AArch64::UBFMWri:
2853 case AArch64::SBFMXri:
2854 case AArch64::UBFMXri:
2855 Opc = NOpc;
2856 Opd0 = N->getOperand(0);
2857 Immr = N->getConstantOperandVal(1);
2858 Imms = N->getConstantOperandVal(2);
2859 return true;
2860 }
2861 // Unreachable
2862 return false;
2863}
2864
2865bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2866 unsigned Opc, Immr, Imms;
2867 SDValue Opd0;
2868 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2869 return false;
2870
2871 EVT VT = N->getValueType(0);
2872 SDLoc dl(N);
2873
2874 // If the bit extract operation is 64bit but the original type is 32bit, we
2875 // need to add one EXTRACT_SUBREG.
2876 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2877 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2878 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2879
2880 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2881 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2882 MVT::i32, SDValue(BFM, 0));
2883 ReplaceNode(N, Inner.getNode());
2884 return true;
2885 }
2886
2887 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2888 CurDAG->getTargetConstant(Imms, dl, VT)};
2889 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2890 return true;
2891}
2892
2893/// Does DstMask form a complementary pair with the mask provided by
2894/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2895/// this asks whether DstMask zeroes precisely those bits that will be set by
2896/// the other half.
2897static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2898 unsigned NumberOfIgnoredHighBits, EVT VT) {
2899 assert((VT == MVT::i32 || VT == MVT::i64) &&
2900 "i32 or i64 mask type expected!");
2901 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2902
2903 // Enable implicitTrunc as we're intentionally ignoring high bits.
2904 APInt SignificantDstMask =
2905 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
2906 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2907
2908 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2909 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2910}
2911
2912// Look for bits that will be useful for later uses.
2913// A bit is consider useless as soon as it is dropped and never used
2914// before it as been dropped.
2915// E.g., looking for useful bit of x
2916// 1. y = x & 0x7
2917// 2. z = y >> 2
2918// After #1, x useful bits are 0x7, then the useful bits of x, live through
2919// y.
2920// After #2, the useful bits of x are 0x4.
2921// However, if x is used on an unpredictable instruction, then all its bits
2922// are useful.
2923// E.g.
2924// 1. y = x & 0x7
2925// 2. z = y >> 2
2926// 3. str x, [@x]
2927static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2928
2930 unsigned Depth) {
2931 uint64_t Imm =
2932 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2933 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2934 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2935 getUsefulBits(Op, UsefulBits, Depth + 1);
2936}
2937
2939 uint64_t Imm, uint64_t MSB,
2940 unsigned Depth) {
2941 // inherit the bitwidth value
2942 APInt OpUsefulBits(UsefulBits);
2943 OpUsefulBits = 1;
2944
2945 if (MSB >= Imm) {
2946 OpUsefulBits <<= MSB - Imm + 1;
2947 --OpUsefulBits;
2948 // The interesting part will be in the lower part of the result
2949 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2950 // The interesting part was starting at Imm in the argument
2951 OpUsefulBits <<= Imm;
2952 } else {
2953 OpUsefulBits <<= MSB + 1;
2954 --OpUsefulBits;
2955 // The interesting part will be shifted in the result
2956 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2957 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2958 // The interesting part was at zero in the argument
2959 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2960 }
2961
2962 UsefulBits &= OpUsefulBits;
2963}
2964
2965static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2966 unsigned Depth) {
2967 uint64_t Imm =
2968 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2969 uint64_t MSB =
2970 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2971
2972 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2973}
2974
2976 unsigned Depth) {
2977 uint64_t ShiftTypeAndValue =
2978 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2979 APInt Mask(UsefulBits);
2980 Mask.clearAllBits();
2981 Mask.flipAllBits();
2982
2983 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2984 // Shift Left
2985 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2986 Mask <<= ShiftAmt;
2987 getUsefulBits(Op, Mask, Depth + 1);
2988 Mask.lshrInPlace(ShiftAmt);
2989 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2990 // Shift Right
2991 // We do not handle AArch64_AM::ASR, because the sign will change the
2992 // number of useful bits
2993 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2994 Mask.lshrInPlace(ShiftAmt);
2995 getUsefulBits(Op, Mask, Depth + 1);
2996 Mask <<= ShiftAmt;
2997 } else
2998 return;
2999
3000 UsefulBits &= Mask;
3001}
3002
3003static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
3004 unsigned Depth) {
3005 uint64_t Imm =
3006 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3007 uint64_t MSB =
3008 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
3009
3010 APInt OpUsefulBits(UsefulBits);
3011 OpUsefulBits = 1;
3012
3013 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
3014 ResultUsefulBits.flipAllBits();
3015 APInt Mask(UsefulBits.getBitWidth(), 0);
3016
3017 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
3018
3019 if (MSB >= Imm) {
3020 // The instruction is a BFXIL.
3021 uint64_t Width = MSB - Imm + 1;
3022 uint64_t LSB = Imm;
3023
3024 OpUsefulBits <<= Width;
3025 --OpUsefulBits;
3026
3027 if (Op.getOperand(1) == Orig) {
3028 // Copy the low bits from the result to bits starting from LSB.
3029 Mask = ResultUsefulBits & OpUsefulBits;
3030 Mask <<= LSB;
3031 }
3032
3033 if (Op.getOperand(0) == Orig)
3034 // Bits starting from LSB in the input contribute to the result.
3035 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3036 } else {
3037 // The instruction is a BFI.
3038 uint64_t Width = MSB + 1;
3039 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
3040
3041 OpUsefulBits <<= Width;
3042 --OpUsefulBits;
3043 OpUsefulBits <<= LSB;
3044
3045 if (Op.getOperand(1) == Orig) {
3046 // Copy the bits from the result to the zero bits.
3047 Mask = ResultUsefulBits & OpUsefulBits;
3048 Mask.lshrInPlace(LSB);
3049 }
3050
3051 if (Op.getOperand(0) == Orig)
3052 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3053 }
3054
3055 UsefulBits &= Mask;
3056}
3057
3058static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
3059 SDValue Orig, unsigned Depth) {
3060
3061 // Users of this node should have already been instruction selected
3062 // FIXME: Can we turn that into an assert?
3063 if (!UserNode->isMachineOpcode())
3064 return;
3065
3066 switch (UserNode->getMachineOpcode()) {
3067 default:
3068 return;
3069 case AArch64::ANDSWri:
3070 case AArch64::ANDSXri:
3071 case AArch64::ANDWri:
3072 case AArch64::ANDXri:
3073 // We increment Depth only when we call the getUsefulBits
3074 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
3075 Depth);
3076 case AArch64::UBFMWri:
3077 case AArch64::UBFMXri:
3078 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
3079
3080 case AArch64::ORRWrs:
3081 case AArch64::ORRXrs:
3082 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
3083 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
3084 Depth);
3085 return;
3086 case AArch64::BFMWri:
3087 case AArch64::BFMXri:
3088 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3089
3090 case AArch64::STRBBui:
3091 case AArch64::STURBBi:
3092 if (UserNode->getOperand(0) != Orig)
3093 return;
3094 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3095 return;
3096
3097 case AArch64::STRHHui:
3098 case AArch64::STURHHi:
3099 if (UserNode->getOperand(0) != Orig)
3100 return;
3101 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3102 return;
3103 }
3104}
3105
3106static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3108 return;
3109 // Initialize UsefulBits
3110 if (!Depth) {
3111 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3112 // At the beginning, assume every produced bits is useful
3113 UsefulBits = APInt(Bitwidth, 0);
3114 UsefulBits.flipAllBits();
3115 }
3116 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3117
3118 for (SDNode *Node : Op.getNode()->users()) {
3119 // A use cannot produce useful bits
3120 APInt UsefulBitsForUse = APInt(UsefulBits);
3121 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
3122 UsersUsefulBits |= UsefulBitsForUse;
3123 }
3124 // UsefulBits contains the produced bits that are meaningful for the
3125 // current definition, thus a user cannot make a bit meaningful at
3126 // this point
3127 UsefulBits &= UsersUsefulBits;
3128}
3129
3130/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3131/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3132/// 0, return Op unchanged.
3133static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3134 if (ShlAmount == 0)
3135 return Op;
3136
3137 EVT VT = Op.getValueType();
3138 SDLoc dl(Op);
3139 unsigned BitWidth = VT.getSizeInBits();
3140 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3141
3142 SDNode *ShiftNode;
3143 if (ShlAmount > 0) {
3144 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3145 ShiftNode = CurDAG->getMachineNode(
3146 UBFMOpc, dl, VT, Op,
3147 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3148 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3149 } else {
3150 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3151 assert(ShlAmount < 0 && "expected right shift");
3152 int ShrAmount = -ShlAmount;
3153 ShiftNode = CurDAG->getMachineNode(
3154 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3155 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3156 }
3157
3158 return SDValue(ShiftNode, 0);
3159}
3160
3161// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3162static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3163 bool BiggerPattern,
3164 const uint64_t NonZeroBits,
3165 SDValue &Src, int &DstLSB,
3166 int &Width);
3167
3168// For bit-field-positioning pattern "shl VAL, N)".
3169static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3170 bool BiggerPattern,
3171 const uint64_t NonZeroBits,
3172 SDValue &Src, int &DstLSB,
3173 int &Width);
3174
3175/// Does this tree qualify as an attempt to move a bitfield into position,
3176/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3178 bool BiggerPattern, SDValue &Src,
3179 int &DstLSB, int &Width) {
3180 EVT VT = Op.getValueType();
3181 unsigned BitWidth = VT.getSizeInBits();
3182 (void)BitWidth;
3183 assert(BitWidth == 32 || BitWidth == 64);
3184
3185 KnownBits Known = CurDAG->computeKnownBits(Op);
3186
3187 // Non-zero in the sense that they're not provably zero, which is the key
3188 // point if we want to use this value
3189 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3190 if (!isShiftedMask_64(NonZeroBits))
3191 return false;
3192
3193 switch (Op.getOpcode()) {
3194 default:
3195 break;
3196 case ISD::AND:
3197 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3198 NonZeroBits, Src, DstLSB, Width);
3199 case ISD::SHL:
3200 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3201 NonZeroBits, Src, DstLSB, Width);
3202 }
3203
3204 return false;
3205}
3206
3208 bool BiggerPattern,
3209 const uint64_t NonZeroBits,
3210 SDValue &Src, int &DstLSB,
3211 int &Width) {
3212 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3213
3214 EVT VT = Op.getValueType();
3215 assert((VT == MVT::i32 || VT == MVT::i64) &&
3216 "Caller guarantees VT is one of i32 or i64");
3217 (void)VT;
3218
3219 uint64_t AndImm;
3220 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3221 return false;
3222
3223 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3224 // 1) (AndImm & (1 << POS) == 0)
3225 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3226 //
3227 // 1) and 2) don't agree so something must be wrong (e.g., in
3228 // 'SelectionDAG::computeKnownBits')
3229 assert((~AndImm & NonZeroBits) == 0 &&
3230 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3231
3232 SDValue AndOp0 = Op.getOperand(0);
3233
3234 uint64_t ShlImm;
3235 SDValue ShlOp0;
3236 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3237 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3238 ShlOp0 = AndOp0.getOperand(0);
3239 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3241 ShlImm)) {
3242 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3243
3244 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3245 SDValue ShlVal = AndOp0.getOperand(0);
3246
3247 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3248 // expect VT to be MVT::i32.
3249 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3250
3251 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3252 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3253 } else
3254 return false;
3255
3256 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3257 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3258 // AndOp0+AND.
3259 if (!BiggerPattern && !AndOp0.hasOneUse())
3260 return false;
3261
3262 DstLSB = llvm::countr_zero(NonZeroBits);
3263 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3264
3265 // Bail out on large Width. This happens when no proper combining / constant
3266 // folding was performed.
3267 if (Width >= (int)VT.getSizeInBits()) {
3268 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3269 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3270 // "val".
3271 // If VT is i32, what Width >= 32 means:
3272 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3273 // demands at least 'Width' bits (after dag-combiner). This together with
3274 // `any_extend` Op (undefined higher bits) indicates missed combination
3275 // when lowering the 'and' IR instruction to an machine IR instruction.
3276 LLVM_DEBUG(
3277 dbgs()
3278 << "Found large Width in bit-field-positioning -- this indicates no "
3279 "proper combining / constant folding was performed\n");
3280 return false;
3281 }
3282
3283 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3284 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3285 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3286 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3287 // which case it is not profitable to insert an extra shift.
3288 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3289 return false;
3290
3291 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3292 return true;
3293}
3294
3295// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3296// UBFIZ.
3298 SDValue &Src, int &DstLSB,
3299 int &Width) {
3300 // Caller should have verified that N is a left shift with constant shift
3301 // amount; asserts that.
3302 assert(Op.getOpcode() == ISD::SHL &&
3303 "Op.getNode() should be a SHL node to call this function");
3304 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3305 "Op.getNode() should shift ShlImm to call this function");
3306
3307 uint64_t AndImm = 0;
3308 SDValue Op0 = Op.getOperand(0);
3309 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3310 return false;
3311
3312 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3313 if (isMask_64(ShiftedAndImm)) {
3314 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3315 // should end with Mask, and could be prefixed with random bits if those
3316 // bits are shifted out.
3317 //
3318 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3319 // the AND result corresponding to those bits are shifted out, so it's fine
3320 // to not extract them.
3321 Width = llvm::countr_one(ShiftedAndImm);
3322 DstLSB = ShlImm;
3323 Src = Op0.getOperand(0);
3324 return true;
3325 }
3326 return false;
3327}
3328
3330 bool BiggerPattern,
3331 const uint64_t NonZeroBits,
3332 SDValue &Src, int &DstLSB,
3333 int &Width) {
3334 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3335
3336 EVT VT = Op.getValueType();
3337 assert((VT == MVT::i32 || VT == MVT::i64) &&
3338 "Caller guarantees that type is i32 or i64");
3339 (void)VT;
3340
3341 uint64_t ShlImm;
3342 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3343 return false;
3344
3345 if (!BiggerPattern && !Op.hasOneUse())
3346 return false;
3347
3348 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3349 return true;
3350
3351 DstLSB = llvm::countr_zero(NonZeroBits);
3352 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3353
3354 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3355 return false;
3356
3357 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3358 return true;
3359}
3360
3361static bool isShiftedMask(uint64_t Mask, EVT VT) {
3362 assert(VT == MVT::i32 || VT == MVT::i64);
3363 if (VT == MVT::i32)
3364 return isShiftedMask_32(Mask);
3365 return isShiftedMask_64(Mask);
3366}
3367
3368// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3369// inserted only sets known zero bits.
3371 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3372
3373 EVT VT = N->getValueType(0);
3374 if (VT != MVT::i32 && VT != MVT::i64)
3375 return false;
3376
3377 unsigned BitWidth = VT.getSizeInBits();
3378
3379 uint64_t OrImm;
3380 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3381 return false;
3382
3383 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3384 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3385 // performance neutral.
3387 return false;
3388
3389 uint64_t MaskImm;
3390 SDValue And = N->getOperand(0);
3391 // Must be a single use AND with an immediate operand.
3392 if (!And.hasOneUse() ||
3393 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3394 return false;
3395
3396 // Compute the Known Zero for the AND as this allows us to catch more general
3397 // cases than just looking for AND with imm.
3398 KnownBits Known = CurDAG->computeKnownBits(And);
3399
3400 // Non-zero in the sense that they're not provably zero, which is the key
3401 // point if we want to use this value.
3402 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3403
3404 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3405 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3406 return false;
3407
3408 // The bits being inserted must only set those bits that are known to be zero.
3409 if ((OrImm & NotKnownZero) != 0) {
3410 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3411 // currently handle this case.
3412 return false;
3413 }
3414
3415 // BFI/BFXIL dst, src, #lsb, #width.
3416 int LSB = llvm::countr_one(NotKnownZero);
3417 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3418
3419 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3420 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3421 unsigned ImmS = Width - 1;
3422
3423 // If we're creating a BFI instruction avoid cases where we need more
3424 // instructions to materialize the BFI constant as compared to the original
3425 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3426 // should be no worse in this case.
3427 bool IsBFI = LSB != 0;
3428 uint64_t BFIImm = OrImm >> LSB;
3429 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3430 // We have a BFI instruction and we know the constant can't be materialized
3431 // with a ORR-immediate with the zero register.
3432 unsigned OrChunks = 0, BFIChunks = 0;
3433 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3434 if (((OrImm >> Shift) & 0xFFFF) != 0)
3435 ++OrChunks;
3436 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3437 ++BFIChunks;
3438 }
3439 if (BFIChunks > OrChunks)
3440 return false;
3441 }
3442
3443 // Materialize the constant to be inserted.
3444 SDLoc DL(N);
3445 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3446 SDNode *MOVI = CurDAG->getMachineNode(
3447 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3448
3449 // Create the BFI/BFXIL instruction.
3450 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3451 CurDAG->getTargetConstant(ImmR, DL, VT),
3452 CurDAG->getTargetConstant(ImmS, DL, VT)};
3453 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3454 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3455 return true;
3456}
3457
3459 SDValue &ShiftedOperand,
3460 uint64_t &EncodedShiftImm) {
3461 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3462 if (!Dst.hasOneUse())
3463 return false;
3464
3465 EVT VT = Dst.getValueType();
3466 assert((VT == MVT::i32 || VT == MVT::i64) &&
3467 "Caller should guarantee that VT is one of i32 or i64");
3468 const unsigned SizeInBits = VT.getSizeInBits();
3469
3470 SDLoc DL(Dst.getNode());
3471 uint64_t AndImm, ShlImm;
3472 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3473 isShiftedMask_64(AndImm)) {
3474 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3475 SDValue DstOp0 = Dst.getOperand(0);
3476 if (!DstOp0.hasOneUse())
3477 return false;
3478
3479 // An example to illustrate the transformation
3480 // From:
3481 // lsr x8, x1, #1
3482 // and x8, x8, #0x3f80
3483 // bfxil x8, x1, #0, #7
3484 // To:
3485 // and x8, x23, #0x7f
3486 // ubfx x9, x23, #8, #7
3487 // orr x23, x8, x9, lsl #7
3488 //
3489 // The number of instructions remains the same, but ORR is faster than BFXIL
3490 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3491 // the dependency chain is improved after the transformation.
3492 uint64_t SrlImm;
3493 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3494 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3495 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3496 unsigned MaskWidth =
3497 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3498 unsigned UBFMOpc =
3499 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3500 SDNode *UBFMNode = CurDAG->getMachineNode(
3501 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3502 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3503 VT),
3504 CurDAG->getTargetConstant(
3505 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3506 ShiftedOperand = SDValue(UBFMNode, 0);
3507 EncodedShiftImm = AArch64_AM::getShifterImm(
3508 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3509 return true;
3510 }
3511 }
3512 return false;
3513 }
3514
3515 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3516 ShiftedOperand = Dst.getOperand(0);
3517 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3518 return true;
3519 }
3520
3521 uint64_t SrlImm;
3522 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3523 ShiftedOperand = Dst.getOperand(0);
3524 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3525 return true;
3526 }
3527 return false;
3528}
3529
3530// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3531// the operands and select it to AArch64::ORR with shifted registers if
3532// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3533static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3534 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3535 const bool BiggerPattern) {
3536 EVT VT = N->getValueType(0);
3537 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3538 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3539 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3540 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3541 assert((VT == MVT::i32 || VT == MVT::i64) &&
3542 "Expect result type to be i32 or i64 since N is combinable to BFM");
3543 SDLoc DL(N);
3544
3545 // Bail out if BFM simplifies away one node in BFM Dst.
3546 if (OrOpd1 != Dst)
3547 return false;
3548
3549 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3550 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3551 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3552 if (BiggerPattern) {
3553 uint64_t SrcAndImm;
3554 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3555 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3556 // OrOpd0 = AND Src, #Mask
3557 // So BFM simplifies away one AND node from Src and doesn't simplify away
3558 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3559 // one node (from Rd), ORR is better since it has higher throughput and
3560 // smaller latency than BFM on many AArch64 processors (and for the rest
3561 // ORR is at least as good as BFM).
3562 SDValue ShiftedOperand;
3563 uint64_t EncodedShiftImm;
3564 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3565 EncodedShiftImm)) {
3566 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3567 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3568 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3569 return true;
3570 }
3571 }
3572 return false;
3573 }
3574
3575 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3576
3577 uint64_t ShlImm;
3578 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3579 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3580 SDValue Ops[] = {
3581 Dst, Src,
3582 CurDAG->getTargetConstant(
3584 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3585 return true;
3586 }
3587
3588 // Select the following pattern to left-shifted operand rather than BFI.
3589 // %val1 = op ..
3590 // %val2 = shl %val1, #imm
3591 // %res = or %val1, %val2
3592 //
3593 // If N is selected to be BFI, we know that
3594 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3595 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3596 //
3597 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3598 if (OrOpd0.getOperand(0) == OrOpd1) {
3599 SDValue Ops[] = {
3600 OrOpd1, OrOpd1,
3601 CurDAG->getTargetConstant(
3603 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3604 return true;
3605 }
3606 }
3607
3608 uint64_t SrlImm;
3609 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3610 // Select the following pattern to right-shifted operand rather than BFXIL.
3611 // %val1 = op ..
3612 // %val2 = lshr %val1, #imm
3613 // %res = or %val1, %val2
3614 //
3615 // If N is selected to be BFXIL, we know that
3616 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3617 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3618 //
3619 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3620 if (OrOpd0.getOperand(0) == OrOpd1) {
3621 SDValue Ops[] = {
3622 OrOpd1, OrOpd1,
3623 CurDAG->getTargetConstant(
3625 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3626 return true;
3627 }
3628 }
3629
3630 return false;
3631}
3632
3633static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3634 SelectionDAG *CurDAG) {
3635 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3636
3637 EVT VT = N->getValueType(0);
3638 if (VT != MVT::i32 && VT != MVT::i64)
3639 return false;
3640
3641 unsigned BitWidth = VT.getSizeInBits();
3642
3643 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3644 // have the expected shape. Try to undo that.
3645
3646 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3647 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3648
3649 // Given a OR operation, check if we have the following pattern
3650 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3651 // isBitfieldExtractOp)
3652 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3653 // countTrailingZeros(mask2) == imm2 - imm + 1
3654 // f = d | c
3655 // if yes, replace the OR instruction with:
3656 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3657
3658 // OR is commutative, check all combinations of operand order and values of
3659 // BiggerPattern, i.e.
3660 // Opd0, Opd1, BiggerPattern=false
3661 // Opd1, Opd0, BiggerPattern=false
3662 // Opd0, Opd1, BiggerPattern=true
3663 // Opd1, Opd0, BiggerPattern=true
3664 // Several of these combinations may match, so check with BiggerPattern=false
3665 // first since that will produce better results by matching more instructions
3666 // and/or inserting fewer extra instructions.
3667 for (int I = 0; I < 4; ++I) {
3668
3669 SDValue Dst, Src;
3670 unsigned ImmR, ImmS;
3671 bool BiggerPattern = I / 2;
3672 SDValue OrOpd0Val = N->getOperand(I % 2);
3673 SDNode *OrOpd0 = OrOpd0Val.getNode();
3674 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3675 SDNode *OrOpd1 = OrOpd1Val.getNode();
3676
3677 unsigned BFXOpc;
3678 int DstLSB, Width;
3679 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3680 NumberOfIgnoredLowBits, BiggerPattern)) {
3681 // Check that the returned opcode is compatible with the pattern,
3682 // i.e., same type and zero extended (U and not S)
3683 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3684 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3685 continue;
3686
3687 // Compute the width of the bitfield insertion
3688 DstLSB = 0;
3689 Width = ImmS - ImmR + 1;
3690 // FIXME: This constraint is to catch bitfield insertion we may
3691 // want to widen the pattern if we want to grab general bitfield
3692 // move case
3693 if (Width <= 0)
3694 continue;
3695
3696 // If the mask on the insertee is correct, we have a BFXIL operation. We
3697 // can share the ImmR and ImmS values from the already-computed UBFM.
3698 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3699 BiggerPattern,
3700 Src, DstLSB, Width)) {
3701 ImmR = (BitWidth - DstLSB) % BitWidth;
3702 ImmS = Width - 1;
3703 } else
3704 continue;
3705
3706 // Check the second part of the pattern
3707 EVT VT = OrOpd1Val.getValueType();
3708 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3709
3710 // Compute the Known Zero for the candidate of the first operand.
3711 // This allows to catch more general case than just looking for
3712 // AND with imm. Indeed, simplify-demanded-bits may have removed
3713 // the AND instruction because it proves it was useless.
3714 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3715
3716 // Check if there is enough room for the second operand to appear
3717 // in the first one
3718 APInt BitsToBeInserted =
3719 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3720
3721 if ((BitsToBeInserted & ~Known.Zero) != 0)
3722 continue;
3723
3724 // Set the first operand
3725 uint64_t Imm;
3726 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3727 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3728 // In that case, we can eliminate the AND
3729 Dst = OrOpd1->getOperand(0);
3730 else
3731 // Maybe the AND has been removed by simplify-demanded-bits
3732 // or is useful because it discards more bits
3733 Dst = OrOpd1Val;
3734
3735 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3736 // with shifted operand is more efficient.
3737 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3738 BiggerPattern))
3739 return true;
3740
3741 // both parts match
3742 SDLoc DL(N);
3743 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3744 CurDAG->getTargetConstant(ImmS, DL, VT)};
3745 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3746 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3747 return true;
3748 }
3749
3750 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3751 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3752 // mask (e.g., 0x000ffff0).
3753 uint64_t Mask0Imm, Mask1Imm;
3754 SDValue And0 = N->getOperand(0);
3755 SDValue And1 = N->getOperand(1);
3756 if (And0.hasOneUse() && And1.hasOneUse() &&
3757 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3758 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3759 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3760 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3761
3762 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3763 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3764 // bits to be inserted.
3765 if (isShiftedMask(Mask0Imm, VT)) {
3766 std::swap(And0, And1);
3767 std::swap(Mask0Imm, Mask1Imm);
3768 }
3769
3770 SDValue Src = And1->getOperand(0);
3771 SDValue Dst = And0->getOperand(0);
3772 unsigned LSB = llvm::countr_zero(Mask1Imm);
3773 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3774
3775 // The BFXIL inserts the low-order bits from a source register, so right
3776 // shift the needed bits into place.
3777 SDLoc DL(N);
3778 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3779 uint64_t LsrImm = LSB;
3780 if (Src->hasOneUse() &&
3781 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3782 (LsrImm + LSB) < BitWidth) {
3783 Src = Src->getOperand(0);
3784 LsrImm += LSB;
3785 }
3786
3787 SDNode *LSR = CurDAG->getMachineNode(
3788 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3789 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3790
3791 // BFXIL is an alias of BFM, so translate to BFM operands.
3792 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3793 unsigned ImmS = Width - 1;
3794
3795 // Create the BFXIL instruction.
3796 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3797 CurDAG->getTargetConstant(ImmR, DL, VT),
3798 CurDAG->getTargetConstant(ImmS, DL, VT)};
3799 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3800 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3801 return true;
3802 }
3803
3804 return false;
3805}
3806
3807bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3808 if (N->getOpcode() != ISD::OR)
3809 return false;
3810
3811 APInt NUsefulBits;
3812 getUsefulBits(SDValue(N, 0), NUsefulBits);
3813
3814 // If all bits are not useful, just return UNDEF.
3815 if (!NUsefulBits) {
3816 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3817 return true;
3818 }
3819
3820 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3821 return true;
3822
3823 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3824}
3825
3826/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3827/// equivalent of a left shift by a constant amount followed by an and masking
3828/// out a contiguous set of bits.
3829bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3830 if (N->getOpcode() != ISD::AND)
3831 return false;
3832
3833 EVT VT = N->getValueType(0);
3834 if (VT != MVT::i32 && VT != MVT::i64)
3835 return false;
3836
3837 SDValue Op0;
3838 int DstLSB, Width;
3839 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3840 Op0, DstLSB, Width))
3841 return false;
3842
3843 // ImmR is the rotate right amount.
3844 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3845 // ImmS is the most significant bit of the source to be moved.
3846 unsigned ImmS = Width - 1;
3847
3848 SDLoc DL(N);
3849 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3850 CurDAG->getTargetConstant(ImmS, DL, VT)};
3851 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3852 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3853 return true;
3854}
3855
3856/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3857/// variable shift/rotate instructions.
3858bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3859 EVT VT = N->getValueType(0);
3860
3861 unsigned Opc;
3862 switch (N->getOpcode()) {
3863 case ISD::ROTR:
3864 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3865 break;
3866 case ISD::SHL:
3867 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3868 break;
3869 case ISD::SRL:
3870 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3871 break;
3872 case ISD::SRA:
3873 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3874 break;
3875 default:
3876 return false;
3877 }
3878
3879 uint64_t Size;
3880 uint64_t Bits;
3881 if (VT == MVT::i32) {
3882 Bits = 5;
3883 Size = 32;
3884 } else if (VT == MVT::i64) {
3885 Bits = 6;
3886 Size = 64;
3887 } else
3888 return false;
3889
3890 SDValue ShiftAmt = N->getOperand(1);
3891 SDLoc DL(N);
3892 SDValue NewShiftAmt;
3893
3894 // Skip over an extend of the shift amount.
3895 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3896 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3897 ShiftAmt = ShiftAmt->getOperand(0);
3898
3899 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3900 SDValue Add0 = ShiftAmt->getOperand(0);
3901 SDValue Add1 = ShiftAmt->getOperand(1);
3902 uint64_t Add0Imm;
3903 uint64_t Add1Imm;
3904 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3905 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3906 // to avoid the ADD/SUB.
3907 NewShiftAmt = Add0;
3908 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3909 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3910 (Add0Imm % Size == 0)) {
3911 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3912 // to generate a NEG instead of a SUB from a constant.
3913 unsigned NegOpc;
3914 unsigned ZeroReg;
3915 EVT SubVT = ShiftAmt->getValueType(0);
3916 if (SubVT == MVT::i32) {
3917 NegOpc = AArch64::SUBWrr;
3918 ZeroReg = AArch64::WZR;
3919 } else {
3920 assert(SubVT == MVT::i64);
3921 NegOpc = AArch64::SUBXrr;
3922 ZeroReg = AArch64::XZR;
3923 }
3924 SDValue Zero =
3925 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3926 MachineSDNode *Neg =
3927 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3928 NewShiftAmt = SDValue(Neg, 0);
3929 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3930 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3931 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3932 // to generate a NOT instead of a SUB from a constant.
3933 unsigned NotOpc;
3934 unsigned ZeroReg;
3935 EVT SubVT = ShiftAmt->getValueType(0);
3936 if (SubVT == MVT::i32) {
3937 NotOpc = AArch64::ORNWrr;
3938 ZeroReg = AArch64::WZR;
3939 } else {
3940 assert(SubVT == MVT::i64);
3941 NotOpc = AArch64::ORNXrr;
3942 ZeroReg = AArch64::XZR;
3943 }
3944 SDValue Zero =
3945 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3946 MachineSDNode *Not =
3947 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3948 NewShiftAmt = SDValue(Not, 0);
3949 } else
3950 return false;
3951 } else {
3952 // If the shift amount is masked with an AND, check that the mask covers the
3953 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3954 // the AND.
3955 uint64_t MaskImm;
3956 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3957 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3958 return false;
3959
3960 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3961 return false;
3962
3963 NewShiftAmt = ShiftAmt->getOperand(0);
3964 }
3965
3966 // Narrow/widen the shift amount to match the size of the shift operation.
3967 if (VT == MVT::i32)
3968 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3969 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3970 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3971 MachineSDNode *Ext = CurDAG->getMachineNode(
3972 AArch64::SUBREG_TO_REG, DL, VT,
3973 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3974 NewShiftAmt = SDValue(Ext, 0);
3975 }
3976
3977 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3978 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3979 return true;
3980}
3981
3983 SDValue &FixedPos,
3984 unsigned RegWidth,
3985 bool isReciprocal) {
3986 APFloat FVal(0.0);
3988 FVal = CN->getValueAPF();
3989 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3990 // Some otherwise illegal constants are allowed in this case.
3991 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3992 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3993 return false;
3994
3995 ConstantPoolSDNode *CN =
3996 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3997 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3998 } else
3999 return false;
4000
4001 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
4002 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
4003 // x-register.
4004 //
4005 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
4006 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
4007 // integers.
4008 bool IsExact;
4009
4010 if (isReciprocal)
4011 if (!FVal.getExactInverse(&FVal))
4012 return false;
4013
4014 // fbits is between 1 and 64 in the worst-case, which means the fmul
4015 // could have 2^64 as an actual operand. Need 65 bits of precision.
4016 APSInt IntVal(65, true);
4017 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
4018
4019 // N.b. isPowerOf2 also checks for > 0.
4020 if (!IsExact || !IntVal.isPowerOf2())
4021 return false;
4022 unsigned FBits = IntVal.logBase2();
4023
4024 // Checks above should have guaranteed that we haven't lost information in
4025 // finding FBits, but it must still be in range.
4026 if (FBits == 0 || FBits > RegWidth) return false;
4027
4028 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4029 return true;
4030}
4031
4032bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4033 unsigned RegWidth) {
4034 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4035 false);
4036}
4037
4038bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
4039 SDValue &FixedPos,
4040 unsigned RegWidth) {
4041 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4042 true);
4043}
4044
4045// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
4046// of the string and obtains the integer values from them and combines these
4047// into a single value to be used in the MRS/MSR instruction.
4050 RegString.split(Fields, ':');
4051
4052 if (Fields.size() == 1)
4053 return -1;
4054
4055 assert(Fields.size() == 5
4056 && "Invalid number of fields in read register string");
4057
4059 bool AllIntFields = true;
4060
4061 for (StringRef Field : Fields) {
4062 unsigned IntField;
4063 AllIntFields &= !Field.getAsInteger(10, IntField);
4064 Ops.push_back(IntField);
4065 }
4066
4067 assert(AllIntFields &&
4068 "Unexpected non-integer value in special register string.");
4069 (void)AllIntFields;
4070
4071 // Need to combine the integer fields of the string into a single value
4072 // based on the bit encoding of MRS/MSR instruction.
4073 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
4074 (Ops[3] << 3) | (Ops[4]);
4075}
4076
4077// Lower the read_register intrinsic to an MRS instruction node if the special
4078// register string argument is either of the form detailed in the ALCE (the
4079// form described in getIntOperandsFromRegisterString) or is a named register
4080// known by the MRS SysReg mapper.
4081bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4082 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4083 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4084 SDLoc DL(N);
4085
4086 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4087
4088 unsigned Opcode64Bit = AArch64::MRS;
4089 int Imm = getIntOperandFromRegisterString(RegString->getString());
4090 if (Imm == -1) {
4091 // No match, Use the sysreg mapper to map the remaining possible strings to
4092 // the value for the register to be used for the instruction operand.
4093 const auto *TheReg =
4094 AArch64SysReg::lookupSysRegByName(RegString->getString());
4095 if (TheReg && TheReg->Readable &&
4096 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4097 Imm = TheReg->Encoding;
4098 else
4099 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4100
4101 if (Imm == -1) {
4102 // Still no match, see if this is "pc" or give up.
4103 if (!ReadIs128Bit && RegString->getString() == "pc") {
4104 Opcode64Bit = AArch64::ADR;
4105 Imm = 0;
4106 } else {
4107 return false;
4108 }
4109 }
4110 }
4111
4112 SDValue InChain = N->getOperand(0);
4113 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
4114 if (!ReadIs128Bit) {
4115 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
4116 {SysRegImm, InChain});
4117 } else {
4118 SDNode *MRRS = CurDAG->getMachineNode(
4119 AArch64::MRRS, DL,
4120 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4121 {SysRegImm, InChain});
4122
4123 // Sysregs are not endian. The even register always contains the low half
4124 // of the register.
4125 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
4126 SDValue(MRRS, 0));
4127 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
4128 SDValue(MRRS, 0));
4129 SDValue OutChain = SDValue(MRRS, 1);
4130
4131 ReplaceUses(SDValue(N, 0), Lo);
4132 ReplaceUses(SDValue(N, 1), Hi);
4133 ReplaceUses(SDValue(N, 2), OutChain);
4134 };
4135 return true;
4136}
4137
4138// Lower the write_register intrinsic to an MSR instruction node if the special
4139// register string argument is either of the form detailed in the ALCE (the
4140// form described in getIntOperandsFromRegisterString) or is a named register
4141// known by the MSR SysReg mapper.
4142bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4143 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4144 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4145 SDLoc DL(N);
4146
4147 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4148
4149 if (!WriteIs128Bit) {
4150 // Check if the register was one of those allowed as the pstatefield value
4151 // in the MSR (immediate) instruction. To accept the values allowed in the
4152 // pstatefield for the MSR (immediate) instruction, we also require that an
4153 // immediate value has been provided as an argument, we know that this is
4154 // the case as it has been ensured by semantic checking.
4155 auto trySelectPState = [&](auto PMapper, unsigned State) {
4156 if (PMapper) {
4157 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4158 "Expected a constant integer expression.");
4159 unsigned Reg = PMapper->Encoding;
4160 uint64_t Immed = N->getConstantOperandVal(2);
4161 CurDAG->SelectNodeTo(
4162 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4163 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4164 return true;
4165 }
4166 return false;
4167 };
4168
4169 if (trySelectPState(
4170 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4171 AArch64::MSRpstateImm4))
4172 return true;
4173 if (trySelectPState(
4174 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4175 AArch64::MSRpstateImm1))
4176 return true;
4177 }
4178
4179 int Imm = getIntOperandFromRegisterString(RegString->getString());
4180 if (Imm == -1) {
4181 // Use the sysreg mapper to attempt to map the remaining possible strings
4182 // to the value for the register to be used for the MSR (register)
4183 // instruction operand.
4184 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4185 if (TheReg && TheReg->Writeable &&
4186 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4187 Imm = TheReg->Encoding;
4188 else
4189 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4190
4191 if (Imm == -1)
4192 return false;
4193 }
4194
4195 SDValue InChain = N->getOperand(0);
4196 if (!WriteIs128Bit) {
4197 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4198 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4199 N->getOperand(2), InChain);
4200 } else {
4201 // No endian swap. The lower half always goes into the even subreg, and the
4202 // higher half always into the odd supreg.
4203 SDNode *Pair = CurDAG->getMachineNode(
4204 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4205 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4206 MVT::i32),
4207 N->getOperand(2),
4208 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4209 N->getOperand(3),
4210 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4211
4212 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4213 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4214 SDValue(Pair, 0), InChain);
4215 }
4216
4217 return true;
4218}
4219
4220/// We've got special pseudo-instructions for these
4221bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4222 unsigned Opcode;
4223 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4224
4225 // Leave IR for LSE if subtarget supports it.
4226 if (Subtarget->hasLSE()) return false;
4227
4228 if (MemTy == MVT::i8)
4229 Opcode = AArch64::CMP_SWAP_8;
4230 else if (MemTy == MVT::i16)
4231 Opcode = AArch64::CMP_SWAP_16;
4232 else if (MemTy == MVT::i32)
4233 Opcode = AArch64::CMP_SWAP_32;
4234 else if (MemTy == MVT::i64)
4235 Opcode = AArch64::CMP_SWAP_64;
4236 else
4237 llvm_unreachable("Unknown AtomicCmpSwap type");
4238
4239 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4240 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4241 N->getOperand(0)};
4242 SDNode *CmpSwap = CurDAG->getMachineNode(
4243 Opcode, SDLoc(N),
4244 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4245
4246 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4247 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4248
4249 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4250 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4251 CurDAG->RemoveDeadNode(N);
4252
4253 return true;
4254}
4255
4256bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4257 SDValue &Shift, bool Negate) {
4258 if (!isa<ConstantSDNode>(N))
4259 return false;
4260
4261 SDLoc DL(N);
4262 APInt Val =
4263 cast<ConstantSDNode>(N)->getAPIntValue().trunc(VT.getFixedSizeInBits());
4264
4265 if (Negate)
4266 Val = -Val;
4267
4268 switch (VT.SimpleTy) {
4269 case MVT::i8:
4270 // All immediates are supported.
4271 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4272 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4273 return true;
4274 case MVT::i16:
4275 case MVT::i32:
4276 case MVT::i64:
4277 // Support 8bit unsigned immediates.
4278 if ((Val & ~0xff) == 0) {
4279 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4280 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4281 return true;
4282 }
4283 // Support 16bit unsigned immediates that are a multiple of 256.
4284 if ((Val & ~0xff00) == 0) {
4285 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4286 Imm = CurDAG->getTargetConstant(Val.lshr(8).getZExtValue(), DL, MVT::i32);
4287 return true;
4288 }
4289 break;
4290 default:
4291 break;
4292 }
4293
4294 return false;
4295}
4296
4297bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4298 SDValue &Imm, SDValue &Shift,
4299 bool Negate) {
4300 if (!isa<ConstantSDNode>(N))
4301 return false;
4302
4303 SDLoc DL(N);
4304 int64_t Val = cast<ConstantSDNode>(N)
4305 ->getAPIntValue()
4307 .getSExtValue();
4308
4309 if (Negate)
4310 Val = -Val;
4311
4312 // Signed saturating instructions treat their immediate operand as unsigned,
4313 // whereas the related intrinsics define their operands to be signed. This
4314 // means we can only use the immediate form when the operand is non-negative.
4315 if (Val < 0)
4316 return false;
4317
4318 switch (VT.SimpleTy) {
4319 case MVT::i8:
4320 // All positive immediates are supported.
4321 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4322 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4323 return true;
4324 case MVT::i16:
4325 case MVT::i32:
4326 case MVT::i64:
4327 // Support 8bit positive immediates.
4328 if (Val <= 255) {
4329 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4330 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4331 return true;
4332 }
4333 // Support 16bit positive immediates that are a multiple of 256.
4334 if (Val <= 65280 && Val % 256 == 0) {
4335 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4336 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4337 return true;
4338 }
4339 break;
4340 default:
4341 break;
4342 }
4343
4344 return false;
4345}
4346
4347bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4348 SDValue &Shift) {
4349 if (!isa<ConstantSDNode>(N))
4350 return false;
4351
4352 SDLoc DL(N);
4353 int64_t Val = cast<ConstantSDNode>(N)
4354 ->getAPIntValue()
4355 .trunc(VT.getFixedSizeInBits())
4356 .getSExtValue();
4357 int32_t ImmVal, ShiftVal;
4358 if (!AArch64_AM::isSVECpyDupImm(VT.getScalarSizeInBits(), Val, ImmVal,
4359 ShiftVal))
4360 return false;
4361
4362 Shift = CurDAG->getTargetConstant(ShiftVal, DL, MVT::i32);
4363 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4364 return true;
4365}
4366
4367bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4368 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4369 int64_t ImmVal = CNode->getSExtValue();
4370 SDLoc DL(N);
4371 if (ImmVal >= -128 && ImmVal < 128) {
4372 Imm = CurDAG->getSignedTargetConstant(ImmVal, DL, MVT::i32);
4373 return true;
4374 }
4375 }
4376 return false;
4377}
4378
4379bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4380 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4381 uint64_t ImmVal = CNode->getZExtValue();
4382
4383 switch (VT.SimpleTy) {
4384 case MVT::i8:
4385 ImmVal &= 0xFF;
4386 break;
4387 case MVT::i16:
4388 ImmVal &= 0xFFFF;
4389 break;
4390 case MVT::i32:
4391 ImmVal &= 0xFFFFFFFF;
4392 break;
4393 case MVT::i64:
4394 break;
4395 default:
4396 llvm_unreachable("Unexpected type");
4397 }
4398
4399 if (ImmVal < 256) {
4400 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4401 return true;
4402 }
4403 }
4404 return false;
4405}
4406
4407bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4408 bool Invert) {
4409 uint64_t ImmVal;
4410 if (auto CI = dyn_cast<ConstantSDNode>(N))
4411 ImmVal = CI->getZExtValue();
4412 else if (auto CFP = dyn_cast<ConstantFPSDNode>(N))
4413 ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
4414 else
4415 return false;
4416
4417 if (Invert)
4418 ImmVal = ~ImmVal;
4419
4420 // Shift mask depending on type size.
4421 switch (VT.SimpleTy) {
4422 case MVT::i8:
4423 ImmVal &= 0xFF;
4424 ImmVal |= ImmVal << 8;
4425 ImmVal |= ImmVal << 16;
4426 ImmVal |= ImmVal << 32;
4427 break;
4428 case MVT::i16:
4429 ImmVal &= 0xFFFF;
4430 ImmVal |= ImmVal << 16;
4431 ImmVal |= ImmVal << 32;
4432 break;
4433 case MVT::i32:
4434 ImmVal &= 0xFFFFFFFF;
4435 ImmVal |= ImmVal << 32;
4436 break;
4437 case MVT::i64:
4438 break;
4439 default:
4440 llvm_unreachable("Unexpected type");
4441 }
4442
4443 uint64_t encoding;
4444 if (!AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding))
4445 return false;
4446
4447 Imm = CurDAG->getTargetConstant(encoding, SDLoc(N), MVT::i64);
4448 return true;
4449}
4450
4451// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4452// Rather than attempt to normalise everything we can sometimes saturate the
4453// shift amount during selection. This function also allows for consistent
4454// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4455// required by the instructions.
4456bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4457 uint64_t High, bool AllowSaturation,
4458 SDValue &Imm) {
4459 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4460 uint64_t ImmVal = CN->getZExtValue();
4461
4462 // Reject shift amounts that are too small.
4463 if (ImmVal < Low)
4464 return false;
4465
4466 // Reject or saturate shift amounts that are too big.
4467 if (ImmVal > High) {
4468 if (!AllowSaturation)
4469 return false;
4470 ImmVal = High;
4471 }
4472
4473 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4474 return true;
4475 }
4476
4477 return false;
4478}
4479
4480bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4481 // tagp(FrameIndex, IRGstack, tag_offset):
4482 // since the offset between FrameIndex and IRGstack is a compile-time
4483 // constant, this can be lowered to a single ADDG instruction.
4484 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4485 return false;
4486 }
4487
4488 SDValue IRG_SP = N->getOperand(2);
4489 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4490 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4491 return false;
4492 }
4493
4494 const TargetLowering *TLI = getTargetLowering();
4495 SDLoc DL(N);
4496 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4497 SDValue FiOp = CurDAG->getTargetFrameIndex(
4498 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4499 int TagOffset = N->getConstantOperandVal(3);
4500
4501 SDNode *Out = CurDAG->getMachineNode(
4502 AArch64::TAGPstack, DL, MVT::i64,
4503 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4504 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4505 ReplaceNode(N, Out);
4506 return true;
4507}
4508
4509void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4510 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4511 "llvm.aarch64.tagp third argument must be an immediate");
4512 if (trySelectStackSlotTagP(N))
4513 return;
4514 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4515 // compile-time constant, not just for stack allocations.
4516
4517 // General case for unrelated pointers in Op1 and Op2.
4518 SDLoc DL(N);
4519 int TagOffset = N->getConstantOperandVal(3);
4520 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4521 {N->getOperand(1), N->getOperand(2)});
4522 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4523 {SDValue(N1, 0), N->getOperand(2)});
4524 SDNode *N3 = CurDAG->getMachineNode(
4525 AArch64::ADDG, DL, MVT::i64,
4526 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4527 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4528 ReplaceNode(N, N3);
4529}
4530
4531bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4532 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4533
4534 // Bail when not a "cast" like insert_subvector.
4535 if (N->getConstantOperandVal(2) != 0)
4536 return false;
4537 if (!N->getOperand(0).isUndef())
4538 return false;
4539
4540 // Bail when normal isel should do the job.
4541 EVT VT = N->getValueType(0);
4542 EVT InVT = N->getOperand(1).getValueType();
4543 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4544 return false;
4545 if (InVT.getSizeInBits() <= 128)
4546 return false;
4547
4548 // NOTE: We can only get here when doing fixed length SVE code generation.
4549 // We do manual selection because the types involved are not linked to real
4550 // registers (despite being legal) and must be coerced into SVE registers.
4551
4553 "Expected to insert into a packed scalable vector!");
4554
4555 SDLoc DL(N);
4556 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4557 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4558 N->getOperand(1), RC));
4559 return true;
4560}
4561
4562bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4563 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4564
4565 // Bail when not a "cast" like extract_subvector.
4566 if (N->getConstantOperandVal(1) != 0)
4567 return false;
4568
4569 // Bail when normal isel can do the job.
4570 EVT VT = N->getValueType(0);
4571 EVT InVT = N->getOperand(0).getValueType();
4572 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4573 return false;
4574 if (VT.getSizeInBits() <= 128)
4575 return false;
4576
4577 // NOTE: We can only get here when doing fixed length SVE code generation.
4578 // We do manual selection because the types involved are not linked to real
4579 // registers (despite being legal) and must be coerced into SVE registers.
4580
4582 "Expected to extract from a packed scalable vector!");
4583
4584 SDLoc DL(N);
4585 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4586 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4587 N->getOperand(0), RC));
4588 return true;
4589}
4590
4591bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4592 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4593
4594 SDValue N0 = N->getOperand(0);
4595 SDValue N1 = N->getOperand(1);
4596
4597 EVT VT = N->getValueType(0);
4598 SDLoc DL(N);
4599
4600 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4601 // Rotate by a constant is a funnel shift in IR which is exanded to
4602 // an OR with shifted operands.
4603 // We do the following transform:
4604 // OR N0, N1 -> xar (x, y, imm)
4605 // Where:
4606 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4607 // N0 = SHL_PRED true, V, splat(bits-imm)
4608 // V = (xor x, y)
4609 if (VT.isScalableVector() &&
4610 (Subtarget->hasSVE2() ||
4611 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4612 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4613 N1.getOpcode() != AArch64ISD::SRL_PRED)
4614 std::swap(N0, N1);
4615 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4616 N1.getOpcode() != AArch64ISD::SRL_PRED)
4617 return false;
4618
4619 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4620 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4621 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4622 return false;
4623
4624 if (N0.getOperand(1) != N1.getOperand(1))
4625 return false;
4626
4627 SDValue R1, R2;
4628 bool IsXOROperand = true;
4629 if (N0.getOperand(1).getOpcode() != ISD::XOR) {
4630 IsXOROperand = false;
4631 } else {
4632 R1 = N0.getOperand(1).getOperand(0);
4633 R2 = N1.getOperand(1).getOperand(1);
4634 }
4635
4636 APInt ShlAmt, ShrAmt;
4637 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4639 return false;
4640
4641 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4642 return false;
4643
4644 if (!IsXOROperand) {
4645 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4646 SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
4647 SDValue MOVIV = SDValue(MOV, 0);
4648
4649 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4650 SDNode *SubRegToReg = CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL,
4651 VT, Zero, MOVIV, ZSub);
4652
4653 R1 = N1->getOperand(1);
4654 R2 = SDValue(SubRegToReg, 0);
4655 }
4656
4657 SDValue Imm =
4658 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4659
4660 SDValue Ops[] = {R1, R2, Imm};
4662 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4663 AArch64::XAR_ZZZI_D})) {
4664 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4665 return true;
4666 }
4667 return false;
4668 }
4669
4670 // We have Neon SHA3 XAR operation for v2i64 but for types
4671 // v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4672 // is available.
4673 EVT SVT;
4674 switch (VT.getSimpleVT().SimpleTy) {
4675 case MVT::v4i32:
4676 case MVT::v2i32:
4677 SVT = MVT::nxv4i32;
4678 break;
4679 case MVT::v8i16:
4680 case MVT::v4i16:
4681 SVT = MVT::nxv8i16;
4682 break;
4683 case MVT::v16i8:
4684 case MVT::v8i8:
4685 SVT = MVT::nxv16i8;
4686 break;
4687 case MVT::v2i64:
4688 case MVT::v1i64:
4689 SVT = Subtarget->hasSHA3() ? MVT::v2i64 : MVT::nxv2i64;
4690 break;
4691 default:
4692 return false;
4693 }
4694
4695 if ((!SVT.isScalableVector() && !Subtarget->hasSHA3()) ||
4696 (SVT.isScalableVector() && !Subtarget->hasSVE2()))
4697 return false;
4698
4699 if (N0->getOpcode() != AArch64ISD::VSHL ||
4700 N1->getOpcode() != AArch64ISD::VLSHR)
4701 return false;
4702
4703 if (N0->getOperand(0) != N1->getOperand(0))
4704 return false;
4705
4706 SDValue R1, R2;
4707 bool IsXOROperand = true;
4708 if (N1->getOperand(0)->getOpcode() != ISD::XOR) {
4709 IsXOROperand = false;
4710 } else {
4711 SDValue XOR = N0.getOperand(0);
4712 R1 = XOR.getOperand(0);
4713 R2 = XOR.getOperand(1);
4714 }
4715
4716 unsigned HsAmt = N0.getConstantOperandVal(1);
4717 unsigned ShAmt = N1.getConstantOperandVal(1);
4718
4719 SDValue Imm = CurDAG->getTargetConstant(
4720 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4721
4722 unsigned VTSizeInBits = VT.getScalarSizeInBits();
4723 if (ShAmt + HsAmt != VTSizeInBits)
4724 return false;
4725
4726 if (!IsXOROperand) {
4727 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4728 SDNode *MOV =
4729 CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, MVT::v2i64, Zero);
4730 SDValue MOVIV = SDValue(MOV, 0);
4731
4732 R1 = N1->getOperand(0);
4733 R2 = MOVIV;
4734 }
4735
4736 if (SVT != VT) {
4737 SDValue Undef =
4738 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, SVT), 0);
4739
4740 if (SVT.isScalableVector() && VT.is64BitVector()) {
4741 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4742
4743 SDValue UndefQ = SDValue(
4744 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, QVT), 0);
4745 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4746
4747 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4748 UndefQ, R1, DSub),
4749 0);
4750 if (R2.getValueType() == VT)
4751 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4752 UndefQ, R2, DSub),
4753 0);
4754 }
4755
4756 SDValue SubReg = CurDAG->getTargetConstant(
4757 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL, MVT::i32);
4758
4759 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
4760 R1, SubReg),
4761 0);
4762
4763 if (SVT.isScalableVector() || R2.getValueType() != SVT)
4764 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT,
4765 Undef, R2, SubReg),
4766 0);
4767 }
4768
4769 SDValue Ops[] = {R1, R2, Imm};
4770 SDNode *XAR = nullptr;
4771
4772 if (SVT.isScalableVector()) {
4774 SVT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4775 AArch64::XAR_ZZZI_D}))
4776 XAR = CurDAG->getMachineNode(Opc, DL, SVT, Ops);
4777 } else {
4778 XAR = CurDAG->getMachineNode(AArch64::XAR, DL, SVT, Ops);
4779 }
4780
4781 assert(XAR && "Unexpected NULL value for XAR instruction in DAG");
4782
4783 if (SVT != VT) {
4784 if (VT.is64BitVector() && SVT.isScalableVector()) {
4785 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4786
4787 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4788 SDNode *Q = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, QVT,
4789 SDValue(XAR, 0), ZSub);
4790
4791 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4792 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4793 SDValue(Q, 0), DSub);
4794 } else {
4795 SDValue SubReg = CurDAG->getTargetConstant(
4796 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL,
4797 MVT::i32);
4798 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4799 SDValue(XAR, 0), SubReg);
4800 }
4801 }
4802 ReplaceNode(N, XAR);
4803 return true;
4804}
4805
4806void AArch64DAGToDAGISel::Select(SDNode *Node) {
4807 // If we have a custom node, we already have selected!
4808 if (Node->isMachineOpcode()) {
4809 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4810 Node->setNodeId(-1);
4811 return;
4812 }
4813
4814 // Few custom selection stuff.
4815 EVT VT = Node->getValueType(0);
4816
4817 switch (Node->getOpcode()) {
4818 default:
4819 break;
4820
4821 case ISD::ATOMIC_CMP_SWAP:
4822 if (SelectCMP_SWAP(Node))
4823 return;
4824 break;
4825
4826 case ISD::READ_REGISTER:
4827 case AArch64ISD::MRRS:
4828 if (tryReadRegister(Node))
4829 return;
4830 break;
4831
4833 case AArch64ISD::MSRR:
4834 if (tryWriteRegister(Node))
4835 return;
4836 break;
4837
4838 case ISD::LOAD: {
4839 // Try to select as an indexed load. Fall through to normal processing
4840 // if we can't.
4841 if (tryIndexedLoad(Node))
4842 return;
4843 break;
4844 }
4845
4846 case ISD::SRL:
4847 case ISD::AND:
4848 case ISD::SRA:
4850 if (tryBitfieldExtractOp(Node))
4851 return;
4852 if (tryBitfieldInsertInZeroOp(Node))
4853 return;
4854 [[fallthrough]];
4855 case ISD::ROTR:
4856 case ISD::SHL:
4857 if (tryShiftAmountMod(Node))
4858 return;
4859 break;
4860
4861 case ISD::SIGN_EXTEND:
4862 if (tryBitfieldExtractOpFromSExt(Node))
4863 return;
4864 break;
4865
4866 case ISD::OR:
4867 if (tryBitfieldInsertOp(Node))
4868 return;
4869 if (trySelectXAR(Node))
4870 return;
4871 break;
4872
4874 if (trySelectCastScalableToFixedLengthVector(Node))
4875 return;
4876 break;
4877 }
4878
4879 case ISD::INSERT_SUBVECTOR: {
4880 if (trySelectCastFixedLengthToScalableVector(Node))
4881 return;
4882 break;
4883 }
4884
4885 case ISD::Constant: {
4886 // Materialize zero constants as copies from WZR/XZR. This allows
4887 // the coalescer to propagate these into other instructions.
4888 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4889 if (ConstNode->isZero()) {
4890 if (VT == MVT::i32) {
4891 SDValue New = CurDAG->getCopyFromReg(
4892 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4893 ReplaceNode(Node, New.getNode());
4894 return;
4895 } else if (VT == MVT::i64) {
4896 SDValue New = CurDAG->getCopyFromReg(
4897 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4898 ReplaceNode(Node, New.getNode());
4899 return;
4900 }
4901 }
4902 break;
4903 }
4904
4905 case ISD::FrameIndex: {
4906 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4907 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4908 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4909 const TargetLowering *TLI = getTargetLowering();
4910 SDValue TFI = CurDAG->getTargetFrameIndex(
4911 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4912 SDLoc DL(Node);
4913 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4914 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4915 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4916 return;
4917 }
4919 unsigned IntNo = Node->getConstantOperandVal(1);
4920 switch (IntNo) {
4921 default:
4922 break;
4923 case Intrinsic::aarch64_gcsss: {
4924 SDLoc DL(Node);
4925 SDValue Chain = Node->getOperand(0);
4926 SDValue Val = Node->getOperand(2);
4927 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
4928 SDNode *SS1 =
4929 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
4930 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
4931 MVT::Other, Zero, SDValue(SS1, 0));
4932 ReplaceNode(Node, SS2);
4933 return;
4934 }
4935 case Intrinsic::aarch64_ldaxp:
4936 case Intrinsic::aarch64_ldxp: {
4937 unsigned Op =
4938 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4939 SDValue MemAddr = Node->getOperand(2);
4940 SDLoc DL(Node);
4941 SDValue Chain = Node->getOperand(0);
4942
4943 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4944 MVT::Other, MemAddr, Chain);
4945
4946 // Transfer memoperands.
4947 MachineMemOperand *MemOp =
4948 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4949 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4950 ReplaceNode(Node, Ld);
4951 return;
4952 }
4953 case Intrinsic::aarch64_stlxp:
4954 case Intrinsic::aarch64_stxp: {
4955 unsigned Op =
4956 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4957 SDLoc DL(Node);
4958 SDValue Chain = Node->getOperand(0);
4959 SDValue ValLo = Node->getOperand(2);
4960 SDValue ValHi = Node->getOperand(3);
4961 SDValue MemAddr = Node->getOperand(4);
4962
4963 // Place arguments in the right order.
4964 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4965
4966 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4967 // Transfer memoperands.
4968 MachineMemOperand *MemOp =
4969 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4970 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4971
4972 ReplaceNode(Node, St);
4973 return;
4974 }
4975 case Intrinsic::aarch64_neon_ld1x2:
4976 if (VT == MVT::v8i8) {
4977 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4978 return;
4979 } else if (VT == MVT::v16i8) {
4980 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4981 return;
4982 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4983 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4984 return;
4985 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4986 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4987 return;
4988 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4989 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4990 return;
4991 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4992 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4993 return;
4994 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4995 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4996 return;
4997 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4998 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4999 return;
5000 }
5001 break;
5002 case Intrinsic::aarch64_neon_ld1x3:
5003 if (VT == MVT::v8i8) {
5004 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
5005 return;
5006 } else if (VT == MVT::v16i8) {
5007 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
5008 return;
5009 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5010 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
5011 return;
5012 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5013 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
5014 return;
5015 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5016 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
5017 return;
5018 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5019 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
5020 return;
5021 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5022 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5023 return;
5024 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5025 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
5026 return;
5027 }
5028 break;
5029 case Intrinsic::aarch64_neon_ld1x4:
5030 if (VT == MVT::v8i8) {
5031 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
5032 return;
5033 } else if (VT == MVT::v16i8) {
5034 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
5035 return;
5036 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5037 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
5038 return;
5039 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5040 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
5041 return;
5042 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5043 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
5044 return;
5045 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5046 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
5047 return;
5048 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5049 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5050 return;
5051 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5052 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
5053 return;
5054 }
5055 break;
5056 case Intrinsic::aarch64_neon_ld2:
5057 if (VT == MVT::v8i8) {
5058 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
5059 return;
5060 } else if (VT == MVT::v16i8) {
5061 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
5062 return;
5063 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5064 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
5065 return;
5066 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5067 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
5068 return;
5069 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5070 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
5071 return;
5072 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5073 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
5074 return;
5075 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5076 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5077 return;
5078 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5079 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
5080 return;
5081 }
5082 break;
5083 case Intrinsic::aarch64_neon_ld3:
5084 if (VT == MVT::v8i8) {
5085 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
5086 return;
5087 } else if (VT == MVT::v16i8) {
5088 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
5089 return;
5090 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5091 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
5092 return;
5093 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5094 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
5095 return;
5096 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5097 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
5098 return;
5099 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5100 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
5101 return;
5102 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5103 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5104 return;
5105 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5106 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
5107 return;
5108 }
5109 break;
5110 case Intrinsic::aarch64_neon_ld4:
5111 if (VT == MVT::v8i8) {
5112 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
5113 return;
5114 } else if (VT == MVT::v16i8) {
5115 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
5116 return;
5117 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5118 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
5119 return;
5120 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5121 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
5122 return;
5123 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5124 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
5125 return;
5126 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5127 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
5128 return;
5129 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5130 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5131 return;
5132 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5133 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
5134 return;
5135 }
5136 break;
5137 case Intrinsic::aarch64_neon_ld2r:
5138 if (VT == MVT::v8i8) {
5139 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
5140 return;
5141 } else if (VT == MVT::v16i8) {
5142 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
5143 return;
5144 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5145 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
5146 return;
5147 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5148 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
5149 return;
5150 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5151 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
5152 return;
5153 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5154 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
5155 return;
5156 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5157 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
5158 return;
5159 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5160 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
5161 return;
5162 }
5163 break;
5164 case Intrinsic::aarch64_neon_ld3r:
5165 if (VT == MVT::v8i8) {
5166 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
5167 return;
5168 } else if (VT == MVT::v16i8) {
5169 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
5170 return;
5171 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5172 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
5173 return;
5174 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5175 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
5176 return;
5177 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5178 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
5179 return;
5180 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5181 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
5182 return;
5183 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5184 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
5185 return;
5186 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5187 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
5188 return;
5189 }
5190 break;
5191 case Intrinsic::aarch64_neon_ld4r:
5192 if (VT == MVT::v8i8) {
5193 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
5194 return;
5195 } else if (VT == MVT::v16i8) {
5196 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
5197 return;
5198 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5199 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
5200 return;
5201 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5202 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
5203 return;
5204 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5205 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
5206 return;
5207 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5208 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
5209 return;
5210 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5211 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
5212 return;
5213 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5214 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
5215 return;
5216 }
5217 break;
5218 case Intrinsic::aarch64_neon_ld2lane:
5219 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5220 SelectLoadLane(Node, 2, AArch64::LD2i8);
5221 return;
5222 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5223 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5224 SelectLoadLane(Node, 2, AArch64::LD2i16);
5225 return;
5226 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5227 VT == MVT::v2f32) {
5228 SelectLoadLane(Node, 2, AArch64::LD2i32);
5229 return;
5230 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5231 VT == MVT::v1f64) {
5232 SelectLoadLane(Node, 2, AArch64::LD2i64);
5233 return;
5234 }
5235 break;
5236 case Intrinsic::aarch64_neon_ld3lane:
5237 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5238 SelectLoadLane(Node, 3, AArch64::LD3i8);
5239 return;
5240 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5241 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5242 SelectLoadLane(Node, 3, AArch64::LD3i16);
5243 return;
5244 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5245 VT == MVT::v2f32) {
5246 SelectLoadLane(Node, 3, AArch64::LD3i32);
5247 return;
5248 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5249 VT == MVT::v1f64) {
5250 SelectLoadLane(Node, 3, AArch64::LD3i64);
5251 return;
5252 }
5253 break;
5254 case Intrinsic::aarch64_neon_ld4lane:
5255 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5256 SelectLoadLane(Node, 4, AArch64::LD4i8);
5257 return;
5258 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5259 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5260 SelectLoadLane(Node, 4, AArch64::LD4i16);
5261 return;
5262 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5263 VT == MVT::v2f32) {
5264 SelectLoadLane(Node, 4, AArch64::LD4i32);
5265 return;
5266 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5267 VT == MVT::v1f64) {
5268 SelectLoadLane(Node, 4, AArch64::LD4i64);
5269 return;
5270 }
5271 break;
5272 case Intrinsic::aarch64_ld64b:
5273 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5274 return;
5275 case Intrinsic::aarch64_sve_ld2q_sret: {
5276 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5277 return;
5278 }
5279 case Intrinsic::aarch64_sve_ld3q_sret: {
5280 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5281 return;
5282 }
5283 case Intrinsic::aarch64_sve_ld4q_sret: {
5284 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5285 return;
5286 }
5287 case Intrinsic::aarch64_sve_ld2_sret: {
5288 if (VT == MVT::nxv16i8) {
5289 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5290 true);
5291 return;
5292 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5293 VT == MVT::nxv8bf16) {
5294 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5295 true);
5296 return;
5297 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5298 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5299 true);
5300 return;
5301 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5302 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5303 true);
5304 return;
5305 }
5306 break;
5307 }
5308 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5309 if (VT == MVT::nxv16i8) {
5310 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5311 SelectContiguousMultiVectorLoad(
5312 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5313 else if (Subtarget->hasSVE2p1())
5314 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5315 AArch64::LD1B_2Z);
5316 else
5317 break;
5318 return;
5319 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5320 VT == MVT::nxv8bf16) {
5321 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5322 SelectContiguousMultiVectorLoad(
5323 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5324 else if (Subtarget->hasSVE2p1())
5325 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5326 AArch64::LD1H_2Z);
5327 else
5328 break;
5329 return;
5330 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5331 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5332 SelectContiguousMultiVectorLoad(
5333 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5334 else if (Subtarget->hasSVE2p1())
5335 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5336 AArch64::LD1W_2Z);
5337 else
5338 break;
5339 return;
5340 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5341 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5342 SelectContiguousMultiVectorLoad(
5343 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5344 else if (Subtarget->hasSVE2p1())
5345 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5346 AArch64::LD1D_2Z);
5347 else
5348 break;
5349 return;
5350 }
5351 break;
5352 }
5353 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5354 if (VT == MVT::nxv16i8) {
5355 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5356 SelectContiguousMultiVectorLoad(
5357 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5358 else if (Subtarget->hasSVE2p1())
5359 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5360 AArch64::LD1B_4Z);
5361 else
5362 break;
5363 return;
5364 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5365 VT == MVT::nxv8bf16) {
5366 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5367 SelectContiguousMultiVectorLoad(
5368 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5369 else if (Subtarget->hasSVE2p1())
5370 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5371 AArch64::LD1H_4Z);
5372 else
5373 break;
5374 return;
5375 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5376 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5377 SelectContiguousMultiVectorLoad(
5378 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5379 else if (Subtarget->hasSVE2p1())
5380 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5381 AArch64::LD1W_4Z);
5382 else
5383 break;
5384 return;
5385 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5386 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5387 SelectContiguousMultiVectorLoad(
5388 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5389 else if (Subtarget->hasSVE2p1())
5390 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5391 AArch64::LD1D_4Z);
5392 else
5393 break;
5394 return;
5395 }
5396 break;
5397 }
5398 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5399 if (VT == MVT::nxv16i8) {
5400 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5401 SelectContiguousMultiVectorLoad(Node, 2, 0,
5402 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5403 AArch64::LDNT1B_2Z_PSEUDO);
5404 else if (Subtarget->hasSVE2p1())
5405 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5406 AArch64::LDNT1B_2Z);
5407 else
5408 break;
5409 return;
5410 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5411 VT == MVT::nxv8bf16) {
5412 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5413 SelectContiguousMultiVectorLoad(Node, 2, 1,
5414 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5415 AArch64::LDNT1H_2Z_PSEUDO);
5416 else if (Subtarget->hasSVE2p1())
5417 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5418 AArch64::LDNT1H_2Z);
5419 else
5420 break;
5421 return;
5422 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5423 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5424 SelectContiguousMultiVectorLoad(Node, 2, 2,
5425 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5426 AArch64::LDNT1W_2Z_PSEUDO);
5427 else if (Subtarget->hasSVE2p1())
5428 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5429 AArch64::LDNT1W_2Z);
5430 else
5431 break;
5432 return;
5433 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5434 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5435 SelectContiguousMultiVectorLoad(Node, 2, 3,
5436 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5437 AArch64::LDNT1D_2Z_PSEUDO);
5438 else if (Subtarget->hasSVE2p1())
5439 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5440 AArch64::LDNT1D_2Z);
5441 else
5442 break;
5443 return;
5444 }
5445 break;
5446 }
5447 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5448 if (VT == MVT::nxv16i8) {
5449 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5450 SelectContiguousMultiVectorLoad(Node, 4, 0,
5451 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5452 AArch64::LDNT1B_4Z_PSEUDO);
5453 else if (Subtarget->hasSVE2p1())
5454 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5455 AArch64::LDNT1B_4Z);
5456 else
5457 break;
5458 return;
5459 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5460 VT == MVT::nxv8bf16) {
5461 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5462 SelectContiguousMultiVectorLoad(Node, 4, 1,
5463 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5464 AArch64::LDNT1H_4Z_PSEUDO);
5465 else if (Subtarget->hasSVE2p1())
5466 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5467 AArch64::LDNT1H_4Z);
5468 else
5469 break;
5470 return;
5471 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5472 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5473 SelectContiguousMultiVectorLoad(Node, 4, 2,
5474 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5475 AArch64::LDNT1W_4Z_PSEUDO);
5476 else if (Subtarget->hasSVE2p1())
5477 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5478 AArch64::LDNT1W_4Z);
5479 else
5480 break;
5481 return;
5482 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5483 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5484 SelectContiguousMultiVectorLoad(Node, 4, 3,
5485 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5486 AArch64::LDNT1D_4Z_PSEUDO);
5487 else if (Subtarget->hasSVE2p1())
5488 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5489 AArch64::LDNT1D_4Z);
5490 else
5491 break;
5492 return;
5493 }
5494 break;
5495 }
5496 case Intrinsic::aarch64_sve_ld3_sret: {
5497 if (VT == MVT::nxv16i8) {
5498 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5499 true);
5500 return;
5501 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5502 VT == MVT::nxv8bf16) {
5503 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5504 true);
5505 return;
5506 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5507 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5508 true);
5509 return;
5510 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5511 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5512 true);
5513 return;
5514 }
5515 break;
5516 }
5517 case Intrinsic::aarch64_sve_ld4_sret: {
5518 if (VT == MVT::nxv16i8) {
5519 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5520 true);
5521 return;
5522 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5523 VT == MVT::nxv8bf16) {
5524 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5525 true);
5526 return;
5527 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5528 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5529 true);
5530 return;
5531 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5532 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5533 true);
5534 return;
5535 }
5536 break;
5537 }
5538 case Intrinsic::aarch64_sme_read_hor_vg2: {
5539 if (VT == MVT::nxv16i8) {
5540 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5541 AArch64::MOVA_2ZMXI_H_B);
5542 return;
5543 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5544 VT == MVT::nxv8bf16) {
5545 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5546 AArch64::MOVA_2ZMXI_H_H);
5547 return;
5548 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5549 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5550 AArch64::MOVA_2ZMXI_H_S);
5551 return;
5552 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5553 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5554 AArch64::MOVA_2ZMXI_H_D);
5555 return;
5556 }
5557 break;
5558 }
5559 case Intrinsic::aarch64_sme_read_ver_vg2: {
5560 if (VT == MVT::nxv16i8) {
5561 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5562 AArch64::MOVA_2ZMXI_V_B);
5563 return;
5564 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5565 VT == MVT::nxv8bf16) {
5566 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5567 AArch64::MOVA_2ZMXI_V_H);
5568 return;
5569 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5570 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5571 AArch64::MOVA_2ZMXI_V_S);
5572 return;
5573 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5574 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5575 AArch64::MOVA_2ZMXI_V_D);
5576 return;
5577 }
5578 break;
5579 }
5580 case Intrinsic::aarch64_sme_read_hor_vg4: {
5581 if (VT == MVT::nxv16i8) {
5582 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5583 AArch64::MOVA_4ZMXI_H_B);
5584 return;
5585 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5586 VT == MVT::nxv8bf16) {
5587 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5588 AArch64::MOVA_4ZMXI_H_H);
5589 return;
5590 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5591 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5592 AArch64::MOVA_4ZMXI_H_S);
5593 return;
5594 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5595 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5596 AArch64::MOVA_4ZMXI_H_D);
5597 return;
5598 }
5599 break;
5600 }
5601 case Intrinsic::aarch64_sme_read_ver_vg4: {
5602 if (VT == MVT::nxv16i8) {
5603 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5604 AArch64::MOVA_4ZMXI_V_B);
5605 return;
5606 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5607 VT == MVT::nxv8bf16) {
5608 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5609 AArch64::MOVA_4ZMXI_V_H);
5610 return;
5611 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5612 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5613 AArch64::MOVA_4ZMXI_V_S);
5614 return;
5615 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5616 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5617 AArch64::MOVA_4ZMXI_V_D);
5618 return;
5619 }
5620 break;
5621 }
5622 case Intrinsic::aarch64_sme_read_vg1x2: {
5623 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5624 AArch64::MOVA_VG2_2ZMXI);
5625 return;
5626 }
5627 case Intrinsic::aarch64_sme_read_vg1x4: {
5628 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5629 AArch64::MOVA_VG4_4ZMXI);
5630 return;
5631 }
5632 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5633 if (VT == MVT::nxv16i8) {
5634 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5635 return;
5636 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5637 VT == MVT::nxv8bf16) {
5638 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5639 return;
5640 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5641 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5642 return;
5643 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5644 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5645 return;
5646 }
5647 break;
5648 }
5649 case Intrinsic::aarch64_sme_readz_vert_x2: {
5650 if (VT == MVT::nxv16i8) {
5651 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5652 return;
5653 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5654 VT == MVT::nxv8bf16) {
5655 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5656 return;
5657 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5658 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5659 return;
5660 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5661 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5662 return;
5663 }
5664 break;
5665 }
5666 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5667 if (VT == MVT::nxv16i8) {
5668 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5669 return;
5670 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5671 VT == MVT::nxv8bf16) {
5672 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5673 return;
5674 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5675 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5676 return;
5677 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5678 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5679 return;
5680 }
5681 break;
5682 }
5683 case Intrinsic::aarch64_sme_readz_vert_x4: {
5684 if (VT == MVT::nxv16i8) {
5685 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5686 return;
5687 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5688 VT == MVT::nxv8bf16) {
5689 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5690 return;
5691 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5692 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5693 return;
5694 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5695 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5696 return;
5697 }
5698 break;
5699 }
5700 case Intrinsic::aarch64_sme_readz_x2: {
5701 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5702 AArch64::ZA);
5703 return;
5704 }
5705 case Intrinsic::aarch64_sme_readz_x4: {
5706 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5707 AArch64::ZA);
5708 return;
5709 }
5710 case Intrinsic::swift_async_context_addr: {
5711 SDLoc DL(Node);
5712 SDValue Chain = Node->getOperand(0);
5713 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5714 SDValue Res = SDValue(
5715 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5716 CurDAG->getTargetConstant(8, DL, MVT::i32),
5717 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5718 0);
5719 ReplaceUses(SDValue(Node, 0), Res);
5720 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5721 CurDAG->RemoveDeadNode(Node);
5722
5723 auto &MF = CurDAG->getMachineFunction();
5724 MF.getFrameInfo().setFrameAddressIsTaken(true);
5725 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5726 return;
5727 }
5728 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5730 Node->getValueType(0),
5731 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5732 AArch64::LUTI2_4ZTZI_S}))
5733 // Second Immediate must be <= 3:
5734 SelectMultiVectorLutiLane(Node, 4, Opc, 3);
5735 return;
5736 }
5737 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5739 Node->getValueType(0),
5740 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5741 // Second Immediate must be <= 1:
5742 SelectMultiVectorLutiLane(Node, 4, Opc, 1);
5743 return;
5744 }
5745 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5747 Node->getValueType(0),
5748 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5749 AArch64::LUTI2_2ZTZI_S}))
5750 // Second Immediate must be <= 7:
5751 SelectMultiVectorLutiLane(Node, 2, Opc, 7);
5752 return;
5753 }
5754 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5756 Node->getValueType(0),
5757 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5758 AArch64::LUTI4_2ZTZI_S}))
5759 // Second Immediate must be <= 3:
5760 SelectMultiVectorLutiLane(Node, 2, Opc, 3);
5761 return;
5762 }
5763 case Intrinsic::aarch64_sme_luti4_zt_x4: {
5764 SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
5765 return;
5766 }
5767 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5769 Node->getValueType(0),
5770 {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5771 SelectCVTIntrinsicFP8(Node, 2, Opc);
5772 return;
5773 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5775 Node->getValueType(0),
5776 {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
5777 SelectCVTIntrinsicFP8(Node, 2, Opc);
5778 return;
5779 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
5781 Node->getValueType(0),
5782 {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
5783 SelectCVTIntrinsicFP8(Node, 2, Opc);
5784 return;
5785 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
5787 Node->getValueType(0),
5788 {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
5789 SelectCVTIntrinsicFP8(Node, 2, Opc);
5790 return;
5791 }
5792 } break;
5794 unsigned IntNo = Node->getConstantOperandVal(0);
5795 switch (IntNo) {
5796 default:
5797 break;
5798 case Intrinsic::aarch64_tagp:
5799 SelectTagP(Node);
5800 return;
5801
5802 case Intrinsic::ptrauth_auth:
5803 SelectPtrauthAuth(Node);
5804 return;
5805
5806 case Intrinsic::ptrauth_resign:
5807 SelectPtrauthResign(Node);
5808 return;
5809
5810 case Intrinsic::aarch64_neon_tbl2:
5811 SelectTable(Node, 2,
5812 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5813 false);
5814 return;
5815 case Intrinsic::aarch64_neon_tbl3:
5816 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5817 : AArch64::TBLv16i8Three,
5818 false);
5819 return;
5820 case Intrinsic::aarch64_neon_tbl4:
5821 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5822 : AArch64::TBLv16i8Four,
5823 false);
5824 return;
5825 case Intrinsic::aarch64_neon_tbx2:
5826 SelectTable(Node, 2,
5827 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5828 true);
5829 return;
5830 case Intrinsic::aarch64_neon_tbx3:
5831 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5832 : AArch64::TBXv16i8Three,
5833 true);
5834 return;
5835 case Intrinsic::aarch64_neon_tbx4:
5836 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5837 : AArch64::TBXv16i8Four,
5838 true);
5839 return;
5840 case Intrinsic::aarch64_sve_srshl_single_x2:
5842 Node->getValueType(0),
5843 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5844 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5845 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5846 return;
5847 case Intrinsic::aarch64_sve_srshl_single_x4:
5849 Node->getValueType(0),
5850 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5851 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5852 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5853 return;
5854 case Intrinsic::aarch64_sve_urshl_single_x2:
5856 Node->getValueType(0),
5857 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5858 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5859 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5860 return;
5861 case Intrinsic::aarch64_sve_urshl_single_x4:
5863 Node->getValueType(0),
5864 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5865 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5866 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5867 return;
5868 case Intrinsic::aarch64_sve_srshl_x2:
5870 Node->getValueType(0),
5871 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5872 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5873 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5874 return;
5875 case Intrinsic::aarch64_sve_srshl_x4:
5877 Node->getValueType(0),
5878 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5879 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5880 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5881 return;
5882 case Intrinsic::aarch64_sve_urshl_x2:
5884 Node->getValueType(0),
5885 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5886 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5887 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5888 return;
5889 case Intrinsic::aarch64_sve_urshl_x4:
5891 Node->getValueType(0),
5892 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5893 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5894 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5895 return;
5896 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5898 Node->getValueType(0),
5899 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5900 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5901 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5902 return;
5903 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5905 Node->getValueType(0),
5906 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5907 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5908 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5909 return;
5910 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5912 Node->getValueType(0),
5913 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5914 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5915 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5916 return;
5917 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5919 Node->getValueType(0),
5920 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5921 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5922 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5923 return;
5924 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
5926 Node->getValueType(0),
5927 {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
5928 AArch64::FSCALE_2ZZ_D}))
5929 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5930 return;
5931 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
5933 Node->getValueType(0),
5934 {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
5935 AArch64::FSCALE_4ZZ_D}))
5936 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5937 return;
5938 case Intrinsic::aarch64_sme_fp8_scale_x2:
5940 Node->getValueType(0),
5941 {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
5942 AArch64::FSCALE_2Z2Z_D}))
5943 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5944 return;
5945 case Intrinsic::aarch64_sme_fp8_scale_x4:
5947 Node->getValueType(0),
5948 {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
5949 AArch64::FSCALE_4Z4Z_D}))
5950 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5951 return;
5952 case Intrinsic::aarch64_sve_whilege_x2:
5954 Node->getValueType(0),
5955 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5956 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5957 SelectWhilePair(Node, Op);
5958 return;
5959 case Intrinsic::aarch64_sve_whilegt_x2:
5961 Node->getValueType(0),
5962 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5963 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5964 SelectWhilePair(Node, Op);
5965 return;
5966 case Intrinsic::aarch64_sve_whilehi_x2:
5968 Node->getValueType(0),
5969 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5970 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5971 SelectWhilePair(Node, Op);
5972 return;
5973 case Intrinsic::aarch64_sve_whilehs_x2:
5975 Node->getValueType(0),
5976 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5977 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5978 SelectWhilePair(Node, Op);
5979 return;
5980 case Intrinsic::aarch64_sve_whilele_x2:
5982 Node->getValueType(0),
5983 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5984 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5985 SelectWhilePair(Node, Op);
5986 return;
5987 case Intrinsic::aarch64_sve_whilelo_x2:
5989 Node->getValueType(0),
5990 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
5991 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
5992 SelectWhilePair(Node, Op);
5993 return;
5994 case Intrinsic::aarch64_sve_whilels_x2:
5996 Node->getValueType(0),
5997 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
5998 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
5999 SelectWhilePair(Node, Op);
6000 return;
6001 case Intrinsic::aarch64_sve_whilelt_x2:
6003 Node->getValueType(0),
6004 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
6005 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
6006 SelectWhilePair(Node, Op);
6007 return;
6008 case Intrinsic::aarch64_sve_smax_single_x2:
6010 Node->getValueType(0),
6011 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
6012 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
6013 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6014 return;
6015 case Intrinsic::aarch64_sve_umax_single_x2:
6017 Node->getValueType(0),
6018 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
6019 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
6020 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6021 return;
6022 case Intrinsic::aarch64_sve_fmax_single_x2:
6024 Node->getValueType(0),
6025 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
6026 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
6027 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6028 return;
6029 case Intrinsic::aarch64_sve_smax_single_x4:
6031 Node->getValueType(0),
6032 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
6033 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
6034 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6035 return;
6036 case Intrinsic::aarch64_sve_umax_single_x4:
6038 Node->getValueType(0),
6039 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
6040 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
6041 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6042 return;
6043 case Intrinsic::aarch64_sve_fmax_single_x4:
6045 Node->getValueType(0),
6046 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
6047 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
6048 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6049 return;
6050 case Intrinsic::aarch64_sve_smin_single_x2:
6052 Node->getValueType(0),
6053 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
6054 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
6055 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6056 return;
6057 case Intrinsic::aarch64_sve_umin_single_x2:
6059 Node->getValueType(0),
6060 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
6061 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
6062 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6063 return;
6064 case Intrinsic::aarch64_sve_fmin_single_x2:
6066 Node->getValueType(0),
6067 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
6068 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
6069 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6070 return;
6071 case Intrinsic::aarch64_sve_smin_single_x4:
6073 Node->getValueType(0),
6074 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
6075 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
6076 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6077 return;
6078 case Intrinsic::aarch64_sve_umin_single_x4:
6080 Node->getValueType(0),
6081 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
6082 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
6083 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6084 return;
6085 case Intrinsic::aarch64_sve_fmin_single_x4:
6087 Node->getValueType(0),
6088 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
6089 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
6090 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6091 return;
6092 case Intrinsic::aarch64_sve_smax_x2:
6094 Node->getValueType(0),
6095 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
6096 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
6097 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6098 return;
6099 case Intrinsic::aarch64_sve_umax_x2:
6101 Node->getValueType(0),
6102 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
6103 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
6104 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6105 return;
6106 case Intrinsic::aarch64_sve_fmax_x2:
6108 Node->getValueType(0),
6109 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
6110 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
6111 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6112 return;
6113 case Intrinsic::aarch64_sve_smax_x4:
6115 Node->getValueType(0),
6116 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
6117 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
6118 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6119 return;
6120 case Intrinsic::aarch64_sve_umax_x4:
6122 Node->getValueType(0),
6123 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
6124 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
6125 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6126 return;
6127 case Intrinsic::aarch64_sve_fmax_x4:
6129 Node->getValueType(0),
6130 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
6131 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
6132 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6133 return;
6134 case Intrinsic::aarch64_sme_famax_x2:
6136 Node->getValueType(0),
6137 {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
6138 AArch64::FAMAX_2Z2Z_D}))
6139 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6140 return;
6141 case Intrinsic::aarch64_sme_famax_x4:
6143 Node->getValueType(0),
6144 {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
6145 AArch64::FAMAX_4Z4Z_D}))
6146 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6147 return;
6148 case Intrinsic::aarch64_sme_famin_x2:
6150 Node->getValueType(0),
6151 {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
6152 AArch64::FAMIN_2Z2Z_D}))
6153 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6154 return;
6155 case Intrinsic::aarch64_sme_famin_x4:
6157 Node->getValueType(0),
6158 {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
6159 AArch64::FAMIN_4Z4Z_D}))
6160 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6161 return;
6162 case Intrinsic::aarch64_sve_smin_x2:
6164 Node->getValueType(0),
6165 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
6166 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
6167 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6168 return;
6169 case Intrinsic::aarch64_sve_umin_x2:
6171 Node->getValueType(0),
6172 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
6173 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
6174 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6175 return;
6176 case Intrinsic::aarch64_sve_fmin_x2:
6178 Node->getValueType(0),
6179 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
6180 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
6181 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6182 return;
6183 case Intrinsic::aarch64_sve_smin_x4:
6185 Node->getValueType(0),
6186 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
6187 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
6188 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6189 return;
6190 case Intrinsic::aarch64_sve_umin_x4:
6192 Node->getValueType(0),
6193 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6194 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6195 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6196 return;
6197 case Intrinsic::aarch64_sve_fmin_x4:
6199 Node->getValueType(0),
6200 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6201 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6202 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6203 return;
6204 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6206 Node->getValueType(0),
6207 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6208 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6209 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6210 return;
6211 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6213 Node->getValueType(0),
6214 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6215 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6216 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6217 return;
6218 case Intrinsic::aarch64_sve_fminnm_single_x2:
6220 Node->getValueType(0),
6221 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6222 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6223 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6224 return;
6225 case Intrinsic::aarch64_sve_fminnm_single_x4:
6227 Node->getValueType(0),
6228 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6229 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6230 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6231 return;
6232 case Intrinsic::aarch64_sve_fmaxnm_x2:
6234 Node->getValueType(0),
6235 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6236 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6237 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6238 return;
6239 case Intrinsic::aarch64_sve_fmaxnm_x4:
6241 Node->getValueType(0),
6242 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6243 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6244 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6245 return;
6246 case Intrinsic::aarch64_sve_fminnm_x2:
6248 Node->getValueType(0),
6249 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6250 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6251 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6252 return;
6253 case Intrinsic::aarch64_sve_fminnm_x4:
6255 Node->getValueType(0),
6256 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6257 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6258 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6259 return;
6260 case Intrinsic::aarch64_sve_fcvtzs_x2:
6261 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
6262 return;
6263 case Intrinsic::aarch64_sve_scvtf_x2:
6264 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
6265 return;
6266 case Intrinsic::aarch64_sve_fcvtzu_x2:
6267 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
6268 return;
6269 case Intrinsic::aarch64_sve_ucvtf_x2:
6270 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
6271 return;
6272 case Intrinsic::aarch64_sve_fcvtzs_x4:
6273 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
6274 return;
6275 case Intrinsic::aarch64_sve_scvtf_x4:
6276 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
6277 return;
6278 case Intrinsic::aarch64_sve_fcvtzu_x4:
6279 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
6280 return;
6281 case Intrinsic::aarch64_sve_ucvtf_x4:
6282 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
6283 return;
6284 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6285 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
6286 return;
6287 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6288 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
6289 return;
6290 case Intrinsic::aarch64_sve_sclamp_single_x2:
6292 Node->getValueType(0),
6293 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6294 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6295 SelectClamp(Node, 2, Op);
6296 return;
6297 case Intrinsic::aarch64_sve_uclamp_single_x2:
6299 Node->getValueType(0),
6300 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6301 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6302 SelectClamp(Node, 2, Op);
6303 return;
6304 case Intrinsic::aarch64_sve_fclamp_single_x2:
6306 Node->getValueType(0),
6307 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6308 AArch64::FCLAMP_VG2_2Z2Z_D}))
6309 SelectClamp(Node, 2, Op);
6310 return;
6311 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6312 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
6313 return;
6314 case Intrinsic::aarch64_sve_sclamp_single_x4:
6316 Node->getValueType(0),
6317 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6318 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6319 SelectClamp(Node, 4, Op);
6320 return;
6321 case Intrinsic::aarch64_sve_uclamp_single_x4:
6323 Node->getValueType(0),
6324 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6325 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6326 SelectClamp(Node, 4, Op);
6327 return;
6328 case Intrinsic::aarch64_sve_fclamp_single_x4:
6330 Node->getValueType(0),
6331 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6332 AArch64::FCLAMP_VG4_4Z4Z_D}))
6333 SelectClamp(Node, 4, Op);
6334 return;
6335 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6336 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6337 return;
6338 case Intrinsic::aarch64_sve_add_single_x2:
6340 Node->getValueType(0),
6341 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6342 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6343 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6344 return;
6345 case Intrinsic::aarch64_sve_add_single_x4:
6347 Node->getValueType(0),
6348 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6349 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6350 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6351 return;
6352 case Intrinsic::aarch64_sve_zip_x2:
6354 Node->getValueType(0),
6355 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6356 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6357 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6358 return;
6359 case Intrinsic::aarch64_sve_zipq_x2:
6360 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6361 AArch64::ZIP_VG2_2ZZZ_Q);
6362 return;
6363 case Intrinsic::aarch64_sve_zip_x4:
6365 Node->getValueType(0),
6366 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6367 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6368 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6369 return;
6370 case Intrinsic::aarch64_sve_zipq_x4:
6371 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6372 AArch64::ZIP_VG4_4Z4Z_Q);
6373 return;
6374 case Intrinsic::aarch64_sve_uzp_x2:
6376 Node->getValueType(0),
6377 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6378 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6379 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6380 return;
6381 case Intrinsic::aarch64_sve_uzpq_x2:
6382 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6383 AArch64::UZP_VG2_2ZZZ_Q);
6384 return;
6385 case Intrinsic::aarch64_sve_uzp_x4:
6387 Node->getValueType(0),
6388 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6389 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6390 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6391 return;
6392 case Intrinsic::aarch64_sve_uzpq_x4:
6393 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6394 AArch64::UZP_VG4_4Z4Z_Q);
6395 return;
6396 case Intrinsic::aarch64_sve_sel_x2:
6398 Node->getValueType(0),
6399 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6400 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6401 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6402 return;
6403 case Intrinsic::aarch64_sve_sel_x4:
6405 Node->getValueType(0),
6406 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6407 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6408 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6409 return;
6410 case Intrinsic::aarch64_sve_frinta_x2:
6411 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6412 return;
6413 case Intrinsic::aarch64_sve_frinta_x4:
6414 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6415 return;
6416 case Intrinsic::aarch64_sve_frintm_x2:
6417 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6418 return;
6419 case Intrinsic::aarch64_sve_frintm_x4:
6420 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6421 return;
6422 case Intrinsic::aarch64_sve_frintn_x2:
6423 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6424 return;
6425 case Intrinsic::aarch64_sve_frintn_x4:
6426 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6427 return;
6428 case Intrinsic::aarch64_sve_frintp_x2:
6429 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6430 return;
6431 case Intrinsic::aarch64_sve_frintp_x4:
6432 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6433 return;
6434 case Intrinsic::aarch64_sve_sunpk_x2:
6436 Node->getValueType(0),
6437 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6438 AArch64::SUNPK_VG2_2ZZ_D}))
6439 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6440 return;
6441 case Intrinsic::aarch64_sve_uunpk_x2:
6443 Node->getValueType(0),
6444 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6445 AArch64::UUNPK_VG2_2ZZ_D}))
6446 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6447 return;
6448 case Intrinsic::aarch64_sve_sunpk_x4:
6450 Node->getValueType(0),
6451 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6452 AArch64::SUNPK_VG4_4Z2Z_D}))
6453 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6454 return;
6455 case Intrinsic::aarch64_sve_uunpk_x4:
6457 Node->getValueType(0),
6458 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6459 AArch64::UUNPK_VG4_4Z2Z_D}))
6460 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6461 return;
6462 case Intrinsic::aarch64_sve_pext_x2: {
6464 Node->getValueType(0),
6465 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6466 AArch64::PEXT_2PCI_D}))
6467 SelectPExtPair(Node, Op);
6468 return;
6469 }
6470 }
6471 break;
6472 }
6473 case ISD::INTRINSIC_VOID: {
6474 unsigned IntNo = Node->getConstantOperandVal(1);
6475 if (Node->getNumOperands() >= 3)
6476 VT = Node->getOperand(2)->getValueType(0);
6477 switch (IntNo) {
6478 default:
6479 break;
6480 case Intrinsic::aarch64_neon_st1x2: {
6481 if (VT == MVT::v8i8) {
6482 SelectStore(Node, 2, AArch64::ST1Twov8b);
6483 return;
6484 } else if (VT == MVT::v16i8) {
6485 SelectStore(Node, 2, AArch64::ST1Twov16b);
6486 return;
6487 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6488 VT == MVT::v4bf16) {
6489 SelectStore(Node, 2, AArch64::ST1Twov4h);
6490 return;
6491 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6492 VT == MVT::v8bf16) {
6493 SelectStore(Node, 2, AArch64::ST1Twov8h);
6494 return;
6495 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6496 SelectStore(Node, 2, AArch64::ST1Twov2s);
6497 return;
6498 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6499 SelectStore(Node, 2, AArch64::ST1Twov4s);
6500 return;
6501 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6502 SelectStore(Node, 2, AArch64::ST1Twov2d);
6503 return;
6504 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6505 SelectStore(Node, 2, AArch64::ST1Twov1d);
6506 return;
6507 }
6508 break;
6509 }
6510 case Intrinsic::aarch64_neon_st1x3: {
6511 if (VT == MVT::v8i8) {
6512 SelectStore(Node, 3, AArch64::ST1Threev8b);
6513 return;
6514 } else if (VT == MVT::v16i8) {
6515 SelectStore(Node, 3, AArch64::ST1Threev16b);
6516 return;
6517 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6518 VT == MVT::v4bf16) {
6519 SelectStore(Node, 3, AArch64::ST1Threev4h);
6520 return;
6521 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6522 VT == MVT::v8bf16) {
6523 SelectStore(Node, 3, AArch64::ST1Threev8h);
6524 return;
6525 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6526 SelectStore(Node, 3, AArch64::ST1Threev2s);
6527 return;
6528 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6529 SelectStore(Node, 3, AArch64::ST1Threev4s);
6530 return;
6531 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6532 SelectStore(Node, 3, AArch64::ST1Threev2d);
6533 return;
6534 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6535 SelectStore(Node, 3, AArch64::ST1Threev1d);
6536 return;
6537 }
6538 break;
6539 }
6540 case Intrinsic::aarch64_neon_st1x4: {
6541 if (VT == MVT::v8i8) {
6542 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6543 return;
6544 } else if (VT == MVT::v16i8) {
6545 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6546 return;
6547 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6548 VT == MVT::v4bf16) {
6549 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6550 return;
6551 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6552 VT == MVT::v8bf16) {
6553 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6554 return;
6555 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6556 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6557 return;
6558 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6559 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6560 return;
6561 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6562 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6563 return;
6564 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6565 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6566 return;
6567 }
6568 break;
6569 }
6570 case Intrinsic::aarch64_neon_st2: {
6571 if (VT == MVT::v8i8) {
6572 SelectStore(Node, 2, AArch64::ST2Twov8b);
6573 return;
6574 } else if (VT == MVT::v16i8) {
6575 SelectStore(Node, 2, AArch64::ST2Twov16b);
6576 return;
6577 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6578 VT == MVT::v4bf16) {
6579 SelectStore(Node, 2, AArch64::ST2Twov4h);
6580 return;
6581 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6582 VT == MVT::v8bf16) {
6583 SelectStore(Node, 2, AArch64::ST2Twov8h);
6584 return;
6585 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6586 SelectStore(Node, 2, AArch64::ST2Twov2s);
6587 return;
6588 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6589 SelectStore(Node, 2, AArch64::ST2Twov4s);
6590 return;
6591 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6592 SelectStore(Node, 2, AArch64::ST2Twov2d);
6593 return;
6594 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6595 SelectStore(Node, 2, AArch64::ST1Twov1d);
6596 return;
6597 }
6598 break;
6599 }
6600 case Intrinsic::aarch64_neon_st3: {
6601 if (VT == MVT::v8i8) {
6602 SelectStore(Node, 3, AArch64::ST3Threev8b);
6603 return;
6604 } else if (VT == MVT::v16i8) {
6605 SelectStore(Node, 3, AArch64::ST3Threev16b);
6606 return;
6607 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6608 VT == MVT::v4bf16) {
6609 SelectStore(Node, 3, AArch64::ST3Threev4h);
6610 return;
6611 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6612 VT == MVT::v8bf16) {
6613 SelectStore(Node, 3, AArch64::ST3Threev8h);
6614 return;
6615 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6616 SelectStore(Node, 3, AArch64::ST3Threev2s);
6617 return;
6618 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6619 SelectStore(Node, 3, AArch64::ST3Threev4s);
6620 return;
6621 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6622 SelectStore(Node, 3, AArch64::ST3Threev2d);
6623 return;
6624 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6625 SelectStore(Node, 3, AArch64::ST1Threev1d);
6626 return;
6627 }
6628 break;
6629 }
6630 case Intrinsic::aarch64_neon_st4: {
6631 if (VT == MVT::v8i8) {
6632 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6633 return;
6634 } else if (VT == MVT::v16i8) {
6635 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6636 return;
6637 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6638 VT == MVT::v4bf16) {
6639 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6640 return;
6641 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6642 VT == MVT::v8bf16) {
6643 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6644 return;
6645 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6646 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6647 return;
6648 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6649 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6650 return;
6651 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6652 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6653 return;
6654 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6655 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6656 return;
6657 }
6658 break;
6659 }
6660 case Intrinsic::aarch64_neon_st2lane: {
6661 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6662 SelectStoreLane(Node, 2, AArch64::ST2i8);
6663 return;
6664 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6665 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6666 SelectStoreLane(Node, 2, AArch64::ST2i16);
6667 return;
6668 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6669 VT == MVT::v2f32) {
6670 SelectStoreLane(Node, 2, AArch64::ST2i32);
6671 return;
6672 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6673 VT == MVT::v1f64) {
6674 SelectStoreLane(Node, 2, AArch64::ST2i64);
6675 return;
6676 }
6677 break;
6678 }
6679 case Intrinsic::aarch64_neon_st3lane: {
6680 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6681 SelectStoreLane(Node, 3, AArch64::ST3i8);
6682 return;
6683 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6684 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6685 SelectStoreLane(Node, 3, AArch64::ST3i16);
6686 return;
6687 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6688 VT == MVT::v2f32) {
6689 SelectStoreLane(Node, 3, AArch64::ST3i32);
6690 return;
6691 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6692 VT == MVT::v1f64) {
6693 SelectStoreLane(Node, 3, AArch64::ST3i64);
6694 return;
6695 }
6696 break;
6697 }
6698 case Intrinsic::aarch64_neon_st4lane: {
6699 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6700 SelectStoreLane(Node, 4, AArch64::ST4i8);
6701 return;
6702 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6703 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6704 SelectStoreLane(Node, 4, AArch64::ST4i16);
6705 return;
6706 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6707 VT == MVT::v2f32) {
6708 SelectStoreLane(Node, 4, AArch64::ST4i32);
6709 return;
6710 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6711 VT == MVT::v1f64) {
6712 SelectStoreLane(Node, 4, AArch64::ST4i64);
6713 return;
6714 }
6715 break;
6716 }
6717 case Intrinsic::aarch64_sve_st2q: {
6718 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6719 return;
6720 }
6721 case Intrinsic::aarch64_sve_st3q: {
6722 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6723 return;
6724 }
6725 case Intrinsic::aarch64_sve_st4q: {
6726 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6727 return;
6728 }
6729 case Intrinsic::aarch64_sve_st2: {
6730 if (VT == MVT::nxv16i8) {
6731 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6732 return;
6733 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6734 VT == MVT::nxv8bf16) {
6735 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6736 return;
6737 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6738 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6739 return;
6740 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6741 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6742 return;
6743 }
6744 break;
6745 }
6746 case Intrinsic::aarch64_sve_st3: {
6747 if (VT == MVT::nxv16i8) {
6748 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6749 return;
6750 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6751 VT == MVT::nxv8bf16) {
6752 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6753 return;
6754 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6755 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6756 return;
6757 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6758 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6759 return;
6760 }
6761 break;
6762 }
6763 case Intrinsic::aarch64_sve_st4: {
6764 if (VT == MVT::nxv16i8) {
6765 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6766 return;
6767 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6768 VT == MVT::nxv8bf16) {
6769 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6770 return;
6771 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6772 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6773 return;
6774 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6775 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6776 return;
6777 }
6778 break;
6779 }
6780 }
6781 break;
6782 }
6783 case AArch64ISD::LD2post: {
6784 if (VT == MVT::v8i8) {
6785 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6786 return;
6787 } else if (VT == MVT::v16i8) {
6788 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6789 return;
6790 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6791 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6792 return;
6793 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6794 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6795 return;
6796 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6797 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6798 return;
6799 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6800 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6801 return;
6802 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6803 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6804 return;
6805 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6806 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6807 return;
6808 }
6809 break;
6810 }
6811 case AArch64ISD::LD3post: {
6812 if (VT == MVT::v8i8) {
6813 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6814 return;
6815 } else if (VT == MVT::v16i8) {
6816 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6817 return;
6818 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6819 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6820 return;
6821 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6822 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6823 return;
6824 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6825 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6826 return;
6827 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6828 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6829 return;
6830 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6831 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6832 return;
6833 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6834 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6835 return;
6836 }
6837 break;
6838 }
6839 case AArch64ISD::LD4post: {
6840 if (VT == MVT::v8i8) {
6841 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6842 return;
6843 } else if (VT == MVT::v16i8) {
6844 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6845 return;
6846 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6847 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6848 return;
6849 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6850 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6851 return;
6852 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6853 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6854 return;
6855 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6856 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6857 return;
6858 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6859 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6860 return;
6861 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6862 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6863 return;
6864 }
6865 break;
6866 }
6867 case AArch64ISD::LD1x2post: {
6868 if (VT == MVT::v8i8) {
6869 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6870 return;
6871 } else if (VT == MVT::v16i8) {
6872 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6873 return;
6874 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6875 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6876 return;
6877 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6878 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6879 return;
6880 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6881 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6882 return;
6883 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6884 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6885 return;
6886 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6887 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6888 return;
6889 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6890 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
6891 return;
6892 }
6893 break;
6894 }
6895 case AArch64ISD::LD1x3post: {
6896 if (VT == MVT::v8i8) {
6897 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
6898 return;
6899 } else if (VT == MVT::v16i8) {
6900 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
6901 return;
6902 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6903 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
6904 return;
6905 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6906 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
6907 return;
6908 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6909 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
6910 return;
6911 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6912 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
6913 return;
6914 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6915 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6916 return;
6917 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6918 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
6919 return;
6920 }
6921 break;
6922 }
6923 case AArch64ISD::LD1x4post: {
6924 if (VT == MVT::v8i8) {
6925 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
6926 return;
6927 } else if (VT == MVT::v16i8) {
6928 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
6929 return;
6930 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6931 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
6932 return;
6933 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6934 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
6935 return;
6936 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6937 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
6938 return;
6939 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6940 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
6941 return;
6942 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6943 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6944 return;
6945 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6946 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
6947 return;
6948 }
6949 break;
6950 }
6951 case AArch64ISD::LD1DUPpost: {
6952 if (VT == MVT::v8i8) {
6953 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
6954 return;
6955 } else if (VT == MVT::v16i8) {
6956 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
6957 return;
6958 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6959 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
6960 return;
6961 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6962 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
6963 return;
6964 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6965 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
6966 return;
6967 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6968 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
6969 return;
6970 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6971 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
6972 return;
6973 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6974 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
6975 return;
6976 }
6977 break;
6978 }
6979 case AArch64ISD::LD2DUPpost: {
6980 if (VT == MVT::v8i8) {
6981 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
6982 return;
6983 } else if (VT == MVT::v16i8) {
6984 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
6985 return;
6986 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6987 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
6988 return;
6989 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6990 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
6991 return;
6992 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6993 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
6994 return;
6995 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6996 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
6997 return;
6998 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6999 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
7000 return;
7001 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7002 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
7003 return;
7004 }
7005 break;
7006 }
7007 case AArch64ISD::LD3DUPpost: {
7008 if (VT == MVT::v8i8) {
7009 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
7010 return;
7011 } else if (VT == MVT::v16i8) {
7012 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
7013 return;
7014 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7015 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
7016 return;
7017 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7018 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
7019 return;
7020 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7021 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
7022 return;
7023 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7024 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
7025 return;
7026 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7027 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
7028 return;
7029 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7030 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
7031 return;
7032 }
7033 break;
7034 }
7035 case AArch64ISD::LD4DUPpost: {
7036 if (VT == MVT::v8i8) {
7037 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
7038 return;
7039 } else if (VT == MVT::v16i8) {
7040 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
7041 return;
7042 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7043 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
7044 return;
7045 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7046 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
7047 return;
7048 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7049 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
7050 return;
7051 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7052 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
7053 return;
7054 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7055 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
7056 return;
7057 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7058 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
7059 return;
7060 }
7061 break;
7062 }
7063 case AArch64ISD::LD1LANEpost: {
7064 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7065 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
7066 return;
7067 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7068 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7069 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
7070 return;
7071 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7072 VT == MVT::v2f32) {
7073 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
7074 return;
7075 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7076 VT == MVT::v1f64) {
7077 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
7078 return;
7079 }
7080 break;
7081 }
7082 case AArch64ISD::LD2LANEpost: {
7083 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7084 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
7085 return;
7086 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7087 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7088 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
7089 return;
7090 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7091 VT == MVT::v2f32) {
7092 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
7093 return;
7094 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7095 VT == MVT::v1f64) {
7096 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
7097 return;
7098 }
7099 break;
7100 }
7101 case AArch64ISD::LD3LANEpost: {
7102 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7103 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
7104 return;
7105 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7106 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7107 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
7108 return;
7109 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7110 VT == MVT::v2f32) {
7111 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
7112 return;
7113 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7114 VT == MVT::v1f64) {
7115 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
7116 return;
7117 }
7118 break;
7119 }
7120 case AArch64ISD::LD4LANEpost: {
7121 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7122 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
7123 return;
7124 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7125 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7126 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
7127 return;
7128 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7129 VT == MVT::v2f32) {
7130 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
7131 return;
7132 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7133 VT == MVT::v1f64) {
7134 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
7135 return;
7136 }
7137 break;
7138 }
7139 case AArch64ISD::ST2post: {
7140 VT = Node->getOperand(1).getValueType();
7141 if (VT == MVT::v8i8) {
7142 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
7143 return;
7144 } else if (VT == MVT::v16i8) {
7145 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
7146 return;
7147 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7148 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
7149 return;
7150 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7151 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
7152 return;
7153 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7154 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
7155 return;
7156 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7157 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
7158 return;
7159 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7160 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
7161 return;
7162 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7163 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7164 return;
7165 }
7166 break;
7167 }
7168 case AArch64ISD::ST3post: {
7169 VT = Node->getOperand(1).getValueType();
7170 if (VT == MVT::v8i8) {
7171 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
7172 return;
7173 } else if (VT == MVT::v16i8) {
7174 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
7175 return;
7176 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7177 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
7178 return;
7179 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7180 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
7181 return;
7182 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7183 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
7184 return;
7185 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7186 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
7187 return;
7188 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7189 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
7190 return;
7191 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7192 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7193 return;
7194 }
7195 break;
7196 }
7197 case AArch64ISD::ST4post: {
7198 VT = Node->getOperand(1).getValueType();
7199 if (VT == MVT::v8i8) {
7200 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
7201 return;
7202 } else if (VT == MVT::v16i8) {
7203 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
7204 return;
7205 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7206 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
7207 return;
7208 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7209 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
7210 return;
7211 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7212 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
7213 return;
7214 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7215 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
7216 return;
7217 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7218 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
7219 return;
7220 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7221 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7222 return;
7223 }
7224 break;
7225 }
7226 case AArch64ISD::ST1x2post: {
7227 VT = Node->getOperand(1).getValueType();
7228 if (VT == MVT::v8i8) {
7229 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
7230 return;
7231 } else if (VT == MVT::v16i8) {
7232 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
7233 return;
7234 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7235 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
7236 return;
7237 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7238 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
7239 return;
7240 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7241 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
7242 return;
7243 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7244 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
7245 return;
7246 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7247 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7248 return;
7249 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7250 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
7251 return;
7252 }
7253 break;
7254 }
7255 case AArch64ISD::ST1x3post: {
7256 VT = Node->getOperand(1).getValueType();
7257 if (VT == MVT::v8i8) {
7258 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
7259 return;
7260 } else if (VT == MVT::v16i8) {
7261 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
7262 return;
7263 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7264 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
7265 return;
7266 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7267 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
7268 return;
7269 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7270 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
7271 return;
7272 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7273 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
7274 return;
7275 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7276 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7277 return;
7278 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7279 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
7280 return;
7281 }
7282 break;
7283 }
7284 case AArch64ISD::ST1x4post: {
7285 VT = Node->getOperand(1).getValueType();
7286 if (VT == MVT::v8i8) {
7287 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
7288 return;
7289 } else if (VT == MVT::v16i8) {
7290 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
7291 return;
7292 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7293 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
7294 return;
7295 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7296 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
7297 return;
7298 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7299 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
7300 return;
7301 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7302 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
7303 return;
7304 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7305 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7306 return;
7307 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7308 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
7309 return;
7310 }
7311 break;
7312 }
7313 case AArch64ISD::ST2LANEpost: {
7314 VT = Node->getOperand(1).getValueType();
7315 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7316 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
7317 return;
7318 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7319 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7320 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
7321 return;
7322 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7323 VT == MVT::v2f32) {
7324 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7325 return;
7326 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7327 VT == MVT::v1f64) {
7328 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7329 return;
7330 }
7331 break;
7332 }
7333 case AArch64ISD::ST3LANEpost: {
7334 VT = Node->getOperand(1).getValueType();
7335 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7336 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7337 return;
7338 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7339 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7340 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7341 return;
7342 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7343 VT == MVT::v2f32) {
7344 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7345 return;
7346 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7347 VT == MVT::v1f64) {
7348 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7349 return;
7350 }
7351 break;
7352 }
7353 case AArch64ISD::ST4LANEpost: {
7354 VT = Node->getOperand(1).getValueType();
7355 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7356 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7357 return;
7358 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7359 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7360 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7361 return;
7362 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7363 VT == MVT::v2f32) {
7364 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7365 return;
7366 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7367 VT == MVT::v1f64) {
7368 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7369 return;
7370 }
7371 break;
7372 }
7373 }
7374
7375 // Select the default instruction
7376 SelectCode(Node);
7377}
7378
7379/// createAArch64ISelDag - This pass converts a legalized DAG into a
7380/// AArch64-specific DAG, ready for instruction scheduling.
7382 CodeGenOptLevel OptLevel) {
7383 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7384}
7385
7386/// When \p PredVT is a scalable vector predicate in the form
7387/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7388/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7389/// structured vectors (NumVec >1), the output data type is
7390/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7391/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7392/// EVT.
7394 unsigned NumVec) {
7395 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7396 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7397 return EVT();
7398
7399 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7400 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7401 return EVT();
7402
7403 ElementCount EC = PredVT.getVectorElementCount();
7404 EVT ScalarVT =
7405 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7406 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7407
7408 return MemVT;
7409}
7410
7411/// Return the EVT of the data associated to a memory operation in \p
7412/// Root. If such EVT cannot be retrieved, it returns an invalid EVT.
7414 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Root))
7415 return MemIntr->getMemoryVT();
7416
7417 if (isa<MemSDNode>(Root)) {
7418 EVT MemVT = cast<MemSDNode>(Root)->getMemoryVT();
7419
7420 EVT DataVT;
7421 if (auto *Load = dyn_cast<LoadSDNode>(Root))
7422 DataVT = Load->getValueType(0);
7423 else if (auto *Load = dyn_cast<MaskedLoadSDNode>(Root))
7424 DataVT = Load->getValueType(0);
7425 else if (auto *Store = dyn_cast<StoreSDNode>(Root))
7426 DataVT = Store->getValue().getValueType();
7427 else if (auto *Store = dyn_cast<MaskedStoreSDNode>(Root))
7428 DataVT = Store->getValue().getValueType();
7429 else
7430 llvm_unreachable("Unexpected MemSDNode!");
7431
7432 return DataVT.changeVectorElementType(MemVT.getVectorElementType());
7433 }
7434
7435 const unsigned Opcode = Root->getOpcode();
7436 // For custom ISD nodes, we have to look at them individually to extract the
7437 // type of the data moved to/from memory.
7438 switch (Opcode) {
7439 case AArch64ISD::LD1_MERGE_ZERO:
7440 case AArch64ISD::LD1S_MERGE_ZERO:
7441 case AArch64ISD::LDNF1_MERGE_ZERO:
7442 case AArch64ISD::LDNF1S_MERGE_ZERO:
7443 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7444 case AArch64ISD::ST1_PRED:
7445 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7446 default:
7447 break;
7448 }
7449
7450 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7451 return EVT();
7452
7453 switch (Root->getConstantOperandVal(1)) {
7454 default:
7455 return EVT();
7456 case Intrinsic::aarch64_sme_ldr:
7457 case Intrinsic::aarch64_sme_str:
7458 return MVT::nxv16i8;
7459 case Intrinsic::aarch64_sve_prf:
7460 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7461 // width of the predicate.
7463 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7464 case Intrinsic::aarch64_sve_ld2_sret:
7465 case Intrinsic::aarch64_sve_ld2q_sret:
7467 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7468 case Intrinsic::aarch64_sve_st2q:
7470 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7471 case Intrinsic::aarch64_sve_ld3_sret:
7472 case Intrinsic::aarch64_sve_ld3q_sret:
7474 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7475 case Intrinsic::aarch64_sve_st3q:
7477 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7478 case Intrinsic::aarch64_sve_ld4_sret:
7479 case Intrinsic::aarch64_sve_ld4q_sret:
7481 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7482 case Intrinsic::aarch64_sve_st4q:
7484 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7485 case Intrinsic::aarch64_sve_ld1udq:
7486 case Intrinsic::aarch64_sve_st1dq:
7487 return EVT(MVT::nxv1i64);
7488 case Intrinsic::aarch64_sve_ld1uwq:
7489 case Intrinsic::aarch64_sve_st1wq:
7490 return EVT(MVT::nxv1i32);
7491 }
7492}
7493
7494/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7495/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7496/// where Root is the memory access using N for its address.
7497template <int64_t Min, int64_t Max>
7498bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7499 SDValue &Base,
7500 SDValue &OffImm) {
7501 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7502 const DataLayout &DL = CurDAG->getDataLayout();
7503 const MachineFrameInfo &MFI = MF->getFrameInfo();
7504
7505 if (N.getOpcode() == ISD::FrameIndex) {
7506 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7507 // We can only encode VL scaled offsets, so only fold in frame indexes
7508 // referencing SVE objects.
7509 if (MFI.hasScalableStackID(FI)) {
7510 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7511 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7512 return true;
7513 }
7514
7515 return false;
7516 }
7517
7518 if (MemVT == EVT())
7519 return false;
7520
7521 if (N.getOpcode() != ISD::ADD)
7522 return false;
7523
7524 SDValue VScale = N.getOperand(1);
7525 int64_t MulImm = std::numeric_limits<int64_t>::max();
7526 if (VScale.getOpcode() == ISD::VSCALE) {
7527 MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7528 } else if (auto C = dyn_cast<ConstantSDNode>(VScale)) {
7529 int64_t ByteOffset = C->getSExtValue();
7530 const auto KnownVScale =
7532
7533 if (!KnownVScale || ByteOffset % KnownVScale != 0)
7534 return false;
7535
7536 MulImm = ByteOffset / KnownVScale;
7537 } else
7538 return false;
7539
7540 TypeSize TS = MemVT.getSizeInBits();
7541 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7542
7543 if ((MulImm % MemWidthBytes) != 0)
7544 return false;
7545
7546 int64_t Offset = MulImm / MemWidthBytes;
7548 return false;
7549
7550 Base = N.getOperand(0);
7551 if (Base.getOpcode() == ISD::FrameIndex) {
7552 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7553 // We can only encode VL scaled offsets, so only fold in frame indexes
7554 // referencing SVE objects.
7555 if (MFI.hasScalableStackID(FI))
7556 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7557 }
7558
7559 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7560 return true;
7561}
7562
7563/// Select register plus register addressing mode for SVE, with scaled
7564/// offset.
7565bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7566 SDValue &Base,
7567 SDValue &Offset) {
7568 if (N.getOpcode() != ISD::ADD)
7569 return false;
7570
7571 // Process an ADD node.
7572 const SDValue LHS = N.getOperand(0);
7573 const SDValue RHS = N.getOperand(1);
7574
7575 // 8 bit data does not come with the SHL node, so it is treated
7576 // separately.
7577 if (Scale == 0) {
7578 Base = LHS;
7579 Offset = RHS;
7580 return true;
7581 }
7582
7583 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7584 int64_t ImmOff = C->getSExtValue();
7585 unsigned Size = 1 << Scale;
7586
7587 // To use the reg+reg addressing mode, the immediate must be a multiple of
7588 // the vector element's byte size.
7589 if (ImmOff % Size)
7590 return false;
7591
7592 SDLoc DL(N);
7593 Base = LHS;
7594 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7595 SDValue Ops[] = {Offset};
7596 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7597 Offset = SDValue(MI, 0);
7598 return true;
7599 }
7600
7601 // Check if the RHS is a shift node with a constant.
7602 if (RHS.getOpcode() != ISD::SHL)
7603 return false;
7604
7605 const SDValue ShiftRHS = RHS.getOperand(1);
7606 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7607 if (C->getZExtValue() == Scale) {
7608 Base = LHS;
7609 Offset = RHS.getOperand(0);
7610 return true;
7611 }
7612
7613 return false;
7614}
7615
7616bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7617 const AArch64TargetLowering *TLI =
7618 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7619
7620 return TLI->isAllActivePredicate(*CurDAG, N);
7621}
7622
7623bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7624 EVT VT = N.getValueType();
7625 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7626}
7627
7628bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7630 unsigned Scale) {
7631 auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
7632 if (auto *C = dyn_cast<ConstantSDNode>(CN)) {
7633 int64_t ImmOff = C->getSExtValue();
7634 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
7635 return CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7636 }
7637 return SDValue();
7638 };
7639
7640 if (SDValue C = MatchConstantOffset(N)) {
7641 Base = CurDAG->getConstant(0, SDLoc(N), MVT::i32);
7642 Offset = C;
7643 return true;
7644 }
7645
7646 // Try to untangle an ADD node into a 'reg + offset'
7647 if (CurDAG->isBaseWithConstantOffset(N)) {
7648 if (SDValue C = MatchConstantOffset(N.getOperand(1))) {
7649 Base = N.getOperand(0);
7650 Offset = C;
7651 return true;
7652 }
7653 }
7654
7655 // By default, just match reg + 0.
7656 Base = N;
7657 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7658 return true;
7659}
7660
7661bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N,
7662 SDValue &Imm) {
7664 static_cast<AArch64CC::CondCode>(P->getConstantOperandVal(1));
7665 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
7666 // Check conservatively if the immediate fits the valid range [0, 64).
7667 // Immediate variants for GE and HS definitely need to be decremented
7668 // when lowering the pseudos later, so an immediate of 1 would become 0.
7669 // For the inverse conditions LT and LO we don't know for sure if they
7670 // will need a decrement but should the decision be made to reverse the
7671 // branch condition, we again end up with the need to decrement.
7672 // The same argument holds for LE, LS, GT and HI and possibly
7673 // incremented immediates. This can lead to slightly less optimal
7674 // codegen, e.g. we never codegen the legal case
7675 // cblt w0, #63, A
7676 // because we could end up with the illegal case
7677 // cbge w0, #64, B
7678 // should the decision to reverse the branch direction be made. For the
7679 // lower bound cases this is no problem since we can express comparisons
7680 // against 0 with either tbz/tnbz or using wzr/xzr.
7681 uint64_t LowerBound = 0, UpperBound = 64;
7682 switch (CC) {
7683 case AArch64CC::GE:
7684 case AArch64CC::HS:
7685 case AArch64CC::LT:
7686 case AArch64CC::LO:
7687 LowerBound = 1;
7688 break;
7689 case AArch64CC::LE:
7690 case AArch64CC::LS:
7691 case AArch64CC::GT:
7692 case AArch64CC::HI:
7693 UpperBound = 63;
7694 break;
7695 default:
7696 break;
7697 }
7698
7699 if (CN->getAPIntValue().uge(LowerBound) &&
7700 CN->getAPIntValue().ult(UpperBound)) {
7701 SDLoc DL(N);
7702 Imm = CurDAG->getTargetConstant(CN->getZExtValue(), DL, N.getValueType());
7703 return true;
7704 }
7705 }
7706
7707 return false;
7708}
7709
7710template <bool MatchCBB>
7711bool AArch64DAGToDAGISel::SelectCmpBranchExtOperand(SDValue N, SDValue &Reg,
7712 SDValue &ExtType) {
7713
7714 // Use an invalid shift-extend value to indicate we don't need to extend later
7715 if (N.getOpcode() == ISD::AssertZext || N.getOpcode() == ISD::AssertSext) {
7716 EVT Ty = cast<VTSDNode>(N.getOperand(1))->getVT();
7717 if (Ty != (MatchCBB ? MVT::i8 : MVT::i16))
7718 return false;
7719 Reg = N.getOperand(0);
7720 ExtType = CurDAG->getSignedTargetConstant(AArch64_AM::InvalidShiftExtend,
7721 SDLoc(N), MVT::i32);
7722 return true;
7723 }
7724
7726
7727 if ((MatchCBB && (ET == AArch64_AM::UXTB || ET == AArch64_AM::SXTB)) ||
7728 (!MatchCBB && (ET == AArch64_AM::UXTH || ET == AArch64_AM::SXTH))) {
7729 Reg = N.getOperand(0);
7730 ExtType =
7731 CurDAG->getTargetConstant(getExtendEncoding(ET), SDLoc(N), MVT::i32);
7732 return true;
7733 }
7734
7735 return false;
7736}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isMemOpOrPrefetch(SDNode *N)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
return SDValue()
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define DEBUG_TYPE
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
OptimizedStructLayoutField Field
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define PASS_NAME
Value * RHS
Value * LHS
const AArch64RegisterInfo * getRegisterInfo() const override
bool isStreaming() const
Returns true if the function has a streaming body.
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
unsigned getSVEVectorSizeInBits() const
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
LLVM_ABI bool getExactInverse(APFloat *Inv) const
If this value is normal and has an exact, normal, multiplicative inverse, store it in inv and return ...
Definition APFloat.cpp:5995
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1314
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1671
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1640
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1599
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1453
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:511
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:859
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool hasScalableStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:702
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned getID() const
Return the register class ID number.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:956
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
unsigned getExtendEncoding(AArch64_AM::ShiftExtendType ET)
Mapping from extend bits to required operation: shifter: 000 ==> uxtb 001 ==> uxth 010 ==> uxtw 011 =...
static bool isSVECpyDupImm(int SizeInBits, int64_t Val, int32_t &Imm, int32_t &Shift)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:267
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:1968
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
Extended Value Type.
Definition ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:463
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition ValueTypes.h:202
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.