LLVM 20.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34//===--------------------------------------------------------------------===//
35/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
36/// instructions for SelectionDAG operations.
37///
38namespace {
39
40class AArch64DAGToDAGISel : public SelectionDAGISel {
41
42 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
43 /// make the right decision when generating code for different targets.
44 const AArch64Subtarget *Subtarget;
45
46public:
47 AArch64DAGToDAGISel() = delete;
48
49 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
50 CodeGenOptLevel OptLevel)
51 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
52
53 bool runOnMachineFunction(MachineFunction &MF) override {
54 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
56 }
57
58 void Select(SDNode *Node) override;
59
60 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
61 /// inline asm expressions.
63 InlineAsm::ConstraintCode ConstraintID,
64 std::vector<SDValue> &OutOps) override;
65
66 template <signed Low, signed High, signed Scale>
67 bool SelectRDVLImm(SDValue N, SDValue &Imm);
68
69 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
70 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
71 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
72 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
73 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
74 return SelectShiftedRegister(N, false, Reg, Shift);
75 }
76 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
77 return SelectShiftedRegister(N, true, Reg, Shift);
78 }
79 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
80 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
81 }
82 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
83 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
84 }
85 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
86 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
87 }
88 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
89 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
90 }
91 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
92 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
93 }
94 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
95 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
96 }
97 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
98 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
99 }
100 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
101 return SelectAddrModeIndexed(N, 1, Base, OffImm);
102 }
103 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
104 return SelectAddrModeIndexed(N, 2, Base, OffImm);
105 }
106 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
107 return SelectAddrModeIndexed(N, 4, Base, OffImm);
108 }
109 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
110 return SelectAddrModeIndexed(N, 8, Base, OffImm);
111 }
112 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
113 return SelectAddrModeIndexed(N, 16, Base, OffImm);
114 }
115 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
116 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
117 }
118 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
119 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
120 }
121 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
122 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
123 }
124 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
125 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
126 }
127 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
128 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
129 }
130 template <unsigned Size, unsigned Max>
131 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
132 // Test if there is an appropriate addressing mode and check if the
133 // immediate fits.
134 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
135 if (Found) {
136 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
137 int64_t C = CI->getSExtValue();
138 if (C <= Max)
139 return true;
140 }
141 }
142
143 // Otherwise, base only, materialize address in register.
144 Base = N;
145 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
146 return true;
147 }
148
149 template<int Width>
150 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
151 SDValue &SignExtend, SDValue &DoShift) {
152 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
153 }
154
155 template<int Width>
156 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
157 SDValue &SignExtend, SDValue &DoShift) {
158 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
159 }
160
161 bool SelectExtractHigh(SDValue N, SDValue &Res) {
162 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
163 N = N->getOperand(0);
164 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
165 !isa<ConstantSDNode>(N->getOperand(1)))
166 return false;
167 EVT VT = N->getValueType(0);
168 EVT LVT = N->getOperand(0).getValueType();
169 unsigned Index = N->getConstantOperandVal(1);
170 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
172 return false;
173 Res = N->getOperand(0);
174 return true;
175 }
176
177 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
178 if (N.getOpcode() != AArch64ISD::VLSHR)
179 return false;
180 SDValue Op = N->getOperand(0);
181 EVT VT = Op.getValueType();
182 unsigned ShtAmt = N->getConstantOperandVal(1);
183 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
184 return false;
185
186 APInt Imm;
187 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
188 Imm = APInt(VT.getScalarSizeInBits(),
189 Op.getOperand(1).getConstantOperandVal(0)
190 << Op.getOperand(1).getConstantOperandVal(1));
191 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
192 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
193 Imm = APInt(VT.getScalarSizeInBits(),
194 Op.getOperand(1).getConstantOperandVal(0));
195 else
196 return false;
197
198 if (Imm != 1ULL << (ShtAmt - 1))
199 return false;
200
201 Res1 = Op.getOperand(0);
202 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
203 return true;
204 }
205
206 bool SelectDupZeroOrUndef(SDValue N) {
207 switch(N->getOpcode()) {
208 case ISD::UNDEF:
209 return true;
210 case AArch64ISD::DUP:
211 case ISD::SPLAT_VECTOR: {
212 auto Opnd0 = N->getOperand(0);
213 if (isNullConstant(Opnd0))
214 return true;
215 if (isNullFPConstant(Opnd0))
216 return true;
217 break;
218 }
219 default:
220 break;
221 }
222
223 return false;
224 }
225
226 bool SelectDupZero(SDValue N) {
227 switch(N->getOpcode()) {
228 case AArch64ISD::DUP:
229 case ISD::SPLAT_VECTOR: {
230 auto Opnd0 = N->getOperand(0);
231 if (isNullConstant(Opnd0))
232 return true;
233 if (isNullFPConstant(Opnd0))
234 return true;
235 break;
236 }
237 }
238
239 return false;
240 }
241
242 bool SelectDupNegativeZero(SDValue N) {
243 switch(N->getOpcode()) {
244 case AArch64ISD::DUP:
245 case ISD::SPLAT_VECTOR: {
246 ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
247 return Const && Const->isZero() && Const->isNegative();
248 }
249 }
250
251 return false;
252 }
253
254 template<MVT::SimpleValueType VT>
255 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
256 return SelectSVEAddSubImm(N, VT, Imm, Shift);
257 }
258
259 template <MVT::SimpleValueType VT, bool Negate>
260 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
261 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
262 }
263
264 template <MVT::SimpleValueType VT>
265 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
266 return SelectSVECpyDupImm(N, VT, Imm, Shift);
267 }
268
269 template <MVT::SimpleValueType VT, bool Invert = false>
270 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
271 return SelectSVELogicalImm(N, VT, Imm, Invert);
272 }
273
274 template <MVT::SimpleValueType VT>
275 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
276 return SelectSVEArithImm(N, VT, Imm);
277 }
278
279 template <unsigned Low, unsigned High, bool AllowSaturation = false>
280 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
281 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
282 }
283
284 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
285 if (N->getOpcode() != ISD::SPLAT_VECTOR)
286 return false;
287
288 EVT EltVT = N->getValueType(0).getVectorElementType();
289 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
290 /* High */ EltVT.getFixedSizeInBits(),
291 /* AllowSaturation */ true, Imm);
292 }
293
294 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
295 template<signed Min, signed Max, signed Scale, bool Shift>
296 bool SelectCntImm(SDValue N, SDValue &Imm) {
297 if (!isa<ConstantSDNode>(N))
298 return false;
299
300 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
301 if (Shift)
302 MulImm = 1LL << MulImm;
303
304 if ((MulImm % std::abs(Scale)) != 0)
305 return false;
306
307 MulImm /= Scale;
308 if ((MulImm >= Min) && (MulImm <= Max)) {
309 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
310 return true;
311 }
312
313 return false;
314 }
315
316 template <signed Max, signed Scale>
317 bool SelectEXTImm(SDValue N, SDValue &Imm) {
318 if (!isa<ConstantSDNode>(N))
319 return false;
320
321 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
322
323 if (MulImm >= 0 && MulImm <= Max) {
324 MulImm *= Scale;
325 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
326 return true;
327 }
328
329 return false;
330 }
331
332 template <unsigned BaseReg, unsigned Max>
333 bool ImmToReg(SDValue N, SDValue &Imm) {
334 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
335 uint64_t C = CI->getZExtValue();
336
337 if (C > Max)
338 return false;
339
340 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
341 return true;
342 }
343 return false;
344 }
345
346 /// Form sequences of consecutive 64/128-bit registers for use in NEON
347 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
348 /// between 1 and 4 elements. If it contains a single element that is returned
349 /// unchanged; otherwise a REG_SEQUENCE value is returned.
352 // Form a sequence of SVE registers for instructions using list of vectors,
353 // e.g. structured loads and stores (ldN, stN).
354 SDValue createZTuple(ArrayRef<SDValue> Vecs);
355
356 // Similar to above, except the register must start at a multiple of the
357 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
358 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
359
360 /// Generic helper for the createDTuple/createQTuple
361 /// functions. Those should almost always be called instead.
362 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
363 const unsigned SubRegs[]);
364
365 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
366
367 bool tryIndexedLoad(SDNode *N);
368
369 void SelectPtrauthAuth(SDNode *N);
370 void SelectPtrauthResign(SDNode *N);
371
372 bool trySelectStackSlotTagP(SDNode *N);
373 void SelectTagP(SDNode *N);
374
375 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
376 unsigned SubRegIdx);
377 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
378 unsigned SubRegIdx);
379 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
380 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
381 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
382 unsigned Opc_rr, unsigned Opc_ri,
383 bool IsIntr = false);
384 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
385 unsigned Scale, unsigned Opc_ri,
386 unsigned Opc_rr);
387 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
388 bool IsZmMulti, unsigned Opcode,
389 bool HasPred = false);
390 void SelectPExtPair(SDNode *N, unsigned Opc);
391 void SelectWhilePair(SDNode *N, unsigned Opc);
392 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
393 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
394 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
395 bool IsTupleInput, unsigned Opc);
396 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
397
398 template <unsigned MaxIdx, unsigned Scale>
399 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
400 unsigned Op);
401 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
402 unsigned Op, unsigned MaxIdx, unsigned Scale,
403 unsigned BaseReg = 0);
404 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
405 /// SVE Reg+Imm addressing mode.
406 template <int64_t Min, int64_t Max>
407 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
408 SDValue &OffImm);
409 /// SVE Reg+Reg address mode.
410 template <unsigned Scale>
411 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
412 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
413 }
414
415 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc,
416 uint32_t MaxImm);
417
418 template <unsigned MaxIdx, unsigned Scale>
419 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
420 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
421 }
422
423 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
424 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
425 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
426 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
427 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
428 unsigned Opc_rr, unsigned Opc_ri);
429 std::tuple<unsigned, SDValue, SDValue>
430 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
431 const SDValue &OldBase, const SDValue &OldOffset,
432 unsigned Scale);
433
434 bool tryBitfieldExtractOp(SDNode *N);
435 bool tryBitfieldExtractOpFromSExt(SDNode *N);
436 bool tryBitfieldInsertOp(SDNode *N);
437 bool tryBitfieldInsertInZeroOp(SDNode *N);
438 bool tryShiftAmountMod(SDNode *N);
439
440 bool tryReadRegister(SDNode *N);
441 bool tryWriteRegister(SDNode *N);
442
443 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
444 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
445
446 bool trySelectXAR(SDNode *N);
447
448// Include the pieces autogenerated from the target description.
449#include "AArch64GenDAGISel.inc"
450
451private:
452 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
453 SDValue &Shift);
454 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
455 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
456 SDValue &OffImm) {
457 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
458 }
459 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
460 unsigned Size, SDValue &Base,
461 SDValue &OffImm);
462 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
463 SDValue &OffImm);
464 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
465 SDValue &OffImm);
466 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
467 SDValue &Offset, SDValue &SignExtend,
468 SDValue &DoShift);
469 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
470 SDValue &Offset, SDValue &SignExtend,
471 SDValue &DoShift);
472 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
473 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
474 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
475 SDValue &Offset, SDValue &SignExtend);
476
477 template<unsigned RegWidth>
478 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
479 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
480 }
481
482 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
483
484 template<unsigned RegWidth>
485 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
486 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
487 }
488
489 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
490 unsigned Width);
491
492 bool SelectCMP_SWAP(SDNode *N);
493
494 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
495 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
496 bool Negate);
497 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
498 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
499
500 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
501 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
502 bool AllowSaturation, SDValue &Imm);
503
504 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
505 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
506 SDValue &Offset);
507 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
508 SDValue &Offset, unsigned Scale = 1);
509
510 bool SelectAllActivePredicate(SDValue N);
511 bool SelectAnyPredicate(SDValue N);
512};
513
514class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
515public:
516 static char ID;
517 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
518 CodeGenOptLevel OptLevel)
520 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
521};
522} // end anonymous namespace
523
524char AArch64DAGToDAGISelLegacy::ID = 0;
525
526INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
527
528/// isIntImmediate - This method tests to see if the node is a constant
529/// operand. If so Imm will receive the 32-bit value.
530static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
531 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
532 Imm = C->getZExtValue();
533 return true;
534 }
535 return false;
536}
537
538// isIntImmediate - This method tests to see if a constant operand.
539// If so Imm will receive the value.
540static bool isIntImmediate(SDValue N, uint64_t &Imm) {
541 return isIntImmediate(N.getNode(), Imm);
542}
543
544// isOpcWithIntImmediate - This method tests to see if the node is a specific
545// opcode and that it has a immediate integer right operand.
546// If so Imm will receive the 32 bit value.
547static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
548 uint64_t &Imm) {
549 return N->getOpcode() == Opc &&
550 isIntImmediate(N->getOperand(1).getNode(), Imm);
551}
552
553// isIntImmediateEq - This method tests to see if N is a constant operand that
554// is equivalent to 'ImmExpected'.
555#ifndef NDEBUG
556static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
557 uint64_t Imm;
558 if (!isIntImmediate(N.getNode(), Imm))
559 return false;
560 return Imm == ImmExpected;
561}
562#endif
563
564bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
565 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
566 std::vector<SDValue> &OutOps) {
567 switch(ConstraintID) {
568 default:
569 llvm_unreachable("Unexpected asm memory constraint");
570 case InlineAsm::ConstraintCode::m:
571 case InlineAsm::ConstraintCode::o:
572 case InlineAsm::ConstraintCode::Q:
573 // We need to make sure that this one operand does not end up in XZR, thus
574 // require the address to be in a PointerRegClass register.
575 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
576 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
577 SDLoc dl(Op);
578 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
579 SDValue NewOp =
580 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
581 dl, Op.getValueType(),
582 Op, RC), 0);
583 OutOps.push_back(NewOp);
584 return false;
585 }
586 return true;
587}
588
589/// SelectArithImmed - Select an immediate value that can be represented as
590/// a 12-bit value shifted left by either 0 or 12. If so, return true with
591/// Val set to the 12-bit value and Shift set to the shifter operand.
592bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
593 SDValue &Shift) {
594 // This function is called from the addsub_shifted_imm ComplexPattern,
595 // which lists [imm] as the list of opcode it's interested in, however
596 // we still need to check whether the operand is actually an immediate
597 // here because the ComplexPattern opcode list is only used in
598 // root-level opcode matching.
599 if (!isa<ConstantSDNode>(N.getNode()))
600 return false;
601
602 uint64_t Immed = N.getNode()->getAsZExtVal();
603 unsigned ShiftAmt;
604
605 if (Immed >> 12 == 0) {
606 ShiftAmt = 0;
607 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
608 ShiftAmt = 12;
609 Immed = Immed >> 12;
610 } else
611 return false;
612
613 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
614 SDLoc dl(N);
615 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
616 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
617 return true;
618}
619
620/// SelectNegArithImmed - As above, but negates the value before trying to
621/// select it.
622bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
623 SDValue &Shift) {
624 // This function is called from the addsub_shifted_imm ComplexPattern,
625 // which lists [imm] as the list of opcode it's interested in, however
626 // we still need to check whether the operand is actually an immediate
627 // here because the ComplexPattern opcode list is only used in
628 // root-level opcode matching.
629 if (!isa<ConstantSDNode>(N.getNode()))
630 return false;
631
632 // The immediate operand must be a 24-bit zero-extended immediate.
633 uint64_t Immed = N.getNode()->getAsZExtVal();
634
635 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
636 // have the opposite effect on the C flag, so this pattern mustn't match under
637 // those circumstances.
638 if (Immed == 0)
639 return false;
640
641 if (N.getValueType() == MVT::i32)
642 Immed = ~((uint32_t)Immed) + 1;
643 else
644 Immed = ~Immed + 1ULL;
645 if (Immed & 0xFFFFFFFFFF000000ULL)
646 return false;
647
648 Immed &= 0xFFFFFFULL;
649 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
650 Shift);
651}
652
653/// getShiftTypeForNode - Translate a shift node to the corresponding
654/// ShiftType value.
656 switch (N.getOpcode()) {
657 default:
659 case ISD::SHL:
660 return AArch64_AM::LSL;
661 case ISD::SRL:
662 return AArch64_AM::LSR;
663 case ISD::SRA:
664 return AArch64_AM::ASR;
665 case ISD::ROTR:
666 return AArch64_AM::ROR;
667 }
668}
669
670/// Determine whether it is worth it to fold SHL into the addressing
671/// mode.
673 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
674 // It is worth folding logical shift of up to three places.
675 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
676 if (!CSD)
677 return false;
678 unsigned ShiftVal = CSD->getZExtValue();
679 if (ShiftVal > 3)
680 return false;
681
682 // Check if this particular node is reused in any non-memory related
683 // operation. If yes, do not try to fold this node into the address
684 // computation, since the computation will be kept.
685 const SDNode *Node = V.getNode();
686 for (SDNode *UI : Node->uses())
687 if (!isa<MemSDNode>(*UI))
688 for (SDNode *UII : UI->uses())
689 if (!isa<MemSDNode>(*UII))
690 return false;
691 return true;
692}
693
694/// Determine whether it is worth to fold V into an extended register addressing
695/// mode.
696bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
697 // Trivial if we are optimizing for code size or if there is only
698 // one use of the value.
699 if (CurDAG->shouldOptForSize() || V.hasOneUse())
700 return true;
701
702 // If a subtarget has a slow shift, folding a shift into multiple loads
703 // costs additional micro-ops.
704 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
705 return false;
706
707 // Check whether we're going to emit the address arithmetic anyway because
708 // it's used by a non-address operation.
709 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
710 return true;
711 if (V.getOpcode() == ISD::ADD) {
712 const SDValue LHS = V.getOperand(0);
713 const SDValue RHS = V.getOperand(1);
714 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
715 return true;
716 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
717 return true;
718 }
719
720 // It hurts otherwise, since the value will be reused.
721 return false;
722}
723
724/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
725/// to select more shifted register
726bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
727 SDValue &Shift) {
728 EVT VT = N.getValueType();
729 if (VT != MVT::i32 && VT != MVT::i64)
730 return false;
731
732 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
733 return false;
734 SDValue LHS = N.getOperand(0);
735 if (!LHS->hasOneUse())
736 return false;
737
738 unsigned LHSOpcode = LHS->getOpcode();
739 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
740 return false;
741
742 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
743 if (!ShiftAmtNode)
744 return false;
745
746 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
747 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
748 if (!RHSC)
749 return false;
750
751 APInt AndMask = RHSC->getAPIntValue();
752 unsigned LowZBits, MaskLen;
753 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
754 return false;
755
756 unsigned BitWidth = N.getValueSizeInBits();
757 SDLoc DL(LHS);
758 uint64_t NewShiftC;
759 unsigned NewShiftOp;
760 if (LHSOpcode == ISD::SHL) {
761 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
762 // BitWidth != LowZBits + MaskLen doesn't match the pattern
763 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
764 return false;
765
766 NewShiftC = LowZBits - ShiftAmtC;
767 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
768 } else {
769 if (LowZBits == 0)
770 return false;
771
772 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
773 NewShiftC = LowZBits + ShiftAmtC;
774 if (NewShiftC >= BitWidth)
775 return false;
776
777 // SRA need all high bits
778 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
779 return false;
780
781 // SRL high bits can be 0 or 1
782 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
783 return false;
784
785 if (LHSOpcode == ISD::SRL)
786 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
787 else
788 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
789 }
790
791 assert(NewShiftC < BitWidth && "Invalid shift amount");
792 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
793 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
794 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
795 NewShiftAmt, BitWidthMinus1),
796 0);
797 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
798 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
799 return true;
800}
801
802/// getExtendTypeForNode - Translate an extend node to the corresponding
803/// ExtendType value.
805getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
806 if (N.getOpcode() == ISD::SIGN_EXTEND ||
807 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
808 EVT SrcVT;
809 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
810 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
811 else
812 SrcVT = N.getOperand(0).getValueType();
813
814 if (!IsLoadStore && SrcVT == MVT::i8)
815 return AArch64_AM::SXTB;
816 else if (!IsLoadStore && SrcVT == MVT::i16)
817 return AArch64_AM::SXTH;
818 else if (SrcVT == MVT::i32)
819 return AArch64_AM::SXTW;
820 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
821
823 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
824 N.getOpcode() == ISD::ANY_EXTEND) {
825 EVT SrcVT = N.getOperand(0).getValueType();
826 if (!IsLoadStore && SrcVT == MVT::i8)
827 return AArch64_AM::UXTB;
828 else if (!IsLoadStore && SrcVT == MVT::i16)
829 return AArch64_AM::UXTH;
830 else if (SrcVT == MVT::i32)
831 return AArch64_AM::UXTW;
832 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
833
835 } else if (N.getOpcode() == ISD::AND) {
836 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
837 if (!CSD)
839 uint64_t AndMask = CSD->getZExtValue();
840
841 switch (AndMask) {
842 default:
844 case 0xFF:
845 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
846 case 0xFFFF:
847 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
848 case 0xFFFFFFFF:
849 return AArch64_AM::UXTW;
850 }
851 }
852
854}
855
856/// Determine whether it is worth to fold V into an extended register of an
857/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
858/// instruction, and the shift should be treated as worth folding even if has
859/// multiple uses.
860bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
861 // Trivial if we are optimizing for code size or if there is only
862 // one use of the value.
863 if (CurDAG->shouldOptForSize() || V.hasOneUse())
864 return true;
865
866 // If a subtarget has a fastpath LSL we can fold a logical shift into
867 // the add/sub and save a cycle.
868 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
869 V.getConstantOperandVal(1) <= 4 &&
871 return true;
872
873 // It hurts otherwise, since the value will be reused.
874 return false;
875}
876
877/// SelectShiftedRegister - Select a "shifted register" operand. If the value
878/// is not shifted, set the Shift operand to default of "LSL 0". The logical
879/// instructions allow the shifted register to be rotated, but the arithmetic
880/// instructions do not. The AllowROR parameter specifies whether ROR is
881/// supported.
882bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
883 SDValue &Reg, SDValue &Shift) {
884 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
885 return true;
886
888 if (ShType == AArch64_AM::InvalidShiftExtend)
889 return false;
890 if (!AllowROR && ShType == AArch64_AM::ROR)
891 return false;
892
893 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
894 unsigned BitSize = N.getValueSizeInBits();
895 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
896 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
897
898 Reg = N.getOperand(0);
899 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
900 return isWorthFoldingALU(N, true);
901 }
902
903 return false;
904}
905
906/// Instructions that accept extend modifiers like UXTW expect the register
907/// being extended to be a GPR32, but the incoming DAG might be acting on a
908/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
909/// this is the case.
911 if (N.getValueType() == MVT::i32)
912 return N;
913
914 SDLoc dl(N);
915 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
916}
917
918// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
919template<signed Low, signed High, signed Scale>
920bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
921 if (!isa<ConstantSDNode>(N))
922 return false;
923
924 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
925 if ((MulImm % std::abs(Scale)) == 0) {
926 int64_t RDVLImm = MulImm / Scale;
927 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
928 Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
929 return true;
930 }
931 }
932
933 return false;
934}
935
936/// SelectArithExtendedRegister - Select a "extended register" operand. This
937/// operand folds in an extend followed by an optional left shift.
938bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
939 SDValue &Shift) {
940 unsigned ShiftVal = 0;
942
943 if (N.getOpcode() == ISD::SHL) {
944 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
945 if (!CSD)
946 return false;
947 ShiftVal = CSD->getZExtValue();
948 if (ShiftVal > 4)
949 return false;
950
951 Ext = getExtendTypeForNode(N.getOperand(0));
953 return false;
954
955 Reg = N.getOperand(0).getOperand(0);
956 } else {
959 return false;
960
961 Reg = N.getOperand(0);
962
963 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
964 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
965 auto isDef32 = [](SDValue N) {
966 unsigned Opc = N.getOpcode();
967 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
968 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
969 Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
970 Opc != ISD::FREEZE;
971 };
972 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
973 isDef32(Reg))
974 return false;
975 }
976
977 // AArch64 mandates that the RHS of the operation must use the smallest
978 // register class that could contain the size being extended from. Thus,
979 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
980 // there might not be an actual 32-bit value in the program. We can
981 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
982 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
983 Reg = narrowIfNeeded(CurDAG, Reg);
984 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
985 MVT::i32);
986 return isWorthFoldingALU(N);
987}
988
989/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
990/// operand is refered by the instructions have SP operand
991bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
992 SDValue &Shift) {
993 unsigned ShiftVal = 0;
995
996 if (N.getOpcode() != ISD::SHL)
997 return false;
998
999 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1000 if (!CSD)
1001 return false;
1002 ShiftVal = CSD->getZExtValue();
1003 if (ShiftVal > 4)
1004 return false;
1005
1007 Reg = N.getOperand(0);
1008 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1009 MVT::i32);
1010 return isWorthFoldingALU(N);
1011}
1012
1013/// If there's a use of this ADDlow that's not itself a load/store then we'll
1014/// need to create a real ADD instruction from it anyway and there's no point in
1015/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1016/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1017/// leads to duplicated ADRP instructions.
1019 for (auto *Use : N->uses()) {
1020 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
1021 Use->getOpcode() != ISD::ATOMIC_LOAD &&
1022 Use->getOpcode() != ISD::ATOMIC_STORE)
1023 return false;
1024
1025 // ldar and stlr have much more restrictive addressing modes (just a
1026 // register).
1027 if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering()))
1028 return false;
1029 }
1030
1031 return true;
1032}
1033
1034/// Check if the immediate offset is valid as a scaled immediate.
1035static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1036 unsigned Size) {
1037 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1038 Offset < (Range << Log2_32(Size)))
1039 return true;
1040 return false;
1041}
1042
1043/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1044/// immediate" address. The "Size" argument is the size in bytes of the memory
1045/// reference, which determines the scale.
1046bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1047 unsigned BW, unsigned Size,
1048 SDValue &Base,
1049 SDValue &OffImm) {
1050 SDLoc dl(N);
1051 const DataLayout &DL = CurDAG->getDataLayout();
1052 const TargetLowering *TLI = getTargetLowering();
1053 if (N.getOpcode() == ISD::FrameIndex) {
1054 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1055 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1056 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1057 return true;
1058 }
1059
1060 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1061 // selected here doesn't support labels/immediates, only base+offset.
1062 if (CurDAG->isBaseWithConstantOffset(N)) {
1063 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1064 if (IsSignedImm) {
1065 int64_t RHSC = RHS->getSExtValue();
1066 unsigned Scale = Log2_32(Size);
1067 int64_t Range = 0x1LL << (BW - 1);
1068
1069 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1070 RHSC < (Range << Scale)) {
1071 Base = N.getOperand(0);
1072 if (Base.getOpcode() == ISD::FrameIndex) {
1073 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1074 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1075 }
1076 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1077 return true;
1078 }
1079 } else {
1080 // unsigned Immediate
1081 uint64_t RHSC = RHS->getZExtValue();
1082 unsigned Scale = Log2_32(Size);
1083 uint64_t Range = 0x1ULL << BW;
1084
1085 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1086 Base = N.getOperand(0);
1087 if (Base.getOpcode() == ISD::FrameIndex) {
1088 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1089 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1090 }
1091 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1092 return true;
1093 }
1094 }
1095 }
1096 }
1097 // Base only. The address will be materialized into a register before
1098 // the memory is accessed.
1099 // add x0, Xbase, #offset
1100 // stp x1, x2, [x0]
1101 Base = N;
1102 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1103 return true;
1104}
1105
1106/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1107/// immediate" address. The "Size" argument is the size in bytes of the memory
1108/// reference, which determines the scale.
1109bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1110 SDValue &Base, SDValue &OffImm) {
1111 SDLoc dl(N);
1112 const DataLayout &DL = CurDAG->getDataLayout();
1113 const TargetLowering *TLI = getTargetLowering();
1114 if (N.getOpcode() == ISD::FrameIndex) {
1115 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1116 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1117 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1118 return true;
1119 }
1120
1121 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1122 GlobalAddressSDNode *GAN =
1123 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1124 Base = N.getOperand(0);
1125 OffImm = N.getOperand(1);
1126 if (!GAN)
1127 return true;
1128
1129 if (GAN->getOffset() % Size == 0 &&
1131 return true;
1132 }
1133
1134 if (CurDAG->isBaseWithConstantOffset(N)) {
1135 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1136 int64_t RHSC = (int64_t)RHS->getZExtValue();
1137 unsigned Scale = Log2_32(Size);
1138 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1139 Base = N.getOperand(0);
1140 if (Base.getOpcode() == ISD::FrameIndex) {
1141 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1142 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1143 }
1144 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1145 return true;
1146 }
1147 }
1148 }
1149
1150 // Before falling back to our general case, check if the unscaled
1151 // instructions can handle this. If so, that's preferable.
1152 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1153 return false;
1154
1155 // Base only. The address will be materialized into a register before
1156 // the memory is accessed.
1157 // add x0, Xbase, #offset
1158 // ldr x0, [x0]
1159 Base = N;
1160 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1161 return true;
1162}
1163
1164/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1165/// immediate" address. This should only match when there is an offset that
1166/// is not valid for a scaled immediate addressing mode. The "Size" argument
1167/// is the size in bytes of the memory reference, which is needed here to know
1168/// what is valid for a scaled immediate.
1169bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1170 SDValue &Base,
1171 SDValue &OffImm) {
1172 if (!CurDAG->isBaseWithConstantOffset(N))
1173 return false;
1174 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1175 int64_t RHSC = RHS->getSExtValue();
1176 if (RHSC >= -256 && RHSC < 256) {
1177 Base = N.getOperand(0);
1178 if (Base.getOpcode() == ISD::FrameIndex) {
1179 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1180 const TargetLowering *TLI = getTargetLowering();
1181 Base = CurDAG->getTargetFrameIndex(
1182 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1183 }
1184 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1185 return true;
1186 }
1187 }
1188 return false;
1189}
1190
1192 SDLoc dl(N);
1193 SDValue ImpDef = SDValue(
1194 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1195 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1196 N);
1197}
1198
1199/// Check if the given SHL node (\p N), can be used to form an
1200/// extended register for an addressing mode.
1201bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1202 bool WantExtend, SDValue &Offset,
1203 SDValue &SignExtend) {
1204 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1205 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1206 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1207 return false;
1208
1209 SDLoc dl(N);
1210 if (WantExtend) {
1212 getExtendTypeForNode(N.getOperand(0), true);
1214 return false;
1215
1216 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1217 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1218 MVT::i32);
1219 } else {
1220 Offset = N.getOperand(0);
1221 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1222 }
1223
1224 unsigned LegalShiftVal = Log2_32(Size);
1225 unsigned ShiftVal = CSD->getZExtValue();
1226
1227 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1228 return false;
1229
1230 return isWorthFoldingAddr(N, Size);
1231}
1232
1233bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1235 SDValue &SignExtend,
1236 SDValue &DoShift) {
1237 if (N.getOpcode() != ISD::ADD)
1238 return false;
1239 SDValue LHS = N.getOperand(0);
1240 SDValue RHS = N.getOperand(1);
1241 SDLoc dl(N);
1242
1243 // We don't want to match immediate adds here, because they are better lowered
1244 // to the register-immediate addressing modes.
1245 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
1246 return false;
1247
1248 // Check if this particular node is reused in any non-memory related
1249 // operation. If yes, do not try to fold this node into the address
1250 // computation, since the computation will be kept.
1251 const SDNode *Node = N.getNode();
1252 for (SDNode *UI : Node->uses()) {
1253 if (!isa<MemSDNode>(*UI))
1254 return false;
1255 }
1256
1257 // Remember if it is worth folding N when it produces extended register.
1258 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1259
1260 // Try to match a shifted extend on the RHS.
1261 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1262 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1263 Base = LHS;
1264 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1265 return true;
1266 }
1267
1268 // Try to match a shifted extend on the LHS.
1269 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1270 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1271 Base = RHS;
1272 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1273 return true;
1274 }
1275
1276 // There was no shift, whatever else we find.
1277 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1278
1280 // Try to match an unshifted extend on the LHS.
1281 if (IsExtendedRegisterWorthFolding &&
1282 (Ext = getExtendTypeForNode(LHS, true)) !=
1284 Base = RHS;
1285 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1286 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1287 MVT::i32);
1288 if (isWorthFoldingAddr(LHS, Size))
1289 return true;
1290 }
1291
1292 // Try to match an unshifted extend on the RHS.
1293 if (IsExtendedRegisterWorthFolding &&
1294 (Ext = getExtendTypeForNode(RHS, true)) !=
1296 Base = LHS;
1297 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1298 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1299 MVT::i32);
1300 if (isWorthFoldingAddr(RHS, Size))
1301 return true;
1302 }
1303
1304 return false;
1305}
1306
1307// Check if the given immediate is preferred by ADD. If an immediate can be
1308// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1309// encoded by one MOVZ, return true.
1310static bool isPreferredADD(int64_t ImmOff) {
1311 // Constant in [0x0, 0xfff] can be encoded in ADD.
1312 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1313 return true;
1314 // Check if it can be encoded in an "ADD LSL #12".
1315 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1316 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1317 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1318 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1319 return false;
1320}
1321
1322bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1324 SDValue &SignExtend,
1325 SDValue &DoShift) {
1326 if (N.getOpcode() != ISD::ADD)
1327 return false;
1328 SDValue LHS = N.getOperand(0);
1329 SDValue RHS = N.getOperand(1);
1330 SDLoc DL(N);
1331
1332 // Check if this particular node is reused in any non-memory related
1333 // operation. If yes, do not try to fold this node into the address
1334 // computation, since the computation will be kept.
1335 const SDNode *Node = N.getNode();
1336 for (SDNode *UI : Node->uses()) {
1337 if (!isa<MemSDNode>(*UI))
1338 return false;
1339 }
1340
1341 // Watch out if RHS is a wide immediate, it can not be selected into
1342 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1343 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1344 // instructions like:
1345 // MOV X0, WideImmediate
1346 // ADD X1, BaseReg, X0
1347 // LDR X2, [X1, 0]
1348 // For such situation, using [BaseReg, XReg] addressing mode can save one
1349 // ADD/SUB:
1350 // MOV X0, WideImmediate
1351 // LDR X2, [BaseReg, X0]
1352 if (isa<ConstantSDNode>(RHS)) {
1353 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1354 // Skip the immediate can be selected by load/store addressing mode.
1355 // Also skip the immediate can be encoded by a single ADD (SUB is also
1356 // checked by using -ImmOff).
1357 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1358 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1359 return false;
1360
1361 SDValue Ops[] = { RHS };
1362 SDNode *MOVI =
1363 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1364 SDValue MOVIV = SDValue(MOVI, 0);
1365 // This ADD of two X register will be selected into [Reg+Reg] mode.
1366 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1367 }
1368
1369 // Remember if it is worth folding N when it produces extended register.
1370 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1371
1372 // Try to match a shifted extend on the RHS.
1373 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1374 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1375 Base = LHS;
1376 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1377 return true;
1378 }
1379
1380 // Try to match a shifted extend on the LHS.
1381 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1382 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1383 Base = RHS;
1384 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1385 return true;
1386 }
1387
1388 // Match any non-shifted, non-extend, non-immediate add expression.
1389 Base = LHS;
1390 Offset = RHS;
1391 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1392 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1393 // Reg1 + Reg2 is free: no check needed.
1394 return true;
1395}
1396
1397SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1398 static const unsigned RegClassIDs[] = {
1399 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1400 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1401 AArch64::dsub2, AArch64::dsub3};
1402
1403 return createTuple(Regs, RegClassIDs, SubRegs);
1404}
1405
1406SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1407 static const unsigned RegClassIDs[] = {
1408 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1409 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1410 AArch64::qsub2, AArch64::qsub3};
1411
1412 return createTuple(Regs, RegClassIDs, SubRegs);
1413}
1414
1415SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1416 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1417 AArch64::ZPR3RegClassID,
1418 AArch64::ZPR4RegClassID};
1419 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1420 AArch64::zsub2, AArch64::zsub3};
1421
1422 return createTuple(Regs, RegClassIDs, SubRegs);
1423}
1424
1425SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1426 assert(Regs.size() == 2 || Regs.size() == 4);
1427
1428 // The createTuple interface requires 3 RegClassIDs for each possible
1429 // tuple type even though we only have them for ZPR2 and ZPR4.
1430 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1431 AArch64::ZPR4Mul4RegClassID};
1432 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1433 AArch64::zsub2, AArch64::zsub3};
1434 return createTuple(Regs, RegClassIDs, SubRegs);
1435}
1436
1437SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1438 const unsigned RegClassIDs[],
1439 const unsigned SubRegs[]) {
1440 // There's no special register-class for a vector-list of 1 element: it's just
1441 // a vector.
1442 if (Regs.size() == 1)
1443 return Regs[0];
1444
1445 assert(Regs.size() >= 2 && Regs.size() <= 4);
1446
1447 SDLoc DL(Regs[0]);
1448
1450
1451 // First operand of REG_SEQUENCE is the desired RegClass.
1452 Ops.push_back(
1453 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1454
1455 // Then we get pairs of source & subregister-position for the components.
1456 for (unsigned i = 0; i < Regs.size(); ++i) {
1457 Ops.push_back(Regs[i]);
1458 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1459 }
1460
1461 SDNode *N =
1462 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1463 return SDValue(N, 0);
1464}
1465
1466void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1467 bool isExt) {
1468 SDLoc dl(N);
1469 EVT VT = N->getValueType(0);
1470
1471 unsigned ExtOff = isExt;
1472
1473 // Form a REG_SEQUENCE to force register allocation.
1474 unsigned Vec0Off = ExtOff + 1;
1475 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1476 N->op_begin() + Vec0Off + NumVecs);
1477 SDValue RegSeq = createQTuple(Regs);
1478
1480 if (isExt)
1481 Ops.push_back(N->getOperand(1));
1482 Ops.push_back(RegSeq);
1483 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1484 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1485}
1486
1487static std::tuple<SDValue, SDValue>
1489 SDLoc DL(Disc);
1490 SDValue AddrDisc;
1491 SDValue ConstDisc;
1492
1493 // If this is a blend, remember the constant and address discriminators.
1494 // Otherwise, it's either a constant discriminator, or a non-blended
1495 // address discriminator.
1496 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1497 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1498 AddrDisc = Disc->getOperand(1);
1499 ConstDisc = Disc->getOperand(2);
1500 } else {
1501 ConstDisc = Disc;
1502 }
1503
1504 // If the constant discriminator (either the blend RHS, or the entire
1505 // discriminator value) isn't a 16-bit constant, bail out, and let the
1506 // discriminator be computed separately.
1507 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1508 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1509 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1510
1511 // If there's no address discriminator, use XZR directly.
1512 if (!AddrDisc)
1513 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1514
1515 return std::make_tuple(
1516 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1517 AddrDisc);
1518}
1519
1520void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1521 SDLoc DL(N);
1522 // IntrinsicID is operand #0
1523 SDValue Val = N->getOperand(1);
1524 SDValue AUTKey = N->getOperand(2);
1525 SDValue AUTDisc = N->getOperand(3);
1526
1527 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1528 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1529
1530 SDValue AUTAddrDisc, AUTConstDisc;
1531 std::tie(AUTConstDisc, AUTAddrDisc) =
1532 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1533
1534 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1535 AArch64::X16, Val, SDValue());
1536 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1537
1538 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUT, DL, MVT::i64, Ops);
1539 ReplaceNode(N, AUT);
1540 return;
1541}
1542
1543void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1544 SDLoc DL(N);
1545 // IntrinsicID is operand #0
1546 SDValue Val = N->getOperand(1);
1547 SDValue AUTKey = N->getOperand(2);
1548 SDValue AUTDisc = N->getOperand(3);
1549 SDValue PACKey = N->getOperand(4);
1550 SDValue PACDisc = N->getOperand(5);
1551
1552 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1553 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1554
1555 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1556 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1557
1558 SDValue AUTAddrDisc, AUTConstDisc;
1559 std::tie(AUTConstDisc, AUTAddrDisc) =
1560 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1561
1562 SDValue PACAddrDisc, PACConstDisc;
1563 std::tie(PACConstDisc, PACAddrDisc) =
1564 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1565
1566 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1567 AArch64::X16, Val, SDValue());
1568
1569 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1570 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1571
1572 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1573 ReplaceNode(N, AUTPAC);
1574 return;
1575}
1576
1577bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1578 LoadSDNode *LD = cast<LoadSDNode>(N);
1579 if (LD->isUnindexed())
1580 return false;
1581 EVT VT = LD->getMemoryVT();
1582 EVT DstVT = N->getValueType(0);
1583 ISD::MemIndexedMode AM = LD->getAddressingMode();
1584 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1585
1586 // We're not doing validity checking here. That was done when checking
1587 // if we should mark the load as indexed or not. We're just selecting
1588 // the right instruction.
1589 unsigned Opcode = 0;
1590
1591 ISD::LoadExtType ExtType = LD->getExtensionType();
1592 bool InsertTo64 = false;
1593 if (VT == MVT::i64)
1594 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1595 else if (VT == MVT::i32) {
1596 if (ExtType == ISD::NON_EXTLOAD)
1597 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1598 else if (ExtType == ISD::SEXTLOAD)
1599 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1600 else {
1601 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1602 InsertTo64 = true;
1603 // The result of the load is only i32. It's the subreg_to_reg that makes
1604 // it into an i64.
1605 DstVT = MVT::i32;
1606 }
1607 } else if (VT == MVT::i16) {
1608 if (ExtType == ISD::SEXTLOAD) {
1609 if (DstVT == MVT::i64)
1610 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1611 else
1612 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1613 } else {
1614 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1615 InsertTo64 = DstVT == MVT::i64;
1616 // The result of the load is only i32. It's the subreg_to_reg that makes
1617 // it into an i64.
1618 DstVT = MVT::i32;
1619 }
1620 } else if (VT == MVT::i8) {
1621 if (ExtType == ISD::SEXTLOAD) {
1622 if (DstVT == MVT::i64)
1623 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1624 else
1625 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1626 } else {
1627 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1628 InsertTo64 = DstVT == MVT::i64;
1629 // The result of the load is only i32. It's the subreg_to_reg that makes
1630 // it into an i64.
1631 DstVT = MVT::i32;
1632 }
1633 } else if (VT == MVT::f16) {
1634 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1635 } else if (VT == MVT::bf16) {
1636 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1637 } else if (VT == MVT::f32) {
1638 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1639 } else if (VT == MVT::f64 || VT.is64BitVector()) {
1640 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1641 } else if (VT.is128BitVector()) {
1642 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1643 } else
1644 return false;
1645 SDValue Chain = LD->getChain();
1646 SDValue Base = LD->getBasePtr();
1647 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1648 int OffsetVal = (int)OffsetOp->getZExtValue();
1649 SDLoc dl(N);
1650 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1651 SDValue Ops[] = { Base, Offset, Chain };
1652 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1653 MVT::Other, Ops);
1654
1655 // Transfer memoperands.
1656 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1657 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1658
1659 // Either way, we're replacing the node, so tell the caller that.
1660 SDValue LoadedVal = SDValue(Res, 1);
1661 if (InsertTo64) {
1662 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1663 LoadedVal =
1664 SDValue(CurDAG->getMachineNode(
1665 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1666 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1667 SubReg),
1668 0);
1669 }
1670
1671 ReplaceUses(SDValue(N, 0), LoadedVal);
1672 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1673 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1674 CurDAG->RemoveDeadNode(N);
1675 return true;
1676}
1677
1678void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1679 unsigned SubRegIdx) {
1680 SDLoc dl(N);
1681 EVT VT = N->getValueType(0);
1682 SDValue Chain = N->getOperand(0);
1683
1684 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1685 Chain};
1686
1687 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1688
1689 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1690 SDValue SuperReg = SDValue(Ld, 0);
1691 for (unsigned i = 0; i < NumVecs; ++i)
1692 ReplaceUses(SDValue(N, i),
1693 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1694
1695 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1696
1697 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1698 // because it's too simple to have needed special treatment during lowering.
1699 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1700 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1701 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1702 }
1703
1704 CurDAG->RemoveDeadNode(N);
1705}
1706
1707void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1708 unsigned Opc, unsigned SubRegIdx) {
1709 SDLoc dl(N);
1710 EVT VT = N->getValueType(0);
1711 SDValue Chain = N->getOperand(0);
1712
1713 SDValue Ops[] = {N->getOperand(1), // Mem operand
1714 N->getOperand(2), // Incremental
1715 Chain};
1716
1717 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1718 MVT::Untyped, MVT::Other};
1719
1720 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1721
1722 // Update uses of write back register
1723 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1724
1725 // Update uses of vector list
1726 SDValue SuperReg = SDValue(Ld, 1);
1727 if (NumVecs == 1)
1728 ReplaceUses(SDValue(N, 0), SuperReg);
1729 else
1730 for (unsigned i = 0; i < NumVecs; ++i)
1731 ReplaceUses(SDValue(N, i),
1732 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1733
1734 // Update the chain
1735 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1736 CurDAG->RemoveDeadNode(N);
1737}
1738
1739/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1740/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1741/// new Base and an SDValue representing the new offset.
1742std::tuple<unsigned, SDValue, SDValue>
1743AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1744 unsigned Opc_ri,
1745 const SDValue &OldBase,
1746 const SDValue &OldOffset,
1747 unsigned Scale) {
1748 SDValue NewBase = OldBase;
1749 SDValue NewOffset = OldOffset;
1750 // Detect a possible Reg+Imm addressing mode.
1751 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1752 N, OldBase, NewBase, NewOffset);
1753
1754 // Detect a possible reg+reg addressing mode, but only if we haven't already
1755 // detected a Reg+Imm one.
1756 const bool IsRegReg =
1757 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1758
1759 // Select the instruction.
1760 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1761}
1762
1763enum class SelectTypeKind {
1764 Int1 = 0,
1765 Int = 1,
1766 FP = 2,
1767 AnyType = 3,
1768};
1769
1770/// This function selects an opcode from a list of opcodes, which is
1771/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1772/// element types, in this order.
1773template <SelectTypeKind Kind>
1774static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1775 // Only match scalable vector VTs
1776 if (!VT.isScalableVector())
1777 return 0;
1778
1779 EVT EltVT = VT.getVectorElementType();
1780 unsigned Key = VT.getVectorMinNumElements();
1781 switch (Kind) {
1783 break;
1785 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1786 EltVT != MVT::i64)
1787 return 0;
1788 break;
1790 if (EltVT != MVT::i1)
1791 return 0;
1792 break;
1793 case SelectTypeKind::FP:
1794 if (EltVT == MVT::bf16)
1795 Key = 16;
1796 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1797 EltVT != MVT::f64)
1798 return 0;
1799 break;
1800 }
1801
1802 unsigned Offset;
1803 switch (Key) {
1804 case 16: // 8-bit or bf16
1805 Offset = 0;
1806 break;
1807 case 8: // 16-bit
1808 Offset = 1;
1809 break;
1810 case 4: // 32-bit
1811 Offset = 2;
1812 break;
1813 case 2: // 64-bit
1814 Offset = 3;
1815 break;
1816 default:
1817 return 0;
1818 }
1819
1820 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1821}
1822
1823// This function is almost identical to SelectWhilePair, but has an
1824// extra check on the range of the immediate operand.
1825// TODO: Merge these two functions together at some point?
1826void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1827 // Immediate can be either 0 or 1.
1828 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1829 if (Imm->getZExtValue() > 1)
1830 return;
1831
1832 SDLoc DL(N);
1833 EVT VT = N->getValueType(0);
1834 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1835 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1836 SDValue SuperReg = SDValue(WhilePair, 0);
1837
1838 for (unsigned I = 0; I < 2; ++I)
1839 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1840 AArch64::psub0 + I, DL, VT, SuperReg));
1841
1842 CurDAG->RemoveDeadNode(N);
1843}
1844
1845void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1846 SDLoc DL(N);
1847 EVT VT = N->getValueType(0);
1848
1849 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1850
1851 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1852 SDValue SuperReg = SDValue(WhilePair, 0);
1853
1854 for (unsigned I = 0; I < 2; ++I)
1855 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1856 AArch64::psub0 + I, DL, VT, SuperReg));
1857
1858 CurDAG->RemoveDeadNode(N);
1859}
1860
1861void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1862 unsigned Opcode) {
1863 EVT VT = N->getValueType(0);
1864 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1865 SDValue Ops = createZTuple(Regs);
1866 SDLoc DL(N);
1867 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1868 SDValue SuperReg = SDValue(Intrinsic, 0);
1869 for (unsigned i = 0; i < NumVecs; ++i)
1870 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1871 AArch64::zsub0 + i, DL, VT, SuperReg));
1872
1873 CurDAG->RemoveDeadNode(N);
1874}
1875
1876void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1877 unsigned NumVecs,
1878 bool IsZmMulti,
1879 unsigned Opcode,
1880 bool HasPred) {
1881 assert(Opcode != 0 && "Unexpected opcode");
1882
1883 SDLoc DL(N);
1884 EVT VT = N->getValueType(0);
1885 unsigned FirstVecIdx = HasPred ? 2 : 1;
1886
1887 auto GetMultiVecOperand = [=](unsigned StartIdx) {
1888 SmallVector<SDValue, 4> Regs(N->op_begin() + StartIdx,
1889 N->op_begin() + StartIdx + NumVecs);
1890 return createZMulTuple(Regs);
1891 };
1892
1893 SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1894
1895 SDValue Zm;
1896 if (IsZmMulti)
1897 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1898 else
1899 Zm = N->getOperand(NumVecs + FirstVecIdx);
1900
1902 if (HasPred)
1903 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
1904 N->getOperand(1), Zdn, Zm);
1905 else
1906 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
1907 SDValue SuperReg = SDValue(Intrinsic, 0);
1908 for (unsigned i = 0; i < NumVecs; ++i)
1909 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1910 AArch64::zsub0 + i, DL, VT, SuperReg));
1911
1912 CurDAG->RemoveDeadNode(N);
1913}
1914
1915void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
1916 unsigned Scale, unsigned Opc_ri,
1917 unsigned Opc_rr, bool IsIntr) {
1918 assert(Scale < 5 && "Invalid scaling value.");
1919 SDLoc DL(N);
1920 EVT VT = N->getValueType(0);
1921 SDValue Chain = N->getOperand(0);
1922
1923 // Optimize addressing mode.
1925 unsigned Opc;
1926 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1927 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
1928 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
1929
1930 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
1931 Base, // Memory operand
1932 Offset, Chain};
1933
1934 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1935
1936 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1937 SDValue SuperReg = SDValue(Load, 0);
1938 for (unsigned i = 0; i < NumVecs; ++i)
1939 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1940 AArch64::zsub0 + i, DL, VT, SuperReg));
1941
1942 // Copy chain
1943 unsigned ChainIdx = NumVecs;
1944 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1945 CurDAG->RemoveDeadNode(N);
1946}
1947
1948void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
1949 unsigned NumVecs,
1950 unsigned Scale,
1951 unsigned Opc_ri,
1952 unsigned Opc_rr) {
1953 assert(Scale < 4 && "Invalid scaling value.");
1954 SDLoc DL(N);
1955 EVT VT = N->getValueType(0);
1956 SDValue Chain = N->getOperand(0);
1957
1958 SDValue PNg = N->getOperand(2);
1959 SDValue Base = N->getOperand(3);
1960 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
1961 unsigned Opc;
1962 std::tie(Opc, Base, Offset) =
1963 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
1964
1965 SDValue Ops[] = {PNg, // Predicate-as-counter
1966 Base, // Memory operand
1967 Offset, Chain};
1968
1969 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1970
1971 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1972 SDValue SuperReg = SDValue(Load, 0);
1973 for (unsigned i = 0; i < NumVecs; ++i)
1974 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1975 AArch64::zsub0 + i, DL, VT, SuperReg));
1976
1977 // Copy chain
1978 unsigned ChainIdx = NumVecs;
1979 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1980 CurDAG->RemoveDeadNode(N);
1981}
1982
1983void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
1984 unsigned Opcode) {
1985 if (N->getValueType(0) != MVT::nxv4f32)
1986 return;
1987 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
1988}
1989
1990void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
1991 unsigned NumOutVecs,
1992 unsigned Opc, uint32_t MaxImm) {
1993 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
1994 if (Imm->getZExtValue() > MaxImm)
1995 return;
1996
1997 SDValue ZtValue;
1998 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
1999 return;
2000 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)};
2001 SDLoc DL(Node);
2002 EVT VT = Node->getValueType(0);
2003
2005 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2006 SDValue SuperReg = SDValue(Instruction, 0);
2007
2008 for (unsigned I = 0; I < NumOutVecs; ++I)
2009 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2010 AArch64::zsub0 + I, DL, VT, SuperReg));
2011
2012 // Copy chain
2013 unsigned ChainIdx = NumOutVecs;
2014 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2015 CurDAG->RemoveDeadNode(Node);
2016}
2017
2018void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2019 unsigned Op) {
2020 SDLoc DL(N);
2021 EVT VT = N->getValueType(0);
2022
2023 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2024 SDValue Zd = createZMulTuple(Regs);
2025 SDValue Zn = N->getOperand(1 + NumVecs);
2026 SDValue Zm = N->getOperand(2 + NumVecs);
2027
2028 SDValue Ops[] = {Zd, Zn, Zm};
2029
2030 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2031 SDValue SuperReg = SDValue(Intrinsic, 0);
2032 for (unsigned i = 0; i < NumVecs; ++i)
2033 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2034 AArch64::zsub0 + i, DL, VT, SuperReg));
2035
2036 CurDAG->RemoveDeadNode(N);
2037}
2038
2039bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2040 switch (BaseReg) {
2041 default:
2042 return false;
2043 case AArch64::ZA:
2044 case AArch64::ZAB0:
2045 if (TileNum == 0)
2046 break;
2047 return false;
2048 case AArch64::ZAH0:
2049 if (TileNum <= 1)
2050 break;
2051 return false;
2052 case AArch64::ZAS0:
2053 if (TileNum <= 3)
2054 break;
2055 return false;
2056 case AArch64::ZAD0:
2057 if (TileNum <= 7)
2058 break;
2059 return false;
2060 }
2061
2062 BaseReg += TileNum;
2063 return true;
2064}
2065
2066template <unsigned MaxIdx, unsigned Scale>
2067void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2068 unsigned BaseReg, unsigned Op) {
2069 unsigned TileNum = 0;
2070 if (BaseReg != AArch64::ZA)
2071 TileNum = N->getConstantOperandVal(2);
2072
2073 if (!SelectSMETile(BaseReg, TileNum))
2074 return;
2075
2076 SDValue SliceBase, Base, Offset;
2077 if (BaseReg == AArch64::ZA)
2078 SliceBase = N->getOperand(2);
2079 else
2080 SliceBase = N->getOperand(3);
2081
2082 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2083 return;
2084
2085 SDLoc DL(N);
2086 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2087 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2088 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2089
2090 EVT VT = N->getValueType(0);
2091 for (unsigned I = 0; I < NumVecs; ++I)
2092 ReplaceUses(SDValue(N, I),
2093 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2094 SDValue(Mov, 0)));
2095 // Copy chain
2096 unsigned ChainIdx = NumVecs;
2097 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2098 CurDAG->RemoveDeadNode(N);
2099}
2100
2101void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2102 unsigned Op, unsigned MaxIdx,
2103 unsigned Scale, unsigned BaseReg) {
2104 // Slice can be in different positions
2105 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2106 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2107 SDValue SliceBase = N->getOperand(2);
2108 if (BaseReg != AArch64::ZA)
2109 SliceBase = N->getOperand(3);
2110
2112 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2113 return;
2114 // The correct Za tile number is computed in Machine Instruction
2115 // See EmitZAInstr
2116 // DAG cannot select Za tile as an output register with ZReg
2117 SDLoc DL(N);
2119 if (BaseReg != AArch64::ZA )
2120 Ops.push_back(N->getOperand(2));
2121 Ops.push_back(Base);
2122 Ops.push_back(Offset);
2123 Ops.push_back(N->getOperand(0)); //Chain
2124 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2125
2126 EVT VT = N->getValueType(0);
2127 for (unsigned I = 0; I < NumVecs; ++I)
2128 ReplaceUses(SDValue(N, I),
2129 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2130 SDValue(Mov, 0)));
2131
2132 // Copy chain
2133 unsigned ChainIdx = NumVecs;
2134 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2135 CurDAG->RemoveDeadNode(N);
2136}
2137
2138void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2139 unsigned NumOutVecs,
2140 bool IsTupleInput,
2141 unsigned Opc) {
2142 SDLoc DL(N);
2143 EVT VT = N->getValueType(0);
2144 unsigned NumInVecs = N->getNumOperands() - 1;
2145
2147 if (IsTupleInput) {
2148 assert((NumInVecs == 2 || NumInVecs == 4) &&
2149 "Don't know how to handle multi-register input!");
2150 SmallVector<SDValue, 4> Regs(N->op_begin() + 1,
2151 N->op_begin() + 1 + NumInVecs);
2152 Ops.push_back(createZMulTuple(Regs));
2153 } else {
2154 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2155 for (unsigned I = 0; I < NumInVecs; I++)
2156 Ops.push_back(N->getOperand(1 + I));
2157 }
2158
2159 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2160 SDValue SuperReg = SDValue(Res, 0);
2161
2162 for (unsigned I = 0; I < NumOutVecs; I++)
2163 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2164 AArch64::zsub0 + I, DL, VT, SuperReg));
2165 CurDAG->RemoveDeadNode(N);
2166}
2167
2168void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2169 unsigned Opc) {
2170 SDLoc dl(N);
2171 EVT VT = N->getOperand(2)->getValueType(0);
2172
2173 // Form a REG_SEQUENCE to force register allocation.
2174 bool Is128Bit = VT.getSizeInBits() == 128;
2175 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2176 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2177
2178 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2179 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2180
2181 // Transfer memoperands.
2182 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2183 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2184
2185 ReplaceNode(N, St);
2186}
2187
2188void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2189 unsigned Scale, unsigned Opc_rr,
2190 unsigned Opc_ri) {
2191 SDLoc dl(N);
2192
2193 // Form a REG_SEQUENCE to force register allocation.
2194 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2195 SDValue RegSeq = createZTuple(Regs);
2196
2197 // Optimize addressing mode.
2198 unsigned Opc;
2200 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2201 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2202 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2203
2204 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2205 Base, // address
2206 Offset, // offset
2207 N->getOperand(0)}; // chain
2208 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2209
2210 ReplaceNode(N, St);
2211}
2212
2213bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2214 SDValue &OffImm) {
2215 SDLoc dl(N);
2216 const DataLayout &DL = CurDAG->getDataLayout();
2217 const TargetLowering *TLI = getTargetLowering();
2218
2219 // Try to match it for the frame address
2220 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2221 int FI = FINode->getIndex();
2222 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2223 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2224 return true;
2225 }
2226
2227 return false;
2228}
2229
2230void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2231 unsigned Opc) {
2232 SDLoc dl(N);
2233 EVT VT = N->getOperand(2)->getValueType(0);
2234 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2235 MVT::Other}; // Type for the Chain
2236
2237 // Form a REG_SEQUENCE to force register allocation.
2238 bool Is128Bit = VT.getSizeInBits() == 128;
2239 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2240 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2241
2242 SDValue Ops[] = {RegSeq,
2243 N->getOperand(NumVecs + 1), // base register
2244 N->getOperand(NumVecs + 2), // Incremental
2245 N->getOperand(0)}; // Chain
2246 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2247
2248 ReplaceNode(N, St);
2249}
2250
2251namespace {
2252/// WidenVector - Given a value in the V64 register class, produce the
2253/// equivalent value in the V128 register class.
2254class WidenVector {
2255 SelectionDAG &DAG;
2256
2257public:
2258 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2259
2260 SDValue operator()(SDValue V64Reg) {
2261 EVT VT = V64Reg.getValueType();
2262 unsigned NarrowSize = VT.getVectorNumElements();
2263 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2264 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2265 SDLoc DL(V64Reg);
2266
2267 SDValue Undef =
2268 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2269 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2270 }
2271};
2272} // namespace
2273
2274/// NarrowVector - Given a value in the V128 register class, produce the
2275/// equivalent value in the V64 register class.
2277 EVT VT = V128Reg.getValueType();
2278 unsigned WideSize = VT.getVectorNumElements();
2279 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2280 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2281
2282 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2283 V128Reg);
2284}
2285
2286void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2287 unsigned Opc) {
2288 SDLoc dl(N);
2289 EVT VT = N->getValueType(0);
2290 bool Narrow = VT.getSizeInBits() == 64;
2291
2292 // Form a REG_SEQUENCE to force register allocation.
2293 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2294
2295 if (Narrow)
2296 transform(Regs, Regs.begin(),
2297 WidenVector(*CurDAG));
2298
2299 SDValue RegSeq = createQTuple(Regs);
2300
2301 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2302
2303 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2304
2305 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2306 N->getOperand(NumVecs + 3), N->getOperand(0)};
2307 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2308 SDValue SuperReg = SDValue(Ld, 0);
2309
2310 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2311 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2312 AArch64::qsub2, AArch64::qsub3 };
2313 for (unsigned i = 0; i < NumVecs; ++i) {
2314 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2315 if (Narrow)
2316 NV = NarrowVector(NV, *CurDAG);
2317 ReplaceUses(SDValue(N, i), NV);
2318 }
2319
2320 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2321 CurDAG->RemoveDeadNode(N);
2322}
2323
2324void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2325 unsigned Opc) {
2326 SDLoc dl(N);
2327 EVT VT = N->getValueType(0);
2328 bool Narrow = VT.getSizeInBits() == 64;
2329
2330 // Form a REG_SEQUENCE to force register allocation.
2331 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2332
2333 if (Narrow)
2334 transform(Regs, Regs.begin(),
2335 WidenVector(*CurDAG));
2336
2337 SDValue RegSeq = createQTuple(Regs);
2338
2339 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2340 RegSeq->getValueType(0), MVT::Other};
2341
2342 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2343
2344 SDValue Ops[] = {RegSeq,
2345 CurDAG->getTargetConstant(LaneNo, dl,
2346 MVT::i64), // Lane Number
2347 N->getOperand(NumVecs + 2), // Base register
2348 N->getOperand(NumVecs + 3), // Incremental
2349 N->getOperand(0)};
2350 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2351
2352 // Update uses of the write back register
2353 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2354
2355 // Update uses of the vector list
2356 SDValue SuperReg = SDValue(Ld, 1);
2357 if (NumVecs == 1) {
2358 ReplaceUses(SDValue(N, 0),
2359 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2360 } else {
2361 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2362 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2363 AArch64::qsub2, AArch64::qsub3 };
2364 for (unsigned i = 0; i < NumVecs; ++i) {
2365 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2366 SuperReg);
2367 if (Narrow)
2368 NV = NarrowVector(NV, *CurDAG);
2369 ReplaceUses(SDValue(N, i), NV);
2370 }
2371 }
2372
2373 // Update the Chain
2374 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2375 CurDAG->RemoveDeadNode(N);
2376}
2377
2378void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2379 unsigned Opc) {
2380 SDLoc dl(N);
2381 EVT VT = N->getOperand(2)->getValueType(0);
2382 bool Narrow = VT.getSizeInBits() == 64;
2383
2384 // Form a REG_SEQUENCE to force register allocation.
2385 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2386
2387 if (Narrow)
2388 transform(Regs, Regs.begin(),
2389 WidenVector(*CurDAG));
2390
2391 SDValue RegSeq = createQTuple(Regs);
2392
2393 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2394
2395 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2396 N->getOperand(NumVecs + 3), N->getOperand(0)};
2397 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2398
2399 // Transfer memoperands.
2400 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2401 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2402
2403 ReplaceNode(N, St);
2404}
2405
2406void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2407 unsigned Opc) {
2408 SDLoc dl(N);
2409 EVT VT = N->getOperand(2)->getValueType(0);
2410 bool Narrow = VT.getSizeInBits() == 64;
2411
2412 // Form a REG_SEQUENCE to force register allocation.
2413 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2414
2415 if (Narrow)
2416 transform(Regs, Regs.begin(),
2417 WidenVector(*CurDAG));
2418
2419 SDValue RegSeq = createQTuple(Regs);
2420
2421 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2422 MVT::Other};
2423
2424 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2425
2426 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2427 N->getOperand(NumVecs + 2), // Base Register
2428 N->getOperand(NumVecs + 3), // Incremental
2429 N->getOperand(0)};
2430 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2431
2432 // Transfer memoperands.
2433 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2434 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2435
2436 ReplaceNode(N, St);
2437}
2438
2440 unsigned &Opc, SDValue &Opd0,
2441 unsigned &LSB, unsigned &MSB,
2442 unsigned NumberOfIgnoredLowBits,
2443 bool BiggerPattern) {
2444 assert(N->getOpcode() == ISD::AND &&
2445 "N must be a AND operation to call this function");
2446
2447 EVT VT = N->getValueType(0);
2448
2449 // Here we can test the type of VT and return false when the type does not
2450 // match, but since it is done prior to that call in the current context
2451 // we turned that into an assert to avoid redundant code.
2452 assert((VT == MVT::i32 || VT == MVT::i64) &&
2453 "Type checking must have been done before calling this function");
2454
2455 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2456 // changed the AND node to a 32-bit mask operation. We'll have to
2457 // undo that as part of the transform here if we want to catch all
2458 // the opportunities.
2459 // Currently the NumberOfIgnoredLowBits argument helps to recover
2460 // from these situations when matching bigger pattern (bitfield insert).
2461
2462 // For unsigned extracts, check for a shift right and mask
2463 uint64_t AndImm = 0;
2464 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2465 return false;
2466
2467 const SDNode *Op0 = N->getOperand(0).getNode();
2468
2469 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2470 // simplified. Try to undo that
2471 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2472
2473 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2474 if (AndImm & (AndImm + 1))
2475 return false;
2476
2477 bool ClampMSB = false;
2478 uint64_t SrlImm = 0;
2479 // Handle the SRL + ANY_EXTEND case.
2480 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2481 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2482 // Extend the incoming operand of the SRL to 64-bit.
2483 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2484 // Make sure to clamp the MSB so that we preserve the semantics of the
2485 // original operations.
2486 ClampMSB = true;
2487 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2489 SrlImm)) {
2490 // If the shift result was truncated, we can still combine them.
2491 Opd0 = Op0->getOperand(0).getOperand(0);
2492
2493 // Use the type of SRL node.
2494 VT = Opd0->getValueType(0);
2495 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2496 Opd0 = Op0->getOperand(0);
2497 ClampMSB = (VT == MVT::i32);
2498 } else if (BiggerPattern) {
2499 // Let's pretend a 0 shift right has been performed.
2500 // The resulting code will be at least as good as the original one
2501 // plus it may expose more opportunities for bitfield insert pattern.
2502 // FIXME: Currently we limit this to the bigger pattern, because
2503 // some optimizations expect AND and not UBFM.
2504 Opd0 = N->getOperand(0);
2505 } else
2506 return false;
2507
2508 // Bail out on large immediates. This happens when no proper
2509 // combining/constant folding was performed.
2510 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2511 LLVM_DEBUG(
2512 (dbgs() << N
2513 << ": Found large shift immediate, this should not happen\n"));
2514 return false;
2515 }
2516
2517 LSB = SrlImm;
2518 MSB = SrlImm +
2519 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2520 : llvm::countr_one<uint64_t>(AndImm)) -
2521 1;
2522 if (ClampMSB)
2523 // Since we're moving the extend before the right shift operation, we need
2524 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2525 // the zeros which would get shifted in with the original right shift
2526 // operation.
2527 MSB = MSB > 31 ? 31 : MSB;
2528
2529 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2530 return true;
2531}
2532
2533static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
2534 SDValue &Opd0, unsigned &Immr,
2535 unsigned &Imms) {
2536 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2537
2538 EVT VT = N->getValueType(0);
2539 unsigned BitWidth = VT.getSizeInBits();
2540 assert((VT == MVT::i32 || VT == MVT::i64) &&
2541 "Type checking must have been done before calling this function");
2542
2543 SDValue Op = N->getOperand(0);
2544 if (Op->getOpcode() == ISD::TRUNCATE) {
2545 Op = Op->getOperand(0);
2546 VT = Op->getValueType(0);
2547 BitWidth = VT.getSizeInBits();
2548 }
2549
2550 uint64_t ShiftImm;
2551 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2552 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2553 return false;
2554
2555 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2556 if (ShiftImm + Width > BitWidth)
2557 return false;
2558
2559 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2560 Opd0 = Op.getOperand(0);
2561 Immr = ShiftImm;
2562 Imms = ShiftImm + Width - 1;
2563 return true;
2564}
2565
2566static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
2567 SDValue &Opd0, unsigned &LSB,
2568 unsigned &MSB) {
2569 // We are looking for the following pattern which basically extracts several
2570 // continuous bits from the source value and places it from the LSB of the
2571 // destination value, all other bits of the destination value or set to zero:
2572 //
2573 // Value2 = AND Value, MaskImm
2574 // SRL Value2, ShiftImm
2575 //
2576 // with MaskImm >> ShiftImm to search for the bit width.
2577 //
2578 // This gets selected into a single UBFM:
2579 //
2580 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2581 //
2582
2583 if (N->getOpcode() != ISD::SRL)
2584 return false;
2585
2586 uint64_t AndMask = 0;
2587 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2588 return false;
2589
2590 Opd0 = N->getOperand(0).getOperand(0);
2591
2592 uint64_t SrlImm = 0;
2593 if (!isIntImmediate(N->getOperand(1), SrlImm))
2594 return false;
2595
2596 // Check whether we really have several bits extract here.
2597 if (!isMask_64(AndMask >> SrlImm))
2598 return false;
2599
2600 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2601 LSB = SrlImm;
2602 MSB = llvm::Log2_64(AndMask);
2603 return true;
2604}
2605
2606static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2607 unsigned &Immr, unsigned &Imms,
2608 bool BiggerPattern) {
2609 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2610 "N must be a SHR/SRA operation to call this function");
2611
2612 EVT VT = N->getValueType(0);
2613
2614 // Here we can test the type of VT and return false when the type does not
2615 // match, but since it is done prior to that call in the current context
2616 // we turned that into an assert to avoid redundant code.
2617 assert((VT == MVT::i32 || VT == MVT::i64) &&
2618 "Type checking must have been done before calling this function");
2619
2620 // Check for AND + SRL doing several bits extract.
2621 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2622 return true;
2623
2624 // We're looking for a shift of a shift.
2625 uint64_t ShlImm = 0;
2626 uint64_t TruncBits = 0;
2627 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2628 Opd0 = N->getOperand(0).getOperand(0);
2629 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2630 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2631 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2632 // be considered as setting high 32 bits as zero. Our strategy here is to
2633 // always generate 64bit UBFM. This consistency will help the CSE pass
2634 // later find more redundancy.
2635 Opd0 = N->getOperand(0).getOperand(0);
2636 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2637 VT = Opd0.getValueType();
2638 assert(VT == MVT::i64 && "the promoted type should be i64");
2639 } else if (BiggerPattern) {
2640 // Let's pretend a 0 shift left has been performed.
2641 // FIXME: Currently we limit this to the bigger pattern case,
2642 // because some optimizations expect AND and not UBFM
2643 Opd0 = N->getOperand(0);
2644 } else
2645 return false;
2646
2647 // Missing combines/constant folding may have left us with strange
2648 // constants.
2649 if (ShlImm >= VT.getSizeInBits()) {
2650 LLVM_DEBUG(
2651 (dbgs() << N
2652 << ": Found large shift immediate, this should not happen\n"));
2653 return false;
2654 }
2655
2656 uint64_t SrlImm = 0;
2657 if (!isIntImmediate(N->getOperand(1), SrlImm))
2658 return false;
2659
2660 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2661 "bad amount in shift node!");
2662 int immr = SrlImm - ShlImm;
2663 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2664 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2665 // SRA requires a signed extraction
2666 if (VT == MVT::i32)
2667 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2668 else
2669 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2670 return true;
2671}
2672
2673bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2674 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2675
2676 EVT VT = N->getValueType(0);
2677 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2678 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2679 return false;
2680
2681 uint64_t ShiftImm;
2682 SDValue Op = N->getOperand(0);
2683 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2684 return false;
2685
2686 SDLoc dl(N);
2687 // Extend the incoming operand of the shift to 64-bits.
2688 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2689 unsigned Immr = ShiftImm;
2690 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2691 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2692 CurDAG->getTargetConstant(Imms, dl, VT)};
2693 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2694 return true;
2695}
2696
2697static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2698 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2699 unsigned NumberOfIgnoredLowBits = 0,
2700 bool BiggerPattern = false) {
2701 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2702 return false;
2703
2704 switch (N->getOpcode()) {
2705 default:
2706 if (!N->isMachineOpcode())
2707 return false;
2708 break;
2709 case ISD::AND:
2710 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2711 NumberOfIgnoredLowBits, BiggerPattern);
2712 case ISD::SRL:
2713 case ISD::SRA:
2714 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2715
2717 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2718 }
2719
2720 unsigned NOpc = N->getMachineOpcode();
2721 switch (NOpc) {
2722 default:
2723 return false;
2724 case AArch64::SBFMWri:
2725 case AArch64::UBFMWri:
2726 case AArch64::SBFMXri:
2727 case AArch64::UBFMXri:
2728 Opc = NOpc;
2729 Opd0 = N->getOperand(0);
2730 Immr = N->getConstantOperandVal(1);
2731 Imms = N->getConstantOperandVal(2);
2732 return true;
2733 }
2734 // Unreachable
2735 return false;
2736}
2737
2738bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2739 unsigned Opc, Immr, Imms;
2740 SDValue Opd0;
2741 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2742 return false;
2743
2744 EVT VT = N->getValueType(0);
2745 SDLoc dl(N);
2746
2747 // If the bit extract operation is 64bit but the original type is 32bit, we
2748 // need to add one EXTRACT_SUBREG.
2749 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2750 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2751 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2752
2753 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2754 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2755 MVT::i32, SDValue(BFM, 0));
2756 ReplaceNode(N, Inner.getNode());
2757 return true;
2758 }
2759
2760 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2761 CurDAG->getTargetConstant(Imms, dl, VT)};
2762 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2763 return true;
2764}
2765
2766/// Does DstMask form a complementary pair with the mask provided by
2767/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2768/// this asks whether DstMask zeroes precisely those bits that will be set by
2769/// the other half.
2770static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2771 unsigned NumberOfIgnoredHighBits, EVT VT) {
2772 assert((VT == MVT::i32 || VT == MVT::i64) &&
2773 "i32 or i64 mask type expected!");
2774 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2775
2776 APInt SignificantDstMask = APInt(BitWidth, DstMask);
2777 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2778
2779 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2780 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2781}
2782
2783// Look for bits that will be useful for later uses.
2784// A bit is consider useless as soon as it is dropped and never used
2785// before it as been dropped.
2786// E.g., looking for useful bit of x
2787// 1. y = x & 0x7
2788// 2. z = y >> 2
2789// After #1, x useful bits are 0x7, then the useful bits of x, live through
2790// y.
2791// After #2, the useful bits of x are 0x4.
2792// However, if x is used on an unpredicatable instruction, then all its bits
2793// are useful.
2794// E.g.
2795// 1. y = x & 0x7
2796// 2. z = y >> 2
2797// 3. str x, [@x]
2798static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2799
2801 unsigned Depth) {
2802 uint64_t Imm =
2803 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2804 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2805 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2806 getUsefulBits(Op, UsefulBits, Depth + 1);
2807}
2808
2810 uint64_t Imm, uint64_t MSB,
2811 unsigned Depth) {
2812 // inherit the bitwidth value
2813 APInt OpUsefulBits(UsefulBits);
2814 OpUsefulBits = 1;
2815
2816 if (MSB >= Imm) {
2817 OpUsefulBits <<= MSB - Imm + 1;
2818 --OpUsefulBits;
2819 // The interesting part will be in the lower part of the result
2820 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2821 // The interesting part was starting at Imm in the argument
2822 OpUsefulBits <<= Imm;
2823 } else {
2824 OpUsefulBits <<= MSB + 1;
2825 --OpUsefulBits;
2826 // The interesting part will be shifted in the result
2827 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2828 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2829 // The interesting part was at zero in the argument
2830 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2831 }
2832
2833 UsefulBits &= OpUsefulBits;
2834}
2835
2836static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2837 unsigned Depth) {
2838 uint64_t Imm =
2839 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2840 uint64_t MSB =
2841 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2842
2843 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2844}
2845
2847 unsigned Depth) {
2848 uint64_t ShiftTypeAndValue =
2849 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2850 APInt Mask(UsefulBits);
2851 Mask.clearAllBits();
2852 Mask.flipAllBits();
2853
2854 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2855 // Shift Left
2856 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2857 Mask <<= ShiftAmt;
2858 getUsefulBits(Op, Mask, Depth + 1);
2859 Mask.lshrInPlace(ShiftAmt);
2860 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2861 // Shift Right
2862 // We do not handle AArch64_AM::ASR, because the sign will change the
2863 // number of useful bits
2864 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2865 Mask.lshrInPlace(ShiftAmt);
2866 getUsefulBits(Op, Mask, Depth + 1);
2867 Mask <<= ShiftAmt;
2868 } else
2869 return;
2870
2871 UsefulBits &= Mask;
2872}
2873
2874static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2875 unsigned Depth) {
2876 uint64_t Imm =
2877 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2878 uint64_t MSB =
2879 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
2880
2881 APInt OpUsefulBits(UsefulBits);
2882 OpUsefulBits = 1;
2883
2884 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
2885 ResultUsefulBits.flipAllBits();
2886 APInt Mask(UsefulBits.getBitWidth(), 0);
2887
2888 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
2889
2890 if (MSB >= Imm) {
2891 // The instruction is a BFXIL.
2892 uint64_t Width = MSB - Imm + 1;
2893 uint64_t LSB = Imm;
2894
2895 OpUsefulBits <<= Width;
2896 --OpUsefulBits;
2897
2898 if (Op.getOperand(1) == Orig) {
2899 // Copy the low bits from the result to bits starting from LSB.
2900 Mask = ResultUsefulBits & OpUsefulBits;
2901 Mask <<= LSB;
2902 }
2903
2904 if (Op.getOperand(0) == Orig)
2905 // Bits starting from LSB in the input contribute to the result.
2906 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2907 } else {
2908 // The instruction is a BFI.
2909 uint64_t Width = MSB + 1;
2910 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
2911
2912 OpUsefulBits <<= Width;
2913 --OpUsefulBits;
2914 OpUsefulBits <<= LSB;
2915
2916 if (Op.getOperand(1) == Orig) {
2917 // Copy the bits from the result to the zero bits.
2918 Mask = ResultUsefulBits & OpUsefulBits;
2919 Mask.lshrInPlace(LSB);
2920 }
2921
2922 if (Op.getOperand(0) == Orig)
2923 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2924 }
2925
2926 UsefulBits &= Mask;
2927}
2928
2929static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2930 SDValue Orig, unsigned Depth) {
2931
2932 // Users of this node should have already been instruction selected
2933 // FIXME: Can we turn that into an assert?
2934 if (!UserNode->isMachineOpcode())
2935 return;
2936
2937 switch (UserNode->getMachineOpcode()) {
2938 default:
2939 return;
2940 case AArch64::ANDSWri:
2941 case AArch64::ANDSXri:
2942 case AArch64::ANDWri:
2943 case AArch64::ANDXri:
2944 // We increment Depth only when we call the getUsefulBits
2945 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2946 Depth);
2947 case AArch64::UBFMWri:
2948 case AArch64::UBFMXri:
2949 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2950
2951 case AArch64::ORRWrs:
2952 case AArch64::ORRXrs:
2953 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
2954 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2955 Depth);
2956 return;
2957 case AArch64::BFMWri:
2958 case AArch64::BFMXri:
2959 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2960
2961 case AArch64::STRBBui:
2962 case AArch64::STURBBi:
2963 if (UserNode->getOperand(0) != Orig)
2964 return;
2965 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2966 return;
2967
2968 case AArch64::STRHHui:
2969 case AArch64::STURHHi:
2970 if (UserNode->getOperand(0) != Orig)
2971 return;
2972 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2973 return;
2974 }
2975}
2976
2977static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2979 return;
2980 // Initialize UsefulBits
2981 if (!Depth) {
2982 unsigned Bitwidth = Op.getScalarValueSizeInBits();
2983 // At the beginning, assume every produced bits is useful
2984 UsefulBits = APInt(Bitwidth, 0);
2985 UsefulBits.flipAllBits();
2986 }
2987 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2988
2989 for (SDNode *Node : Op.getNode()->uses()) {
2990 // A use cannot produce useful bits
2991 APInt UsefulBitsForUse = APInt(UsefulBits);
2992 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2993 UsersUsefulBits |= UsefulBitsForUse;
2994 }
2995 // UsefulBits contains the produced bits that are meaningful for the
2996 // current definition, thus a user cannot make a bit meaningful at
2997 // this point
2998 UsefulBits &= UsersUsefulBits;
2999}
3000
3001/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3002/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3003/// 0, return Op unchanged.
3004static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3005 if (ShlAmount == 0)
3006 return Op;
3007
3008 EVT VT = Op.getValueType();
3009 SDLoc dl(Op);
3010 unsigned BitWidth = VT.getSizeInBits();
3011 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3012
3013 SDNode *ShiftNode;
3014 if (ShlAmount > 0) {
3015 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3016 ShiftNode = CurDAG->getMachineNode(
3017 UBFMOpc, dl, VT, Op,
3018 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3019 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3020 } else {
3021 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3022 assert(ShlAmount < 0 && "expected right shift");
3023 int ShrAmount = -ShlAmount;
3024 ShiftNode = CurDAG->getMachineNode(
3025 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3026 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3027 }
3028
3029 return SDValue(ShiftNode, 0);
3030}
3031
3032// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3034 bool BiggerPattern,
3035 const uint64_t NonZeroBits,
3036 SDValue &Src, int &DstLSB,
3037 int &Width);
3038
3039// For bit-field-positioning pattern "shl VAL, N)".
3041 bool BiggerPattern,
3042 const uint64_t NonZeroBits,
3043 SDValue &Src, int &DstLSB,
3044 int &Width);
3045
3046/// Does this tree qualify as an attempt to move a bitfield into position,
3047/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3049 bool BiggerPattern, SDValue &Src,
3050 int &DstLSB, int &Width) {
3051 EVT VT = Op.getValueType();
3052 unsigned BitWidth = VT.getSizeInBits();
3053 (void)BitWidth;
3054 assert(BitWidth == 32 || BitWidth == 64);
3055
3056 KnownBits Known = CurDAG->computeKnownBits(Op);
3057
3058 // Non-zero in the sense that they're not provably zero, which is the key
3059 // point if we want to use this value
3060 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3061 if (!isShiftedMask_64(NonZeroBits))
3062 return false;
3063
3064 switch (Op.getOpcode()) {
3065 default:
3066 break;
3067 case ISD::AND:
3068 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3069 NonZeroBits, Src, DstLSB, Width);
3070 case ISD::SHL:
3071 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3072 NonZeroBits, Src, DstLSB, Width);
3073 }
3074
3075 return false;
3076}
3077
3079 bool BiggerPattern,
3080 const uint64_t NonZeroBits,
3081 SDValue &Src, int &DstLSB,
3082 int &Width) {
3083 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3084
3085 EVT VT = Op.getValueType();
3086 assert((VT == MVT::i32 || VT == MVT::i64) &&
3087 "Caller guarantees VT is one of i32 or i64");
3088 (void)VT;
3089
3090 uint64_t AndImm;
3091 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3092 return false;
3093
3094 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3095 // 1) (AndImm & (1 << POS) == 0)
3096 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3097 //
3098 // 1) and 2) don't agree so something must be wrong (e.g., in
3099 // 'SelectionDAG::computeKnownBits')
3100 assert((~AndImm & NonZeroBits) == 0 &&
3101 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3102
3103 SDValue AndOp0 = Op.getOperand(0);
3104
3105 uint64_t ShlImm;
3106 SDValue ShlOp0;
3107 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3108 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3109 ShlOp0 = AndOp0.getOperand(0);
3110 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3112 ShlImm)) {
3113 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3114
3115 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3116 SDValue ShlVal = AndOp0.getOperand(0);
3117
3118 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3119 // expect VT to be MVT::i32.
3120 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3121
3122 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3123 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3124 } else
3125 return false;
3126
3127 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3128 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3129 // AndOp0+AND.
3130 if (!BiggerPattern && !AndOp0.hasOneUse())
3131 return false;
3132
3133 DstLSB = llvm::countr_zero(NonZeroBits);
3134 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3135
3136 // Bail out on large Width. This happens when no proper combining / constant
3137 // folding was performed.
3138 if (Width >= (int)VT.getSizeInBits()) {
3139 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3140 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3141 // "val".
3142 // If VT is i32, what Width >= 32 means:
3143 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3144 // demands at least 'Width' bits (after dag-combiner). This together with
3145 // `any_extend` Op (undefined higher bits) indicates missed combination
3146 // when lowering the 'and' IR instruction to an machine IR instruction.
3147 LLVM_DEBUG(
3148 dbgs()
3149 << "Found large Width in bit-field-positioning -- this indicates no "
3150 "proper combining / constant folding was performed\n");
3151 return false;
3152 }
3153
3154 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3155 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3156 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3157 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3158 // which case it is not profitable to insert an extra shift.
3159 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3160 return false;
3161
3162 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3163 return true;
3164}
3165
3166// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3167// UBFIZ.
3169 SDValue &Src, int &DstLSB,
3170 int &Width) {
3171 // Caller should have verified that N is a left shift with constant shift
3172 // amount; asserts that.
3173 assert(Op.getOpcode() == ISD::SHL &&
3174 "Op.getNode() should be a SHL node to call this function");
3175 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3176 "Op.getNode() should shift ShlImm to call this function");
3177
3178 uint64_t AndImm = 0;
3179 SDValue Op0 = Op.getOperand(0);
3180 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3181 return false;
3182
3183 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3184 if (isMask_64(ShiftedAndImm)) {
3185 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3186 // should end with Mask, and could be prefixed with random bits if those
3187 // bits are shifted out.
3188 //
3189 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3190 // the AND result corresponding to those bits are shifted out, so it's fine
3191 // to not extract them.
3192 Width = llvm::countr_one(ShiftedAndImm);
3193 DstLSB = ShlImm;
3194 Src = Op0.getOperand(0);
3195 return true;
3196 }
3197 return false;
3198}
3199
3201 bool BiggerPattern,
3202 const uint64_t NonZeroBits,
3203 SDValue &Src, int &DstLSB,
3204 int &Width) {
3205 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3206
3207 EVT VT = Op.getValueType();
3208 assert((VT == MVT::i32 || VT == MVT::i64) &&
3209 "Caller guarantees that type is i32 or i64");
3210 (void)VT;
3211
3212 uint64_t ShlImm;
3213 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3214 return false;
3215
3216 if (!BiggerPattern && !Op.hasOneUse())
3217 return false;
3218
3219 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3220 return true;
3221
3222 DstLSB = llvm::countr_zero(NonZeroBits);
3223 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3224
3225 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3226 return false;
3227
3228 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3229 return true;
3230}
3231
3232static bool isShiftedMask(uint64_t Mask, EVT VT) {
3233 assert(VT == MVT::i32 || VT == MVT::i64);
3234 if (VT == MVT::i32)
3235 return isShiftedMask_32(Mask);
3236 return isShiftedMask_64(Mask);
3237}
3238
3239// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3240// inserted only sets known zero bits.
3242 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3243
3244 EVT VT = N->getValueType(0);
3245 if (VT != MVT::i32 && VT != MVT::i64)
3246 return false;
3247
3248 unsigned BitWidth = VT.getSizeInBits();
3249
3250 uint64_t OrImm;
3251 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3252 return false;
3253
3254 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3255 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3256 // performance neutral.
3258 return false;
3259
3260 uint64_t MaskImm;
3261 SDValue And = N->getOperand(0);
3262 // Must be a single use AND with an immediate operand.
3263 if (!And.hasOneUse() ||
3264 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3265 return false;
3266
3267 // Compute the Known Zero for the AND as this allows us to catch more general
3268 // cases than just looking for AND with imm.
3269 KnownBits Known = CurDAG->computeKnownBits(And);
3270
3271 // Non-zero in the sense that they're not provably zero, which is the key
3272 // point if we want to use this value.
3273 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3274
3275 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3276 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3277 return false;
3278
3279 // The bits being inserted must only set those bits that are known to be zero.
3280 if ((OrImm & NotKnownZero) != 0) {
3281 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3282 // currently handle this case.
3283 return false;
3284 }
3285
3286 // BFI/BFXIL dst, src, #lsb, #width.
3287 int LSB = llvm::countr_one(NotKnownZero);
3288 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3289
3290 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3291 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3292 unsigned ImmS = Width - 1;
3293
3294 // If we're creating a BFI instruction avoid cases where we need more
3295 // instructions to materialize the BFI constant as compared to the original
3296 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3297 // should be no worse in this case.
3298 bool IsBFI = LSB != 0;
3299 uint64_t BFIImm = OrImm >> LSB;
3300 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3301 // We have a BFI instruction and we know the constant can't be materialized
3302 // with a ORR-immediate with the zero register.
3303 unsigned OrChunks = 0, BFIChunks = 0;
3304 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3305 if (((OrImm >> Shift) & 0xFFFF) != 0)
3306 ++OrChunks;
3307 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3308 ++BFIChunks;
3309 }
3310 if (BFIChunks > OrChunks)
3311 return false;
3312 }
3313
3314 // Materialize the constant to be inserted.
3315 SDLoc DL(N);
3316 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3317 SDNode *MOVI = CurDAG->getMachineNode(
3318 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3319
3320 // Create the BFI/BFXIL instruction.
3321 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3322 CurDAG->getTargetConstant(ImmR, DL, VT),
3323 CurDAG->getTargetConstant(ImmS, DL, VT)};
3324 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3325 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3326 return true;
3327}
3328
3330 SDValue &ShiftedOperand,
3331 uint64_t &EncodedShiftImm) {
3332 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3333 if (!Dst.hasOneUse())
3334 return false;
3335
3336 EVT VT = Dst.getValueType();
3337 assert((VT == MVT::i32 || VT == MVT::i64) &&
3338 "Caller should guarantee that VT is one of i32 or i64");
3339 const unsigned SizeInBits = VT.getSizeInBits();
3340
3341 SDLoc DL(Dst.getNode());
3342 uint64_t AndImm, ShlImm;
3343 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3344 isShiftedMask_64(AndImm)) {
3345 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3346 SDValue DstOp0 = Dst.getOperand(0);
3347 if (!DstOp0.hasOneUse())
3348 return false;
3349
3350 // An example to illustrate the transformation
3351 // From:
3352 // lsr x8, x1, #1
3353 // and x8, x8, #0x3f80
3354 // bfxil x8, x1, #0, #7
3355 // To:
3356 // and x8, x23, #0x7f
3357 // ubfx x9, x23, #8, #7
3358 // orr x23, x8, x9, lsl #7
3359 //
3360 // The number of instructions remains the same, but ORR is faster than BFXIL
3361 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3362 // the dependency chain is improved after the transformation.
3363 uint64_t SrlImm;
3364 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3365 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3366 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3367 unsigned MaskWidth =
3368 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3369 unsigned UBFMOpc =
3370 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3371 SDNode *UBFMNode = CurDAG->getMachineNode(
3372 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3373 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3374 VT),
3375 CurDAG->getTargetConstant(
3376 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3377 ShiftedOperand = SDValue(UBFMNode, 0);
3378 EncodedShiftImm = AArch64_AM::getShifterImm(
3379 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3380 return true;
3381 }
3382 }
3383 return false;
3384 }
3385
3386 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3387 ShiftedOperand = Dst.getOperand(0);
3388 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3389 return true;
3390 }
3391
3392 uint64_t SrlImm;
3393 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3394 ShiftedOperand = Dst.getOperand(0);
3395 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3396 return true;
3397 }
3398 return false;
3399}
3400
3401// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3402// the operands and select it to AArch64::ORR with shifted registers if
3403// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3404static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3405 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3406 const bool BiggerPattern) {
3407 EVT VT = N->getValueType(0);
3408 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3409 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3410 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3411 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3412 assert((VT == MVT::i32 || VT == MVT::i64) &&
3413 "Expect result type to be i32 or i64 since N is combinable to BFM");
3414 SDLoc DL(N);
3415
3416 // Bail out if BFM simplifies away one node in BFM Dst.
3417 if (OrOpd1 != Dst)
3418 return false;
3419
3420 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3421 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3422 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3423 if (BiggerPattern) {
3424 uint64_t SrcAndImm;
3425 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3426 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3427 // OrOpd0 = AND Src, #Mask
3428 // So BFM simplifies away one AND node from Src and doesn't simplify away
3429 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3430 // one node (from Rd), ORR is better since it has higher throughput and
3431 // smaller latency than BFM on many AArch64 processors (and for the rest
3432 // ORR is at least as good as BFM).
3433 SDValue ShiftedOperand;
3434 uint64_t EncodedShiftImm;
3435 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3436 EncodedShiftImm)) {
3437 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3438 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3439 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3440 return true;
3441 }
3442 }
3443 return false;
3444 }
3445
3446 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3447
3448 uint64_t ShlImm;
3449 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3450 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3451 SDValue Ops[] = {
3452 Dst, Src,
3453 CurDAG->getTargetConstant(
3455 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3456 return true;
3457 }
3458
3459 // Select the following pattern to left-shifted operand rather than BFI.
3460 // %val1 = op ..
3461 // %val2 = shl %val1, #imm
3462 // %res = or %val1, %val2
3463 //
3464 // If N is selected to be BFI, we know that
3465 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3466 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3467 //
3468 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3469 if (OrOpd0.getOperand(0) == OrOpd1) {
3470 SDValue Ops[] = {
3471 OrOpd1, OrOpd1,
3472 CurDAG->getTargetConstant(
3474 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3475 return true;
3476 }
3477 }
3478
3479 uint64_t SrlImm;
3480 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3481 // Select the following pattern to right-shifted operand rather than BFXIL.
3482 // %val1 = op ..
3483 // %val2 = lshr %val1, #imm
3484 // %res = or %val1, %val2
3485 //
3486 // If N is selected to be BFXIL, we know that
3487 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3488 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3489 //
3490 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3491 if (OrOpd0.getOperand(0) == OrOpd1) {
3492 SDValue Ops[] = {
3493 OrOpd1, OrOpd1,
3494 CurDAG->getTargetConstant(
3496 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3497 return true;
3498 }
3499 }
3500
3501 return false;
3502}
3503
3504static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3505 SelectionDAG *CurDAG) {
3506 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3507
3508 EVT VT = N->getValueType(0);
3509 if (VT != MVT::i32 && VT != MVT::i64)
3510 return false;
3511
3512 unsigned BitWidth = VT.getSizeInBits();
3513
3514 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3515 // have the expected shape. Try to undo that.
3516
3517 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3518 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3519
3520 // Given a OR operation, check if we have the following pattern
3521 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3522 // isBitfieldExtractOp)
3523 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3524 // countTrailingZeros(mask2) == imm2 - imm + 1
3525 // f = d | c
3526 // if yes, replace the OR instruction with:
3527 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3528
3529 // OR is commutative, check all combinations of operand order and values of
3530 // BiggerPattern, i.e.
3531 // Opd0, Opd1, BiggerPattern=false
3532 // Opd1, Opd0, BiggerPattern=false
3533 // Opd0, Opd1, BiggerPattern=true
3534 // Opd1, Opd0, BiggerPattern=true
3535 // Several of these combinations may match, so check with BiggerPattern=false
3536 // first since that will produce better results by matching more instructions
3537 // and/or inserting fewer extra instructions.
3538 for (int I = 0; I < 4; ++I) {
3539
3540 SDValue Dst, Src;
3541 unsigned ImmR, ImmS;
3542 bool BiggerPattern = I / 2;
3543 SDValue OrOpd0Val = N->getOperand(I % 2);
3544 SDNode *OrOpd0 = OrOpd0Val.getNode();
3545 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3546 SDNode *OrOpd1 = OrOpd1Val.getNode();
3547
3548 unsigned BFXOpc;
3549 int DstLSB, Width;
3550 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3551 NumberOfIgnoredLowBits, BiggerPattern)) {
3552 // Check that the returned opcode is compatible with the pattern,
3553 // i.e., same type and zero extended (U and not S)
3554 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3555 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3556 continue;
3557
3558 // Compute the width of the bitfield insertion
3559 DstLSB = 0;
3560 Width = ImmS - ImmR + 1;
3561 // FIXME: This constraint is to catch bitfield insertion we may
3562 // want to widen the pattern if we want to grab general bitfied
3563 // move case
3564 if (Width <= 0)
3565 continue;
3566
3567 // If the mask on the insertee is correct, we have a BFXIL operation. We
3568 // can share the ImmR and ImmS values from the already-computed UBFM.
3569 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3570 BiggerPattern,
3571 Src, DstLSB, Width)) {
3572 ImmR = (BitWidth - DstLSB) % BitWidth;
3573 ImmS = Width - 1;
3574 } else
3575 continue;
3576
3577 // Check the second part of the pattern
3578 EVT VT = OrOpd1Val.getValueType();
3579 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3580
3581 // Compute the Known Zero for the candidate of the first operand.
3582 // This allows to catch more general case than just looking for
3583 // AND with imm. Indeed, simplify-demanded-bits may have removed
3584 // the AND instruction because it proves it was useless.
3585 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3586
3587 // Check if there is enough room for the second operand to appear
3588 // in the first one
3589 APInt BitsToBeInserted =
3590 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3591
3592 if ((BitsToBeInserted & ~Known.Zero) != 0)
3593 continue;
3594
3595 // Set the first operand
3596 uint64_t Imm;
3597 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3598 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3599 // In that case, we can eliminate the AND
3600 Dst = OrOpd1->getOperand(0);
3601 else
3602 // Maybe the AND has been removed by simplify-demanded-bits
3603 // or is useful because it discards more bits
3604 Dst = OrOpd1Val;
3605
3606 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3607 // with shifted operand is more efficient.
3608 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3609 BiggerPattern))
3610 return true;
3611
3612 // both parts match
3613 SDLoc DL(N);
3614 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3615 CurDAG->getTargetConstant(ImmS, DL, VT)};
3616 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3617 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3618 return true;
3619 }
3620
3621 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3622 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3623 // mask (e.g., 0x000ffff0).
3624 uint64_t Mask0Imm, Mask1Imm;
3625 SDValue And0 = N->getOperand(0);
3626 SDValue And1 = N->getOperand(1);
3627 if (And0.hasOneUse() && And1.hasOneUse() &&
3628 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3629 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3630 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3631 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3632
3633 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3634 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3635 // bits to be inserted.
3636 if (isShiftedMask(Mask0Imm, VT)) {
3637 std::swap(And0, And1);
3638 std::swap(Mask0Imm, Mask1Imm);
3639 }
3640
3641 SDValue Src = And1->getOperand(0);
3642 SDValue Dst = And0->getOperand(0);
3643 unsigned LSB = llvm::countr_zero(Mask1Imm);
3644 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3645
3646 // The BFXIL inserts the low-order bits from a source register, so right
3647 // shift the needed bits into place.
3648 SDLoc DL(N);
3649 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3650 uint64_t LsrImm = LSB;
3651 if (Src->hasOneUse() &&
3652 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3653 (LsrImm + LSB) < BitWidth) {
3654 Src = Src->getOperand(0);
3655 LsrImm += LSB;
3656 }
3657
3658 SDNode *LSR = CurDAG->getMachineNode(
3659 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3660 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3661
3662 // BFXIL is an alias of BFM, so translate to BFM operands.
3663 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3664 unsigned ImmS = Width - 1;
3665
3666 // Create the BFXIL instruction.
3667 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3668 CurDAG->getTargetConstant(ImmR, DL, VT),
3669 CurDAG->getTargetConstant(ImmS, DL, VT)};
3670 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3671 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3672 return true;
3673 }
3674
3675 return false;
3676}
3677
3678bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3679 if (N->getOpcode() != ISD::OR)
3680 return false;
3681
3682 APInt NUsefulBits;
3683 getUsefulBits(SDValue(N, 0), NUsefulBits);
3684
3685 // If all bits are not useful, just return UNDEF.
3686 if (!NUsefulBits) {
3687 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3688 return true;
3689 }
3690
3691 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3692 return true;
3693
3694 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3695}
3696
3697/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3698/// equivalent of a left shift by a constant amount followed by an and masking
3699/// out a contiguous set of bits.
3700bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3701 if (N->getOpcode() != ISD::AND)
3702 return false;
3703
3704 EVT VT = N->getValueType(0);
3705 if (VT != MVT::i32 && VT != MVT::i64)
3706 return false;
3707
3708 SDValue Op0;
3709 int DstLSB, Width;
3710 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3711 Op0, DstLSB, Width))
3712 return false;
3713
3714 // ImmR is the rotate right amount.
3715 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3716 // ImmS is the most significant bit of the source to be moved.
3717 unsigned ImmS = Width - 1;
3718
3719 SDLoc DL(N);
3720 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3721 CurDAG->getTargetConstant(ImmS, DL, VT)};
3722 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3723 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3724 return true;
3725}
3726
3727/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3728/// variable shift/rotate instructions.
3729bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3730 EVT VT = N->getValueType(0);
3731
3732 unsigned Opc;
3733 switch (N->getOpcode()) {
3734 case ISD::ROTR:
3735 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3736 break;
3737 case ISD::SHL:
3738 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3739 break;
3740 case ISD::SRL:
3741 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3742 break;
3743 case ISD::SRA:
3744 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3745 break;
3746 default:
3747 return false;
3748 }
3749
3750 uint64_t Size;
3751 uint64_t Bits;
3752 if (VT == MVT::i32) {
3753 Bits = 5;
3754 Size = 32;
3755 } else if (VT == MVT::i64) {
3756 Bits = 6;
3757 Size = 64;
3758 } else
3759 return false;
3760
3761 SDValue ShiftAmt = N->getOperand(1);
3762 SDLoc DL(N);
3763 SDValue NewShiftAmt;
3764
3765 // Skip over an extend of the shift amount.
3766 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3767 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3768 ShiftAmt = ShiftAmt->getOperand(0);
3769
3770 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3771 SDValue Add0 = ShiftAmt->getOperand(0);
3772 SDValue Add1 = ShiftAmt->getOperand(1);
3773 uint64_t Add0Imm;
3774 uint64_t Add1Imm;
3775 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3776 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3777 // to avoid the ADD/SUB.
3778 NewShiftAmt = Add0;
3779 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3780 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3781 (Add0Imm % Size == 0)) {
3782 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3783 // to generate a NEG instead of a SUB from a constant.
3784 unsigned NegOpc;
3785 unsigned ZeroReg;
3786 EVT SubVT = ShiftAmt->getValueType(0);
3787 if (SubVT == MVT::i32) {
3788 NegOpc = AArch64::SUBWrr;
3789 ZeroReg = AArch64::WZR;
3790 } else {
3791 assert(SubVT == MVT::i64);
3792 NegOpc = AArch64::SUBXrr;
3793 ZeroReg = AArch64::XZR;
3794 }
3795 SDValue Zero =
3796 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3797 MachineSDNode *Neg =
3798 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3799 NewShiftAmt = SDValue(Neg, 0);
3800 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3801 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3802 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3803 // to generate a NOT instead of a SUB from a constant.
3804 unsigned NotOpc;
3805 unsigned ZeroReg;
3806 EVT SubVT = ShiftAmt->getValueType(0);
3807 if (SubVT == MVT::i32) {
3808 NotOpc = AArch64::ORNWrr;
3809 ZeroReg = AArch64::WZR;
3810 } else {
3811 assert(SubVT == MVT::i64);
3812 NotOpc = AArch64::ORNXrr;
3813 ZeroReg = AArch64::XZR;
3814 }
3815 SDValue Zero =
3816 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3818 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3819 NewShiftAmt = SDValue(Not, 0);
3820 } else
3821 return false;
3822 } else {
3823 // If the shift amount is masked with an AND, check that the mask covers the
3824 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3825 // the AND.
3826 uint64_t MaskImm;
3827 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3828 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3829 return false;
3830
3831 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3832 return false;
3833
3834 NewShiftAmt = ShiftAmt->getOperand(0);
3835 }
3836
3837 // Narrow/widen the shift amount to match the size of the shift operation.
3838 if (VT == MVT::i32)
3839 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3840 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3841 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3842 MachineSDNode *Ext = CurDAG->getMachineNode(
3843 AArch64::SUBREG_TO_REG, DL, VT,
3844 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3845 NewShiftAmt = SDValue(Ext, 0);
3846 }
3847
3848 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3849 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3850 return true;
3851}
3852
3854 SDValue &FixedPos,
3855 unsigned RegWidth,
3856 bool isReciprocal) {
3857 APFloat FVal(0.0);
3858 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
3859 FVal = CN->getValueAPF();
3860 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3861 // Some otherwise illegal constants are allowed in this case.
3862 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3863 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3864 return false;
3865
3866 ConstantPoolSDNode *CN =
3867 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3868 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3869 } else
3870 return false;
3871
3872 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
3873 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
3874 // x-register.
3875 //
3876 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
3877 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
3878 // integers.
3879 bool IsExact;
3880
3881 if (isReciprocal)
3882 if (!FVal.getExactInverse(&FVal))
3883 return false;
3884
3885 // fbits is between 1 and 64 in the worst-case, which means the fmul
3886 // could have 2^64 as an actual operand. Need 65 bits of precision.
3887 APSInt IntVal(65, true);
3888 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
3889
3890 // N.b. isPowerOf2 also checks for > 0.
3891 if (!IsExact || !IntVal.isPowerOf2())
3892 return false;
3893 unsigned FBits = IntVal.logBase2();
3894
3895 // Checks above should have guaranteed that we haven't lost information in
3896 // finding FBits, but it must still be in range.
3897 if (FBits == 0 || FBits > RegWidth) return false;
3898
3899 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
3900 return true;
3901}
3902
3903bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
3904 unsigned RegWidth) {
3905 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3906 false);
3907}
3908
3909bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
3910 SDValue &FixedPos,
3911 unsigned RegWidth) {
3912 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3913 true);
3914}
3915
3916// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
3917// of the string and obtains the integer values from them and combines these
3918// into a single value to be used in the MRS/MSR instruction.
3921 RegString.split(Fields, ':');
3922
3923 if (Fields.size() == 1)
3924 return -1;
3925
3926 assert(Fields.size() == 5
3927 && "Invalid number of fields in read register string");
3928
3930 bool AllIntFields = true;
3931
3932 for (StringRef Field : Fields) {
3933 unsigned IntField;
3934 AllIntFields &= !Field.getAsInteger(10, IntField);
3935 Ops.push_back(IntField);
3936 }
3937
3938 assert(AllIntFields &&
3939 "Unexpected non-integer value in special register string.");
3940 (void)AllIntFields;
3941
3942 // Need to combine the integer fields of the string into a single value
3943 // based on the bit encoding of MRS/MSR instruction.
3944 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
3945 (Ops[3] << 3) | (Ops[4]);
3946}
3947
3948// Lower the read_register intrinsic to an MRS instruction node if the special
3949// register string argument is either of the form detailed in the ALCE (the
3950// form described in getIntOperandsFromRegsterString) or is a named register
3951// known by the MRS SysReg mapper.
3952bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
3953 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3954 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3955 SDLoc DL(N);
3956
3957 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
3958
3959 unsigned Opcode64Bit = AArch64::MRS;
3960 int Imm = getIntOperandFromRegisterString(RegString->getString());
3961 if (Imm == -1) {
3962 // No match, Use the sysreg mapper to map the remaining possible strings to
3963 // the value for the register to be used for the instruction operand.
3964 const auto *TheReg =
3965 AArch64SysReg::lookupSysRegByName(RegString->getString());
3966 if (TheReg && TheReg->Readable &&
3967 TheReg->haveFeatures(Subtarget->getFeatureBits()))
3968 Imm = TheReg->Encoding;
3969 else
3970 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
3971
3972 if (Imm == -1) {
3973 // Still no match, see if this is "pc" or give up.
3974 if (!ReadIs128Bit && RegString->getString() == "pc") {
3975 Opcode64Bit = AArch64::ADR;
3976 Imm = 0;
3977 } else {
3978 return false;
3979 }
3980 }
3981 }
3982
3983 SDValue InChain = N->getOperand(0);
3984 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
3985 if (!ReadIs128Bit) {
3986 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
3987 {SysRegImm, InChain});
3988 } else {
3989 SDNode *MRRS = CurDAG->getMachineNode(
3990 AArch64::MRRS, DL,
3991 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
3992 {SysRegImm, InChain});
3993
3994 // Sysregs are not endian. The even register always contains the low half
3995 // of the register.
3996 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
3997 SDValue(MRRS, 0));
3998 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
3999 SDValue(MRRS, 0));
4000 SDValue OutChain = SDValue(MRRS, 1);
4001
4002 ReplaceUses(SDValue(N, 0), Lo);
4003 ReplaceUses(SDValue(N, 1), Hi);
4004 ReplaceUses(SDValue(N, 2), OutChain);
4005 };
4006 return true;
4007}
4008
4009// Lower the write_register intrinsic to an MSR instruction node if the special
4010// register string argument is either of the form detailed in the ALCE (the
4011// form described in getIntOperandsFromRegsterString) or is a named register
4012// known by the MSR SysReg mapper.
4013bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4014 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4015 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4016 SDLoc DL(N);
4017
4018 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4019
4020 if (!WriteIs128Bit) {
4021 // Check if the register was one of those allowed as the pstatefield value
4022 // in the MSR (immediate) instruction. To accept the values allowed in the
4023 // pstatefield for the MSR (immediate) instruction, we also require that an
4024 // immediate value has been provided as an argument, we know that this is
4025 // the case as it has been ensured by semantic checking.
4026 auto trySelectPState = [&](auto PMapper, unsigned State) {
4027 if (PMapper) {
4028 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4029 "Expected a constant integer expression.");
4030 unsigned Reg = PMapper->Encoding;
4031 uint64_t Immed = N->getConstantOperandVal(2);
4032 CurDAG->SelectNodeTo(
4033 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4034 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4035 return true;
4036 }
4037 return false;
4038 };
4039
4040 if (trySelectPState(
4041 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4042 AArch64::MSRpstateImm4))
4043 return true;
4044 if (trySelectPState(
4045 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4046 AArch64::MSRpstateImm1))
4047 return true;
4048 }
4049
4050 int Imm = getIntOperandFromRegisterString(RegString->getString());
4051 if (Imm == -1) {
4052 // Use the sysreg mapper to attempt to map the remaining possible strings
4053 // to the value for the register to be used for the MSR (register)
4054 // instruction operand.
4055 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4056 if (TheReg && TheReg->Writeable &&
4057 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4058 Imm = TheReg->Encoding;
4059 else
4060 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4061
4062 if (Imm == -1)
4063 return false;
4064 }
4065
4066 SDValue InChain = N->getOperand(0);
4067 if (!WriteIs128Bit) {
4068 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4069 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4070 N->getOperand(2), InChain);
4071 } else {
4072 // No endian swap. The lower half always goes into the even subreg, and the
4073 // higher half always into the odd supreg.
4074 SDNode *Pair = CurDAG->getMachineNode(
4075 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4076 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4077 MVT::i32),
4078 N->getOperand(2),
4079 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4080 N->getOperand(3),
4081 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4082
4083 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4084 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4085 SDValue(Pair, 0), InChain);
4086 }
4087
4088 return true;
4089}
4090
4091/// We've got special pseudo-instructions for these
4092bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4093 unsigned Opcode;
4094 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4095
4096 // Leave IR for LSE if subtarget supports it.
4097 if (Subtarget->hasLSE()) return false;
4098
4099 if (MemTy == MVT::i8)
4100 Opcode = AArch64::CMP_SWAP_8;
4101 else if (MemTy == MVT::i16)
4102 Opcode = AArch64::CMP_SWAP_16;
4103 else if (MemTy == MVT::i32)
4104 Opcode = AArch64::CMP_SWAP_32;
4105 else if (MemTy == MVT::i64)
4106 Opcode = AArch64::CMP_SWAP_64;
4107 else
4108 llvm_unreachable("Unknown AtomicCmpSwap type");
4109
4110 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4111 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4112 N->getOperand(0)};
4113 SDNode *CmpSwap = CurDAG->getMachineNode(
4114 Opcode, SDLoc(N),
4115 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4116
4117 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4118 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4119
4120 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4121 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4122 CurDAG->RemoveDeadNode(N);
4123
4124 return true;
4125}
4126
4127bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4128 SDValue &Shift) {
4129 if (!isa<ConstantSDNode>(N))
4130 return false;
4131
4132 SDLoc DL(N);
4133 uint64_t Val = cast<ConstantSDNode>(N)
4134 ->getAPIntValue()
4135 .trunc(VT.getFixedSizeInBits())
4136 .getZExtValue();
4137
4138 switch (VT.SimpleTy) {
4139 case MVT::i8:
4140 // All immediates are supported.
4141 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4142 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4143 return true;
4144 case MVT::i16:
4145 case MVT::i32:
4146 case MVT::i64:
4147 // Support 8bit unsigned immediates.
4148 if (Val <= 255) {
4149 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4150 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4151 return true;
4152 }
4153 // Support 16bit unsigned immediates that are a multiple of 256.
4154 if (Val <= 65280 && Val % 256 == 0) {
4155 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4156 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4157 return true;
4158 }
4159 break;
4160 default:
4161 break;
4162 }
4163
4164 return false;
4165}
4166
4167bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4168 SDValue &Imm, SDValue &Shift,
4169 bool Negate) {
4170 if (!isa<ConstantSDNode>(N))
4171 return false;
4172
4173 SDLoc DL(N);
4174 int64_t Val = cast<ConstantSDNode>(N)
4175 ->getAPIntValue()
4176 .trunc(VT.getFixedSizeInBits())
4177 .getSExtValue();
4178
4179 if (Negate)
4180 Val = -Val;
4181
4182 // Signed saturating instructions treat their immediate operand as unsigned,
4183 // whereas the related intrinsics define their operands to be signed. This
4184 // means we can only use the immediate form when the operand is non-negative.
4185 if (Val < 0)
4186 return false;
4187
4188 switch (VT.SimpleTy) {
4189 case MVT::i8:
4190 // All positive immediates are supported.
4191 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4192 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4193 return true;
4194 case MVT::i16:
4195 case MVT::i32:
4196 case MVT::i64:
4197 // Support 8bit positive immediates.
4198 if (Val <= 255) {
4199 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4200 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4201 return true;
4202 }
4203 // Support 16bit positive immediates that are a multiple of 256.
4204 if (Val <= 65280 && Val % 256 == 0) {
4205 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4206 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4207 return true;
4208 }
4209 break;
4210 default:
4211 break;
4212 }
4213
4214 return false;
4215}
4216
4217bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4218 SDValue &Shift) {
4219 if (!isa<ConstantSDNode>(N))
4220 return false;
4221
4222 SDLoc DL(N);
4223 int64_t Val = cast<ConstantSDNode>(N)
4224 ->getAPIntValue()
4225 .trunc(VT.getFixedSizeInBits())
4226 .getSExtValue();
4227
4228 switch (VT.SimpleTy) {
4229 case MVT::i8:
4230 // All immediates are supported.
4231 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4232 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4233 return true;
4234 case MVT::i16:
4235 case MVT::i32:
4236 case MVT::i64:
4237 // Support 8bit signed immediates.
4238 if (Val >= -128 && Val <= 127) {
4239 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4240 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4241 return true;
4242 }
4243 // Support 16bit signed immediates that are a multiple of 256.
4244 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
4245 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4246 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
4247 return true;
4248 }
4249 break;
4250 default:
4251 break;
4252 }
4253
4254 return false;
4255}
4256
4257bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4258 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4259 int64_t ImmVal = CNode->getSExtValue();
4260 SDLoc DL(N);
4261 if (ImmVal >= -128 && ImmVal < 128) {
4262 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4263 return true;
4264 }
4265 }
4266 return false;
4267}
4268
4269bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4270 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4271 uint64_t ImmVal = CNode->getZExtValue();
4272
4273 switch (VT.SimpleTy) {
4274 case MVT::i8:
4275 ImmVal &= 0xFF;
4276 break;
4277 case MVT::i16:
4278 ImmVal &= 0xFFFF;
4279 break;
4280 case MVT::i32:
4281 ImmVal &= 0xFFFFFFFF;
4282 break;
4283 case MVT::i64:
4284 break;
4285 default:
4286 llvm_unreachable("Unexpected type");
4287 }
4288
4289 if (ImmVal < 256) {
4290 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4291 return true;
4292 }
4293 }
4294 return false;
4295}
4296
4297bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4298 bool Invert) {
4299 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4300 uint64_t ImmVal = CNode->getZExtValue();
4301 SDLoc DL(N);
4302
4303 if (Invert)
4304 ImmVal = ~ImmVal;
4305
4306 // Shift mask depending on type size.
4307 switch (VT.SimpleTy) {
4308 case MVT::i8:
4309 ImmVal &= 0xFF;
4310 ImmVal |= ImmVal << 8;
4311 ImmVal |= ImmVal << 16;
4312 ImmVal |= ImmVal << 32;
4313 break;
4314 case MVT::i16:
4315 ImmVal &= 0xFFFF;
4316 ImmVal |= ImmVal << 16;
4317 ImmVal |= ImmVal << 32;
4318 break;
4319 case MVT::i32:
4320 ImmVal &= 0xFFFFFFFF;
4321 ImmVal |= ImmVal << 32;
4322 break;
4323 case MVT::i64:
4324 break;
4325 default:
4326 llvm_unreachable("Unexpected type");
4327 }
4328
4329 uint64_t encoding;
4330 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
4331 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
4332 return true;
4333 }
4334 }
4335 return false;
4336}
4337
4338// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4339// Rather than attempt to normalise everything we can sometimes saturate the
4340// shift amount during selection. This function also allows for consistent
4341// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4342// required by the instructions.
4343bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4344 uint64_t High, bool AllowSaturation,
4345 SDValue &Imm) {
4346 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4347 uint64_t ImmVal = CN->getZExtValue();
4348
4349 // Reject shift amounts that are too small.
4350 if (ImmVal < Low)
4351 return false;
4352
4353 // Reject or saturate shift amounts that are too big.
4354 if (ImmVal > High) {
4355 if (!AllowSaturation)
4356 return false;
4357 ImmVal = High;
4358 }
4359
4360 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4361 return true;
4362 }
4363
4364 return false;
4365}
4366
4367bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4368 // tagp(FrameIndex, IRGstack, tag_offset):
4369 // since the offset between FrameIndex and IRGstack is a compile-time
4370 // constant, this can be lowered to a single ADDG instruction.
4371 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4372 return false;
4373 }
4374
4375 SDValue IRG_SP = N->getOperand(2);
4376 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4377 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4378 return false;
4379 }
4380
4381 const TargetLowering *TLI = getTargetLowering();
4382 SDLoc DL(N);
4383 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4384 SDValue FiOp = CurDAG->getTargetFrameIndex(
4385 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4386 int TagOffset = N->getConstantOperandVal(3);
4387
4388 SDNode *Out = CurDAG->getMachineNode(
4389 AArch64::TAGPstack, DL, MVT::i64,
4390 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4391 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4392 ReplaceNode(N, Out);
4393 return true;
4394}
4395
4396void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4397 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4398 "llvm.aarch64.tagp third argument must be an immediate");
4399 if (trySelectStackSlotTagP(N))
4400 return;
4401 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4402 // compile-time constant, not just for stack allocations.
4403
4404 // General case for unrelated pointers in Op1 and Op2.
4405 SDLoc DL(N);
4406 int TagOffset = N->getConstantOperandVal(3);
4407 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4408 {N->getOperand(1), N->getOperand(2)});
4409 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4410 {SDValue(N1, 0), N->getOperand(2)});
4411 SDNode *N3 = CurDAG->getMachineNode(
4412 AArch64::ADDG, DL, MVT::i64,
4413 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4414 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4415 ReplaceNode(N, N3);
4416}
4417
4418bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4419 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4420
4421 // Bail when not a "cast" like insert_subvector.
4422 if (N->getConstantOperandVal(2) != 0)
4423 return false;
4424 if (!N->getOperand(0).isUndef())
4425 return false;
4426
4427 // Bail when normal isel should do the job.
4428 EVT VT = N->getValueType(0);
4429 EVT InVT = N->getOperand(1).getValueType();
4430 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4431 return false;
4432 if (InVT.getSizeInBits() <= 128)
4433 return false;
4434
4435 // NOTE: We can only get here when doing fixed length SVE code generation.
4436 // We do manual selection because the types involved are not linked to real
4437 // registers (despite being legal) and must be coerced into SVE registers.
4438
4440 "Expected to insert into a packed scalable vector!");
4441
4442 SDLoc DL(N);
4443 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4444 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4445 N->getOperand(1), RC));
4446 return true;
4447}
4448
4449bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4450 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4451
4452 // Bail when not a "cast" like extract_subvector.
4453 if (N->getConstantOperandVal(1) != 0)
4454 return false;
4455
4456 // Bail when normal isel can do the job.
4457 EVT VT = N->getValueType(0);
4458 EVT InVT = N->getOperand(0).getValueType();
4459 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4460 return false;
4461 if (VT.getSizeInBits() <= 128)
4462 return false;
4463
4464 // NOTE: We can only get here when doing fixed length SVE code generation.
4465 // We do manual selection because the types involved are not linked to real
4466 // registers (despite being legal) and must be coerced into SVE registers.
4467
4469 "Expected to extract from a packed scalable vector!");
4470
4471 SDLoc DL(N);
4472 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4473 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4474 N->getOperand(0), RC));
4475 return true;
4476}
4477
4478bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4479 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4480
4481 SDValue N0 = N->getOperand(0);
4482 SDValue N1 = N->getOperand(1);
4483 EVT VT = N->getValueType(0);
4484
4485 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4486 // Rotate by a constant is a funnel shift in IR which is exanded to
4487 // an OR with shifted operands.
4488 // We do the following transform:
4489 // OR N0, N1 -> xar (x, y, imm)
4490 // Where:
4491 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4492 // N0 = SHL_PRED true, V, splat(bits-imm)
4493 // V = (xor x, y)
4494 if (VT.isScalableVector() &&
4495 (Subtarget->hasSVE2() ||
4496 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4497 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4499 std::swap(N0, N1);
4500 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4502 return false;
4503
4504 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4505 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4506 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4507 return false;
4508
4509 SDValue XOR = N0.getOperand(1);
4510 if (XOR.getOpcode() != ISD::XOR || XOR != N1.getOperand(1))
4511 return false;
4512
4513 APInt ShlAmt, ShrAmt;
4514 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4516 return false;
4517
4518 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4519 return false;
4520
4521 SDLoc DL(N);
4522 SDValue Imm =
4523 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4524
4525 SDValue Ops[] = {XOR.getOperand(0), XOR.getOperand(1), Imm};
4526 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4527 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4528 AArch64::XAR_ZZZI_D})) {
4529 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4530 return true;
4531 }
4532 return false;
4533 }
4534
4535 if (!Subtarget->hasSHA3())
4536 return false;
4537
4538 if (N0->getOpcode() != AArch64ISD::VSHL ||
4540 return false;
4541
4542 if (N0->getOperand(0) != N1->getOperand(0) ||
4543 N1->getOperand(0)->getOpcode() != ISD::XOR)
4544 return false;
4545
4546 SDValue XOR = N0.getOperand(0);
4547 SDValue R1 = XOR.getOperand(0);
4548 SDValue R2 = XOR.getOperand(1);
4549
4550 unsigned HsAmt = N0.getConstantOperandVal(1);
4551 unsigned ShAmt = N1.getConstantOperandVal(1);
4552
4553 SDLoc DL = SDLoc(N0.getOperand(1));
4554 SDValue Imm = CurDAG->getTargetConstant(
4555 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4556
4557 if (ShAmt + HsAmt != 64)
4558 return false;
4559
4560 SDValue Ops[] = {R1, R2, Imm};
4561 CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops);
4562
4563 return true;
4564}
4565
4566void AArch64DAGToDAGISel::Select(SDNode *Node) {
4567 // If we have a custom node, we already have selected!
4568 if (Node->isMachineOpcode()) {
4569 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4570 Node->setNodeId(-1);
4571 return;
4572 }
4573
4574 // Few custom selection stuff.
4575 EVT VT = Node->getValueType(0);
4576
4577 switch (Node->getOpcode()) {
4578 default:
4579 break;
4580
4582 if (SelectCMP_SWAP(Node))
4583 return;
4584 break;
4585
4586 case ISD::READ_REGISTER:
4587 case AArch64ISD::MRRS:
4588 if (tryReadRegister(Node))
4589 return;
4590 break;
4591
4593 case AArch64ISD::MSRR:
4594 if (tryWriteRegister(Node))
4595 return;
4596 break;
4597
4598 case ISD::LOAD: {
4599 // Try to select as an indexed load. Fall through to normal processing
4600 // if we can't.
4601 if (tryIndexedLoad(Node))
4602 return;
4603 break;
4604 }
4605
4606 case ISD::SRL:
4607 case ISD::AND:
4608 case ISD::SRA:
4610 if (tryBitfieldExtractOp(Node))
4611 return;
4612 if (tryBitfieldInsertInZeroOp(Node))
4613 return;
4614 [[fallthrough]];
4615 case ISD::ROTR:
4616 case ISD::SHL:
4617 if (tryShiftAmountMod(Node))
4618 return;
4619 break;
4620
4621 case ISD::SIGN_EXTEND:
4622 if (tryBitfieldExtractOpFromSExt(Node))
4623 return;
4624 break;
4625
4626 case ISD::OR:
4627 if (tryBitfieldInsertOp(Node))
4628 return;
4629 if (trySelectXAR(Node))
4630 return;
4631 break;
4632
4634 if (trySelectCastScalableToFixedLengthVector(Node))
4635 return;
4636 break;
4637 }
4638
4639 case ISD::INSERT_SUBVECTOR: {
4640 if (trySelectCastFixedLengthToScalableVector(Node))
4641 return;
4642 break;
4643 }
4644
4645 case ISD::Constant: {
4646 // Materialize zero constants as copies from WZR/XZR. This allows
4647 // the coalescer to propagate these into other instructions.
4648 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4649 if (ConstNode->isZero()) {
4650 if (VT == MVT::i32) {
4651 SDValue New = CurDAG->getCopyFromReg(
4652 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4653 ReplaceNode(Node, New.getNode());
4654 return;
4655 } else if (VT == MVT::i64) {
4656 SDValue New = CurDAG->getCopyFromReg(
4657 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4658 ReplaceNode(Node, New.getNode());
4659 return;
4660 }
4661 }
4662 break;
4663 }
4664
4665 case ISD::FrameIndex: {
4666 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4667 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4668 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4669 const TargetLowering *TLI = getTargetLowering();
4670 SDValue TFI = CurDAG->getTargetFrameIndex(
4671 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4672 SDLoc DL(Node);
4673 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4674 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4675 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4676 return;
4677 }
4679 unsigned IntNo = Node->getConstantOperandVal(1);
4680 switch (IntNo) {
4681 default:
4682 break;
4683 case Intrinsic::aarch64_gcsss: {
4684 SDLoc DL(Node);
4685 SDValue Chain = Node->getOperand(0);
4686 SDValue Val = Node->getOperand(2);
4687 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
4688 SDNode *SS1 =
4689 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
4690 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
4691 MVT::Other, Zero, SDValue(SS1, 0));
4692 ReplaceNode(Node, SS2);
4693 return;
4694 }
4695 case Intrinsic::aarch64_ldaxp:
4696 case Intrinsic::aarch64_ldxp: {
4697 unsigned Op =
4698 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4699 SDValue MemAddr = Node->getOperand(2);
4700 SDLoc DL(Node);
4701 SDValue Chain = Node->getOperand(0);
4702
4703 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4704 MVT::Other, MemAddr, Chain);
4705
4706 // Transfer memoperands.
4708 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4709 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4710 ReplaceNode(Node, Ld);
4711 return;
4712 }
4713 case Intrinsic::aarch64_stlxp:
4714 case Intrinsic::aarch64_stxp: {
4715 unsigned Op =
4716 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4717 SDLoc DL(Node);
4718 SDValue Chain = Node->getOperand(0);
4719 SDValue ValLo = Node->getOperand(2);
4720 SDValue ValHi = Node->getOperand(3);
4721 SDValue MemAddr = Node->getOperand(4);
4722
4723 // Place arguments in the right order.
4724 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4725
4726 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4727 // Transfer memoperands.
4729 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4730 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4731
4732 ReplaceNode(Node, St);
4733 return;
4734 }
4735 case Intrinsic::aarch64_neon_ld1x2:
4736 if (VT == MVT::v8i8) {
4737 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4738 return;
4739 } else if (VT == MVT::v16i8) {
4740 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4741 return;
4742 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4743 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4744 return;
4745 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4746 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4747 return;
4748 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4749 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4750 return;
4751 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4752 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4753 return;
4754 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4755 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4756 return;
4757 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4758 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4759 return;
4760 }
4761 break;
4762 case Intrinsic::aarch64_neon_ld1x3:
4763 if (VT == MVT::v8i8) {
4764 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4765 return;
4766 } else if (VT == MVT::v16i8) {
4767 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4768 return;
4769 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4770 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
4771 return;
4772 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4773 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
4774 return;
4775 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4776 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
4777 return;
4778 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4779 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
4780 return;
4781 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4782 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4783 return;
4784 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4785 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
4786 return;
4787 }
4788 break;
4789 case Intrinsic::aarch64_neon_ld1x4:
4790 if (VT == MVT::v8i8) {
4791 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
4792 return;
4793 } else if (VT == MVT::v16i8) {
4794 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
4795 return;
4796 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4797 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
4798 return;
4799 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4800 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
4801 return;
4802 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4803 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
4804 return;
4805 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4806 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
4807 return;
4808 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4809 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4810 return;
4811 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4812 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
4813 return;
4814 }
4815 break;
4816 case Intrinsic::aarch64_neon_ld2:
4817 if (VT == MVT::v8i8) {
4818 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
4819 return;
4820 } else if (VT == MVT::v16i8) {
4821 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
4822 return;
4823 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4824 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
4825 return;
4826 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4827 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
4828 return;
4829 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4830 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
4831 return;
4832 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4833 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
4834 return;
4835 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4836 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4837 return;
4838 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4839 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
4840 return;
4841 }
4842 break;
4843 case Intrinsic::aarch64_neon_ld3:
4844 if (VT == MVT::v8i8) {
4845 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
4846 return;
4847 } else if (VT == MVT::v16i8) {
4848 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
4849 return;
4850 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4851 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
4852 return;
4853 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4854 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
4855 return;
4856 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4857 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
4858 return;
4859 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4860 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
4861 return;
4862 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4863 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4864 return;
4865 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4866 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
4867 return;
4868 }
4869 break;
4870 case Intrinsic::aarch64_neon_ld4:
4871 if (VT == MVT::v8i8) {
4872 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
4873 return;
4874 } else if (VT == MVT::v16i8) {
4875 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
4876 return;
4877 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4878 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
4879 return;
4880 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4881 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
4882 return;
4883 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4884 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
4885 return;
4886 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4887 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
4888 return;
4889 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4890 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4891 return;
4892 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4893 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
4894 return;
4895 }
4896 break;
4897 case Intrinsic::aarch64_neon_ld2r:
4898 if (VT == MVT::v8i8) {
4899 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
4900 return;
4901 } else if (VT == MVT::v16i8) {
4902 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
4903 return;
4904 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4905 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
4906 return;
4907 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4908 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
4909 return;
4910 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4911 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
4912 return;
4913 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4914 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
4915 return;
4916 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4917 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
4918 return;
4919 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4920 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
4921 return;
4922 }
4923 break;
4924 case Intrinsic::aarch64_neon_ld3r:
4925 if (VT == MVT::v8i8) {
4926 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
4927 return;
4928 } else if (VT == MVT::v16i8) {
4929 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
4930 return;
4931 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4932 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
4933 return;
4934 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4935 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
4936 return;
4937 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4938 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
4939 return;
4940 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4941 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
4942 return;
4943 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4944 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
4945 return;
4946 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4947 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
4948 return;
4949 }
4950 break;
4951 case Intrinsic::aarch64_neon_ld4r:
4952 if (VT == MVT::v8i8) {
4953 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
4954 return;
4955 } else if (VT == MVT::v16i8) {
4956 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
4957 return;
4958 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4959 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
4960 return;
4961 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4962 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
4963 return;
4964 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4965 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
4966 return;
4967 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4968 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
4969 return;
4970 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4971 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
4972 return;
4973 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4974 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
4975 return;
4976 }
4977 break;
4978 case Intrinsic::aarch64_neon_ld2lane:
4979 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4980 SelectLoadLane(Node, 2, AArch64::LD2i8);
4981 return;
4982 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4983 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4984 SelectLoadLane(Node, 2, AArch64::LD2i16);
4985 return;
4986 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4987 VT == MVT::v2f32) {
4988 SelectLoadLane(Node, 2, AArch64::LD2i32);
4989 return;
4990 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4991 VT == MVT::v1f64) {
4992 SelectLoadLane(Node, 2, AArch64::LD2i64);
4993 return;
4994 }
4995 break;
4996 case Intrinsic::aarch64_neon_ld3lane:
4997 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4998 SelectLoadLane(Node, 3, AArch64::LD3i8);
4999 return;
5000 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5001 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5002 SelectLoadLane(Node, 3, AArch64::LD3i16);
5003 return;
5004 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5005 VT == MVT::v2f32) {
5006 SelectLoadLane(Node, 3, AArch64::LD3i32);
5007 return;
5008 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5009 VT == MVT::v1f64) {
5010 SelectLoadLane(Node, 3, AArch64::LD3i64);
5011 return;
5012 }
5013 break;
5014 case Intrinsic::aarch64_neon_ld4lane:
5015 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5016 SelectLoadLane(Node, 4, AArch64::LD4i8);
5017 return;
5018 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5019 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5020 SelectLoadLane(Node, 4, AArch64::LD4i16);
5021 return;
5022 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5023 VT == MVT::v2f32) {
5024 SelectLoadLane(Node, 4, AArch64::LD4i32);
5025 return;
5026 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5027 VT == MVT::v1f64) {
5028 SelectLoadLane(Node, 4, AArch64::LD4i64);
5029 return;
5030 }
5031 break;
5032 case Intrinsic::aarch64_ld64b:
5033 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5034 return;
5035 case Intrinsic::aarch64_sve_ld2q_sret: {
5036 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5037 return;
5038 }
5039 case Intrinsic::aarch64_sve_ld3q_sret: {
5040 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5041 return;
5042 }
5043 case Intrinsic::aarch64_sve_ld4q_sret: {
5044 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5045 return;
5046 }
5047 case Intrinsic::aarch64_sve_ld2_sret: {
5048 if (VT == MVT::nxv16i8) {
5049 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5050 true);
5051 return;
5052 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5053 VT == MVT::nxv8bf16) {
5054 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5055 true);
5056 return;
5057 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5058 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5059 true);
5060 return;
5061 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5062 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5063 true);
5064 return;
5065 }
5066 break;
5067 }
5068 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5069 if (VT == MVT::nxv16i8) {
5070 if (Subtarget->hasSME2())
5071 SelectContiguousMultiVectorLoad(
5072 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5073 else if (Subtarget->hasSVE2p1())
5074 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5075 AArch64::LD1B_2Z);
5076 else
5077 break;
5078 return;
5079 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5080 VT == MVT::nxv8bf16) {
5081 if (Subtarget->hasSME2())
5082 SelectContiguousMultiVectorLoad(
5083 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5084 else if (Subtarget->hasSVE2p1())
5085 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5086 AArch64::LD1H_2Z);
5087 else
5088 break;
5089 return;
5090 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5091 if (Subtarget->hasSME2())
5092 SelectContiguousMultiVectorLoad(
5093 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5094 else if (Subtarget->hasSVE2p1())
5095 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5096 AArch64::LD1W_2Z);
5097 else
5098 break;
5099 return;
5100 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5101 if (Subtarget->hasSME2())
5102 SelectContiguousMultiVectorLoad(
5103 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5104 else if (Subtarget->hasSVE2p1())
5105 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5106 AArch64::LD1D_2Z);
5107 else
5108 break;
5109 return;
5110 }
5111 break;
5112 }
5113 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5114 if (VT == MVT::nxv16i8) {
5115 if (Subtarget->hasSME2())
5116 SelectContiguousMultiVectorLoad(
5117 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5118 else if (Subtarget->hasSVE2p1())
5119 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5120 AArch64::LD1B_4Z);
5121 else
5122 break;
5123 return;
5124 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5125 VT == MVT::nxv8bf16) {
5126 if (Subtarget->hasSME2())
5127 SelectContiguousMultiVectorLoad(
5128 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5129 else if (Subtarget->hasSVE2p1())
5130 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5131 AArch64::LD1H_4Z);
5132 else
5133 break;
5134 return;
5135 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5136 if (Subtarget->hasSME2())
5137 SelectContiguousMultiVectorLoad(
5138 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5139 else if (Subtarget->hasSVE2p1())
5140 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5141 AArch64::LD1W_4Z);
5142 else
5143 break;
5144 return;
5145 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5146 if (Subtarget->hasSME2())
5147 SelectContiguousMultiVectorLoad(
5148 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5149 else if (Subtarget->hasSVE2p1())
5150 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5151 AArch64::LD1D_4Z);
5152 else
5153 break;
5154 return;
5155 }
5156 break;
5157 }
5158 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5159 if (VT == MVT::nxv16i8) {
5160 if (Subtarget->hasSME2())
5161 SelectContiguousMultiVectorLoad(Node, 2, 0,
5162 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5163 AArch64::LDNT1B_2Z_PSEUDO);
5164 else if (Subtarget->hasSVE2p1())
5165 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5166 AArch64::LDNT1B_2Z);
5167 else
5168 break;
5169 return;
5170 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5171 VT == MVT::nxv8bf16) {
5172 if (Subtarget->hasSME2())
5173 SelectContiguousMultiVectorLoad(Node, 2, 1,
5174 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5175 AArch64::LDNT1H_2Z_PSEUDO);
5176 else if (Subtarget->hasSVE2p1())
5177 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5178 AArch64::LDNT1H_2Z);
5179 else
5180 break;
5181 return;
5182 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5183 if (Subtarget->hasSME2())
5184 SelectContiguousMultiVectorLoad(Node, 2, 2,
5185 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5186 AArch64::LDNT1W_2Z_PSEUDO);
5187 else if (Subtarget->hasSVE2p1())
5188 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5189 AArch64::LDNT1W_2Z);
5190 else
5191 break;
5192 return;
5193 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5194 if (Subtarget->hasSME2())
5195 SelectContiguousMultiVectorLoad(Node, 2, 3,
5196 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5197 AArch64::LDNT1D_2Z_PSEUDO);
5198 else if (Subtarget->hasSVE2p1())
5199 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5200 AArch64::LDNT1D_2Z);
5201 else
5202 break;
5203 return;
5204 }
5205 break;
5206 }
5207 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5208 if (VT == MVT::nxv16i8) {
5209 if (Subtarget->hasSME2())
5210 SelectContiguousMultiVectorLoad(Node, 4, 0,
5211 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5212 AArch64::LDNT1B_4Z_PSEUDO);
5213 else if (Subtarget->hasSVE2p1())
5214 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5215 AArch64::LDNT1B_4Z);
5216 else
5217 break;
5218 return;
5219 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5220 VT == MVT::nxv8bf16) {
5221 if (Subtarget->hasSME2())
5222 SelectContiguousMultiVectorLoad(Node, 4, 1,
5223 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5224 AArch64::LDNT1H_4Z_PSEUDO);
5225 else if (Subtarget->hasSVE2p1())
5226 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5227 AArch64::LDNT1H_4Z);
5228 else
5229 break;
5230 return;
5231 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5232 if (Subtarget->hasSME2())
5233 SelectContiguousMultiVectorLoad(Node, 4, 2,
5234 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5235 AArch64::LDNT1W_4Z_PSEUDO);
5236 else if (Subtarget->hasSVE2p1())
5237 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5238 AArch64::LDNT1W_4Z);
5239 else
5240 break;
5241 return;
5242 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5243 if (Subtarget->hasSME2())
5244 SelectContiguousMultiVectorLoad(Node, 4, 3,
5245 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5246 AArch64::LDNT1D_4Z_PSEUDO);
5247 else if (Subtarget->hasSVE2p1())
5248 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5249 AArch64::LDNT1D_4Z);
5250 else
5251 break;
5252 return;
5253 }
5254 break;
5255 }
5256 case Intrinsic::aarch64_sve_ld3_sret: {
5257 if (VT == MVT::nxv16i8) {
5258 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5259 true);
5260 return;
5261 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5262 VT == MVT::nxv8bf16) {
5263 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5264 true);
5265 return;
5266 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5267 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5268 true);
5269 return;
5270 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5271 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5272 true);
5273 return;
5274 }
5275 break;
5276 }
5277 case Intrinsic::aarch64_sve_ld4_sret: {
5278 if (VT == MVT::nxv16i8) {
5279 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5280 true);
5281 return;
5282 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5283 VT == MVT::nxv8bf16) {
5284 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5285 true);
5286 return;
5287 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5288 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5289 true);
5290 return;
5291 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5292 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5293 true);
5294 return;
5295 }
5296 break;
5297 }
5298 case Intrinsic::aarch64_sme_read_hor_vg2: {
5299 if (VT == MVT::nxv16i8) {
5300 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5301 AArch64::MOVA_2ZMXI_H_B);
5302 return;
5303 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5304 VT == MVT::nxv8bf16) {
5305 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5306 AArch64::MOVA_2ZMXI_H_H);
5307 return;
5308 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5309 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5310 AArch64::MOVA_2ZMXI_H_S);
5311 return;
5312 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5313 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5314 AArch64::MOVA_2ZMXI_H_D);
5315 return;
5316 }
5317 break;
5318 }
5319 case Intrinsic::aarch64_sme_read_ver_vg2: {
5320 if (VT == MVT::nxv16i8) {
5321 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5322 AArch64::MOVA_2ZMXI_V_B);
5323 return;
5324 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5325 VT == MVT::nxv8bf16) {
5326 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5327 AArch64::MOVA_2ZMXI_V_H);
5328 return;
5329 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5330 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5331 AArch64::MOVA_2ZMXI_V_S);
5332 return;
5333 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5334 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5335 AArch64::MOVA_2ZMXI_V_D);
5336 return;
5337 }
5338 break;
5339 }
5340 case Intrinsic::aarch64_sme_read_hor_vg4: {
5341 if (VT == MVT::nxv16i8) {
5342 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5343 AArch64::MOVA_4ZMXI_H_B);
5344 return;
5345 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5346 VT == MVT::nxv8bf16) {
5347 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5348 AArch64::MOVA_4ZMXI_H_H);
5349 return;
5350 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5351 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5352 AArch64::MOVA_4ZMXI_H_S);
5353 return;
5354 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5355 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5356 AArch64::MOVA_4ZMXI_H_D);
5357 return;
5358 }
5359 break;
5360 }
5361 case Intrinsic::aarch64_sme_read_ver_vg4: {
5362 if (VT == MVT::nxv16i8) {
5363 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5364 AArch64::MOVA_4ZMXI_V_B);
5365 return;
5366 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5367 VT == MVT::nxv8bf16) {
5368 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5369 AArch64::MOVA_4ZMXI_V_H);
5370 return;
5371 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5372 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5373 AArch64::MOVA_4ZMXI_V_S);
5374 return;
5375 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5376 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5377 AArch64::MOVA_4ZMXI_V_D);
5378 return;
5379 }
5380 break;
5381 }
5382 case Intrinsic::aarch64_sme_read_vg1x2: {
5383 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5384 AArch64::MOVA_VG2_2ZMXI);
5385 return;
5386 }
5387 case Intrinsic::aarch64_sme_read_vg1x4: {
5388 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5389 AArch64::MOVA_VG4_4ZMXI);
5390 return;
5391 }
5392 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5393 if (VT == MVT::nxv16i8) {
5394 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5395 return;
5396 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5397 VT == MVT::nxv8bf16) {
5398 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5399 return;
5400 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5401 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5402 return;
5403 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5404 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5405 return;
5406 }
5407 break;
5408 }
5409 case Intrinsic::aarch64_sme_readz_vert_x2: {
5410 if (VT == MVT::nxv16i8) {
5411 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5412 return;
5413 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5414 VT == MVT::nxv8bf16) {
5415 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5416 return;
5417 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5418 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5419 return;
5420 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5421 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5422 return;
5423 }
5424 break;
5425 }
5426 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5427 if (VT == MVT::nxv16i8) {
5428 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5429 return;
5430 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5431 VT == MVT::nxv8bf16) {
5432 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5433 return;
5434 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5435 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5436 return;
5437 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5438 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5439 return;
5440 }
5441 break;
5442 }
5443 case Intrinsic::aarch64_sme_readz_vert_x4: {
5444 if (VT == MVT::nxv16i8) {
5445 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5446 return;
5447 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5448 VT == MVT::nxv8bf16) {
5449 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5450 return;
5451 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5452 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5453 return;
5454 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5455 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5456 return;
5457 }
5458 break;
5459 }
5460 case Intrinsic::aarch64_sme_readz_x2: {
5461 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5462 AArch64::ZA);
5463 return;
5464 }
5465 case Intrinsic::aarch64_sme_readz_x4: {
5466 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5467 AArch64::ZA);
5468 return;
5469 }
5470 case Intrinsic::swift_async_context_addr: {
5471 SDLoc DL(Node);
5472 SDValue Chain = Node->getOperand(0);
5473 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5474 SDValue Res = SDValue(
5475 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5476 CurDAG->getTargetConstant(8, DL, MVT::i32),
5477 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5478 0);
5479 ReplaceUses(SDValue(Node, 0), Res);
5480 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5481 CurDAG->RemoveDeadNode(Node);
5482
5483 auto &MF = CurDAG->getMachineFunction();
5484 MF.getFrameInfo().setFrameAddressIsTaken(true);
5485 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5486 return;
5487 }
5488 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5489 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5490 Node->getValueType(0),
5491 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5492 AArch64::LUTI2_4ZTZI_S}))
5493 // Second Immediate must be <= 3:
5494 SelectMultiVectorLuti(Node, 4, Opc, 3);
5495 return;
5496 }
5497 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5498 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5499 Node->getValueType(0),
5500 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5501 // Second Immediate must be <= 1:
5502 SelectMultiVectorLuti(Node, 4, Opc, 1);
5503 return;
5504 }
5505 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5506 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5507 Node->getValueType(0),
5508 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5509 AArch64::LUTI2_2ZTZI_S}))
5510 // Second Immediate must be <= 7:
5511 SelectMultiVectorLuti(Node, 2, Opc, 7);
5512 return;
5513 }
5514 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5515 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5516 Node->getValueType(0),
5517 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5518 AArch64::LUTI4_2ZTZI_S}))
5519 // Second Immediate must be <= 3:
5520 SelectMultiVectorLuti(Node, 2, Opc, 3);
5521 return;
5522 }
5523 }
5524 } break;
5526 unsigned IntNo = Node->getConstantOperandVal(0);
5527 switch (IntNo) {
5528 default:
5529 break;
5530 case Intrinsic::aarch64_tagp:
5531 SelectTagP(Node);
5532 return;
5533
5534 case Intrinsic::ptrauth_auth:
5535 SelectPtrauthAuth(Node);
5536 return;
5537
5538 case Intrinsic::ptrauth_resign:
5539 SelectPtrauthResign(Node);
5540 return;
5541
5542 case Intrinsic::aarch64_neon_tbl2:
5543 SelectTable(Node, 2,
5544 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5545 false);
5546 return;
5547 case Intrinsic::aarch64_neon_tbl3:
5548 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5549 : AArch64::TBLv16i8Three,
5550 false);
5551 return;
5552 case Intrinsic::aarch64_neon_tbl4:
5553 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5554 : AArch64::TBLv16i8Four,
5555 false);
5556 return;
5557 case Intrinsic::aarch64_neon_tbx2:
5558 SelectTable(Node, 2,
5559 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5560 true);
5561 return;
5562 case Intrinsic::aarch64_neon_tbx3:
5563 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5564 : AArch64::TBXv16i8Three,
5565 true);
5566 return;
5567 case Intrinsic::aarch64_neon_tbx4:
5568 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5569 : AArch64::TBXv16i8Four,
5570 true);
5571 return;
5572 case Intrinsic::aarch64_sve_srshl_single_x2:
5573 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5574 Node->getValueType(0),
5575 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5576 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5577 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5578 return;
5579 case Intrinsic::aarch64_sve_srshl_single_x4:
5580 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5581 Node->getValueType(0),
5582 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5583 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5584 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5585 return;
5586 case Intrinsic::aarch64_sve_urshl_single_x2:
5587 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5588 Node->getValueType(0),
5589 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5590 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5591 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5592 return;
5593 case Intrinsic::aarch64_sve_urshl_single_x4:
5594 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5595 Node->getValueType(0),
5596 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5597 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5598 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5599 return;
5600 case Intrinsic::aarch64_sve_srshl_x2:
5601 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5602 Node->getValueType(0),
5603 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5604 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5605 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5606 return;
5607 case Intrinsic::aarch64_sve_srshl_x4:
5608 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5609 Node->getValueType(0),
5610 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5611 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5612 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5613 return;
5614 case Intrinsic::aarch64_sve_urshl_x2:
5615 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5616 Node->getValueType(0),
5617 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5618 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5619 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5620 return;
5621 case Intrinsic::aarch64_sve_urshl_x4:
5622 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5623 Node->getValueType(0),
5624 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5625 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5626 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5627 return;
5628 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5629 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5630 Node->getValueType(0),
5631 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5632 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5633 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5634 return;
5635 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5636 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5637 Node->getValueType(0),
5638 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5639 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5640 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5641 return;
5642 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5643 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5644 Node->getValueType(0),
5645 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5646 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5647 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5648 return;
5649 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5650 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5651 Node->getValueType(0),
5652 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5653 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5654 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5655 return;
5656 case Intrinsic::aarch64_sve_whilege_x2:
5657 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5658 Node->getValueType(0),
5659 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5660 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5661 SelectWhilePair(Node, Op);
5662 return;
5663 case Intrinsic::aarch64_sve_whilegt_x2:
5664 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5665 Node->getValueType(0),
5666 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5667 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5668 SelectWhilePair(Node, Op);
5669 return;
5670 case Intrinsic::aarch64_sve_whilehi_x2:
5671 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5672 Node->getValueType(0),
5673 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5674 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5675 SelectWhilePair(Node, Op);
5676 return;
5677 case Intrinsic::aarch64_sve_whilehs_x2:
5678 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5679 Node->getValueType(0),
5680 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5681 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5682 SelectWhilePair(Node, Op);
5683 return;
5684 case Intrinsic::aarch64_sve_whilele_x2:
5685 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5686 Node->getValueType(0),
5687 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5688 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5689 SelectWhilePair(Node, Op);
5690 return;
5691 case Intrinsic::aarch64_sve_whilelo_x2:
5692 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5693 Node->getValueType(0),
5694 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
5695 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
5696 SelectWhilePair(Node, Op);
5697 return;
5698 case Intrinsic::aarch64_sve_whilels_x2:
5699 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5700 Node->getValueType(0),
5701 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
5702 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
5703 SelectWhilePair(Node, Op);
5704 return;
5705 case Intrinsic::aarch64_sve_whilelt_x2:
5706 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5707 Node->getValueType(0),
5708 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
5709 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
5710 SelectWhilePair(Node, Op);
5711 return;
5712 case Intrinsic::aarch64_sve_smax_single_x2:
5713 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5714 Node->getValueType(0),
5715 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
5716 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
5717 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5718 return;
5719 case Intrinsic::aarch64_sve_umax_single_x2:
5720 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5721 Node->getValueType(0),
5722 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
5723 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
5724 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5725 return;
5726 case Intrinsic::aarch64_sve_fmax_single_x2:
5727 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5728 Node->getValueType(0),
5729 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
5730 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
5731 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5732 return;
5733 case Intrinsic::aarch64_sve_smax_single_x4:
5734 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5735 Node->getValueType(0),
5736 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
5737 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
5738 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5739 return;
5740 case Intrinsic::aarch64_sve_umax_single_x4:
5741 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5742 Node->getValueType(0),
5743 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
5744 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
5745 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5746 return;
5747 case Intrinsic::aarch64_sve_fmax_single_x4:
5748 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5749 Node->getValueType(0),
5750 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
5751 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
5752 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5753 return;
5754 case Intrinsic::aarch64_sve_smin_single_x2:
5755 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5756 Node->getValueType(0),
5757 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
5758 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
5759 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5760 return;
5761 case Intrinsic::aarch64_sve_umin_single_x2:
5762 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5763 Node->getValueType(0),
5764 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
5765 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
5766 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5767 return;
5768 case Intrinsic::aarch64_sve_fmin_single_x2:
5769 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5770 Node->getValueType(0),
5771 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
5772 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
5773 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5774 return;
5775 case Intrinsic::aarch64_sve_smin_single_x4:
5776 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5777 Node->getValueType(0),
5778 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
5779 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
5780 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5781 return;
5782 case Intrinsic::aarch64_sve_umin_single_x4:
5783 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5784 Node->getValueType(0),
5785 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
5786 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
5787 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5788 return;
5789 case Intrinsic::aarch64_sve_fmin_single_x4:
5790 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5791 Node->getValueType(0),
5792 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
5793 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
5794 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5795 return;
5796 case Intrinsic::aarch64_sve_smax_x2:
5797 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5798 Node->getValueType(0),
5799 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
5800 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
5801 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5802 return;
5803 case Intrinsic::aarch64_sve_umax_x2:
5804 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5805 Node->getValueType(0),
5806 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
5807 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
5808 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5809 return;
5810 case Intrinsic::aarch64_sve_fmax_x2:
5811 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5812 Node->getValueType(0),
5813 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
5814 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
5815 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5816 return;
5817 case Intrinsic::aarch64_sve_smax_x4:
5818 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5819 Node->getValueType(0),
5820 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
5821 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
5822 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5823 return;
5824 case Intrinsic::aarch64_sve_umax_x4:
5825 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5826 Node->getValueType(0),
5827 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
5828 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
5829 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5830 return;
5831 case Intrinsic::aarch64_sve_fmax_x4:
5832 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5833 Node->getValueType(0),
5834 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
5835 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
5836 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5837 return;
5838 case Intrinsic::aarch64_sve_smin_x2:
5839 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5840 Node->getValueType(0),
5841 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
5842 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
5843 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5844 return;
5845 case Intrinsic::aarch64_sve_umin_x2:
5846 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5847 Node->getValueType(0),
5848 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
5849 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
5850 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5851 return;
5852 case Intrinsic::aarch64_sve_fmin_x2:
5853 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5854 Node->getValueType(0),
5855 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
5856 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
5857 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5858 return;
5859 case Intrinsic::aarch64_sve_smin_x4:
5860 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5861 Node->getValueType(0),
5862 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
5863 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
5864 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5865 return;
5866 case Intrinsic::aarch64_sve_umin_x4:
5867 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5868 Node->getValueType(0),
5869 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
5870 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
5871 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5872 return;
5873 case Intrinsic::aarch64_sve_fmin_x4:
5874 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5875 Node->getValueType(0),
5876 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
5877 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
5878 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5879 return;
5880 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
5881 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5882 Node->getValueType(0),
5883 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
5884 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
5885 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5886 return;
5887 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
5888 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5889 Node->getValueType(0),
5890 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
5891 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
5892 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5893 return;
5894 case Intrinsic::aarch64_sve_fminnm_single_x2:
5895 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5896 Node->getValueType(0),
5897 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
5898 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
5899 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5900 return;
5901 case Intrinsic::aarch64_sve_fminnm_single_x4:
5902 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5903 Node->getValueType(0),
5904 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
5905 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
5906 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5907 return;
5908 case Intrinsic::aarch64_sve_fmaxnm_x2:
5909 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5910 Node->getValueType(0),
5911 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
5912 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
5913 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5914 return;
5915 case Intrinsic::aarch64_sve_fmaxnm_x4:
5916 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5917 Node->getValueType(0),
5918 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
5919 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
5920 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5921 return;
5922 case Intrinsic::aarch64_sve_fminnm_x2:
5923 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5924 Node->getValueType(0),
5925 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
5926 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
5927 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5928 return;
5929 case Intrinsic::aarch64_sve_fminnm_x4:
5930 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5931 Node->getValueType(0),
5932 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
5933 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
5934 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5935 return;
5936 case Intrinsic::aarch64_sve_fcvtzs_x2:
5937 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
5938 return;
5939 case Intrinsic::aarch64_sve_scvtf_x2:
5940 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
5941 return;
5942 case Intrinsic::aarch64_sve_fcvtzu_x2:
5943 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
5944 return;
5945 case Intrinsic::aarch64_sve_ucvtf_x2:
5946 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
5947 return;
5948 case Intrinsic::aarch64_sve_fcvtzs_x4:
5949 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
5950 return;
5951 case Intrinsic::aarch64_sve_scvtf_x4:
5952 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
5953 return;
5954 case Intrinsic::aarch64_sve_fcvtzu_x4:
5955 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
5956 return;
5957 case Intrinsic::aarch64_sve_ucvtf_x4:
5958 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
5959 return;
5960 case Intrinsic::aarch64_sve_fcvt_widen_x2:
5961 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
5962 return;
5963 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
5964 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
5965 return;
5966 case Intrinsic::aarch64_sve_sclamp_single_x2:
5967 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5968 Node->getValueType(0),
5969 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
5970 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
5971 SelectClamp(Node, 2, Op);
5972 return;
5973 case Intrinsic::aarch64_sve_uclamp_single_x2:
5974 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5975 Node->getValueType(0),
5976 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
5977 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
5978 SelectClamp(Node, 2, Op);
5979 return;
5980 case Intrinsic::aarch64_sve_fclamp_single_x2:
5981 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5982 Node->getValueType(0),
5983 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
5984 AArch64::FCLAMP_VG2_2Z2Z_D}))
5985 SelectClamp(Node, 2, Op);
5986 return;
5987 case Intrinsic::aarch64_sve_bfclamp_single_x2:
5988 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
5989 return;
5990 case Intrinsic::aarch64_sve_sclamp_single_x4:
5991 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5992 Node->getValueType(0),
5993 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
5994 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
5995 SelectClamp(Node, 4, Op);
5996 return;
5997 case Intrinsic::aarch64_sve_uclamp_single_x4:
5998 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5999 Node->getValueType(0),
6000 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6001 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6002 SelectClamp(Node, 4, Op);
6003 return;
6004 case Intrinsic::aarch64_sve_fclamp_single_x4:
6005 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6006 Node->getValueType(0),
6007 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6008 AArch64::FCLAMP_VG4_4Z4Z_D}))
6009 SelectClamp(Node, 4, Op);
6010 return;
6011 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6012 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6013 return;
6014 case Intrinsic::aarch64_sve_add_single_x2:
6015 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6016 Node->getValueType(0),
6017 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6018 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6019 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6020 return;
6021 case Intrinsic::aarch64_sve_add_single_x4:
6022 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6023 Node->getValueType(0),
6024 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6025 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6026 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6027 return;
6028 case Intrinsic::aarch64_sve_zip_x2:
6029 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6030 Node->getValueType(0),
6031 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6032 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6033 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6034 return;
6035 case Intrinsic::aarch64_sve_zipq_x2:
6036 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6037 AArch64::ZIP_VG2_2ZZZ_Q);
6038 return;
6039 case Intrinsic::aarch64_sve_zip_x4:
6040 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6041 Node->getValueType(0),
6042 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6043 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6044 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6045 return;
6046 case Intrinsic::aarch64_sve_zipq_x4:
6047 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6048 AArch64::ZIP_VG4_4Z4Z_Q);
6049 return;
6050 case Intrinsic::aarch64_sve_uzp_x2:
6051 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6052 Node->getValueType(0),
6053 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6054 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6055 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6056 return;
6057 case Intrinsic::aarch64_sve_uzpq_x2:
6058 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6059 AArch64::UZP_VG2_2ZZZ_Q);
6060 return;
6061 case Intrinsic::aarch64_sve_uzp_x4:
6062 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6063 Node->getValueType(0),
6064 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6065 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6066 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6067 return;
6068 case Intrinsic::aarch64_sve_uzpq_x4:
6069 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6070 AArch64::UZP_VG4_4Z4Z_Q);
6071 return;
6072 case Intrinsic::aarch64_sve_sel_x2:
6073 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6074 Node->getValueType(0),
6075 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6076 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6077 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6078 return;
6079 case Intrinsic::aarch64_sve_sel_x4:
6080 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6081 Node->getValueType(0),
6082 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6083 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6084 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6085 return;
6086 case Intrinsic::aarch64_sve_frinta_x2:
6087 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6088 return;
6089 case Intrinsic::aarch64_sve_frinta_x4:
6090 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6091 return;
6092 case Intrinsic::aarch64_sve_frintm_x2:
6093 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6094 return;
6095 case Intrinsic::aarch64_sve_frintm_x4:
6096 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6097 return;
6098 case Intrinsic::aarch64_sve_frintn_x2:
6099 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6100 return;
6101 case Intrinsic::aarch64_sve_frintn_x4:
6102 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6103 return;
6104 case Intrinsic::aarch64_sve_frintp_x2:
6105 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6106 return;
6107 case Intrinsic::aarch64_sve_frintp_x4:
6108 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6109 return;
6110 case Intrinsic::aarch64_sve_sunpk_x2:
6111 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6112 Node->getValueType(0),
6113 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6114 AArch64::SUNPK_VG2_2ZZ_D}))
6115 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6116 return;
6117 case Intrinsic::aarch64_sve_uunpk_x2:
6118 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6119 Node->getValueType(0),
6120 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6121 AArch64::UUNPK_VG2_2ZZ_D}))
6122 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6123 return;
6124 case Intrinsic::aarch64_sve_sunpk_x4:
6125 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6126 Node->getValueType(0),
6127 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6128 AArch64::SUNPK_VG4_4Z2Z_D}))
6129 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6130 return;
6131 case Intrinsic::aarch64_sve_uunpk_x4:
6132 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6133 Node->getValueType(0),
6134 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6135 AArch64::UUNPK_VG4_4Z2Z_D}))
6136 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6137 return;
6138 case Intrinsic::aarch64_sve_pext_x2: {
6139 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6140 Node->getValueType(0),
6141 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6142 AArch64::PEXT_2PCI_D}))
6143 SelectPExtPair(Node, Op);
6144 return;
6145 }
6146 }
6147 break;
6148 }
6149 case ISD::INTRINSIC_VOID: {
6150 unsigned IntNo = Node->getConstantOperandVal(1);
6151 if (Node->getNumOperands() >= 3)
6152 VT = Node->getOperand(2)->getValueType(0);
6153 switch (IntNo) {
6154 default:
6155 break;
6156 case Intrinsic::aarch64_neon_st1x2: {
6157 if (VT == MVT::v8i8) {
6158 SelectStore(Node, 2, AArch64::ST1Twov8b);
6159 return;
6160 } else if (VT == MVT::v16i8) {
6161 SelectStore(Node, 2, AArch64::ST1Twov16b);
6162 return;
6163 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6164 VT == MVT::v4bf16) {
6165 SelectStore(Node, 2, AArch64::ST1Twov4h);
6166 return;
6167 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6168 VT == MVT::v8bf16) {
6169 SelectStore(Node, 2, AArch64::ST1Twov8h);
6170 return;
6171 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6172 SelectStore(Node, 2, AArch64::ST1Twov2s);
6173 return;
6174 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6175 SelectStore(Node, 2, AArch64::ST1Twov4s);
6176 return;
6177 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6178 SelectStore(Node, 2, AArch64::ST1Twov2d);
6179 return;
6180 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6181 SelectStore(Node, 2, AArch64::ST1Twov1d);
6182 return;
6183 }
6184 break;
6185 }
6186 case Intrinsic::aarch64_neon_st1x3: {
6187 if (VT == MVT::v8i8) {
6188 SelectStore(Node, 3, AArch64::ST1Threev8b);
6189 return;
6190 } else if (VT == MVT::v16i8) {
6191 SelectStore(Node, 3, AArch64::ST1Threev16b);
6192 return;
6193 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6194 VT == MVT::v4bf16) {
6195 SelectStore(Node, 3, AArch64::ST1Threev4h);
6196 return;
6197 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6198 VT == MVT::v8bf16) {
6199 SelectStore(Node, 3, AArch64::ST1Threev8h);
6200 return;
6201 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6202 SelectStore(Node, 3, AArch64::ST1Threev2s);
6203 return;
6204 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6205 SelectStore(Node, 3, AArch64::ST1Threev4s);
6206 return;
6207 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6208 SelectStore(Node, 3, AArch64::ST1Threev2d);
6209 return;
6210 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6211 SelectStore(Node, 3, AArch64::ST1Threev1d);
6212 return;
6213 }
6214 break;
6215 }
6216 case Intrinsic::aarch64_neon_st1x4: {
6217 if (VT == MVT::v8i8) {
6218 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6219 return;
6220 } else if (VT == MVT::v16i8) {
6221 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6222 return;
6223 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6224 VT == MVT::v4bf16) {
6225 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6226 return;
6227 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6228 VT == MVT::v8bf16) {
6229 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6230 return;
6231 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6232 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6233 return;
6234 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6235 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6236 return;
6237 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6238 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6239 return;
6240 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6241 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6242 return;
6243 }
6244 break;
6245 }
6246 case Intrinsic::aarch64_neon_st2: {
6247 if (VT == MVT::v8i8) {
6248 SelectStore(Node, 2, AArch64::ST2Twov8b);
6249 return;
6250 } else if (VT == MVT::v16i8) {
6251 SelectStore(Node, 2, AArch64::ST2Twov16b);
6252 return;
6253 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6254 VT == MVT::v4bf16) {
6255 SelectStore(Node, 2, AArch64::ST2Twov4h);
6256 return;
6257 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6258 VT == MVT::v8bf16) {
6259 SelectStore(Node, 2, AArch64::ST2Twov8h);
6260 return;
6261 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6262 SelectStore(Node, 2, AArch64::ST2Twov2s);
6263 return;
6264 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6265 SelectStore(Node, 2, AArch64::ST2Twov4s);
6266 return;
6267 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6268 SelectStore(Node, 2, AArch64::ST2Twov2d);
6269 return;
6270 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6271 SelectStore(Node, 2, AArch64::ST1Twov1d);
6272 return;
6273 }
6274 break;
6275 }
6276 case Intrinsic::aarch64_neon_st3: {
6277 if (VT == MVT::v8i8) {
6278 SelectStore(Node, 3, AArch64::ST3Threev8b);
6279 return;
6280 } else if (VT == MVT::v16i8) {
6281 SelectStore(Node, 3, AArch64::ST3Threev16b);
6282 return;
6283 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6284 VT == MVT::v4bf16) {
6285 SelectStore(Node, 3, AArch64::ST3Threev4h);
6286 return;
6287 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6288 VT == MVT::v8bf16) {
6289 SelectStore(Node, 3, AArch64::ST3Threev8h);
6290 return;
6291 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6292 SelectStore(Node, 3, AArch64::ST3Threev2s);
6293 return;
6294 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6295 SelectStore(Node, 3, AArch64::ST3Threev4s);
6296 return;
6297 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6298 SelectStore(Node, 3, AArch64::ST3Threev2d);
6299 return;
6300 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6301 SelectStore(Node, 3, AArch64::ST1Threev1d);
6302 return;
6303 }
6304 break;
6305 }
6306 case Intrinsic::aarch64_neon_st4: {
6307 if (VT == MVT::v8i8) {
6308 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6309 return;
6310 } else if (VT == MVT::v16i8) {
6311 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6312 return;
6313 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6314 VT == MVT::v4bf16) {
6315 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6316 return;
6317 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6318 VT == MVT::v8bf16) {
6319 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6320 return;
6321 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6322 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6323 return;
6324 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6325 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6326 return;
6327 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6328 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6329 return;
6330 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6331 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6332 return;
6333 }
6334 break;
6335 }
6336 case Intrinsic::aarch64_neon_st2lane: {
6337 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6338 SelectStoreLane(Node, 2, AArch64::ST2i8);
6339 return;
6340 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6341 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6342 SelectStoreLane(Node, 2, AArch64::ST2i16);
6343 return;
6344 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6345 VT == MVT::v2f32) {
6346 SelectStoreLane(Node, 2, AArch64::ST2i32);
6347 return;
6348 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6349 VT == MVT::v1f64) {
6350 SelectStoreLane(Node, 2, AArch64::ST2i64);
6351 return;
6352 }
6353 break;
6354 }
6355 case Intrinsic::aarch64_neon_st3lane: {
6356 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6357 SelectStoreLane(Node, 3, AArch64::ST3i8);
6358 return;
6359 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6360 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6361 SelectStoreLane(Node, 3, AArch64::ST3i16);
6362 return;
6363 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6364 VT == MVT::v2f32) {
6365 SelectStoreLane(Node, 3, AArch64::ST3i32);
6366 return;
6367 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6368 VT == MVT::v1f64) {
6369 SelectStoreLane(Node, 3, AArch64::ST3i64);
6370 return;
6371 }
6372 break;
6373 }
6374 case Intrinsic::aarch64_neon_st4lane: {
6375 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6376 SelectStoreLane(Node, 4, AArch64::ST4i8);
6377 return;
6378 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6379 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6380 SelectStoreLane(Node, 4, AArch64::ST4i16);
6381 return;
6382 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6383 VT == MVT::v2f32) {
6384 SelectStoreLane(Node, 4, AArch64::ST4i32);
6385 return;
6386 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6387 VT == MVT::v1f64) {
6388 SelectStoreLane(Node, 4, AArch64::ST4i64);
6389 return;
6390 }
6391 break;
6392 }
6393 case Intrinsic::aarch64_sve_st2q: {
6394 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6395 return;
6396 }
6397 case Intrinsic::aarch64_sve_st3q: {
6398 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6399 return;
6400 }
6401 case Intrinsic::aarch64_sve_st4q: {
6402 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6403 return;
6404 }
6405 case Intrinsic::aarch64_sve_st2: {
6406 if (VT == MVT::nxv16i8) {
6407 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6408 return;
6409 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6410 VT == MVT::nxv8bf16) {
6411 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6412 return;
6413 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6414 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6415 return;
6416 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6417 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6418 return;
6419 }
6420 break;
6421 }
6422 case Intrinsic::aarch64_sve_st3: {
6423 if (VT == MVT::nxv16i8) {
6424 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6425 return;
6426 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6427 VT == MVT::nxv8bf16) {
6428 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6429 return;
6430 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6431 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6432 return;
6433 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6434 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6435 return;
6436 }
6437 break;
6438 }
6439 case Intrinsic::aarch64_sve_st4: {
6440 if (VT == MVT::nxv16i8) {
6441 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6442 return;
6443 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6444 VT == MVT::nxv8bf16) {
6445 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6446 return;
6447 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6448 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6449 return;
6450 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6451 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6452 return;
6453 }
6454 break;
6455 }
6456 }
6457 break;
6458 }
6459 case AArch64ISD::LD2post: {
6460 if (VT == MVT::v8i8) {
6461 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6462 return;
6463 } else if (VT == MVT::v16i8) {
6464 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6465 return;
6466 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6467 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6468 return;
6469 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6470 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6471 return;
6472 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6473 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6474 return;
6475 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6476 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6477 return;
6478 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6479 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6480 return;
6481 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6482 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6483 return;
6484 }
6485 break;
6486 }
6487 case AArch64ISD::LD3post: {
6488 if (VT == MVT::v8i8) {
6489 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6490 return;
6491 } else if (VT == MVT::v16i8) {
6492 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6493 return;
6494 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6495 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6496 return;
6497 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6498 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6499 return;
6500 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6501 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6502 return;
6503 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6504 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6505 return;
6506 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6507 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6508 return;
6509 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6510 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6511 return;
6512 }
6513 break;
6514 }
6515 case AArch64ISD::LD4post: {
6516 if (VT == MVT::v8i8) {
6517 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6518 return;
6519 } else if (VT == MVT::v16i8) {
6520 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6521 return;
6522 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6523 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6524 return;
6525 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6526 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6527 return;
6528 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6529 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6530 return;
6531 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6532 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6533 return;
6534 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6535 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6536 return;
6537 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6538 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6539 return;
6540 }
6541 break;
6542 }
6543 case AArch64ISD::LD1x2post: {
6544 if (VT == MVT::v8i8) {
6545 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6546 return;
6547 } else if (VT == MVT::v16i8) {
6548 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6549 return;
6550 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6551 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6552 return;
6553 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6554 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6555 return;
6556 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6557 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6558 return;
6559 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6560 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6561 return;
6562 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6563 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6564 return;
6565 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6566 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
6567 return;
6568 }
6569 break;
6570 }
6571 case AArch64ISD::LD1x3post: {
6572 if (VT == MVT::v8i8) {
6573 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
6574 return;
6575 } else if (VT == MVT::v16i8) {
6576 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
6577 return;
6578 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6579 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
6580 return;
6581 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6582 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
6583 return;
6584 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6585 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
6586 return;
6587 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6588 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
6589 return;
6590 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6591 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6592 return;
6593 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6594 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
6595 return;
6596 }
6597 break;
6598 }
6599 case AArch64ISD::LD1x4post: {
6600 if (VT == MVT::v8i8) {
6601 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
6602 return;
6603 } else if (VT == MVT::v16i8) {
6604 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
6605 return;
6606 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6607 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
6608 return;
6609 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6610 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
6611 return;
6612 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6613 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
6614 return;
6615 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6616 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
6617 return;
6618 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6619 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6620 return;
6621 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6622 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
6623 return;
6624 }
6625 break;
6626 }
6628 if (VT == MVT::v8i8) {
6629 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
6630 return;
6631 } else if (VT == MVT::v16i8) {
6632 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
6633 return;
6634 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6635 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
6636 return;
6637 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6638 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
6639 return;
6640 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6641 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
6642 return;
6643 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6644 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
6645 return;
6646 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6647 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
6648 return;
6649 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6650 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
6651 return;
6652 }
6653 break;
6654 }
6656 if (VT == MVT::v8i8) {
6657 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
6658 return;
6659 } else if (VT == MVT::v16i8) {
6660 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
6661 return;
6662 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6663 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
6664 return;
6665 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6666 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
6667 return;
6668 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6669 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
6670 return;
6671 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6672 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
6673 return;
6674 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6675 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
6676 return;
6677 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6678 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
6679 return;
6680 }
6681 break;
6682 }
6684 if (VT == MVT::v8i8) {
6685 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
6686 return;
6687 } else if (VT == MVT::v16i8) {
6688 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
6689 return;
6690 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6691 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
6692 return;
6693 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6694 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
6695 return;
6696 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6697 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
6698 return;
6699 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6700 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
6701 return;
6702 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6703 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
6704 return;
6705 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6706 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
6707 return;
6708 }
6709 break;
6710 }
6712 if (VT == MVT::v8i8) {
6713 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
6714 return;
6715 } else if (VT == MVT::v16i8) {
6716 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
6717 return;
6718 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6719 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
6720 return;
6721 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6722 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
6723 return;
6724 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6725 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
6726 return;
6727 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6728 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
6729 return;
6730 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6731 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
6732 return;
6733 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6734 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
6735 return;
6736 }
6737 break;
6738 }
6740 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6741 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
6742 return;
6743 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6744 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6745 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
6746 return;
6747 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6748 VT == MVT::v2f32) {
6749 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
6750 return;
6751 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6752 VT == MVT::v1f64) {
6753 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
6754 return;
6755 }
6756 break;
6757 }
6759 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6760 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
6761 return;
6762 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6763 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6764 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
6765 return;
6766 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6767 VT == MVT::v2f32) {
6768 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
6769 return;
6770 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6771 VT == MVT::v1f64) {
6772 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
6773 return;
6774 }
6775 break;
6776 }
6778 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6779 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
6780 return;
6781 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6782 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6783 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
6784 return;
6785 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6786 VT == MVT::v2f32) {
6787 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
6788 return;
6789 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6790 VT == MVT::v1f64) {
6791 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
6792 return;
6793 }
6794 break;
6795 }
6797 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6798 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
6799 return;
6800 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6801 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6802 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
6803 return;
6804 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6805 VT == MVT::v2f32) {
6806 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
6807 return;
6808 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6809 VT == MVT::v1f64) {
6810 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
6811 return;
6812 }
6813 break;
6814 }
6815 case AArch64ISD::ST2post: {
6816 VT = Node->getOperand(1).getValueType();
6817 if (VT == MVT::v8i8) {
6818 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
6819 return;
6820 } else if (VT == MVT::v16i8) {
6821 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
6822 return;
6823 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6824 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
6825 return;
6826 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6827 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
6828 return;
6829 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6830 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
6831 return;
6832 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6833 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
6834 return;
6835 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6836 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
6837 return;
6838 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6839 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
6840 return;
6841 }
6842 break;
6843 }
6844 case AArch64ISD::ST3post: {
6845 VT = Node->getOperand(1).getValueType();
6846 if (VT == MVT::v8i8) {
6847 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
6848 return;
6849 } else if (VT == MVT::v16i8) {
6850 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
6851 return;
6852 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6853 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
6854 return;
6855 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6856 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
6857 return;
6858 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6859 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
6860 return;
6861 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6862 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
6863 return;
6864 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6865 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
6866 return;
6867 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6868 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
6869 return;
6870 }
6871 break;
6872 }
6873 case AArch64ISD::ST4post: {
6874 VT = Node->getOperand(1).getValueType();
6875 if (VT == MVT::v8i8) {
6876 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
6877 return;
6878 } else if (VT == MVT::v16i8) {
6879 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
6880 return;
6881 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6882 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
6883 return;
6884 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6885 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
6886 return;
6887 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6888 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
6889 return;
6890 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6891 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
6892 return;
6893 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6894 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
6895 return;
6896 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6897 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
6898 return;
6899 }
6900 break;
6901 }
6902 case AArch64ISD::ST1x2post: {
6903 VT = Node->getOperand(1).getValueType();
6904 if (VT == MVT::v8i8) {
6905 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
6906 return;
6907 } else if (VT == MVT::v16i8) {
6908 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
6909 return;
6910 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6911 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
6912 return;
6913 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6914 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
6915 return;
6916 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6917 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
6918 return;
6919 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6920 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
6921 return;
6922 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6923 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
6924 return;
6925 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6926 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
6927 return;
6928 }
6929 break;
6930 }
6931 case AArch64ISD::ST1x3post: {
6932 VT = Node->getOperand(1).getValueType();
6933 if (VT == MVT::v8i8) {
6934 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
6935 return;
6936 } else if (VT == MVT::v16i8) {
6937 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
6938 return;
6939 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6940 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
6941 return;
6942 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
6943 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
6944 return;
6945 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6946 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
6947 return;
6948 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6949 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
6950 return;
6951 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6952 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
6953 return;
6954 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6955 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
6956 return;
6957 }
6958 break;
6959 }
6960 case AArch64ISD::ST1x4post: {
6961 VT = Node->getOperand(1).getValueType();
6962 if (VT == MVT::v8i8) {
6963 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
6964 return;
6965 } else if (VT == MVT::v16i8) {
6966 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
6967 return;
6968 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6969 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
6970 return;
6971 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6972 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
6973 return;
6974 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6975 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
6976 return;
6977 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6978 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
6979 return;
6980 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6981 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
6982 return;
6983 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6984 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
6985 return;
6986 }
6987 break;
6988 }
6990 VT = Node->getOperand(1).getValueType();
6991 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6992 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
6993 return;
6994 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6995 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6996 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
6997 return;
6998 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6999 VT == MVT::v2f32) {
7000 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7001 return;
7002 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7003 VT == MVT::v1f64) {
7004 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7005 return;
7006 }
7007 break;
7008 }
7010 VT = Node->getOperand(1).getValueType();
7011 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7012 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7013 return;
7014 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7015 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7016 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7017 return;
7018 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7019 VT == MVT::v2f32) {
7020 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7021 return;
7022 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7023 VT == MVT::v1f64) {
7024 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7025 return;
7026 }
7027 break;
7028 }
7030 VT = Node->getOperand(1).getValueType();
7031 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7032 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7033 return;
7034 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7035 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7036 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7037 return;
7038 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7039 VT == MVT::v2f32) {
7040 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7041 return;
7042 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7043 VT == MVT::v1f64) {
7044 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7045 return;
7046 }
7047 break;
7048 }
7050 if (VT == MVT::nxv16i8) {
7051 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
7052 return;
7053 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7054 VT == MVT::nxv8bf16) {
7055 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
7056 return;
7057 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7058 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);
7059 return;
7060 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7061 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);
7062 return;
7063 }
7064 break;
7065 }
7067 if (VT == MVT::nxv16i8) {
7068 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
7069 return;
7070 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7071 VT == MVT::nxv8bf16) {
7072 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
7073 return;
7074 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7075 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);
7076 return;
7077 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7078 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);
7079 return;
7080 }
7081 break;
7082 }
7084 if (VT == MVT::nxv16i8) {
7085 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
7086 return;
7087 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7088 VT == MVT::nxv8bf16) {
7089 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
7090 return;
7091 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7092 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);
7093 return;
7094 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7095 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);
7096 return;
7097 }
7098 break;
7099 }
7100 }
7101
7102 // Select the default instruction
7103 SelectCode(Node);
7104}
7105
7106/// createAArch64ISelDag - This pass converts a legalized DAG into a
7107/// AArch64-specific DAG, ready for instruction scheduling.
7109 CodeGenOptLevel OptLevel) {
7110 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7111}
7112
7113/// When \p PredVT is a scalable vector predicate in the form
7114/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7115/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7116/// structured vectors (NumVec >1), the output data type is
7117/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7118/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7119/// EVT.
7121 unsigned NumVec) {
7122 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7123 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7124 return EVT();
7125
7126 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7127 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7128 return EVT();
7129
7130 ElementCount EC = PredVT.getVectorElementCount();
7131 EVT ScalarVT =
7132 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7133 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7134
7135 return MemVT;
7136}
7137
7138/// Return the EVT of the data associated to a memory operation in \p
7139/// Root. If such EVT cannot be retrived, it returns an invalid EVT.
7141 if (isa<MemSDNode>(Root))
7142 return cast<MemSDNode>(Root)->getMemoryVT();
7143
7144 if (isa<MemIntrinsicSDNode>(Root))
7145 return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
7146
7147 const unsigned Opcode = Root->getOpcode();
7148 // For custom ISD nodes, we have to look at them individually to extract the
7149 // type of the data moved to/from memory.
7150 switch (Opcode) {
7155 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7157 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7160 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2);
7163 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3);
7166 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4);
7167 default:
7168 break;
7169 }
7170
7171 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7172 return EVT();
7173
7174 switch (Root->getConstantOperandVal(1)) {
7175 default:
7176 return EVT();
7177 case Intrinsic::aarch64_sme_ldr:
7178 case Intrinsic::aarch64_sme_str:
7179 return MVT::nxv16i8;
7180 case Intrinsic::aarch64_sve_prf:
7181 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7182 // width of the predicate.
7184 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7185 case Intrinsic::aarch64_sve_ld2_sret:
7186 case Intrinsic::aarch64_sve_ld2q_sret:
7188 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7189 case Intrinsic::aarch64_sve_st2q:
7191 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7192 case Intrinsic::aarch64_sve_ld3_sret:
7193 case Intrinsic::aarch64_sve_ld3q_sret:
7195 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7196 case Intrinsic::aarch64_sve_st3q:
7198 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7199 case Intrinsic::aarch64_sve_ld4_sret:
7200 case Intrinsic::aarch64_sve_ld4q_sret:
7202 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7203 case Intrinsic::aarch64_sve_st4q:
7205 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7206 case Intrinsic::aarch64_sve_ld1udq:
7207 case Intrinsic::aarch64_sve_st1dq:
7208 return EVT(MVT::nxv1i64);
7209 case Intrinsic::aarch64_sve_ld1uwq:
7210 case Intrinsic::aarch64_sve_st1wq:
7211 return EVT(MVT::nxv1i32);
7212 }
7213}
7214
7215/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7216/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7217/// where Root is the memory access using N for its address.
7218template <int64_t Min, int64_t Max>
7219bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7220 SDValue &Base,
7221 SDValue &OffImm) {
7222 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7223 const DataLayout &DL = CurDAG->getDataLayout();
7224 const MachineFrameInfo &MFI = MF->getFrameInfo();
7225
7226 if (N.getOpcode() == ISD::FrameIndex) {
7227 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7228 // We can only encode VL scaled offsets, so only fold in frame indexes
7229 // referencing SVE objects.
7231 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7232 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7233 return true;
7234 }
7235
7236 return false;
7237 }
7238
7239 if (MemVT == EVT())
7240 return false;
7241
7242 if (N.getOpcode() != ISD::ADD)
7243 return false;
7244
7245 SDValue VScale = N.getOperand(1);
7246 if (VScale.getOpcode() != ISD::VSCALE)
7247 return false;
7248
7249 TypeSize TS = MemVT.getSizeInBits();
7250 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7251 int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7252
7253 if ((MulImm % MemWidthBytes) != 0)
7254 return false;
7255
7256 int64_t Offset = MulImm / MemWidthBytes;
7257 if (Offset < Min || Offset > Max)
7258 return false;
7259
7260 Base = N.getOperand(0);
7261 if (Base.getOpcode() == ISD::FrameIndex) {
7262 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7263 // We can only encode VL scaled offsets, so only fold in frame indexes
7264 // referencing SVE objects.
7266 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7267 }
7268
7269 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7270 return true;
7271}
7272
7273/// Select register plus register addressing mode for SVE, with scaled
7274/// offset.
7275bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7276 SDValue &Base,
7277 SDValue &Offset) {
7278 if (N.getOpcode() != ISD::ADD)
7279 return false;
7280
7281 // Process an ADD node.
7282 const SDValue LHS = N.getOperand(0);
7283 const SDValue RHS = N.getOperand(1);
7284
7285 // 8 bit data does not come with the SHL node, so it is treated
7286 // separately.
7287 if (Scale == 0) {
7288 Base = LHS;
7289 Offset = RHS;
7290 return true;
7291 }
7292
7293 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7294 int64_t ImmOff = C->getSExtValue();
7295 unsigned Size = 1 << Scale;
7296
7297 // To use the reg+reg addressing mode, the immediate must be a multiple of
7298 // the vector element's byte size.
7299 if (ImmOff % Size)
7300 return false;
7301
7302 SDLoc DL(N);
7303 Base = LHS;
7304 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7305 SDValue Ops[] = {Offset};
7306 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7307 Offset = SDValue(MI, 0);
7308 return true;
7309 }
7310
7311 // Check if the RHS is a shift node with a constant.
7312 if (RHS.getOpcode() != ISD::SHL)
7313 return false;
7314
7315 const SDValue ShiftRHS = RHS.getOperand(1);
7316 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7317 if (C->getZExtValue() == Scale) {
7318 Base = LHS;
7319 Offset = RHS.getOperand(0);
7320 return true;
7321 }
7322
7323 return false;
7324}
7325
7326bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7327 const AArch64TargetLowering *TLI =
7328 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7329
7330 return TLI->isAllActivePredicate(*CurDAG, N);
7331}
7332
7333bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7334 EVT VT = N.getValueType();
7335 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7336}
7337
7338bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7340 unsigned Scale) {
7341 // Try to untangle an ADD node into a 'reg + offset'
7342 if (N.getOpcode() == ISD::ADD)
7343 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
7344 int64_t ImmOff = C->getSExtValue();
7345 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) {
7346 Base = N.getOperand(0);
7347 Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7348 return true;
7349 }
7350 }
7351
7352 // By default, just match reg + 0.
7353 Base = N;
7354 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7355 return true;
7356}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
#define PASS_NAME
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
#define DEBUG_TYPE
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
aarch64 promote const
amdgpu AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Value * RHS
Value * LHS
support::ulittle16_t & Lo
Definition: aarch32.cpp:206
support::ulittle16_t & Hi
Definition: aarch32.cpp:205
DEMANGLE_DUMP_METHOD void dump() const
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
bool getExactInverse(APFloat *inv) const
Definition: APFloat.h:1393
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1235
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1500
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1629
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:238
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1448
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1598
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1557
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1414
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:490
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:838
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint8_t getStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
A description of a memory reference used in the backend.
An SDNode that represents everything that will be needed to construct a MachineInstr.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
iterator_range< use_iterator > uses()
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps)
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:227
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:452
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:691
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:685
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
unsigned getID() const
Return the register class ID number.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:927
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
const SysReg * lookupSysRegByName(StringRef)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static constexpr unsigned SVEBitsPerBlock
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:573
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1284
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1074
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:933
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ WRITE_REGISTER
Definition: ISDOpcodes.h:125
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1280
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:641
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:68
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:124
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1372
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1291
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:828
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition: ISDOpcodes.h:223
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1527
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1558
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
Reg
All possible values of the reg field in the ModR/M byte.
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
@ Offset
Definition: DWP.cpp:480
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:279
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:346
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:285
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1928
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:273
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
@ And
Bitwise or logical AND of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Extended Value Type.
Definition: ValueTypes.h:34
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:349
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:203
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:198
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40