LLVM 19.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34//===--------------------------------------------------------------------===//
35/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
36/// instructions for SelectionDAG operations.
37///
38namespace {
39
40class AArch64DAGToDAGISel : public SelectionDAGISel {
41
42 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
43 /// make the right decision when generating code for different targets.
44 const AArch64Subtarget *Subtarget;
45
46public:
47 static char ID;
48
49 AArch64DAGToDAGISel() = delete;
50
51 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
52 CodeGenOptLevel OptLevel)
53 : SelectionDAGISel(ID, tm, OptLevel), Subtarget(nullptr) {}
54
55 bool runOnMachineFunction(MachineFunction &MF) override {
56 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
58 }
59
60 void Select(SDNode *Node) override;
61
62 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
63 /// inline asm expressions.
65 InlineAsm::ConstraintCode ConstraintID,
66 std::vector<SDValue> &OutOps) override;
67
68 template <signed Low, signed High, signed Scale>
69 bool SelectRDVLImm(SDValue N, SDValue &Imm);
70
71 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
72 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
73 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
74 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
75 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
76 return SelectShiftedRegister(N, false, Reg, Shift);
77 }
78 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
79 return SelectShiftedRegister(N, true, Reg, Shift);
80 }
81 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
82 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
83 }
84 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
85 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
86 }
87 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
88 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
89 }
90 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
91 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
92 }
93 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
94 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
95 }
96 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
97 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
98 }
99 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
100 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
101 }
102 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
103 return SelectAddrModeIndexed(N, 1, Base, OffImm);
104 }
105 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
106 return SelectAddrModeIndexed(N, 2, Base, OffImm);
107 }
108 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
109 return SelectAddrModeIndexed(N, 4, Base, OffImm);
110 }
111 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
112 return SelectAddrModeIndexed(N, 8, Base, OffImm);
113 }
114 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
115 return SelectAddrModeIndexed(N, 16, Base, OffImm);
116 }
117 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
118 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
119 }
120 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
121 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
122 }
123 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
124 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
125 }
126 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
127 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
128 }
129 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
130 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
131 }
132 template <unsigned Size, unsigned Max>
133 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
134 // Test if there is an appropriate addressing mode and check if the
135 // immediate fits.
136 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
137 if (Found) {
138 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
139 int64_t C = CI->getSExtValue();
140 if (C <= Max)
141 return true;
142 }
143 }
144
145 // Otherwise, base only, materialize address in register.
146 Base = N;
147 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
148 return true;
149 }
150
151 template<int Width>
152 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
153 SDValue &SignExtend, SDValue &DoShift) {
154 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
155 }
156
157 template<int Width>
158 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
159 SDValue &SignExtend, SDValue &DoShift) {
160 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
161 }
162
163 bool SelectExtractHigh(SDValue N, SDValue &Res) {
164 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
165 N = N->getOperand(0);
166 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
167 !isa<ConstantSDNode>(N->getOperand(1)))
168 return false;
169 EVT VT = N->getValueType(0);
170 EVT LVT = N->getOperand(0).getValueType();
171 unsigned Index = N->getConstantOperandVal(1);
172 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
174 return false;
175 Res = N->getOperand(0);
176 return true;
177 }
178
179 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
180 if (N.getOpcode() != AArch64ISD::VLSHR)
181 return false;
182 SDValue Op = N->getOperand(0);
183 EVT VT = Op.getValueType();
184 unsigned ShtAmt = N->getConstantOperandVal(1);
185 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
186 return false;
187
188 APInt Imm;
189 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
190 Imm = APInt(VT.getScalarSizeInBits(),
191 Op.getOperand(1).getConstantOperandVal(0)
192 << Op.getOperand(1).getConstantOperandVal(1));
193 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
194 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
195 Imm = APInt(VT.getScalarSizeInBits(),
196 Op.getOperand(1).getConstantOperandVal(0));
197 else
198 return false;
199
200 if (Imm != 1ULL << (ShtAmt - 1))
201 return false;
202
203 Res1 = Op.getOperand(0);
204 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
205 return true;
206 }
207
208 bool SelectDupZeroOrUndef(SDValue N) {
209 switch(N->getOpcode()) {
210 case ISD::UNDEF:
211 return true;
212 case AArch64ISD::DUP:
213 case ISD::SPLAT_VECTOR: {
214 auto Opnd0 = N->getOperand(0);
215 if (isNullConstant(Opnd0))
216 return true;
217 if (isNullFPConstant(Opnd0))
218 return true;
219 break;
220 }
221 default:
222 break;
223 }
224
225 return false;
226 }
227
228 bool SelectDupZero(SDValue N) {
229 switch(N->getOpcode()) {
230 case AArch64ISD::DUP:
231 case ISD::SPLAT_VECTOR: {
232 auto Opnd0 = N->getOperand(0);
233 if (isNullConstant(Opnd0))
234 return true;
235 if (isNullFPConstant(Opnd0))
236 return true;
237 break;
238 }
239 }
240
241 return false;
242 }
243
244 bool SelectDupNegativeZero(SDValue N) {
245 switch(N->getOpcode()) {
246 case AArch64ISD::DUP:
247 case ISD::SPLAT_VECTOR: {
248 ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
249 return Const && Const->isZero() && Const->isNegative();
250 }
251 }
252
253 return false;
254 }
255
256 template<MVT::SimpleValueType VT>
257 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
258 return SelectSVEAddSubImm(N, VT, Imm, Shift);
259 }
260
261 template <MVT::SimpleValueType VT, bool Negate>
262 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
263 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
264 }
265
266 template <MVT::SimpleValueType VT>
267 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
268 return SelectSVECpyDupImm(N, VT, Imm, Shift);
269 }
270
271 template <MVT::SimpleValueType VT, bool Invert = false>
272 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
273 return SelectSVELogicalImm(N, VT, Imm, Invert);
274 }
275
276 template <MVT::SimpleValueType VT>
277 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
278 return SelectSVEArithImm(N, VT, Imm);
279 }
280
281 template <unsigned Low, unsigned High, bool AllowSaturation = false>
282 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
283 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
284 }
285
286 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
287 if (N->getOpcode() != ISD::SPLAT_VECTOR)
288 return false;
289
290 EVT EltVT = N->getValueType(0).getVectorElementType();
291 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
292 /* High */ EltVT.getFixedSizeInBits(),
293 /* AllowSaturation */ true, Imm);
294 }
295
296 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
297 template<signed Min, signed Max, signed Scale, bool Shift>
298 bool SelectCntImm(SDValue N, SDValue &Imm) {
299 if (!isa<ConstantSDNode>(N))
300 return false;
301
302 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
303 if (Shift)
304 MulImm = 1LL << MulImm;
305
306 if ((MulImm % std::abs(Scale)) != 0)
307 return false;
308
309 MulImm /= Scale;
310 if ((MulImm >= Min) && (MulImm <= Max)) {
311 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
312 return true;
313 }
314
315 return false;
316 }
317
318 template <signed Max, signed Scale>
319 bool SelectEXTImm(SDValue N, SDValue &Imm) {
320 if (!isa<ConstantSDNode>(N))
321 return false;
322
323 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
324
325 if (MulImm >= 0 && MulImm <= Max) {
326 MulImm *= Scale;
327 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
328 return true;
329 }
330
331 return false;
332 }
333
334 template <unsigned BaseReg, unsigned Max>
335 bool ImmToReg(SDValue N, SDValue &Imm) {
336 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
337 uint64_t C = CI->getZExtValue();
338
339 if (C > Max)
340 return false;
341
342 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
343 return true;
344 }
345 return false;
346 }
347
348 /// Form sequences of consecutive 64/128-bit registers for use in NEON
349 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
350 /// between 1 and 4 elements. If it contains a single element that is returned
351 /// unchanged; otherwise a REG_SEQUENCE value is returned.
354 // Form a sequence of SVE registers for instructions using list of vectors,
355 // e.g. structured loads and stores (ldN, stN).
356 SDValue createZTuple(ArrayRef<SDValue> Vecs);
357
358 // Similar to above, except the register must start at a multiple of the
359 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
360 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
361
362 /// Generic helper for the createDTuple/createQTuple
363 /// functions. Those should almost always be called instead.
364 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
365 const unsigned SubRegs[]);
366
367 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
368
369 bool tryIndexedLoad(SDNode *N);
370
371 bool trySelectStackSlotTagP(SDNode *N);
372 void SelectTagP(SDNode *N);
373
374 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
375 unsigned SubRegIdx);
376 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
377 unsigned SubRegIdx);
378 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
379 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
380 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
381 unsigned Opc_rr, unsigned Opc_ri,
382 bool IsIntr = false);
383 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
384 unsigned Scale, unsigned Opc_ri,
385 unsigned Opc_rr);
386 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
387 bool IsZmMulti, unsigned Opcode,
388 bool HasPred = false);
389 void SelectPExtPair(SDNode *N, unsigned Opc);
390 void SelectWhilePair(SDNode *N, unsigned Opc);
391 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
392 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
393 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
394 bool IsTupleInput, unsigned Opc);
395 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
396
397 template <unsigned MaxIdx, unsigned Scale>
398 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
399 unsigned Op);
400
401 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
402 /// SVE Reg+Imm addressing mode.
403 template <int64_t Min, int64_t Max>
404 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
405 SDValue &OffImm);
406 /// SVE Reg+Reg address mode.
407 template <unsigned Scale>
408 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
409 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
410 }
411
412 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc,
413 uint32_t MaxImm);
414
415 template <unsigned MaxIdx, unsigned Scale>
416 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
417 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
418 }
419
420 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
421 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
422 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
423 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
424 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
425 unsigned Opc_rr, unsigned Opc_ri);
426 std::tuple<unsigned, SDValue, SDValue>
427 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
428 const SDValue &OldBase, const SDValue &OldOffset,
429 unsigned Scale);
430
431 bool tryBitfieldExtractOp(SDNode *N);
432 bool tryBitfieldExtractOpFromSExt(SDNode *N);
433 bool tryBitfieldInsertOp(SDNode *N);
434 bool tryBitfieldInsertInZeroOp(SDNode *N);
435 bool tryShiftAmountMod(SDNode *N);
436
437 bool tryReadRegister(SDNode *N);
438 bool tryWriteRegister(SDNode *N);
439
440 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
441 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
442
443 bool trySelectXAR(SDNode *N);
444
445// Include the pieces autogenerated from the target description.
446#include "AArch64GenDAGISel.inc"
447
448private:
449 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
450 SDValue &Shift);
451 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
452 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
453 SDValue &OffImm) {
454 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
455 }
456 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
457 unsigned Size, SDValue &Base,
458 SDValue &OffImm);
459 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
460 SDValue &OffImm);
461 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
462 SDValue &OffImm);
463 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
464 SDValue &Offset, SDValue &SignExtend,
465 SDValue &DoShift);
466 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
467 SDValue &Offset, SDValue &SignExtend,
468 SDValue &DoShift);
469 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
470 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
471 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
472 SDValue &Offset, SDValue &SignExtend);
473
474 template<unsigned RegWidth>
475 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
476 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
477 }
478
479 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
480
481 template<unsigned RegWidth>
482 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
483 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
484 }
485
486 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
487 unsigned Width);
488
489 bool SelectCMP_SWAP(SDNode *N);
490
491 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
492 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
493 bool Negate);
494 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
495 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
496
497 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
498 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
499 bool AllowSaturation, SDValue &Imm);
500
501 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
502 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
503 SDValue &Offset);
504 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
505 SDValue &Offset, unsigned Scale = 1);
506
507 bool SelectAllActivePredicate(SDValue N);
508 bool SelectAnyPredicate(SDValue N);
509};
510} // end anonymous namespace
511
512char AArch64DAGToDAGISel::ID = 0;
513
514INITIALIZE_PASS(AArch64DAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
515
516/// isIntImmediate - This method tests to see if the node is a constant
517/// operand. If so Imm will receive the 32-bit value.
518static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
519 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
520 Imm = C->getZExtValue();
521 return true;
522 }
523 return false;
524}
525
526// isIntImmediate - This method tests to see if a constant operand.
527// If so Imm will receive the value.
528static bool isIntImmediate(SDValue N, uint64_t &Imm) {
529 return isIntImmediate(N.getNode(), Imm);
530}
531
532// isOpcWithIntImmediate - This method tests to see if the node is a specific
533// opcode and that it has a immediate integer right operand.
534// If so Imm will receive the 32 bit value.
535static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
536 uint64_t &Imm) {
537 return N->getOpcode() == Opc &&
538 isIntImmediate(N->getOperand(1).getNode(), Imm);
539}
540
541// isIntImmediateEq - This method tests to see if N is a constant operand that
542// is equivalent to 'ImmExpected'.
543#ifndef NDEBUG
544static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
545 uint64_t Imm;
546 if (!isIntImmediate(N.getNode(), Imm))
547 return false;
548 return Imm == ImmExpected;
549}
550#endif
551
552bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
553 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
554 std::vector<SDValue> &OutOps) {
555 switch(ConstraintID) {
556 default:
557 llvm_unreachable("Unexpected asm memory constraint");
558 case InlineAsm::ConstraintCode::m:
559 case InlineAsm::ConstraintCode::o:
560 case InlineAsm::ConstraintCode::Q:
561 // We need to make sure that this one operand does not end up in XZR, thus
562 // require the address to be in a PointerRegClass register.
563 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
564 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
565 SDLoc dl(Op);
566 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
567 SDValue NewOp =
568 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
569 dl, Op.getValueType(),
570 Op, RC), 0);
571 OutOps.push_back(NewOp);
572 return false;
573 }
574 return true;
575}
576
577/// SelectArithImmed - Select an immediate value that can be represented as
578/// a 12-bit value shifted left by either 0 or 12. If so, return true with
579/// Val set to the 12-bit value and Shift set to the shifter operand.
580bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
581 SDValue &Shift) {
582 // This function is called from the addsub_shifted_imm ComplexPattern,
583 // which lists [imm] as the list of opcode it's interested in, however
584 // we still need to check whether the operand is actually an immediate
585 // here because the ComplexPattern opcode list is only used in
586 // root-level opcode matching.
587 if (!isa<ConstantSDNode>(N.getNode()))
588 return false;
589
590 uint64_t Immed = N.getNode()->getAsZExtVal();
591 unsigned ShiftAmt;
592
593 if (Immed >> 12 == 0) {
594 ShiftAmt = 0;
595 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
596 ShiftAmt = 12;
597 Immed = Immed >> 12;
598 } else
599 return false;
600
601 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
602 SDLoc dl(N);
603 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
604 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
605 return true;
606}
607
608/// SelectNegArithImmed - As above, but negates the value before trying to
609/// select it.
610bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
611 SDValue &Shift) {
612 // This function is called from the addsub_shifted_imm ComplexPattern,
613 // which lists [imm] as the list of opcode it's interested in, however
614 // we still need to check whether the operand is actually an immediate
615 // here because the ComplexPattern opcode list is only used in
616 // root-level opcode matching.
617 if (!isa<ConstantSDNode>(N.getNode()))
618 return false;
619
620 // The immediate operand must be a 24-bit zero-extended immediate.
621 uint64_t Immed = N.getNode()->getAsZExtVal();
622
623 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
624 // have the opposite effect on the C flag, so this pattern mustn't match under
625 // those circumstances.
626 if (Immed == 0)
627 return false;
628
629 if (N.getValueType() == MVT::i32)
630 Immed = ~((uint32_t)Immed) + 1;
631 else
632 Immed = ~Immed + 1ULL;
633 if (Immed & 0xFFFFFFFFFF000000ULL)
634 return false;
635
636 Immed &= 0xFFFFFFULL;
637 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
638 Shift);
639}
640
641/// getShiftTypeForNode - Translate a shift node to the corresponding
642/// ShiftType value.
644 switch (N.getOpcode()) {
645 default:
647 case ISD::SHL:
648 return AArch64_AM::LSL;
649 case ISD::SRL:
650 return AArch64_AM::LSR;
651 case ISD::SRA:
652 return AArch64_AM::ASR;
653 case ISD::ROTR:
654 return AArch64_AM::ROR;
655 }
656}
657
658/// Determine whether it is worth it to fold SHL into the addressing
659/// mode.
661 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
662 // It is worth folding logical shift of up to three places.
663 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
664 if (!CSD)
665 return false;
666 unsigned ShiftVal = CSD->getZExtValue();
667 if (ShiftVal > 3)
668 return false;
669
670 // Check if this particular node is reused in any non-memory related
671 // operation. If yes, do not try to fold this node into the address
672 // computation, since the computation will be kept.
673 const SDNode *Node = V.getNode();
674 for (SDNode *UI : Node->uses())
675 if (!isa<MemSDNode>(*UI))
676 for (SDNode *UII : UI->uses())
677 if (!isa<MemSDNode>(*UII))
678 return false;
679 return true;
680}
681
682/// Determine whether it is worth to fold V into an extended register addressing
683/// mode.
684bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
685 // Trivial if we are optimizing for code size or if there is only
686 // one use of the value.
687 if (CurDAG->shouldOptForSize() || V.hasOneUse())
688 return true;
689
690 // If a subtarget has a slow shift, folding a shift into multiple loads
691 // costs additional micro-ops.
692 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
693 return false;
694
695 // Check whether we're going to emit the address arithmetic anyway because
696 // it's used by a non-address operation.
697 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
698 return true;
699 if (V.getOpcode() == ISD::ADD) {
700 const SDValue LHS = V.getOperand(0);
701 const SDValue RHS = V.getOperand(1);
702 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
703 return true;
704 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
705 return true;
706 }
707
708 // It hurts otherwise, since the value will be reused.
709 return false;
710}
711
712/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
713/// to select more shifted register
714bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
715 SDValue &Shift) {
716 EVT VT = N.getValueType();
717 if (VT != MVT::i32 && VT != MVT::i64)
718 return false;
719
720 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
721 return false;
722 SDValue LHS = N.getOperand(0);
723 if (!LHS->hasOneUse())
724 return false;
725
726 unsigned LHSOpcode = LHS->getOpcode();
727 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
728 return false;
729
730 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
731 if (!ShiftAmtNode)
732 return false;
733
734 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
735 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
736 if (!RHSC)
737 return false;
738
739 APInt AndMask = RHSC->getAPIntValue();
740 unsigned LowZBits, MaskLen;
741 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
742 return false;
743
744 unsigned BitWidth = N.getValueSizeInBits();
745 SDLoc DL(LHS);
746 uint64_t NewShiftC;
747 unsigned NewShiftOp;
748 if (LHSOpcode == ISD::SHL) {
749 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
750 // BitWidth != LowZBits + MaskLen doesn't match the pattern
751 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
752 return false;
753
754 NewShiftC = LowZBits - ShiftAmtC;
755 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
756 } else {
757 if (LowZBits == 0)
758 return false;
759
760 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
761 NewShiftC = LowZBits + ShiftAmtC;
762 if (NewShiftC >= BitWidth)
763 return false;
764
765 // SRA need all high bits
766 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
767 return false;
768
769 // SRL high bits can be 0 or 1
770 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
771 return false;
772
773 if (LHSOpcode == ISD::SRL)
774 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
775 else
776 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
777 }
778
779 assert(NewShiftC < BitWidth && "Invalid shift amount");
780 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
781 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
782 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
783 NewShiftAmt, BitWidthMinus1),
784 0);
785 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
786 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
787 return true;
788}
789
790/// getExtendTypeForNode - Translate an extend node to the corresponding
791/// ExtendType value.
793getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
794 if (N.getOpcode() == ISD::SIGN_EXTEND ||
795 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
796 EVT SrcVT;
797 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
798 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
799 else
800 SrcVT = N.getOperand(0).getValueType();
801
802 if (!IsLoadStore && SrcVT == MVT::i8)
803 return AArch64_AM::SXTB;
804 else if (!IsLoadStore && SrcVT == MVT::i16)
805 return AArch64_AM::SXTH;
806 else if (SrcVT == MVT::i32)
807 return AArch64_AM::SXTW;
808 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
809
811 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
812 N.getOpcode() == ISD::ANY_EXTEND) {
813 EVT SrcVT = N.getOperand(0).getValueType();
814 if (!IsLoadStore && SrcVT == MVT::i8)
815 return AArch64_AM::UXTB;
816 else if (!IsLoadStore && SrcVT == MVT::i16)
817 return AArch64_AM::UXTH;
818 else if (SrcVT == MVT::i32)
819 return AArch64_AM::UXTW;
820 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
821
823 } else if (N.getOpcode() == ISD::AND) {
824 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
825 if (!CSD)
827 uint64_t AndMask = CSD->getZExtValue();
828
829 switch (AndMask) {
830 default:
832 case 0xFF:
833 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
834 case 0xFFFF:
835 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
836 case 0xFFFFFFFF:
837 return AArch64_AM::UXTW;
838 }
839 }
840
842}
843
844/// Determine whether it is worth to fold V into an extended register of an
845/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
846/// instruction, and the shift should be treated as worth folding even if has
847/// multiple uses.
848bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
849 // Trivial if we are optimizing for code size or if there is only
850 // one use of the value.
851 if (CurDAG->shouldOptForSize() || V.hasOneUse())
852 return true;
853
854 // If a subtarget has a fastpath LSL we can fold a logical shift into
855 // the add/sub and save a cycle.
856 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
857 V.getConstantOperandVal(1) <= 4 &&
859 return true;
860
861 // It hurts otherwise, since the value will be reused.
862 return false;
863}
864
865/// SelectShiftedRegister - Select a "shifted register" operand. If the value
866/// is not shifted, set the Shift operand to default of "LSL 0". The logical
867/// instructions allow the shifted register to be rotated, but the arithmetic
868/// instructions do not. The AllowROR parameter specifies whether ROR is
869/// supported.
870bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
871 SDValue &Reg, SDValue &Shift) {
872 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
873 return true;
874
876 if (ShType == AArch64_AM::InvalidShiftExtend)
877 return false;
878 if (!AllowROR && ShType == AArch64_AM::ROR)
879 return false;
880
881 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
882 unsigned BitSize = N.getValueSizeInBits();
883 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
884 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
885
886 Reg = N.getOperand(0);
887 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
888 return isWorthFoldingALU(N, true);
889 }
890
891 return false;
892}
893
894/// Instructions that accept extend modifiers like UXTW expect the register
895/// being extended to be a GPR32, but the incoming DAG might be acting on a
896/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
897/// this is the case.
899 if (N.getValueType() == MVT::i32)
900 return N;
901
902 SDLoc dl(N);
903 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
904}
905
906// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
907template<signed Low, signed High, signed Scale>
908bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
909 if (!isa<ConstantSDNode>(N))
910 return false;
911
912 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
913 if ((MulImm % std::abs(Scale)) == 0) {
914 int64_t RDVLImm = MulImm / Scale;
915 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
916 Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
917 return true;
918 }
919 }
920
921 return false;
922}
923
924/// SelectArithExtendedRegister - Select a "extended register" operand. This
925/// operand folds in an extend followed by an optional left shift.
926bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
927 SDValue &Shift) {
928 unsigned ShiftVal = 0;
930
931 if (N.getOpcode() == ISD::SHL) {
932 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
933 if (!CSD)
934 return false;
935 ShiftVal = CSD->getZExtValue();
936 if (ShiftVal > 4)
937 return false;
938
939 Ext = getExtendTypeForNode(N.getOperand(0));
941 return false;
942
943 Reg = N.getOperand(0).getOperand(0);
944 } else {
947 return false;
948
949 Reg = N.getOperand(0);
950
951 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
952 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
953 auto isDef32 = [](SDValue N) {
954 unsigned Opc = N.getOpcode();
955 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
956 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
957 Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
958 Opc != ISD::FREEZE;
959 };
960 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
961 isDef32(Reg))
962 return false;
963 }
964
965 // AArch64 mandates that the RHS of the operation must use the smallest
966 // register class that could contain the size being extended from. Thus,
967 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
968 // there might not be an actual 32-bit value in the program. We can
969 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
970 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
971 Reg = narrowIfNeeded(CurDAG, Reg);
972 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
973 MVT::i32);
974 return isWorthFoldingALU(N);
975}
976
977/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
978/// operand is refered by the instructions have SP operand
979bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
980 SDValue &Shift) {
981 unsigned ShiftVal = 0;
983
984 if (N.getOpcode() != ISD::SHL)
985 return false;
986
987 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
988 if (!CSD)
989 return false;
990 ShiftVal = CSD->getZExtValue();
991 if (ShiftVal > 4)
992 return false;
993
995 Reg = N.getOperand(0);
996 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
997 MVT::i32);
998 return isWorthFoldingALU(N);
999}
1000
1001/// If there's a use of this ADDlow that's not itself a load/store then we'll
1002/// need to create a real ADD instruction from it anyway and there's no point in
1003/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1004/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1005/// leads to duplicated ADRP instructions.
1007 for (auto *Use : N->uses()) {
1008 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
1009 Use->getOpcode() != ISD::ATOMIC_LOAD &&
1010 Use->getOpcode() != ISD::ATOMIC_STORE)
1011 return false;
1012
1013 // ldar and stlr have much more restrictive addressing modes (just a
1014 // register).
1015 if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering()))
1016 return false;
1017 }
1018
1019 return true;
1020}
1021
1022/// Check if the immediate offset is valid as a scaled immediate.
1023static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1024 unsigned Size) {
1025 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1026 Offset < (Range << Log2_32(Size)))
1027 return true;
1028 return false;
1029}
1030
1031/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1032/// immediate" address. The "Size" argument is the size in bytes of the memory
1033/// reference, which determines the scale.
1034bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1035 unsigned BW, unsigned Size,
1036 SDValue &Base,
1037 SDValue &OffImm) {
1038 SDLoc dl(N);
1039 const DataLayout &DL = CurDAG->getDataLayout();
1040 const TargetLowering *TLI = getTargetLowering();
1041 if (N.getOpcode() == ISD::FrameIndex) {
1042 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1043 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1044 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1045 return true;
1046 }
1047
1048 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1049 // selected here doesn't support labels/immediates, only base+offset.
1050 if (CurDAG->isBaseWithConstantOffset(N)) {
1051 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1052 if (IsSignedImm) {
1053 int64_t RHSC = RHS->getSExtValue();
1054 unsigned Scale = Log2_32(Size);
1055 int64_t Range = 0x1LL << (BW - 1);
1056
1057 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1058 RHSC < (Range << Scale)) {
1059 Base = N.getOperand(0);
1060 if (Base.getOpcode() == ISD::FrameIndex) {
1061 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1062 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1063 }
1064 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1065 return true;
1066 }
1067 } else {
1068 // unsigned Immediate
1069 uint64_t RHSC = RHS->getZExtValue();
1070 unsigned Scale = Log2_32(Size);
1071 uint64_t Range = 0x1ULL << BW;
1072
1073 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1074 Base = N.getOperand(0);
1075 if (Base.getOpcode() == ISD::FrameIndex) {
1076 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1077 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1078 }
1079 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1080 return true;
1081 }
1082 }
1083 }
1084 }
1085 // Base only. The address will be materialized into a register before
1086 // the memory is accessed.
1087 // add x0, Xbase, #offset
1088 // stp x1, x2, [x0]
1089 Base = N;
1090 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1091 return true;
1092}
1093
1094/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1095/// immediate" address. The "Size" argument is the size in bytes of the memory
1096/// reference, which determines the scale.
1097bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1098 SDValue &Base, SDValue &OffImm) {
1099 SDLoc dl(N);
1100 const DataLayout &DL = CurDAG->getDataLayout();
1101 const TargetLowering *TLI = getTargetLowering();
1102 if (N.getOpcode() == ISD::FrameIndex) {
1103 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1104 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1105 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1106 return true;
1107 }
1108
1109 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1110 GlobalAddressSDNode *GAN =
1111 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1112 Base = N.getOperand(0);
1113 OffImm = N.getOperand(1);
1114 if (!GAN)
1115 return true;
1116
1117 if (GAN->getOffset() % Size == 0 &&
1119 return true;
1120 }
1121
1122 if (CurDAG->isBaseWithConstantOffset(N)) {
1123 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1124 int64_t RHSC = (int64_t)RHS->getZExtValue();
1125 unsigned Scale = Log2_32(Size);
1126 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1127 Base = N.getOperand(0);
1128 if (Base.getOpcode() == ISD::FrameIndex) {
1129 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1130 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1131 }
1132 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1133 return true;
1134 }
1135 }
1136 }
1137
1138 // Before falling back to our general case, check if the unscaled
1139 // instructions can handle this. If so, that's preferable.
1140 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1141 return false;
1142
1143 // Base only. The address will be materialized into a register before
1144 // the memory is accessed.
1145 // add x0, Xbase, #offset
1146 // ldr x0, [x0]
1147 Base = N;
1148 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1149 return true;
1150}
1151
1152/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1153/// immediate" address. This should only match when there is an offset that
1154/// is not valid for a scaled immediate addressing mode. The "Size" argument
1155/// is the size in bytes of the memory reference, which is needed here to know
1156/// what is valid for a scaled immediate.
1157bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1158 SDValue &Base,
1159 SDValue &OffImm) {
1160 if (!CurDAG->isBaseWithConstantOffset(N))
1161 return false;
1162 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1163 int64_t RHSC = RHS->getSExtValue();
1164 if (RHSC >= -256 && RHSC < 256) {
1165 Base = N.getOperand(0);
1166 if (Base.getOpcode() == ISD::FrameIndex) {
1167 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1168 const TargetLowering *TLI = getTargetLowering();
1169 Base = CurDAG->getTargetFrameIndex(
1170 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1171 }
1172 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1173 return true;
1174 }
1175 }
1176 return false;
1177}
1178
1180 SDLoc dl(N);
1181 SDValue ImpDef = SDValue(
1182 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1183 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1184 N);
1185}
1186
1187/// Check if the given SHL node (\p N), can be used to form an
1188/// extended register for an addressing mode.
1189bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1190 bool WantExtend, SDValue &Offset,
1191 SDValue &SignExtend) {
1192 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1193 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1194 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1195 return false;
1196
1197 SDLoc dl(N);
1198 if (WantExtend) {
1200 getExtendTypeForNode(N.getOperand(0), true);
1202 return false;
1203
1204 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1205 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1206 MVT::i32);
1207 } else {
1208 Offset = N.getOperand(0);
1209 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1210 }
1211
1212 unsigned LegalShiftVal = Log2_32(Size);
1213 unsigned ShiftVal = CSD->getZExtValue();
1214
1215 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1216 return false;
1217
1218 return isWorthFoldingAddr(N, Size);
1219}
1220
1221bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1223 SDValue &SignExtend,
1224 SDValue &DoShift) {
1225 if (N.getOpcode() != ISD::ADD)
1226 return false;
1227 SDValue LHS = N.getOperand(0);
1228 SDValue RHS = N.getOperand(1);
1229 SDLoc dl(N);
1230
1231 // We don't want to match immediate adds here, because they are better lowered
1232 // to the register-immediate addressing modes.
1233 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
1234 return false;
1235
1236 // Check if this particular node is reused in any non-memory related
1237 // operation. If yes, do not try to fold this node into the address
1238 // computation, since the computation will be kept.
1239 const SDNode *Node = N.getNode();
1240 for (SDNode *UI : Node->uses()) {
1241 if (!isa<MemSDNode>(*UI))
1242 return false;
1243 }
1244
1245 // Remember if it is worth folding N when it produces extended register.
1246 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1247
1248 // Try to match a shifted extend on the RHS.
1249 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1250 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1251 Base = LHS;
1252 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1253 return true;
1254 }
1255
1256 // Try to match a shifted extend on the LHS.
1257 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1258 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1259 Base = RHS;
1260 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1261 return true;
1262 }
1263
1264 // There was no shift, whatever else we find.
1265 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1266
1268 // Try to match an unshifted extend on the LHS.
1269 if (IsExtendedRegisterWorthFolding &&
1270 (Ext = getExtendTypeForNode(LHS, true)) !=
1272 Base = RHS;
1273 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1274 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1275 MVT::i32);
1276 if (isWorthFoldingAddr(LHS, Size))
1277 return true;
1278 }
1279
1280 // Try to match an unshifted extend on the RHS.
1281 if (IsExtendedRegisterWorthFolding &&
1282 (Ext = getExtendTypeForNode(RHS, true)) !=
1284 Base = LHS;
1285 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1286 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1287 MVT::i32);
1288 if (isWorthFoldingAddr(RHS, Size))
1289 return true;
1290 }
1291
1292 return false;
1293}
1294
1295// Check if the given immediate is preferred by ADD. If an immediate can be
1296// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1297// encoded by one MOVZ, return true.
1298static bool isPreferredADD(int64_t ImmOff) {
1299 // Constant in [0x0, 0xfff] can be encoded in ADD.
1300 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1301 return true;
1302 // Check if it can be encoded in an "ADD LSL #12".
1303 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1304 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1305 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1306 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1307 return false;
1308}
1309
1310bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1312 SDValue &SignExtend,
1313 SDValue &DoShift) {
1314 if (N.getOpcode() != ISD::ADD)
1315 return false;
1316 SDValue LHS = N.getOperand(0);
1317 SDValue RHS = N.getOperand(1);
1318 SDLoc DL(N);
1319
1320 // Check if this particular node is reused in any non-memory related
1321 // operation. If yes, do not try to fold this node into the address
1322 // computation, since the computation will be kept.
1323 const SDNode *Node = N.getNode();
1324 for (SDNode *UI : Node->uses()) {
1325 if (!isa<MemSDNode>(*UI))
1326 return false;
1327 }
1328
1329 // Watch out if RHS is a wide immediate, it can not be selected into
1330 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1331 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1332 // instructions like:
1333 // MOV X0, WideImmediate
1334 // ADD X1, BaseReg, X0
1335 // LDR X2, [X1, 0]
1336 // For such situation, using [BaseReg, XReg] addressing mode can save one
1337 // ADD/SUB:
1338 // MOV X0, WideImmediate
1339 // LDR X2, [BaseReg, X0]
1340 if (isa<ConstantSDNode>(RHS)) {
1341 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1342 // Skip the immediate can be selected by load/store addressing mode.
1343 // Also skip the immediate can be encoded by a single ADD (SUB is also
1344 // checked by using -ImmOff).
1345 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1346 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1347 return false;
1348
1349 SDValue Ops[] = { RHS };
1350 SDNode *MOVI =
1351 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1352 SDValue MOVIV = SDValue(MOVI, 0);
1353 // This ADD of two X register will be selected into [Reg+Reg] mode.
1354 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1355 }
1356
1357 // Remember if it is worth folding N when it produces extended register.
1358 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1359
1360 // Try to match a shifted extend on the RHS.
1361 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1362 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1363 Base = LHS;
1364 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1365 return true;
1366 }
1367
1368 // Try to match a shifted extend on the LHS.
1369 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1370 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1371 Base = RHS;
1372 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1373 return true;
1374 }
1375
1376 // Match any non-shifted, non-extend, non-immediate add expression.
1377 Base = LHS;
1378 Offset = RHS;
1379 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1380 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1381 // Reg1 + Reg2 is free: no check needed.
1382 return true;
1383}
1384
1385SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1386 static const unsigned RegClassIDs[] = {
1387 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1388 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1389 AArch64::dsub2, AArch64::dsub3};
1390
1391 return createTuple(Regs, RegClassIDs, SubRegs);
1392}
1393
1394SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1395 static const unsigned RegClassIDs[] = {
1396 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1397 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1398 AArch64::qsub2, AArch64::qsub3};
1399
1400 return createTuple(Regs, RegClassIDs, SubRegs);
1401}
1402
1403SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1404 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1405 AArch64::ZPR3RegClassID,
1406 AArch64::ZPR4RegClassID};
1407 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1408 AArch64::zsub2, AArch64::zsub3};
1409
1410 return createTuple(Regs, RegClassIDs, SubRegs);
1411}
1412
1413SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1414 assert(Regs.size() == 2 || Regs.size() == 4);
1415
1416 // The createTuple interface requires 3 RegClassIDs for each possible
1417 // tuple type even though we only have them for ZPR2 and ZPR4.
1418 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1419 AArch64::ZPR4Mul4RegClassID};
1420 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1421 AArch64::zsub2, AArch64::zsub3};
1422 return createTuple(Regs, RegClassIDs, SubRegs);
1423}
1424
1425SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1426 const unsigned RegClassIDs[],
1427 const unsigned SubRegs[]) {
1428 // There's no special register-class for a vector-list of 1 element: it's just
1429 // a vector.
1430 if (Regs.size() == 1)
1431 return Regs[0];
1432
1433 assert(Regs.size() >= 2 && Regs.size() <= 4);
1434
1435 SDLoc DL(Regs[0]);
1436
1438
1439 // First operand of REG_SEQUENCE is the desired RegClass.
1440 Ops.push_back(
1441 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1442
1443 // Then we get pairs of source & subregister-position for the components.
1444 for (unsigned i = 0; i < Regs.size(); ++i) {
1445 Ops.push_back(Regs[i]);
1446 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1447 }
1448
1449 SDNode *N =
1450 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1451 return SDValue(N, 0);
1452}
1453
1454void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1455 bool isExt) {
1456 SDLoc dl(N);
1457 EVT VT = N->getValueType(0);
1458
1459 unsigned ExtOff = isExt;
1460
1461 // Form a REG_SEQUENCE to force register allocation.
1462 unsigned Vec0Off = ExtOff + 1;
1463 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1464 N->op_begin() + Vec0Off + NumVecs);
1465 SDValue RegSeq = createQTuple(Regs);
1466
1468 if (isExt)
1469 Ops.push_back(N->getOperand(1));
1470 Ops.push_back(RegSeq);
1471 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1472 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1473}
1474
1475bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1476 LoadSDNode *LD = cast<LoadSDNode>(N);
1477 if (LD->isUnindexed())
1478 return false;
1479 EVT VT = LD->getMemoryVT();
1480 EVT DstVT = N->getValueType(0);
1481 ISD::MemIndexedMode AM = LD->getAddressingMode();
1482 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1483
1484 // We're not doing validity checking here. That was done when checking
1485 // if we should mark the load as indexed or not. We're just selecting
1486 // the right instruction.
1487 unsigned Opcode = 0;
1488
1489 ISD::LoadExtType ExtType = LD->getExtensionType();
1490 bool InsertTo64 = false;
1491 if (VT == MVT::i64)
1492 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1493 else if (VT == MVT::i32) {
1494 if (ExtType == ISD::NON_EXTLOAD)
1495 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1496 else if (ExtType == ISD::SEXTLOAD)
1497 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1498 else {
1499 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1500 InsertTo64 = true;
1501 // The result of the load is only i32. It's the subreg_to_reg that makes
1502 // it into an i64.
1503 DstVT = MVT::i32;
1504 }
1505 } else if (VT == MVT::i16) {
1506 if (ExtType == ISD::SEXTLOAD) {
1507 if (DstVT == MVT::i64)
1508 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1509 else
1510 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1511 } else {
1512 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1513 InsertTo64 = DstVT == MVT::i64;
1514 // The result of the load is only i32. It's the subreg_to_reg that makes
1515 // it into an i64.
1516 DstVT = MVT::i32;
1517 }
1518 } else if (VT == MVT::i8) {
1519 if (ExtType == ISD::SEXTLOAD) {
1520 if (DstVT == MVT::i64)
1521 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1522 else
1523 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1524 } else {
1525 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1526 InsertTo64 = DstVT == MVT::i64;
1527 // The result of the load is only i32. It's the subreg_to_reg that makes
1528 // it into an i64.
1529 DstVT = MVT::i32;
1530 }
1531 } else if (VT == MVT::f16) {
1532 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1533 } else if (VT == MVT::bf16) {
1534 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1535 } else if (VT == MVT::f32) {
1536 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1537 } else if (VT == MVT::f64 || VT.is64BitVector()) {
1538 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1539 } else if (VT.is128BitVector()) {
1540 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1541 } else
1542 return false;
1543 SDValue Chain = LD->getChain();
1544 SDValue Base = LD->getBasePtr();
1545 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1546 int OffsetVal = (int)OffsetOp->getZExtValue();
1547 SDLoc dl(N);
1548 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1549 SDValue Ops[] = { Base, Offset, Chain };
1550 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1551 MVT::Other, Ops);
1552
1553 // Transfer memoperands.
1554 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1555 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1556
1557 // Either way, we're replacing the node, so tell the caller that.
1558 SDValue LoadedVal = SDValue(Res, 1);
1559 if (InsertTo64) {
1560 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1561 LoadedVal =
1562 SDValue(CurDAG->getMachineNode(
1563 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1564 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1565 SubReg),
1566 0);
1567 }
1568
1569 ReplaceUses(SDValue(N, 0), LoadedVal);
1570 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1571 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1572 CurDAG->RemoveDeadNode(N);
1573 return true;
1574}
1575
1576void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1577 unsigned SubRegIdx) {
1578 SDLoc dl(N);
1579 EVT VT = N->getValueType(0);
1580 SDValue Chain = N->getOperand(0);
1581
1582 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1583 Chain};
1584
1585 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1586
1587 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1588 SDValue SuperReg = SDValue(Ld, 0);
1589 for (unsigned i = 0; i < NumVecs; ++i)
1590 ReplaceUses(SDValue(N, i),
1591 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1592
1593 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1594
1595 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1596 // because it's too simple to have needed special treatment during lowering.
1597 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1598 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1599 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1600 }
1601
1602 CurDAG->RemoveDeadNode(N);
1603}
1604
1605void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1606 unsigned Opc, unsigned SubRegIdx) {
1607 SDLoc dl(N);
1608 EVT VT = N->getValueType(0);
1609 SDValue Chain = N->getOperand(0);
1610
1611 SDValue Ops[] = {N->getOperand(1), // Mem operand
1612 N->getOperand(2), // Incremental
1613 Chain};
1614
1615 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1616 MVT::Untyped, MVT::Other};
1617
1618 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1619
1620 // Update uses of write back register
1621 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1622
1623 // Update uses of vector list
1624 SDValue SuperReg = SDValue(Ld, 1);
1625 if (NumVecs == 1)
1626 ReplaceUses(SDValue(N, 0), SuperReg);
1627 else
1628 for (unsigned i = 0; i < NumVecs; ++i)
1629 ReplaceUses(SDValue(N, i),
1630 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1631
1632 // Update the chain
1633 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1634 CurDAG->RemoveDeadNode(N);
1635}
1636
1637/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1638/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1639/// new Base and an SDValue representing the new offset.
1640std::tuple<unsigned, SDValue, SDValue>
1641AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1642 unsigned Opc_ri,
1643 const SDValue &OldBase,
1644 const SDValue &OldOffset,
1645 unsigned Scale) {
1646 SDValue NewBase = OldBase;
1647 SDValue NewOffset = OldOffset;
1648 // Detect a possible Reg+Imm addressing mode.
1649 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1650 N, OldBase, NewBase, NewOffset);
1651
1652 // Detect a possible reg+reg addressing mode, but only if we haven't already
1653 // detected a Reg+Imm one.
1654 const bool IsRegReg =
1655 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1656
1657 // Select the instruction.
1658 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1659}
1660
1661enum class SelectTypeKind {
1662 Int1 = 0,
1663 Int = 1,
1664 FP = 2,
1665 AnyType = 3,
1666};
1667
1668/// This function selects an opcode from a list of opcodes, which is
1669/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1670/// element types, in this order.
1671template <SelectTypeKind Kind>
1672static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1673 // Only match scalable vector VTs
1674 if (!VT.isScalableVector())
1675 return 0;
1676
1677 EVT EltVT = VT.getVectorElementType();
1678 unsigned Key = VT.getVectorMinNumElements();
1679 switch (Kind) {
1681 break;
1683 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1684 EltVT != MVT::i64)
1685 return 0;
1686 break;
1688 if (EltVT != MVT::i1)
1689 return 0;
1690 break;
1691 case SelectTypeKind::FP:
1692 if (EltVT == MVT::bf16)
1693 Key = 16;
1694 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1695 EltVT != MVT::f64)
1696 return 0;
1697 break;
1698 }
1699
1700 unsigned Offset;
1701 switch (Key) {
1702 case 16: // 8-bit or bf16
1703 Offset = 0;
1704 break;
1705 case 8: // 16-bit
1706 Offset = 1;
1707 break;
1708 case 4: // 32-bit
1709 Offset = 2;
1710 break;
1711 case 2: // 64-bit
1712 Offset = 3;
1713 break;
1714 default:
1715 return 0;
1716 }
1717
1718 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1719}
1720
1721// This function is almost identical to SelectWhilePair, but has an
1722// extra check on the range of the immediate operand.
1723// TODO: Merge these two functions together at some point?
1724void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1725 // Immediate can be either 0 or 1.
1726 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1727 if (Imm->getZExtValue() > 1)
1728 return;
1729
1730 SDLoc DL(N);
1731 EVT VT = N->getValueType(0);
1732 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1733 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1734 SDValue SuperReg = SDValue(WhilePair, 0);
1735
1736 for (unsigned I = 0; I < 2; ++I)
1737 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1738 AArch64::psub0 + I, DL, VT, SuperReg));
1739
1740 CurDAG->RemoveDeadNode(N);
1741}
1742
1743void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1744 SDLoc DL(N);
1745 EVT VT = N->getValueType(0);
1746
1747 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1748
1749 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1750 SDValue SuperReg = SDValue(WhilePair, 0);
1751
1752 for (unsigned I = 0; I < 2; ++I)
1753 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1754 AArch64::psub0 + I, DL, VT, SuperReg));
1755
1756 CurDAG->RemoveDeadNode(N);
1757}
1758
1759void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1760 unsigned Opcode) {
1761 EVT VT = N->getValueType(0);
1762 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1763 SDValue Ops = createZTuple(Regs);
1764 SDLoc DL(N);
1765 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1766 SDValue SuperReg = SDValue(Intrinsic, 0);
1767 for (unsigned i = 0; i < NumVecs; ++i)
1768 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1769 AArch64::zsub0 + i, DL, VT, SuperReg));
1770
1771 CurDAG->RemoveDeadNode(N);
1772}
1773
1774void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1775 unsigned NumVecs,
1776 bool IsZmMulti,
1777 unsigned Opcode,
1778 bool HasPred) {
1779 assert(Opcode != 0 && "Unexpected opcode");
1780
1781 SDLoc DL(N);
1782 EVT VT = N->getValueType(0);
1783 unsigned FirstVecIdx = HasPred ? 2 : 1;
1784
1785 auto GetMultiVecOperand = [=](unsigned StartIdx) {
1786 SmallVector<SDValue, 4> Regs(N->op_begin() + StartIdx,
1787 N->op_begin() + StartIdx + NumVecs);
1788 return createZMulTuple(Regs);
1789 };
1790
1791 SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1792
1793 SDValue Zm;
1794 if (IsZmMulti)
1795 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1796 else
1797 Zm = N->getOperand(NumVecs + FirstVecIdx);
1798
1800 if (HasPred)
1801 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
1802 N->getOperand(1), Zdn, Zm);
1803 else
1804 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
1805 SDValue SuperReg = SDValue(Intrinsic, 0);
1806 for (unsigned i = 0; i < NumVecs; ++i)
1807 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1808 AArch64::zsub0 + i, DL, VT, SuperReg));
1809
1810 CurDAG->RemoveDeadNode(N);
1811}
1812
1813void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
1814 unsigned Scale, unsigned Opc_ri,
1815 unsigned Opc_rr, bool IsIntr) {
1816 assert(Scale < 5 && "Invalid scaling value.");
1817 SDLoc DL(N);
1818 EVT VT = N->getValueType(0);
1819 SDValue Chain = N->getOperand(0);
1820
1821 // Optimize addressing mode.
1823 unsigned Opc;
1824 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1825 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
1826 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
1827
1828 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
1829 Base, // Memory operand
1830 Offset, Chain};
1831
1832 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1833
1834 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1835 SDValue SuperReg = SDValue(Load, 0);
1836 for (unsigned i = 0; i < NumVecs; ++i)
1837 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1838 AArch64::zsub0 + i, DL, VT, SuperReg));
1839
1840 // Copy chain
1841 unsigned ChainIdx = NumVecs;
1842 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1843 CurDAG->RemoveDeadNode(N);
1844}
1845
1846void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
1847 unsigned NumVecs,
1848 unsigned Scale,
1849 unsigned Opc_ri,
1850 unsigned Opc_rr) {
1851 assert(Scale < 4 && "Invalid scaling value.");
1852 SDLoc DL(N);
1853 EVT VT = N->getValueType(0);
1854 SDValue Chain = N->getOperand(0);
1855
1856 SDValue PNg = N->getOperand(2);
1857 SDValue Base = N->getOperand(3);
1858 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
1859 unsigned Opc;
1860 std::tie(Opc, Base, Offset) =
1861 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
1862
1863 SDValue Ops[] = {PNg, // Predicate-as-counter
1864 Base, // Memory operand
1865 Offset, Chain};
1866
1867 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1868
1869 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1870 SDValue SuperReg = SDValue(Load, 0);
1871 for (unsigned i = 0; i < NumVecs; ++i)
1872 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1873 AArch64::zsub0 + i, DL, VT, SuperReg));
1874
1875 // Copy chain
1876 unsigned ChainIdx = NumVecs;
1877 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1878 CurDAG->RemoveDeadNode(N);
1879}
1880
1881void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
1882 unsigned Opcode) {
1883 if (N->getValueType(0) != MVT::nxv4f32)
1884 return;
1885 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
1886}
1887
1888void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
1889 unsigned NumOutVecs,
1890 unsigned Opc, uint32_t MaxImm) {
1891 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
1892 if (Imm->getZExtValue() > MaxImm)
1893 return;
1894
1895 SDValue ZtValue;
1896 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
1897 return;
1898 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)};
1899 SDLoc DL(Node);
1900 EVT VT = Node->getValueType(0);
1901
1903 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
1904 SDValue SuperReg = SDValue(Instruction, 0);
1905
1906 for (unsigned I = 0; I < NumOutVecs; ++I)
1907 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
1908 AArch64::zsub0 + I, DL, VT, SuperReg));
1909
1910 // Copy chain
1911 unsigned ChainIdx = NumOutVecs;
1912 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
1913 CurDAG->RemoveDeadNode(Node);
1914}
1915
1916void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
1917 unsigned Op) {
1918 SDLoc DL(N);
1919 EVT VT = N->getValueType(0);
1920
1921 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1922 SDValue Zd = createZMulTuple(Regs);
1923 SDValue Zn = N->getOperand(1 + NumVecs);
1924 SDValue Zm = N->getOperand(2 + NumVecs);
1925
1926 SDValue Ops[] = {Zd, Zn, Zm};
1927
1928 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
1929 SDValue SuperReg = SDValue(Intrinsic, 0);
1930 for (unsigned i = 0; i < NumVecs; ++i)
1931 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1932 AArch64::zsub0 + i, DL, VT, SuperReg));
1933
1934 CurDAG->RemoveDeadNode(N);
1935}
1936
1937bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
1938 switch (BaseReg) {
1939 default:
1940 return false;
1941 case AArch64::ZA:
1942 case AArch64::ZAB0:
1943 if (TileNum == 0)
1944 break;
1945 return false;
1946 case AArch64::ZAH0:
1947 if (TileNum <= 1)
1948 break;
1949 return false;
1950 case AArch64::ZAS0:
1951 if (TileNum <= 3)
1952 break;
1953 return false;
1954 case AArch64::ZAD0:
1955 if (TileNum <= 7)
1956 break;
1957 return false;
1958 }
1959
1960 BaseReg += TileNum;
1961 return true;
1962}
1963
1964template <unsigned MaxIdx, unsigned Scale>
1965void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
1966 unsigned BaseReg, unsigned Op) {
1967 unsigned TileNum = 0;
1968 if (BaseReg != AArch64::ZA)
1969 TileNum = N->getConstantOperandVal(2);
1970
1971 if (!SelectSMETile(BaseReg, TileNum))
1972 return;
1973
1974 SDValue SliceBase, Base, Offset;
1975 if (BaseReg == AArch64::ZA)
1976 SliceBase = N->getOperand(2);
1977 else
1978 SliceBase = N->getOperand(3);
1979
1980 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
1981 return;
1982
1983 SDLoc DL(N);
1984 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
1985 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
1986 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
1987
1988 EVT VT = N->getValueType(0);
1989 for (unsigned I = 0; I < NumVecs; ++I)
1990 ReplaceUses(SDValue(N, I),
1991 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
1992 SDValue(Mov, 0)));
1993 // Copy chain
1994 unsigned ChainIdx = NumVecs;
1995 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
1996 CurDAG->RemoveDeadNode(N);
1997}
1998
1999void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2000 unsigned NumOutVecs,
2001 bool IsTupleInput,
2002 unsigned Opc) {
2003 SDLoc DL(N);
2004 EVT VT = N->getValueType(0);
2005 unsigned NumInVecs = N->getNumOperands() - 1;
2006
2008 if (IsTupleInput) {
2009 assert((NumInVecs == 2 || NumInVecs == 4) &&
2010 "Don't know how to handle multi-register input!");
2011 SmallVector<SDValue, 4> Regs(N->op_begin() + 1,
2012 N->op_begin() + 1 + NumInVecs);
2013 Ops.push_back(createZMulTuple(Regs));
2014 } else {
2015 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2016 for (unsigned I = 0; I < NumInVecs; I++)
2017 Ops.push_back(N->getOperand(1 + I));
2018 }
2019
2020 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2021 SDValue SuperReg = SDValue(Res, 0);
2022
2023 for (unsigned I = 0; I < NumOutVecs; I++)
2024 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2025 AArch64::zsub0 + I, DL, VT, SuperReg));
2026 CurDAG->RemoveDeadNode(N);
2027}
2028
2029void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2030 unsigned Opc) {
2031 SDLoc dl(N);
2032 EVT VT = N->getOperand(2)->getValueType(0);
2033
2034 // Form a REG_SEQUENCE to force register allocation.
2035 bool Is128Bit = VT.getSizeInBits() == 128;
2036 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2037 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2038
2039 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2040 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2041
2042 // Transfer memoperands.
2043 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2044 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2045
2046 ReplaceNode(N, St);
2047}
2048
2049void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2050 unsigned Scale, unsigned Opc_rr,
2051 unsigned Opc_ri) {
2052 SDLoc dl(N);
2053
2054 // Form a REG_SEQUENCE to force register allocation.
2055 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2056 SDValue RegSeq = createZTuple(Regs);
2057
2058 // Optimize addressing mode.
2059 unsigned Opc;
2061 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2062 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2063 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2064
2065 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2066 Base, // address
2067 Offset, // offset
2068 N->getOperand(0)}; // chain
2069 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2070
2071 ReplaceNode(N, St);
2072}
2073
2074bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2075 SDValue &OffImm) {
2076 SDLoc dl(N);
2077 const DataLayout &DL = CurDAG->getDataLayout();
2078 const TargetLowering *TLI = getTargetLowering();
2079
2080 // Try to match it for the frame address
2081 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2082 int FI = FINode->getIndex();
2083 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2084 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2085 return true;
2086 }
2087
2088 return false;
2089}
2090
2091void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2092 unsigned Opc) {
2093 SDLoc dl(N);
2094 EVT VT = N->getOperand(2)->getValueType(0);
2095 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2096 MVT::Other}; // Type for the Chain
2097
2098 // Form a REG_SEQUENCE to force register allocation.
2099 bool Is128Bit = VT.getSizeInBits() == 128;
2100 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2101 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2102
2103 SDValue Ops[] = {RegSeq,
2104 N->getOperand(NumVecs + 1), // base register
2105 N->getOperand(NumVecs + 2), // Incremental
2106 N->getOperand(0)}; // Chain
2107 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2108
2109 ReplaceNode(N, St);
2110}
2111
2112namespace {
2113/// WidenVector - Given a value in the V64 register class, produce the
2114/// equivalent value in the V128 register class.
2115class WidenVector {
2116 SelectionDAG &DAG;
2117
2118public:
2119 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2120
2121 SDValue operator()(SDValue V64Reg) {
2122 EVT VT = V64Reg.getValueType();
2123 unsigned NarrowSize = VT.getVectorNumElements();
2124 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2125 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2126 SDLoc DL(V64Reg);
2127
2128 SDValue Undef =
2129 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2130 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2131 }
2132};
2133} // namespace
2134
2135/// NarrowVector - Given a value in the V128 register class, produce the
2136/// equivalent value in the V64 register class.
2138 EVT VT = V128Reg.getValueType();
2139 unsigned WideSize = VT.getVectorNumElements();
2140 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2141 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2142
2143 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2144 V128Reg);
2145}
2146
2147void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2148 unsigned Opc) {
2149 SDLoc dl(N);
2150 EVT VT = N->getValueType(0);
2151 bool Narrow = VT.getSizeInBits() == 64;
2152
2153 // Form a REG_SEQUENCE to force register allocation.
2154 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2155
2156 if (Narrow)
2157 transform(Regs, Regs.begin(),
2158 WidenVector(*CurDAG));
2159
2160 SDValue RegSeq = createQTuple(Regs);
2161
2162 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2163
2164 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2165
2166 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2167 N->getOperand(NumVecs + 3), N->getOperand(0)};
2168 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2169 SDValue SuperReg = SDValue(Ld, 0);
2170
2171 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2172 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2173 AArch64::qsub2, AArch64::qsub3 };
2174 for (unsigned i = 0; i < NumVecs; ++i) {
2175 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2176 if (Narrow)
2177 NV = NarrowVector(NV, *CurDAG);
2178 ReplaceUses(SDValue(N, i), NV);
2179 }
2180
2181 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2182 CurDAG->RemoveDeadNode(N);
2183}
2184
2185void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2186 unsigned Opc) {
2187 SDLoc dl(N);
2188 EVT VT = N->getValueType(0);
2189 bool Narrow = VT.getSizeInBits() == 64;
2190
2191 // Form a REG_SEQUENCE to force register allocation.
2192 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2193
2194 if (Narrow)
2195 transform(Regs, Regs.begin(),
2196 WidenVector(*CurDAG));
2197
2198 SDValue RegSeq = createQTuple(Regs);
2199
2200 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2201 RegSeq->getValueType(0), MVT::Other};
2202
2203 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2204
2205 SDValue Ops[] = {RegSeq,
2206 CurDAG->getTargetConstant(LaneNo, dl,
2207 MVT::i64), // Lane Number
2208 N->getOperand(NumVecs + 2), // Base register
2209 N->getOperand(NumVecs + 3), // Incremental
2210 N->getOperand(0)};
2211 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2212
2213 // Update uses of the write back register
2214 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2215
2216 // Update uses of the vector list
2217 SDValue SuperReg = SDValue(Ld, 1);
2218 if (NumVecs == 1) {
2219 ReplaceUses(SDValue(N, 0),
2220 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2221 } else {
2222 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2223 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2224 AArch64::qsub2, AArch64::qsub3 };
2225 for (unsigned i = 0; i < NumVecs; ++i) {
2226 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2227 SuperReg);
2228 if (Narrow)
2229 NV = NarrowVector(NV, *CurDAG);
2230 ReplaceUses(SDValue(N, i), NV);
2231 }
2232 }
2233
2234 // Update the Chain
2235 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2236 CurDAG->RemoveDeadNode(N);
2237}
2238
2239void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2240 unsigned Opc) {
2241 SDLoc dl(N);
2242 EVT VT = N->getOperand(2)->getValueType(0);
2243 bool Narrow = VT.getSizeInBits() == 64;
2244
2245 // Form a REG_SEQUENCE to force register allocation.
2246 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2247
2248 if (Narrow)
2249 transform(Regs, Regs.begin(),
2250 WidenVector(*CurDAG));
2251
2252 SDValue RegSeq = createQTuple(Regs);
2253
2254 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2255
2256 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2257 N->getOperand(NumVecs + 3), N->getOperand(0)};
2258 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2259
2260 // Transfer memoperands.
2261 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2262 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2263
2264 ReplaceNode(N, St);
2265}
2266
2267void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2268 unsigned Opc) {
2269 SDLoc dl(N);
2270 EVT VT = N->getOperand(2)->getValueType(0);
2271 bool Narrow = VT.getSizeInBits() == 64;
2272
2273 // Form a REG_SEQUENCE to force register allocation.
2274 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2275
2276 if (Narrow)
2277 transform(Regs, Regs.begin(),
2278 WidenVector(*CurDAG));
2279
2280 SDValue RegSeq = createQTuple(Regs);
2281
2282 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2283 MVT::Other};
2284
2285 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2286
2287 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2288 N->getOperand(NumVecs + 2), // Base Register
2289 N->getOperand(NumVecs + 3), // Incremental
2290 N->getOperand(0)};
2291 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2292
2293 // Transfer memoperands.
2294 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2295 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2296
2297 ReplaceNode(N, St);
2298}
2299
2301 unsigned &Opc, SDValue &Opd0,
2302 unsigned &LSB, unsigned &MSB,
2303 unsigned NumberOfIgnoredLowBits,
2304 bool BiggerPattern) {
2305 assert(N->getOpcode() == ISD::AND &&
2306 "N must be a AND operation to call this function");
2307
2308 EVT VT = N->getValueType(0);
2309
2310 // Here we can test the type of VT and return false when the type does not
2311 // match, but since it is done prior to that call in the current context
2312 // we turned that into an assert to avoid redundant code.
2313 assert((VT == MVT::i32 || VT == MVT::i64) &&
2314 "Type checking must have been done before calling this function");
2315
2316 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2317 // changed the AND node to a 32-bit mask operation. We'll have to
2318 // undo that as part of the transform here if we want to catch all
2319 // the opportunities.
2320 // Currently the NumberOfIgnoredLowBits argument helps to recover
2321 // from these situations when matching bigger pattern (bitfield insert).
2322
2323 // For unsigned extracts, check for a shift right and mask
2324 uint64_t AndImm = 0;
2325 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2326 return false;
2327
2328 const SDNode *Op0 = N->getOperand(0).getNode();
2329
2330 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2331 // simplified. Try to undo that
2332 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2333
2334 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2335 if (AndImm & (AndImm + 1))
2336 return false;
2337
2338 bool ClampMSB = false;
2339 uint64_t SrlImm = 0;
2340 // Handle the SRL + ANY_EXTEND case.
2341 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2342 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2343 // Extend the incoming operand of the SRL to 64-bit.
2344 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2345 // Make sure to clamp the MSB so that we preserve the semantics of the
2346 // original operations.
2347 ClampMSB = true;
2348 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2350 SrlImm)) {
2351 // If the shift result was truncated, we can still combine them.
2352 Opd0 = Op0->getOperand(0).getOperand(0);
2353
2354 // Use the type of SRL node.
2355 VT = Opd0->getValueType(0);
2356 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2357 Opd0 = Op0->getOperand(0);
2358 ClampMSB = (VT == MVT::i32);
2359 } else if (BiggerPattern) {
2360 // Let's pretend a 0 shift right has been performed.
2361 // The resulting code will be at least as good as the original one
2362 // plus it may expose more opportunities for bitfield insert pattern.
2363 // FIXME: Currently we limit this to the bigger pattern, because
2364 // some optimizations expect AND and not UBFM.
2365 Opd0 = N->getOperand(0);
2366 } else
2367 return false;
2368
2369 // Bail out on large immediates. This happens when no proper
2370 // combining/constant folding was performed.
2371 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2372 LLVM_DEBUG(
2373 (dbgs() << N
2374 << ": Found large shift immediate, this should not happen\n"));
2375 return false;
2376 }
2377
2378 LSB = SrlImm;
2379 MSB = SrlImm +
2380 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2381 : llvm::countr_one<uint64_t>(AndImm)) -
2382 1;
2383 if (ClampMSB)
2384 // Since we're moving the extend before the right shift operation, we need
2385 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2386 // the zeros which would get shifted in with the original right shift
2387 // operation.
2388 MSB = MSB > 31 ? 31 : MSB;
2389
2390 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2391 return true;
2392}
2393
2394static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
2395 SDValue &Opd0, unsigned &Immr,
2396 unsigned &Imms) {
2397 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2398
2399 EVT VT = N->getValueType(0);
2400 unsigned BitWidth = VT.getSizeInBits();
2401 assert((VT == MVT::i32 || VT == MVT::i64) &&
2402 "Type checking must have been done before calling this function");
2403
2404 SDValue Op = N->getOperand(0);
2405 if (Op->getOpcode() == ISD::TRUNCATE) {
2406 Op = Op->getOperand(0);
2407 VT = Op->getValueType(0);
2408 BitWidth = VT.getSizeInBits();
2409 }
2410
2411 uint64_t ShiftImm;
2412 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2413 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2414 return false;
2415
2416 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2417 if (ShiftImm + Width > BitWidth)
2418 return false;
2419
2420 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2421 Opd0 = Op.getOperand(0);
2422 Immr = ShiftImm;
2423 Imms = ShiftImm + Width - 1;
2424 return true;
2425}
2426
2427static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
2428 SDValue &Opd0, unsigned &LSB,
2429 unsigned &MSB) {
2430 // We are looking for the following pattern which basically extracts several
2431 // continuous bits from the source value and places it from the LSB of the
2432 // destination value, all other bits of the destination value or set to zero:
2433 //
2434 // Value2 = AND Value, MaskImm
2435 // SRL Value2, ShiftImm
2436 //
2437 // with MaskImm >> ShiftImm to search for the bit width.
2438 //
2439 // This gets selected into a single UBFM:
2440 //
2441 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2442 //
2443
2444 if (N->getOpcode() != ISD::SRL)
2445 return false;
2446
2447 uint64_t AndMask = 0;
2448 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2449 return false;
2450
2451 Opd0 = N->getOperand(0).getOperand(0);
2452
2453 uint64_t SrlImm = 0;
2454 if (!isIntImmediate(N->getOperand(1), SrlImm))
2455 return false;
2456
2457 // Check whether we really have several bits extract here.
2458 if (!isMask_64(AndMask >> SrlImm))
2459 return false;
2460
2461 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2462 LSB = SrlImm;
2463 MSB = llvm::Log2_64(AndMask);
2464 return true;
2465}
2466
2467static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2468 unsigned &Immr, unsigned &Imms,
2469 bool BiggerPattern) {
2470 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2471 "N must be a SHR/SRA operation to call this function");
2472
2473 EVT VT = N->getValueType(0);
2474
2475 // Here we can test the type of VT and return false when the type does not
2476 // match, but since it is done prior to that call in the current context
2477 // we turned that into an assert to avoid redundant code.
2478 assert((VT == MVT::i32 || VT == MVT::i64) &&
2479 "Type checking must have been done before calling this function");
2480
2481 // Check for AND + SRL doing several bits extract.
2482 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2483 return true;
2484
2485 // We're looking for a shift of a shift.
2486 uint64_t ShlImm = 0;
2487 uint64_t TruncBits = 0;
2488 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2489 Opd0 = N->getOperand(0).getOperand(0);
2490 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2491 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2492 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2493 // be considered as setting high 32 bits as zero. Our strategy here is to
2494 // always generate 64bit UBFM. This consistency will help the CSE pass
2495 // later find more redundancy.
2496 Opd0 = N->getOperand(0).getOperand(0);
2497 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2498 VT = Opd0.getValueType();
2499 assert(VT == MVT::i64 && "the promoted type should be i64");
2500 } else if (BiggerPattern) {
2501 // Let's pretend a 0 shift left has been performed.
2502 // FIXME: Currently we limit this to the bigger pattern case,
2503 // because some optimizations expect AND and not UBFM
2504 Opd0 = N->getOperand(0);
2505 } else
2506 return false;
2507
2508 // Missing combines/constant folding may have left us with strange
2509 // constants.
2510 if (ShlImm >= VT.getSizeInBits()) {
2511 LLVM_DEBUG(
2512 (dbgs() << N
2513 << ": Found large shift immediate, this should not happen\n"));
2514 return false;
2515 }
2516
2517 uint64_t SrlImm = 0;
2518 if (!isIntImmediate(N->getOperand(1), SrlImm))
2519 return false;
2520
2521 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2522 "bad amount in shift node!");
2523 int immr = SrlImm - ShlImm;
2524 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2525 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2526 // SRA requires a signed extraction
2527 if (VT == MVT::i32)
2528 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2529 else
2530 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2531 return true;
2532}
2533
2534bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2535 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2536
2537 EVT VT = N->getValueType(0);
2538 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2539 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2540 return false;
2541
2542 uint64_t ShiftImm;
2543 SDValue Op = N->getOperand(0);
2544 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2545 return false;
2546
2547 SDLoc dl(N);
2548 // Extend the incoming operand of the shift to 64-bits.
2549 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2550 unsigned Immr = ShiftImm;
2551 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2552 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2553 CurDAG->getTargetConstant(Imms, dl, VT)};
2554 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2555 return true;
2556}
2557
2558static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2559 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2560 unsigned NumberOfIgnoredLowBits = 0,
2561 bool BiggerPattern = false) {
2562 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2563 return false;
2564
2565 switch (N->getOpcode()) {
2566 default:
2567 if (!N->isMachineOpcode())
2568 return false;
2569 break;
2570 case ISD::AND:
2571 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2572 NumberOfIgnoredLowBits, BiggerPattern);
2573 case ISD::SRL:
2574 case ISD::SRA:
2575 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2576
2578 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2579 }
2580
2581 unsigned NOpc = N->getMachineOpcode();
2582 switch (NOpc) {
2583 default:
2584 return false;
2585 case AArch64::SBFMWri:
2586 case AArch64::UBFMWri:
2587 case AArch64::SBFMXri:
2588 case AArch64::UBFMXri:
2589 Opc = NOpc;
2590 Opd0 = N->getOperand(0);
2591 Immr = N->getConstantOperandVal(1);
2592 Imms = N->getConstantOperandVal(2);
2593 return true;
2594 }
2595 // Unreachable
2596 return false;
2597}
2598
2599bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2600 unsigned Opc, Immr, Imms;
2601 SDValue Opd0;
2602 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2603 return false;
2604
2605 EVT VT = N->getValueType(0);
2606 SDLoc dl(N);
2607
2608 // If the bit extract operation is 64bit but the original type is 32bit, we
2609 // need to add one EXTRACT_SUBREG.
2610 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2611 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2612 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2613
2614 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2615 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2616 MVT::i32, SDValue(BFM, 0));
2617 ReplaceNode(N, Inner.getNode());
2618 return true;
2619 }
2620
2621 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2622 CurDAG->getTargetConstant(Imms, dl, VT)};
2623 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2624 return true;
2625}
2626
2627/// Does DstMask form a complementary pair with the mask provided by
2628/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2629/// this asks whether DstMask zeroes precisely those bits that will be set by
2630/// the other half.
2631static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2632 unsigned NumberOfIgnoredHighBits, EVT VT) {
2633 assert((VT == MVT::i32 || VT == MVT::i64) &&
2634 "i32 or i64 mask type expected!");
2635 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2636
2637 APInt SignificantDstMask = APInt(BitWidth, DstMask);
2638 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2639
2640 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2641 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2642}
2643
2644// Look for bits that will be useful for later uses.
2645// A bit is consider useless as soon as it is dropped and never used
2646// before it as been dropped.
2647// E.g., looking for useful bit of x
2648// 1. y = x & 0x7
2649// 2. z = y >> 2
2650// After #1, x useful bits are 0x7, then the useful bits of x, live through
2651// y.
2652// After #2, the useful bits of x are 0x4.
2653// However, if x is used on an unpredicatable instruction, then all its bits
2654// are useful.
2655// E.g.
2656// 1. y = x & 0x7
2657// 2. z = y >> 2
2658// 3. str x, [@x]
2659static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2660
2662 unsigned Depth) {
2663 uint64_t Imm =
2664 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2665 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2666 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2667 getUsefulBits(Op, UsefulBits, Depth + 1);
2668}
2669
2671 uint64_t Imm, uint64_t MSB,
2672 unsigned Depth) {
2673 // inherit the bitwidth value
2674 APInt OpUsefulBits(UsefulBits);
2675 OpUsefulBits = 1;
2676
2677 if (MSB >= Imm) {
2678 OpUsefulBits <<= MSB - Imm + 1;
2679 --OpUsefulBits;
2680 // The interesting part will be in the lower part of the result
2681 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2682 // The interesting part was starting at Imm in the argument
2683 OpUsefulBits <<= Imm;
2684 } else {
2685 OpUsefulBits <<= MSB + 1;
2686 --OpUsefulBits;
2687 // The interesting part will be shifted in the result
2688 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2689 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2690 // The interesting part was at zero in the argument
2691 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2692 }
2693
2694 UsefulBits &= OpUsefulBits;
2695}
2696
2697static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2698 unsigned Depth) {
2699 uint64_t Imm =
2700 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2701 uint64_t MSB =
2702 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2703
2704 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2705}
2706
2708 unsigned Depth) {
2709 uint64_t ShiftTypeAndValue =
2710 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2711 APInt Mask(UsefulBits);
2712 Mask.clearAllBits();
2713 Mask.flipAllBits();
2714
2715 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2716 // Shift Left
2717 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2718 Mask <<= ShiftAmt;
2719 getUsefulBits(Op, Mask, Depth + 1);
2720 Mask.lshrInPlace(ShiftAmt);
2721 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2722 // Shift Right
2723 // We do not handle AArch64_AM::ASR, because the sign will change the
2724 // number of useful bits
2725 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2726 Mask.lshrInPlace(ShiftAmt);
2727 getUsefulBits(Op, Mask, Depth + 1);
2728 Mask <<= ShiftAmt;
2729 } else
2730 return;
2731
2732 UsefulBits &= Mask;
2733}
2734
2735static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2736 unsigned Depth) {
2737 uint64_t Imm =
2738 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2739 uint64_t MSB =
2740 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
2741
2742 APInt OpUsefulBits(UsefulBits);
2743 OpUsefulBits = 1;
2744
2745 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
2746 ResultUsefulBits.flipAllBits();
2747 APInt Mask(UsefulBits.getBitWidth(), 0);
2748
2749 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
2750
2751 if (MSB >= Imm) {
2752 // The instruction is a BFXIL.
2753 uint64_t Width = MSB - Imm + 1;
2754 uint64_t LSB = Imm;
2755
2756 OpUsefulBits <<= Width;
2757 --OpUsefulBits;
2758
2759 if (Op.getOperand(1) == Orig) {
2760 // Copy the low bits from the result to bits starting from LSB.
2761 Mask = ResultUsefulBits & OpUsefulBits;
2762 Mask <<= LSB;
2763 }
2764
2765 if (Op.getOperand(0) == Orig)
2766 // Bits starting from LSB in the input contribute to the result.
2767 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2768 } else {
2769 // The instruction is a BFI.
2770 uint64_t Width = MSB + 1;
2771 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
2772
2773 OpUsefulBits <<= Width;
2774 --OpUsefulBits;
2775 OpUsefulBits <<= LSB;
2776
2777 if (Op.getOperand(1) == Orig) {
2778 // Copy the bits from the result to the zero bits.
2779 Mask = ResultUsefulBits & OpUsefulBits;
2780 Mask.lshrInPlace(LSB);
2781 }
2782
2783 if (Op.getOperand(0) == Orig)
2784 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2785 }
2786
2787 UsefulBits &= Mask;
2788}
2789
2790static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2791 SDValue Orig, unsigned Depth) {
2792
2793 // Users of this node should have already been instruction selected
2794 // FIXME: Can we turn that into an assert?
2795 if (!UserNode->isMachineOpcode())
2796 return;
2797
2798 switch (UserNode->getMachineOpcode()) {
2799 default:
2800 return;
2801 case AArch64::ANDSWri:
2802 case AArch64::ANDSXri:
2803 case AArch64::ANDWri:
2804 case AArch64::ANDXri:
2805 // We increment Depth only when we call the getUsefulBits
2806 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2807 Depth);
2808 case AArch64::UBFMWri:
2809 case AArch64::UBFMXri:
2810 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2811
2812 case AArch64::ORRWrs:
2813 case AArch64::ORRXrs:
2814 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
2815 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2816 Depth);
2817 return;
2818 case AArch64::BFMWri:
2819 case AArch64::BFMXri:
2820 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2821
2822 case AArch64::STRBBui:
2823 case AArch64::STURBBi:
2824 if (UserNode->getOperand(0) != Orig)
2825 return;
2826 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2827 return;
2828
2829 case AArch64::STRHHui:
2830 case AArch64::STURHHi:
2831 if (UserNode->getOperand(0) != Orig)
2832 return;
2833 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2834 return;
2835 }
2836}
2837
2838static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2840 return;
2841 // Initialize UsefulBits
2842 if (!Depth) {
2843 unsigned Bitwidth = Op.getScalarValueSizeInBits();
2844 // At the beginning, assume every produced bits is useful
2845 UsefulBits = APInt(Bitwidth, 0);
2846 UsefulBits.flipAllBits();
2847 }
2848 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2849
2850 for (SDNode *Node : Op.getNode()->uses()) {
2851 // A use cannot produce useful bits
2852 APInt UsefulBitsForUse = APInt(UsefulBits);
2853 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2854 UsersUsefulBits |= UsefulBitsForUse;
2855 }
2856 // UsefulBits contains the produced bits that are meaningful for the
2857 // current definition, thus a user cannot make a bit meaningful at
2858 // this point
2859 UsefulBits &= UsersUsefulBits;
2860}
2861
2862/// Create a machine node performing a notional SHL of Op by ShlAmount. If
2863/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2864/// 0, return Op unchanged.
2865static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2866 if (ShlAmount == 0)
2867 return Op;
2868
2869 EVT VT = Op.getValueType();
2870 SDLoc dl(Op);
2871 unsigned BitWidth = VT.getSizeInBits();
2872 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2873
2874 SDNode *ShiftNode;
2875 if (ShlAmount > 0) {
2876 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2877 ShiftNode = CurDAG->getMachineNode(
2878 UBFMOpc, dl, VT, Op,
2879 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
2880 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
2881 } else {
2882 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2883 assert(ShlAmount < 0 && "expected right shift");
2884 int ShrAmount = -ShlAmount;
2885 ShiftNode = CurDAG->getMachineNode(
2886 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
2887 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
2888 }
2889
2890 return SDValue(ShiftNode, 0);
2891}
2892
2893// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
2895 bool BiggerPattern,
2896 const uint64_t NonZeroBits,
2897 SDValue &Src, int &DstLSB,
2898 int &Width);
2899
2900// For bit-field-positioning pattern "shl VAL, N)".
2902 bool BiggerPattern,
2903 const uint64_t NonZeroBits,
2904 SDValue &Src, int &DstLSB,
2905 int &Width);
2906
2907/// Does this tree qualify as an attempt to move a bitfield into position,
2908/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
2910 bool BiggerPattern, SDValue &Src,
2911 int &DstLSB, int &Width) {
2912 EVT VT = Op.getValueType();
2913 unsigned BitWidth = VT.getSizeInBits();
2914 (void)BitWidth;
2915 assert(BitWidth == 32 || BitWidth == 64);
2916
2917 KnownBits Known = CurDAG->computeKnownBits(Op);
2918
2919 // Non-zero in the sense that they're not provably zero, which is the key
2920 // point if we want to use this value
2921 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2922 if (!isShiftedMask_64(NonZeroBits))
2923 return false;
2924
2925 switch (Op.getOpcode()) {
2926 default:
2927 break;
2928 case ISD::AND:
2929 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
2930 NonZeroBits, Src, DstLSB, Width);
2931 case ISD::SHL:
2932 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
2933 NonZeroBits, Src, DstLSB, Width);
2934 }
2935
2936 return false;
2937}
2938
2940 bool BiggerPattern,
2941 const uint64_t NonZeroBits,
2942 SDValue &Src, int &DstLSB,
2943 int &Width) {
2944 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
2945
2946 EVT VT = Op.getValueType();
2947 assert((VT == MVT::i32 || VT == MVT::i64) &&
2948 "Caller guarantees VT is one of i32 or i64");
2949 (void)VT;
2950
2951 uint64_t AndImm;
2952 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
2953 return false;
2954
2955 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
2956 // 1) (AndImm & (1 << POS) == 0)
2957 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
2958 //
2959 // 1) and 2) don't agree so something must be wrong (e.g., in
2960 // 'SelectionDAG::computeKnownBits')
2961 assert((~AndImm & NonZeroBits) == 0 &&
2962 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
2963
2964 SDValue AndOp0 = Op.getOperand(0);
2965
2966 uint64_t ShlImm;
2967 SDValue ShlOp0;
2968 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
2969 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
2970 ShlOp0 = AndOp0.getOperand(0);
2971 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
2973 ShlImm)) {
2974 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
2975
2976 // ShlVal == shl(val, N), which is a left shift on a smaller type.
2977 SDValue ShlVal = AndOp0.getOperand(0);
2978
2979 // Since this is after type legalization and ShlVal is extended to MVT::i64,
2980 // expect VT to be MVT::i32.
2981 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
2982
2983 // Widens 'val' to MVT::i64 as the source of bit field positioning.
2984 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
2985 } else
2986 return false;
2987
2988 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
2989 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
2990 // AndOp0+AND.
2991 if (!BiggerPattern && !AndOp0.hasOneUse())
2992 return false;
2993
2994 DstLSB = llvm::countr_zero(NonZeroBits);
2995 Width = llvm::countr_one(NonZeroBits >> DstLSB);
2996
2997 // Bail out on large Width. This happens when no proper combining / constant
2998 // folding was performed.
2999 if (Width >= (int)VT.getSizeInBits()) {
3000 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3001 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3002 // "val".
3003 // If VT is i32, what Width >= 32 means:
3004 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3005 // demands at least 'Width' bits (after dag-combiner). This together with
3006 // `any_extend` Op (undefined higher bits) indicates missed combination
3007 // when lowering the 'and' IR instruction to an machine IR instruction.
3008 LLVM_DEBUG(
3009 dbgs()
3010 << "Found large Width in bit-field-positioning -- this indicates no "
3011 "proper combining / constant folding was performed\n");
3012 return false;
3013 }
3014
3015 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3016 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3017 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3018 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3019 // which case it is not profitable to insert an extra shift.
3020 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3021 return false;
3022
3023 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3024 return true;
3025}
3026
3027// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3028// UBFIZ.
3030 SDValue &Src, int &DstLSB,
3031 int &Width) {
3032 // Caller should have verified that N is a left shift with constant shift
3033 // amount; asserts that.
3034 assert(Op.getOpcode() == ISD::SHL &&
3035 "Op.getNode() should be a SHL node to call this function");
3036 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3037 "Op.getNode() should shift ShlImm to call this function");
3038
3039 uint64_t AndImm = 0;
3040 SDValue Op0 = Op.getOperand(0);
3041 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3042 return false;
3043
3044 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3045 if (isMask_64(ShiftedAndImm)) {
3046 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3047 // should end with Mask, and could be prefixed with random bits if those
3048 // bits are shifted out.
3049 //
3050 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3051 // the AND result corresponding to those bits are shifted out, so it's fine
3052 // to not extract them.
3053 Width = llvm::countr_one(ShiftedAndImm);
3054 DstLSB = ShlImm;
3055 Src = Op0.getOperand(0);
3056 return true;
3057 }
3058 return false;
3059}
3060
3062 bool BiggerPattern,
3063 const uint64_t NonZeroBits,
3064 SDValue &Src, int &DstLSB,
3065 int &Width) {
3066 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3067
3068 EVT VT = Op.getValueType();
3069 assert((VT == MVT::i32 || VT == MVT::i64) &&
3070 "Caller guarantees that type is i32 or i64");
3071 (void)VT;
3072
3073 uint64_t ShlImm;
3074 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3075 return false;
3076
3077 if (!BiggerPattern && !Op.hasOneUse())
3078 return false;
3079
3080 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3081 return true;
3082
3083 DstLSB = llvm::countr_zero(NonZeroBits);
3084 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3085
3086 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3087 return false;
3088
3089 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3090 return true;
3091}
3092
3093static bool isShiftedMask(uint64_t Mask, EVT VT) {
3094 assert(VT == MVT::i32 || VT == MVT::i64);
3095 if (VT == MVT::i32)
3096 return isShiftedMask_32(Mask);
3097 return isShiftedMask_64(Mask);
3098}
3099
3100// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3101// inserted only sets known zero bits.
3103 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3104
3105 EVT VT = N->getValueType(0);
3106 if (VT != MVT::i32 && VT != MVT::i64)
3107 return false;
3108
3109 unsigned BitWidth = VT.getSizeInBits();
3110
3111 uint64_t OrImm;
3112 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3113 return false;
3114
3115 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3116 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3117 // performance neutral.
3119 return false;
3120
3121 uint64_t MaskImm;
3122 SDValue And = N->getOperand(0);
3123 // Must be a single use AND with an immediate operand.
3124 if (!And.hasOneUse() ||
3125 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3126 return false;
3127
3128 // Compute the Known Zero for the AND as this allows us to catch more general
3129 // cases than just looking for AND with imm.
3130 KnownBits Known = CurDAG->computeKnownBits(And);
3131
3132 // Non-zero in the sense that they're not provably zero, which is the key
3133 // point if we want to use this value.
3134 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3135
3136 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3137 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3138 return false;
3139
3140 // The bits being inserted must only set those bits that are known to be zero.
3141 if ((OrImm & NotKnownZero) != 0) {
3142 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3143 // currently handle this case.
3144 return false;
3145 }
3146
3147 // BFI/BFXIL dst, src, #lsb, #width.
3148 int LSB = llvm::countr_one(NotKnownZero);
3149 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3150
3151 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3152 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3153 unsigned ImmS = Width - 1;
3154
3155 // If we're creating a BFI instruction avoid cases where we need more
3156 // instructions to materialize the BFI constant as compared to the original
3157 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3158 // should be no worse in this case.
3159 bool IsBFI = LSB != 0;
3160 uint64_t BFIImm = OrImm >> LSB;
3161 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3162 // We have a BFI instruction and we know the constant can't be materialized
3163 // with a ORR-immediate with the zero register.
3164 unsigned OrChunks = 0, BFIChunks = 0;
3165 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3166 if (((OrImm >> Shift) & 0xFFFF) != 0)
3167 ++OrChunks;
3168 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3169 ++BFIChunks;
3170 }
3171 if (BFIChunks > OrChunks)
3172 return false;
3173 }
3174
3175 // Materialize the constant to be inserted.
3176 SDLoc DL(N);
3177 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3178 SDNode *MOVI = CurDAG->getMachineNode(
3179 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3180
3181 // Create the BFI/BFXIL instruction.
3182 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3183 CurDAG->getTargetConstant(ImmR, DL, VT),
3184 CurDAG->getTargetConstant(ImmS, DL, VT)};
3185 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3186 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3187 return true;
3188}
3189
3191 SDValue &ShiftedOperand,
3192 uint64_t &EncodedShiftImm) {
3193 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3194 if (!Dst.hasOneUse())
3195 return false;
3196
3197 EVT VT = Dst.getValueType();
3198 assert((VT == MVT::i32 || VT == MVT::i64) &&
3199 "Caller should guarantee that VT is one of i32 or i64");
3200 const unsigned SizeInBits = VT.getSizeInBits();
3201
3202 SDLoc DL(Dst.getNode());
3203 uint64_t AndImm, ShlImm;
3204 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3205 isShiftedMask_64(AndImm)) {
3206 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3207 SDValue DstOp0 = Dst.getOperand(0);
3208 if (!DstOp0.hasOneUse())
3209 return false;
3210
3211 // An example to illustrate the transformation
3212 // From:
3213 // lsr x8, x1, #1
3214 // and x8, x8, #0x3f80
3215 // bfxil x8, x1, #0, #7
3216 // To:
3217 // and x8, x23, #0x7f
3218 // ubfx x9, x23, #8, #7
3219 // orr x23, x8, x9, lsl #7
3220 //
3221 // The number of instructions remains the same, but ORR is faster than BFXIL
3222 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3223 // the dependency chain is improved after the transformation.
3224 uint64_t SrlImm;
3225 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3226 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3227 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3228 unsigned MaskWidth =
3229 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3230 unsigned UBFMOpc =
3231 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3232 SDNode *UBFMNode = CurDAG->getMachineNode(
3233 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3234 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3235 VT),
3236 CurDAG->getTargetConstant(
3237 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3238 ShiftedOperand = SDValue(UBFMNode, 0);
3239 EncodedShiftImm = AArch64_AM::getShifterImm(
3240 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3241 return true;
3242 }
3243 }
3244 return false;
3245 }
3246
3247 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3248 ShiftedOperand = Dst.getOperand(0);
3249 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3250 return true;
3251 }
3252
3253 uint64_t SrlImm;
3254 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3255 ShiftedOperand = Dst.getOperand(0);
3256 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3257 return true;
3258 }
3259 return false;
3260}
3261
3262// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3263// the operands and select it to AArch64::ORR with shifted registers if
3264// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3265static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3266 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3267 const bool BiggerPattern) {
3268 EVT VT = N->getValueType(0);
3269 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3270 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3271 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3272 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3273 assert((VT == MVT::i32 || VT == MVT::i64) &&
3274 "Expect result type to be i32 or i64 since N is combinable to BFM");
3275 SDLoc DL(N);
3276
3277 // Bail out if BFM simplifies away one node in BFM Dst.
3278 if (OrOpd1 != Dst)
3279 return false;
3280
3281 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3282 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3283 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3284 if (BiggerPattern) {
3285 uint64_t SrcAndImm;
3286 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3287 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3288 // OrOpd0 = AND Src, #Mask
3289 // So BFM simplifies away one AND node from Src and doesn't simplify away
3290 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3291 // one node (from Rd), ORR is better since it has higher throughput and
3292 // smaller latency than BFM on many AArch64 processors (and for the rest
3293 // ORR is at least as good as BFM).
3294 SDValue ShiftedOperand;
3295 uint64_t EncodedShiftImm;
3296 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3297 EncodedShiftImm)) {
3298 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3299 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3300 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3301 return true;
3302 }
3303 }
3304 return false;
3305 }
3306
3307 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3308
3309 uint64_t ShlImm;
3310 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3311 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3312 SDValue Ops[] = {
3313 Dst, Src,
3314 CurDAG->getTargetConstant(
3316 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3317 return true;
3318 }
3319
3320 // Select the following pattern to left-shifted operand rather than BFI.
3321 // %val1 = op ..
3322 // %val2 = shl %val1, #imm
3323 // %res = or %val1, %val2
3324 //
3325 // If N is selected to be BFI, we know that
3326 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3327 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3328 //
3329 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3330 if (OrOpd0.getOperand(0) == OrOpd1) {
3331 SDValue Ops[] = {
3332 OrOpd1, OrOpd1,
3333 CurDAG->getTargetConstant(
3335 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3336 return true;
3337 }
3338 }
3339
3340 uint64_t SrlImm;
3341 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3342 // Select the following pattern to right-shifted operand rather than BFXIL.
3343 // %val1 = op ..
3344 // %val2 = lshr %val1, #imm
3345 // %res = or %val1, %val2
3346 //
3347 // If N is selected to be BFXIL, we know that
3348 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3349 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3350 //
3351 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3352 if (OrOpd0.getOperand(0) == OrOpd1) {
3353 SDValue Ops[] = {
3354 OrOpd1, OrOpd1,
3355 CurDAG->getTargetConstant(
3357 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3358 return true;
3359 }
3360 }
3361
3362 return false;
3363}
3364
3365static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3366 SelectionDAG *CurDAG) {
3367 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3368
3369 EVT VT = N->getValueType(0);
3370 if (VT != MVT::i32 && VT != MVT::i64)
3371 return false;
3372
3373 unsigned BitWidth = VT.getSizeInBits();
3374
3375 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3376 // have the expected shape. Try to undo that.
3377
3378 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3379 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3380
3381 // Given a OR operation, check if we have the following pattern
3382 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3383 // isBitfieldExtractOp)
3384 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3385 // countTrailingZeros(mask2) == imm2 - imm + 1
3386 // f = d | c
3387 // if yes, replace the OR instruction with:
3388 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3389
3390 // OR is commutative, check all combinations of operand order and values of
3391 // BiggerPattern, i.e.
3392 // Opd0, Opd1, BiggerPattern=false
3393 // Opd1, Opd0, BiggerPattern=false
3394 // Opd0, Opd1, BiggerPattern=true
3395 // Opd1, Opd0, BiggerPattern=true
3396 // Several of these combinations may match, so check with BiggerPattern=false
3397 // first since that will produce better results by matching more instructions
3398 // and/or inserting fewer extra instructions.
3399 for (int I = 0; I < 4; ++I) {
3400
3401 SDValue Dst, Src;
3402 unsigned ImmR, ImmS;
3403 bool BiggerPattern = I / 2;
3404 SDValue OrOpd0Val = N->getOperand(I % 2);
3405 SDNode *OrOpd0 = OrOpd0Val.getNode();
3406 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3407 SDNode *OrOpd1 = OrOpd1Val.getNode();
3408
3409 unsigned BFXOpc;
3410 int DstLSB, Width;
3411 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3412 NumberOfIgnoredLowBits, BiggerPattern)) {
3413 // Check that the returned opcode is compatible with the pattern,
3414 // i.e., same type and zero extended (U and not S)
3415 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3416 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3417 continue;
3418
3419 // Compute the width of the bitfield insertion
3420 DstLSB = 0;
3421 Width = ImmS - ImmR + 1;
3422 // FIXME: This constraint is to catch bitfield insertion we may
3423 // want to widen the pattern if we want to grab general bitfied
3424 // move case
3425 if (Width <= 0)
3426 continue;
3427
3428 // If the mask on the insertee is correct, we have a BFXIL operation. We
3429 // can share the ImmR and ImmS values from the already-computed UBFM.
3430 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3431 BiggerPattern,
3432 Src, DstLSB, Width)) {
3433 ImmR = (BitWidth - DstLSB) % BitWidth;
3434 ImmS = Width - 1;
3435 } else
3436 continue;
3437
3438 // Check the second part of the pattern
3439 EVT VT = OrOpd1Val.getValueType();
3440 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3441
3442 // Compute the Known Zero for the candidate of the first operand.
3443 // This allows to catch more general case than just looking for
3444 // AND with imm. Indeed, simplify-demanded-bits may have removed
3445 // the AND instruction because it proves it was useless.
3446 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3447
3448 // Check if there is enough room for the second operand to appear
3449 // in the first one
3450 APInt BitsToBeInserted =
3451 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3452
3453 if ((BitsToBeInserted & ~Known.Zero) != 0)
3454 continue;
3455
3456 // Set the first operand
3457 uint64_t Imm;
3458 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3459 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3460 // In that case, we can eliminate the AND
3461 Dst = OrOpd1->getOperand(0);
3462 else
3463 // Maybe the AND has been removed by simplify-demanded-bits
3464 // or is useful because it discards more bits
3465 Dst = OrOpd1Val;
3466
3467 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3468 // with shifted operand is more efficient.
3469 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3470 BiggerPattern))
3471 return true;
3472
3473 // both parts match
3474 SDLoc DL(N);
3475 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3476 CurDAG->getTargetConstant(ImmS, DL, VT)};
3477 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3478 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3479 return true;
3480 }
3481
3482 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3483 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3484 // mask (e.g., 0x000ffff0).
3485 uint64_t Mask0Imm, Mask1Imm;
3486 SDValue And0 = N->getOperand(0);
3487 SDValue And1 = N->getOperand(1);
3488 if (And0.hasOneUse() && And1.hasOneUse() &&
3489 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3490 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3491 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3492 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3493
3494 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3495 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3496 // bits to be inserted.
3497 if (isShiftedMask(Mask0Imm, VT)) {
3498 std::swap(And0, And1);
3499 std::swap(Mask0Imm, Mask1Imm);
3500 }
3501
3502 SDValue Src = And1->getOperand(0);
3503 SDValue Dst = And0->getOperand(0);
3504 unsigned LSB = llvm::countr_zero(Mask1Imm);
3505 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3506
3507 // The BFXIL inserts the low-order bits from a source register, so right
3508 // shift the needed bits into place.
3509 SDLoc DL(N);
3510 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3511 uint64_t LsrImm = LSB;
3512 if (Src->hasOneUse() &&
3513 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3514 (LsrImm + LSB) < BitWidth) {
3515 Src = Src->getOperand(0);
3516 LsrImm += LSB;
3517 }
3518
3519 SDNode *LSR = CurDAG->getMachineNode(
3520 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3521 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3522
3523 // BFXIL is an alias of BFM, so translate to BFM operands.
3524 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3525 unsigned ImmS = Width - 1;
3526
3527 // Create the BFXIL instruction.
3528 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3529 CurDAG->getTargetConstant(ImmR, DL, VT),
3530 CurDAG->getTargetConstant(ImmS, DL, VT)};
3531 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3532 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3533 return true;
3534 }
3535
3536 return false;
3537}
3538
3539bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3540 if (N->getOpcode() != ISD::OR)
3541 return false;
3542
3543 APInt NUsefulBits;
3544 getUsefulBits(SDValue(N, 0), NUsefulBits);
3545
3546 // If all bits are not useful, just return UNDEF.
3547 if (!NUsefulBits) {
3548 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3549 return true;
3550 }
3551
3552 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3553 return true;
3554
3555 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3556}
3557
3558/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3559/// equivalent of a left shift by a constant amount followed by an and masking
3560/// out a contiguous set of bits.
3561bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3562 if (N->getOpcode() != ISD::AND)
3563 return false;
3564
3565 EVT VT = N->getValueType(0);
3566 if (VT != MVT::i32 && VT != MVT::i64)
3567 return false;
3568
3569 SDValue Op0;
3570 int DstLSB, Width;
3571 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3572 Op0, DstLSB, Width))
3573 return false;
3574
3575 // ImmR is the rotate right amount.
3576 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3577 // ImmS is the most significant bit of the source to be moved.
3578 unsigned ImmS = Width - 1;
3579
3580 SDLoc DL(N);
3581 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3582 CurDAG->getTargetConstant(ImmS, DL, VT)};
3583 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3584 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3585 return true;
3586}
3587
3588/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3589/// variable shift/rotate instructions.
3590bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3591 EVT VT = N->getValueType(0);
3592
3593 unsigned Opc;
3594 switch (N->getOpcode()) {
3595 case ISD::ROTR:
3596 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3597 break;
3598 case ISD::SHL:
3599 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3600 break;
3601 case ISD::SRL:
3602 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3603 break;
3604 case ISD::SRA:
3605 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3606 break;
3607 default:
3608 return false;
3609 }
3610
3611 uint64_t Size;
3612 uint64_t Bits;
3613 if (VT == MVT::i32) {
3614 Bits = 5;
3615 Size = 32;
3616 } else if (VT == MVT::i64) {
3617 Bits = 6;
3618 Size = 64;
3619 } else
3620 return false;
3621
3622 SDValue ShiftAmt = N->getOperand(1);
3623 SDLoc DL(N);
3624 SDValue NewShiftAmt;
3625
3626 // Skip over an extend of the shift amount.
3627 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3628 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3629 ShiftAmt = ShiftAmt->getOperand(0);
3630
3631 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3632 SDValue Add0 = ShiftAmt->getOperand(0);
3633 SDValue Add1 = ShiftAmt->getOperand(1);
3634 uint64_t Add0Imm;
3635 uint64_t Add1Imm;
3636 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3637 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3638 // to avoid the ADD/SUB.
3639 NewShiftAmt = Add0;
3640 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3641 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3642 (Add0Imm % Size == 0)) {
3643 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3644 // to generate a NEG instead of a SUB from a constant.
3645 unsigned NegOpc;
3646 unsigned ZeroReg;
3647 EVT SubVT = ShiftAmt->getValueType(0);
3648 if (SubVT == MVT::i32) {
3649 NegOpc = AArch64::SUBWrr;
3650 ZeroReg = AArch64::WZR;
3651 } else {
3652 assert(SubVT == MVT::i64);
3653 NegOpc = AArch64::SUBXrr;
3654 ZeroReg = AArch64::XZR;
3655 }
3656 SDValue Zero =
3657 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3658 MachineSDNode *Neg =
3659 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3660 NewShiftAmt = SDValue(Neg, 0);
3661 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3662 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3663 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3664 // to generate a NOT instead of a SUB from a constant.
3665 unsigned NotOpc;
3666 unsigned ZeroReg;
3667 EVT SubVT = ShiftAmt->getValueType(0);
3668 if (SubVT == MVT::i32) {
3669 NotOpc = AArch64::ORNWrr;
3670 ZeroReg = AArch64::WZR;
3671 } else {
3672 assert(SubVT == MVT::i64);
3673 NotOpc = AArch64::ORNXrr;
3674 ZeroReg = AArch64::XZR;
3675 }
3676 SDValue Zero =
3677 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3679 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3680 NewShiftAmt = SDValue(Not, 0);
3681 } else
3682 return false;
3683 } else {
3684 // If the shift amount is masked with an AND, check that the mask covers the
3685 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3686 // the AND.
3687 uint64_t MaskImm;
3688 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3689 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3690 return false;
3691
3692 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3693 return false;
3694
3695 NewShiftAmt = ShiftAmt->getOperand(0);
3696 }
3697
3698 // Narrow/widen the shift amount to match the size of the shift operation.
3699 if (VT == MVT::i32)
3700 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3701 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3702 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3703 MachineSDNode *Ext = CurDAG->getMachineNode(
3704 AArch64::SUBREG_TO_REG, DL, VT,
3705 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3706 NewShiftAmt = SDValue(Ext, 0);
3707 }
3708
3709 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3710 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3711 return true;
3712}
3713
3715 SDValue &FixedPos,
3716 unsigned RegWidth,
3717 bool isReciprocal) {
3718 APFloat FVal(0.0);
3719 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
3720 FVal = CN->getValueAPF();
3721 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3722 // Some otherwise illegal constants are allowed in this case.
3723 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3724 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3725 return false;
3726
3727 ConstantPoolSDNode *CN =
3728 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3729 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3730 } else
3731 return false;
3732
3733 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
3734 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
3735 // x-register.
3736 //
3737 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
3738 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
3739 // integers.
3740 bool IsExact;
3741
3742 if (isReciprocal)
3743 if (!FVal.getExactInverse(&FVal))
3744 return false;
3745
3746 // fbits is between 1 and 64 in the worst-case, which means the fmul
3747 // could have 2^64 as an actual operand. Need 65 bits of precision.
3748 APSInt IntVal(65, true);
3749 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
3750
3751 // N.b. isPowerOf2 also checks for > 0.
3752 if (!IsExact || !IntVal.isPowerOf2())
3753 return false;
3754 unsigned FBits = IntVal.logBase2();
3755
3756 // Checks above should have guaranteed that we haven't lost information in
3757 // finding FBits, but it must still be in range.
3758 if (FBits == 0 || FBits > RegWidth) return false;
3759
3760 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
3761 return true;
3762}
3763
3764bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
3765 unsigned RegWidth) {
3766 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3767 false);
3768}
3769
3770bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
3771 SDValue &FixedPos,
3772 unsigned RegWidth) {
3773 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3774 true);
3775}
3776
3777// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
3778// of the string and obtains the integer values from them and combines these
3779// into a single value to be used in the MRS/MSR instruction.
3782 RegString.split(Fields, ':');
3783
3784 if (Fields.size() == 1)
3785 return -1;
3786
3787 assert(Fields.size() == 5
3788 && "Invalid number of fields in read register string");
3789
3791 bool AllIntFields = true;
3792
3793 for (StringRef Field : Fields) {
3794 unsigned IntField;
3795 AllIntFields &= !Field.getAsInteger(10, IntField);
3796 Ops.push_back(IntField);
3797 }
3798
3799 assert(AllIntFields &&
3800 "Unexpected non-integer value in special register string.");
3801 (void)AllIntFields;
3802
3803 // Need to combine the integer fields of the string into a single value
3804 // based on the bit encoding of MRS/MSR instruction.
3805 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
3806 (Ops[3] << 3) | (Ops[4]);
3807}
3808
3809// Lower the read_register intrinsic to an MRS instruction node if the special
3810// register string argument is either of the form detailed in the ALCE (the
3811// form described in getIntOperandsFromRegsterString) or is a named register
3812// known by the MRS SysReg mapper.
3813bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
3814 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3815 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3816 SDLoc DL(N);
3817
3818 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
3819
3820 unsigned Opcode64Bit = AArch64::MRS;
3821 int Imm = getIntOperandFromRegisterString(RegString->getString());
3822 if (Imm == -1) {
3823 // No match, Use the sysreg mapper to map the remaining possible strings to
3824 // the value for the register to be used for the instruction operand.
3825 const auto *TheReg =
3826 AArch64SysReg::lookupSysRegByName(RegString->getString());
3827 if (TheReg && TheReg->Readable &&
3828 TheReg->haveFeatures(Subtarget->getFeatureBits()))
3829 Imm = TheReg->Encoding;
3830 else
3831 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
3832
3833 if (Imm == -1) {
3834 // Still no match, see if this is "pc" or give up.
3835 if (!ReadIs128Bit && RegString->getString() == "pc") {
3836 Opcode64Bit = AArch64::ADR;
3837 Imm = 0;
3838 } else {
3839 return false;
3840 }
3841 }
3842 }
3843
3844 SDValue InChain = N->getOperand(0);
3845 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
3846 if (!ReadIs128Bit) {
3847 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
3848 {SysRegImm, InChain});
3849 } else {
3850 SDNode *MRRS = CurDAG->getMachineNode(
3851 AArch64::MRRS, DL,
3852 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
3853 {SysRegImm, InChain});
3854
3855 // Sysregs are not endian. The even register always contains the low half
3856 // of the register.
3857 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
3858 SDValue(MRRS, 0));
3859 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
3860 SDValue(MRRS, 0));
3861 SDValue OutChain = SDValue(MRRS, 1);
3862
3863 ReplaceUses(SDValue(N, 0), Lo);
3864 ReplaceUses(SDValue(N, 1), Hi);
3865 ReplaceUses(SDValue(N, 2), OutChain);
3866 };
3867 return true;
3868}
3869
3870// Lower the write_register intrinsic to an MSR instruction node if the special
3871// register string argument is either of the form detailed in the ALCE (the
3872// form described in getIntOperandsFromRegsterString) or is a named register
3873// known by the MSR SysReg mapper.
3874bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
3875 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3876 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3877 SDLoc DL(N);
3878
3879 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
3880
3881 if (!WriteIs128Bit) {
3882 // Check if the register was one of those allowed as the pstatefield value
3883 // in the MSR (immediate) instruction. To accept the values allowed in the
3884 // pstatefield for the MSR (immediate) instruction, we also require that an
3885 // immediate value has been provided as an argument, we know that this is
3886 // the case as it has been ensured by semantic checking.
3887 auto trySelectPState = [&](auto PMapper, unsigned State) {
3888 if (PMapper) {
3889 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
3890 "Expected a constant integer expression.");
3891 unsigned Reg = PMapper->Encoding;
3892 uint64_t Immed = N->getConstantOperandVal(2);
3893 CurDAG->SelectNodeTo(
3894 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
3895 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
3896 return true;
3897 }
3898 return false;
3899 };
3900
3901 if (trySelectPState(
3902 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
3903 AArch64::MSRpstateImm4))
3904 return true;
3905 if (trySelectPState(
3906 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
3907 AArch64::MSRpstateImm1))
3908 return true;
3909 }
3910
3911 int Imm = getIntOperandFromRegisterString(RegString->getString());
3912 if (Imm == -1) {
3913 // Use the sysreg mapper to attempt to map the remaining possible strings
3914 // to the value for the register to be used for the MSR (register)
3915 // instruction operand.
3916 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
3917 if (TheReg && TheReg->Writeable &&
3918 TheReg->haveFeatures(Subtarget->getFeatureBits()))
3919 Imm = TheReg->Encoding;
3920 else
3921 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
3922
3923 if (Imm == -1)
3924 return false;
3925 }
3926
3927 SDValue InChain = N->getOperand(0);
3928 if (!WriteIs128Bit) {
3929 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
3930 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
3931 N->getOperand(2), InChain);
3932 } else {
3933 // No endian swap. The lower half always goes into the even subreg, and the
3934 // higher half always into the odd supreg.
3935 SDNode *Pair = CurDAG->getMachineNode(
3936 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
3937 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
3938 MVT::i32),
3939 N->getOperand(2),
3940 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
3941 N->getOperand(3),
3942 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
3943
3944 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
3945 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
3946 SDValue(Pair, 0), InChain);
3947 }
3948
3949 return true;
3950}
3951
3952/// We've got special pseudo-instructions for these
3953bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3954 unsigned Opcode;
3955 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3956
3957 // Leave IR for LSE if subtarget supports it.
3958 if (Subtarget->hasLSE()) return false;
3959
3960 if (MemTy == MVT::i8)
3961 Opcode = AArch64::CMP_SWAP_8;
3962 else if (MemTy == MVT::i16)
3963 Opcode = AArch64::CMP_SWAP_16;
3964 else if (MemTy == MVT::i32)
3965 Opcode = AArch64::CMP_SWAP_32;
3966 else if (MemTy == MVT::i64)
3967 Opcode = AArch64::CMP_SWAP_64;
3968 else
3969 llvm_unreachable("Unknown AtomicCmpSwap type");
3970
3971 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
3972 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3973 N->getOperand(0)};
3974 SDNode *CmpSwap = CurDAG->getMachineNode(
3975 Opcode, SDLoc(N),
3976 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
3977
3978 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3979 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3980
3981 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3982 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3983 CurDAG->RemoveDeadNode(N);
3984
3985 return true;
3986}
3987
3988bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
3989 SDValue &Shift) {
3990 if (!isa<ConstantSDNode>(N))
3991 return false;
3992
3993 SDLoc DL(N);
3994 uint64_t Val = cast<ConstantSDNode>(N)
3995 ->getAPIntValue()
3996 .trunc(VT.getFixedSizeInBits())
3997 .getZExtValue();
3998
3999 switch (VT.SimpleTy) {
4000 case MVT::i8:
4001 // All immediates are supported.
4002 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4003 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4004 return true;
4005 case MVT::i16:
4006 case MVT::i32:
4007 case MVT::i64:
4008 // Support 8bit unsigned immediates.
4009 if (Val <= 255) {
4010 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4011 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4012 return true;
4013 }
4014 // Support 16bit unsigned immediates that are a multiple of 256.
4015 if (Val <= 65280 && Val % 256 == 0) {
4016 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4017 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4018 return true;
4019 }
4020 break;
4021 default:
4022 break;
4023 }
4024
4025 return false;
4026}
4027
4028bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4029 SDValue &Imm, SDValue &Shift,
4030 bool Negate) {
4031 if (!isa<ConstantSDNode>(N))
4032 return false;
4033
4034 SDLoc DL(N);
4035 int64_t Val = cast<ConstantSDNode>(N)
4036 ->getAPIntValue()
4037 .trunc(VT.getFixedSizeInBits())
4038 .getSExtValue();
4039
4040 if (Negate)
4041 Val = -Val;
4042
4043 // Signed saturating instructions treat their immediate operand as unsigned,
4044 // whereas the related intrinsics define their operands to be signed. This
4045 // means we can only use the immediate form when the operand is non-negative.
4046 if (Val < 0)
4047 return false;
4048
4049 switch (VT.SimpleTy) {
4050 case MVT::i8:
4051 // All positive immediates are supported.
4052 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4053 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4054 return true;
4055 case MVT::i16:
4056 case MVT::i32:
4057 case MVT::i64:
4058 // Support 8bit positive immediates.
4059 if (Val <= 255) {
4060 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4061 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4062 return true;
4063 }
4064 // Support 16bit positive immediates that are a multiple of 256.
4065 if (Val <= 65280 && Val % 256 == 0) {
4066 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4067 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4068 return true;
4069 }
4070 break;
4071 default:
4072 break;
4073 }
4074
4075 return false;
4076}
4077
4078bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4079 SDValue &Shift) {
4080 if (!isa<ConstantSDNode>(N))
4081 return false;
4082
4083 SDLoc DL(N);
4084 int64_t Val = cast<ConstantSDNode>(N)
4085 ->getAPIntValue()
4086 .trunc(VT.getFixedSizeInBits())
4087 .getSExtValue();
4088
4089 switch (VT.SimpleTy) {
4090 case MVT::i8:
4091 // All immediates are supported.
4092 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4093 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4094 return true;
4095 case MVT::i16:
4096 case MVT::i32:
4097 case MVT::i64:
4098 // Support 8bit signed immediates.
4099 if (Val >= -128 && Val <= 127) {
4100 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4101 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4102 return true;
4103 }
4104 // Support 16bit signed immediates that are a multiple of 256.
4105 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
4106 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4107 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
4108 return true;
4109 }
4110 break;
4111 default:
4112 break;
4113 }
4114
4115 return false;
4116}
4117
4118bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4119 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4120 int64_t ImmVal = CNode->getSExtValue();
4121 SDLoc DL(N);
4122 if (ImmVal >= -128 && ImmVal < 128) {
4123 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4124 return true;
4125 }
4126 }
4127 return false;
4128}
4129
4130bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4131 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4132 uint64_t ImmVal = CNode->getZExtValue();
4133
4134 switch (VT.SimpleTy) {
4135 case MVT::i8:
4136 ImmVal &= 0xFF;
4137 break;
4138 case MVT::i16:
4139 ImmVal &= 0xFFFF;
4140 break;
4141 case MVT::i32:
4142 ImmVal &= 0xFFFFFFFF;
4143 break;
4144 case MVT::i64:
4145 break;
4146 default:
4147 llvm_unreachable("Unexpected type");
4148 }
4149
4150 if (ImmVal < 256) {
4151 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4152 return true;
4153 }
4154 }
4155 return false;
4156}
4157
4158bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4159 bool Invert) {
4160 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4161 uint64_t ImmVal = CNode->getZExtValue();
4162 SDLoc DL(N);
4163
4164 if (Invert)
4165 ImmVal = ~ImmVal;
4166
4167 // Shift mask depending on type size.
4168 switch (VT.SimpleTy) {
4169 case MVT::i8:
4170 ImmVal &= 0xFF;
4171 ImmVal |= ImmVal << 8;
4172 ImmVal |= ImmVal << 16;
4173 ImmVal |= ImmVal << 32;
4174 break;
4175 case MVT::i16:
4176 ImmVal &= 0xFFFF;
4177 ImmVal |= ImmVal << 16;
4178 ImmVal |= ImmVal << 32;
4179 break;
4180 case MVT::i32:
4181 ImmVal &= 0xFFFFFFFF;
4182 ImmVal |= ImmVal << 32;
4183 break;
4184 case MVT::i64:
4185 break;
4186 default:
4187 llvm_unreachable("Unexpected type");
4188 }
4189
4190 uint64_t encoding;
4191 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
4192 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
4193 return true;
4194 }
4195 }
4196 return false;
4197}
4198
4199// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4200// Rather than attempt to normalise everything we can sometimes saturate the
4201// shift amount during selection. This function also allows for consistent
4202// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4203// required by the instructions.
4204bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4205 uint64_t High, bool AllowSaturation,
4206 SDValue &Imm) {
4207 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4208 uint64_t ImmVal = CN->getZExtValue();
4209
4210 // Reject shift amounts that are too small.
4211 if (ImmVal < Low)
4212 return false;
4213
4214 // Reject or saturate shift amounts that are too big.
4215 if (ImmVal > High) {
4216 if (!AllowSaturation)
4217 return false;
4218 ImmVal = High;
4219 }
4220
4221 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4222 return true;
4223 }
4224
4225 return false;
4226}
4227
4228bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4229 // tagp(FrameIndex, IRGstack, tag_offset):
4230 // since the offset between FrameIndex and IRGstack is a compile-time
4231 // constant, this can be lowered to a single ADDG instruction.
4232 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4233 return false;
4234 }
4235
4236 SDValue IRG_SP = N->getOperand(2);
4237 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4238 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4239 return false;
4240 }
4241
4242 const TargetLowering *TLI = getTargetLowering();
4243 SDLoc DL(N);
4244 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4245 SDValue FiOp = CurDAG->getTargetFrameIndex(
4246 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4247 int TagOffset = N->getConstantOperandVal(3);
4248
4249 SDNode *Out = CurDAG->getMachineNode(
4250 AArch64::TAGPstack, DL, MVT::i64,
4251 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4252 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4253 ReplaceNode(N, Out);
4254 return true;
4255}
4256
4257void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4258 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4259 "llvm.aarch64.tagp third argument must be an immediate");
4260 if (trySelectStackSlotTagP(N))
4261 return;
4262 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4263 // compile-time constant, not just for stack allocations.
4264
4265 // General case for unrelated pointers in Op1 and Op2.
4266 SDLoc DL(N);
4267 int TagOffset = N->getConstantOperandVal(3);
4268 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4269 {N->getOperand(1), N->getOperand(2)});
4270 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4271 {SDValue(N1, 0), N->getOperand(2)});
4272 SDNode *N3 = CurDAG->getMachineNode(
4273 AArch64::ADDG, DL, MVT::i64,
4274 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4275 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4276 ReplaceNode(N, N3);
4277}
4278
4279bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4280 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4281
4282 // Bail when not a "cast" like insert_subvector.
4283 if (N->getConstantOperandVal(2) != 0)
4284 return false;
4285 if (!N->getOperand(0).isUndef())
4286 return false;
4287
4288 // Bail when normal isel should do the job.
4289 EVT VT = N->getValueType(0);
4290 EVT InVT = N->getOperand(1).getValueType();
4291 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4292 return false;
4293 if (InVT.getSizeInBits() <= 128)
4294 return false;
4295
4296 // NOTE: We can only get here when doing fixed length SVE code generation.
4297 // We do manual selection because the types involved are not linked to real
4298 // registers (despite being legal) and must be coerced into SVE registers.
4299
4301 "Expected to insert into a packed scalable vector!");
4302
4303 SDLoc DL(N);
4304 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4305 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4306 N->getOperand(1), RC));
4307 return true;
4308}
4309
4310bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4311 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4312
4313 // Bail when not a "cast" like extract_subvector.
4314 if (N->getConstantOperandVal(1) != 0)
4315 return false;
4316
4317 // Bail when normal isel can do the job.
4318 EVT VT = N->getValueType(0);
4319 EVT InVT = N->getOperand(0).getValueType();
4320 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4321 return false;
4322 if (VT.getSizeInBits() <= 128)
4323 return false;
4324
4325 // NOTE: We can only get here when doing fixed length SVE code generation.
4326 // We do manual selection because the types involved are not linked to real
4327 // registers (despite being legal) and must be coerced into SVE registers.
4328
4330 "Expected to extract from a packed scalable vector!");
4331
4332 SDLoc DL(N);
4333 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4334 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4335 N->getOperand(0), RC));
4336 return true;
4337}
4338
4339bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4340 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4341
4342 SDValue N0 = N->getOperand(0);
4343 SDValue N1 = N->getOperand(1);
4344 EVT VT = N->getValueType(0);
4345
4346 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4347 // Rotate by a constant is a funnel shift in IR which is exanded to
4348 // an OR with shifted operands.
4349 // We do the following transform:
4350 // OR N0, N1 -> xar (x, y, imm)
4351 // Where:
4352 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4353 // N0 = SHL_PRED true, V, splat(bits-imm)
4354 // V = (xor x, y)
4355 if (VT.isScalableVector() && Subtarget->hasSVE2orSME()) {
4356 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4358 std::swap(N0, N1);
4359 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4361 return false;
4362
4363 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4364 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4365 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4366 return false;
4367
4368 SDValue XOR = N0.getOperand(1);
4369 if (XOR.getOpcode() != ISD::XOR || XOR != N1.getOperand(1))
4370 return false;
4371
4372 APInt ShlAmt, ShrAmt;
4373 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4375 return false;
4376
4377 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4378 return false;
4379
4380 SDLoc DL(N);
4381 SDValue Imm =
4382 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4383
4384 SDValue Ops[] = {XOR.getOperand(0), XOR.getOperand(1), Imm};
4385 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4386 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4387 AArch64::XAR_ZZZI_D})) {
4388 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4389 return true;
4390 }
4391 return false;
4392 }
4393
4394 if (!Subtarget->hasSHA3())
4395 return false;
4396
4397 if (N0->getOpcode() != AArch64ISD::VSHL ||
4399 return false;
4400
4401 if (N0->getOperand(0) != N1->getOperand(0) ||
4402 N1->getOperand(0)->getOpcode() != ISD::XOR)
4403 return false;
4404
4405 SDValue XOR = N0.getOperand(0);
4406 SDValue R1 = XOR.getOperand(0);
4407 SDValue R2 = XOR.getOperand(1);
4408
4409 unsigned HsAmt = N0.getConstantOperandVal(1);
4410 unsigned ShAmt = N1.getConstantOperandVal(1);
4411
4412 SDLoc DL = SDLoc(N0.getOperand(1));
4413 SDValue Imm = CurDAG->getTargetConstant(
4414 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4415
4416 if (ShAmt + HsAmt != 64)
4417 return false;
4418
4419 SDValue Ops[] = {R1, R2, Imm};
4420 CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops);
4421
4422 return true;
4423}
4424
4425void AArch64DAGToDAGISel::Select(SDNode *Node) {
4426 // If we have a custom node, we already have selected!
4427 if (Node->isMachineOpcode()) {
4428 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4429 Node->setNodeId(-1);
4430 return;
4431 }
4432
4433 // Few custom selection stuff.
4434 EVT VT = Node->getValueType(0);
4435
4436 switch (Node->getOpcode()) {
4437 default:
4438 break;
4439
4441 if (SelectCMP_SWAP(Node))
4442 return;
4443 break;
4444
4445 case ISD::READ_REGISTER:
4446 case AArch64ISD::MRRS:
4447 if (tryReadRegister(Node))
4448 return;
4449 break;
4450
4452 case AArch64ISD::MSRR:
4453 if (tryWriteRegister(Node))
4454 return;
4455 break;
4456
4457 case ISD::LOAD: {
4458 // Try to select as an indexed load. Fall through to normal processing
4459 // if we can't.
4460 if (tryIndexedLoad(Node))
4461 return;
4462 break;
4463 }
4464
4465 case ISD::SRL:
4466 case ISD::AND:
4467 case ISD::SRA:
4469 if (tryBitfieldExtractOp(Node))
4470 return;
4471 if (tryBitfieldInsertInZeroOp(Node))
4472 return;
4473 [[fallthrough]];
4474 case ISD::ROTR:
4475 case ISD::SHL:
4476 if (tryShiftAmountMod(Node))
4477 return;
4478 break;
4479
4480 case ISD::SIGN_EXTEND:
4481 if (tryBitfieldExtractOpFromSExt(Node))
4482 return;
4483 break;
4484
4485 case ISD::OR:
4486 if (tryBitfieldInsertOp(Node))
4487 return;
4488 if (trySelectXAR(Node))
4489 return;
4490 break;
4491
4493 if (trySelectCastScalableToFixedLengthVector(Node))
4494 return;
4495 break;
4496 }
4497
4498 case ISD::INSERT_SUBVECTOR: {
4499 if (trySelectCastFixedLengthToScalableVector(Node))
4500 return;
4501 break;
4502 }
4503
4504 case ISD::Constant: {
4505 // Materialize zero constants as copies from WZR/XZR. This allows
4506 // the coalescer to propagate these into other instructions.
4507 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4508 if (ConstNode->isZero()) {
4509 if (VT == MVT::i32) {
4510 SDValue New = CurDAG->getCopyFromReg(
4511 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4512 ReplaceNode(Node, New.getNode());
4513 return;
4514 } else if (VT == MVT::i64) {
4515 SDValue New = CurDAG->getCopyFromReg(
4516 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4517 ReplaceNode(Node, New.getNode());
4518 return;
4519 }
4520 }
4521 break;
4522 }
4523
4524 case ISD::FrameIndex: {
4525 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4526 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4527 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4528 const TargetLowering *TLI = getTargetLowering();
4529 SDValue TFI = CurDAG->getTargetFrameIndex(
4530 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4531 SDLoc DL(Node);
4532 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4533 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4534 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4535 return;
4536 }
4538 unsigned IntNo = Node->getConstantOperandVal(1);
4539 switch (IntNo) {
4540 default:
4541 break;
4542 case Intrinsic::aarch64_ldaxp:
4543 case Intrinsic::aarch64_ldxp: {
4544 unsigned Op =
4545 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4546 SDValue MemAddr = Node->getOperand(2);
4547 SDLoc DL(Node);
4548 SDValue Chain = Node->getOperand(0);
4549
4550 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4551 MVT::Other, MemAddr, Chain);
4552
4553 // Transfer memoperands.
4555 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4556 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4557 ReplaceNode(Node, Ld);
4558 return;
4559 }
4560 case Intrinsic::aarch64_stlxp:
4561 case Intrinsic::aarch64_stxp: {
4562 unsigned Op =
4563 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4564 SDLoc DL(Node);
4565 SDValue Chain = Node->getOperand(0);
4566 SDValue ValLo = Node->getOperand(2);
4567 SDValue ValHi = Node->getOperand(3);
4568 SDValue MemAddr = Node->getOperand(4);
4569
4570 // Place arguments in the right order.
4571 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4572
4573 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4574 // Transfer memoperands.
4576 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4577 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4578
4579 ReplaceNode(Node, St);
4580 return;
4581 }
4582 case Intrinsic::aarch64_neon_ld1x2:
4583 if (VT == MVT::v8i8) {
4584 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4585 return;
4586 } else if (VT == MVT::v16i8) {
4587 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4588 return;
4589 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4590 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4591 return;
4592 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4593 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4594 return;
4595 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4596 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4597 return;
4598 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4599 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4600 return;
4601 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4602 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4603 return;
4604 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4605 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4606 return;
4607 }
4608 break;
4609 case Intrinsic::aarch64_neon_ld1x3:
4610 if (VT == MVT::v8i8) {
4611 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4612 return;
4613 } else if (VT == MVT::v16i8) {
4614 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4615 return;
4616 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4617 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
4618 return;
4619 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4620 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
4621 return;
4622 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4623 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
4624 return;
4625 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4626 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
4627 return;
4628 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4629 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4630 return;
4631 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4632 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
4633 return;
4634 }
4635 break;
4636 case Intrinsic::aarch64_neon_ld1x4:
4637 if (VT == MVT::v8i8) {
4638 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
4639 return;
4640 } else if (VT == MVT::v16i8) {
4641 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
4642 return;
4643 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4644 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
4645 return;
4646 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4647 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
4648 return;
4649 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4650 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
4651 return;
4652 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4653 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
4654 return;
4655 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4656 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4657 return;
4658 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4659 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
4660 return;
4661 }
4662 break;
4663 case Intrinsic::aarch64_neon_ld2:
4664 if (VT == MVT::v8i8) {
4665 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
4666 return;
4667 } else if (VT == MVT::v16i8) {
4668 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
4669 return;
4670 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4671 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
4672 return;
4673 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4674 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
4675 return;
4676 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4677 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
4678 return;
4679 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4680 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
4681 return;
4682 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4683 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4684 return;
4685 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4686 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
4687 return;
4688 }
4689 break;
4690 case Intrinsic::aarch64_neon_ld3:
4691 if (VT == MVT::v8i8) {
4692 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
4693 return;
4694 } else if (VT == MVT::v16i8) {
4695 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
4696 return;
4697 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4698 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
4699 return;
4700 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4701 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
4702 return;
4703 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4704 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
4705 return;
4706 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4707 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
4708 return;
4709 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4710 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4711 return;
4712 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4713 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
4714 return;
4715 }
4716 break;
4717 case Intrinsic::aarch64_neon_ld4:
4718 if (VT == MVT::v8i8) {
4719 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
4720 return;
4721 } else if (VT == MVT::v16i8) {
4722 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
4723 return;
4724 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4725 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
4726 return;
4727 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4728 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
4729 return;
4730 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4731 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
4732 return;
4733 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4734 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
4735 return;
4736 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4737 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4738 return;
4739 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4740 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
4741 return;
4742 }
4743 break;
4744 case Intrinsic::aarch64_neon_ld2r:
4745 if (VT == MVT::v8i8) {
4746 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
4747 return;
4748 } else if (VT == MVT::v16i8) {
4749 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
4750 return;
4751 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4752 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
4753 return;
4754 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4755 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
4756 return;
4757 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4758 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
4759 return;
4760 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4761 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
4762 return;
4763 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4764 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
4765 return;
4766 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4767 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
4768 return;
4769 }
4770 break;
4771 case Intrinsic::aarch64_neon_ld3r:
4772 if (VT == MVT::v8i8) {
4773 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
4774 return;
4775 } else if (VT == MVT::v16i8) {
4776 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
4777 return;
4778 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4779 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
4780 return;
4781 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4782 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
4783 return;
4784 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4785 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
4786 return;
4787 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4788 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
4789 return;
4790 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4791 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
4792 return;
4793 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4794 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
4795 return;
4796 }
4797 break;
4798 case Intrinsic::aarch64_neon_ld4r:
4799 if (VT == MVT::v8i8) {
4800 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
4801 return;
4802 } else if (VT == MVT::v16i8) {
4803 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
4804 return;
4805 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4806 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
4807 return;
4808 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4809 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
4810 return;
4811 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4812 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
4813 return;
4814 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4815 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
4816 return;
4817 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4818 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
4819 return;
4820 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4821 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
4822 return;
4823 }
4824 break;
4825 case Intrinsic::aarch64_neon_ld2lane:
4826 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4827 SelectLoadLane(Node, 2, AArch64::LD2i8);
4828 return;
4829 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4830 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4831 SelectLoadLane(Node, 2, AArch64::LD2i16);
4832 return;
4833 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4834 VT == MVT::v2f32) {
4835 SelectLoadLane(Node, 2, AArch64::LD2i32);
4836 return;
4837 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4838 VT == MVT::v1f64) {
4839 SelectLoadLane(Node, 2, AArch64::LD2i64);
4840 return;
4841 }
4842 break;
4843 case Intrinsic::aarch64_neon_ld3lane:
4844 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4845 SelectLoadLane(Node, 3, AArch64::LD3i8);
4846 return;
4847 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4848 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4849 SelectLoadLane(Node, 3, AArch64::LD3i16);
4850 return;
4851 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4852 VT == MVT::v2f32) {
4853 SelectLoadLane(Node, 3, AArch64::LD3i32);
4854 return;
4855 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4856 VT == MVT::v1f64) {
4857 SelectLoadLane(Node, 3, AArch64::LD3i64);
4858 return;
4859 }
4860 break;
4861 case Intrinsic::aarch64_neon_ld4lane:
4862 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4863 SelectLoadLane(Node, 4, AArch64::LD4i8);
4864 return;
4865 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4866 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4867 SelectLoadLane(Node, 4, AArch64::LD4i16);
4868 return;
4869 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4870 VT == MVT::v2f32) {
4871 SelectLoadLane(Node, 4, AArch64::LD4i32);
4872 return;
4873 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4874 VT == MVT::v1f64) {
4875 SelectLoadLane(Node, 4, AArch64::LD4i64);
4876 return;
4877 }
4878 break;
4879 case Intrinsic::aarch64_ld64b:
4880 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
4881 return;
4882 case Intrinsic::aarch64_sve_ld2q_sret: {
4883 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
4884 return;
4885 }
4886 case Intrinsic::aarch64_sve_ld3q_sret: {
4887 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
4888 return;
4889 }
4890 case Intrinsic::aarch64_sve_ld4q_sret: {
4891 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
4892 return;
4893 }
4894 case Intrinsic::aarch64_sve_ld2_sret: {
4895 if (VT == MVT::nxv16i8) {
4896 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
4897 true);
4898 return;
4899 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4900 VT == MVT::nxv8bf16) {
4901 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
4902 true);
4903 return;
4904 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4905 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
4906 true);
4907 return;
4908 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4909 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
4910 true);
4911 return;
4912 }
4913 break;
4914 }
4915 case Intrinsic::aarch64_sve_ld1_pn_x2: {
4916 if (VT == MVT::nxv16i8) {
4917 if (Subtarget->hasSME2())
4918 SelectContiguousMultiVectorLoad(
4919 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
4920 else if (Subtarget->hasSVE2p1())
4921 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
4922 AArch64::LD1B_2Z);
4923 else
4924 break;
4925 return;
4926 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4927 VT == MVT::nxv8bf16) {
4928 if (Subtarget->hasSME2())
4929 SelectContiguousMultiVectorLoad(
4930 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
4931 else if (Subtarget->hasSVE2p1())
4932 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
4933 AArch64::LD1H_2Z);
4934 else
4935 break;
4936 return;
4937 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4938 if (Subtarget->hasSME2())
4939 SelectContiguousMultiVectorLoad(
4940 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
4941 else if (Subtarget->hasSVE2p1())
4942 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
4943 AArch64::LD1W_2Z);
4944 else
4945 break;
4946 return;
4947 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4948 if (Subtarget->hasSME2())
4949 SelectContiguousMultiVectorLoad(
4950 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
4951 else if (Subtarget->hasSVE2p1())
4952 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
4953 AArch64::LD1D_2Z);
4954 else
4955 break;
4956 return;
4957 }
4958 break;
4959 }
4960 case Intrinsic::aarch64_sve_ld1_pn_x4: {
4961 if (VT == MVT::nxv16i8) {
4962 if (Subtarget->hasSME2())
4963 SelectContiguousMultiVectorLoad(
4964 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
4965 else if (Subtarget->hasSVE2p1())
4966 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
4967 AArch64::LD1B_4Z);
4968 else
4969 break;
4970 return;
4971 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4972 VT == MVT::nxv8bf16) {
4973 if (Subtarget->hasSME2())
4974 SelectContiguousMultiVectorLoad(
4975 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
4976 else if (Subtarget->hasSVE2p1())
4977 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
4978 AArch64::LD1H_4Z);
4979 else
4980 break;
4981 return;
4982 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4983 if (Subtarget->hasSME2())
4984 SelectContiguousMultiVectorLoad(
4985 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
4986 else if (Subtarget->hasSVE2p1())
4987 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
4988 AArch64::LD1W_4Z);
4989 else
4990 break;
4991 return;
4992 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4993 if (Subtarget->hasSME2())
4994 SelectContiguousMultiVectorLoad(
4995 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
4996 else if (Subtarget->hasSVE2p1())
4997 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
4998 AArch64::LD1D_4Z);
4999 else
5000 break;
5001 return;
5002 }
5003 break;
5004 }
5005 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5006 if (VT == MVT::nxv16i8) {
5007 if (Subtarget->hasSME2())
5008 SelectContiguousMultiVectorLoad(Node, 2, 0,
5009 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5010 AArch64::LDNT1B_2Z_PSEUDO);
5011 else if (Subtarget->hasSVE2p1())
5012 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5013 AArch64::LDNT1B_2Z);
5014 else
5015 break;
5016 return;
5017 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5018 VT == MVT::nxv8bf16) {
5019 if (Subtarget->hasSME2())
5020 SelectContiguousMultiVectorLoad(Node, 2, 1,
5021 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5022 AArch64::LDNT1H_2Z_PSEUDO);
5023 else if (Subtarget->hasSVE2p1())
5024 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5025 AArch64::LDNT1H_2Z);
5026 else
5027 break;
5028 return;
5029 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5030 if (Subtarget->hasSME2())
5031 SelectContiguousMultiVectorLoad(Node, 2, 2,
5032 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5033 AArch64::LDNT1W_2Z_PSEUDO);
5034 else if (Subtarget->hasSVE2p1())
5035 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5036 AArch64::LDNT1W_2Z);
5037 else
5038 break;
5039 return;
5040 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5041 if (Subtarget->hasSME2())
5042 SelectContiguousMultiVectorLoad(Node, 2, 3,
5043 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5044 AArch64::LDNT1D_2Z_PSEUDO);
5045 else if (Subtarget->hasSVE2p1())
5046 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5047 AArch64::LDNT1D_2Z);
5048 else
5049 break;
5050 return;
5051 }
5052 break;
5053 }
5054 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5055 if (VT == MVT::nxv16i8) {
5056 if (Subtarget->hasSME2())
5057 SelectContiguousMultiVectorLoad(Node, 4, 0,
5058 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5059 AArch64::LDNT1B_4Z_PSEUDO);
5060 else if (Subtarget->hasSVE2p1())
5061 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5062 AArch64::LDNT1B_4Z);
5063 else
5064 break;
5065 return;
5066 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5067 VT == MVT::nxv8bf16) {
5068 if (Subtarget->hasSME2())
5069 SelectContiguousMultiVectorLoad(Node, 4, 1,
5070 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5071 AArch64::LDNT1H_4Z_PSEUDO);
5072 else if (Subtarget->hasSVE2p1())
5073 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5074 AArch64::LDNT1H_4Z);
5075 else
5076 break;
5077 return;
5078 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5079 if (Subtarget->hasSME2())
5080 SelectContiguousMultiVectorLoad(Node, 4, 2,
5081 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5082 AArch64::LDNT1W_4Z_PSEUDO);
5083 else if (Subtarget->hasSVE2p1())
5084 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5085 AArch64::LDNT1W_4Z);
5086 else
5087 break;
5088 return;
5089 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5090 if (Subtarget->hasSME2())
5091 SelectContiguousMultiVectorLoad(Node, 4, 3,
5092 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5093 AArch64::LDNT1D_4Z_PSEUDO);
5094 else if (Subtarget->hasSVE2p1())
5095 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5096 AArch64::LDNT1D_4Z);
5097 else
5098 break;
5099 return;
5100 }
5101 break;
5102 }
5103 case Intrinsic::aarch64_sve_ld3_sret: {
5104 if (VT == MVT::nxv16i8) {
5105 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5106 true);
5107 return;
5108 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5109 VT == MVT::nxv8bf16) {
5110 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5111 true);
5112 return;
5113 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5114 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5115 true);
5116 return;
5117 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5118 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5119 true);
5120 return;
5121 }
5122 break;
5123 }
5124 case Intrinsic::aarch64_sve_ld4_sret: {
5125 if (VT == MVT::nxv16i8) {
5126 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5127 true);
5128 return;
5129 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5130 VT == MVT::nxv8bf16) {
5131 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5132 true);
5133 return;
5134 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5135 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5136 true);
5137 return;
5138 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5139 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5140 true);
5141 return;
5142 }
5143 break;
5144 }
5145 case Intrinsic::aarch64_sme_read_hor_vg2: {
5146 if (VT == MVT::nxv16i8) {
5147 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5148 AArch64::MOVA_2ZMXI_H_B);
5149 return;
5150 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5151 VT == MVT::nxv8bf16) {
5152 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5153 AArch64::MOVA_2ZMXI_H_H);
5154 return;
5155 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5156 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5157 AArch64::MOVA_2ZMXI_H_S);
5158 return;
5159 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5160 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5161 AArch64::MOVA_2ZMXI_H_D);
5162 return;
5163 }
5164 break;
5165 }
5166 case Intrinsic::aarch64_sme_read_ver_vg2: {
5167 if (VT == MVT::nxv16i8) {
5168 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5169 AArch64::MOVA_2ZMXI_V_B);
5170 return;
5171 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5172 VT == MVT::nxv8bf16) {
5173 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5174 AArch64::MOVA_2ZMXI_V_H);
5175 return;
5176 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5177 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5178 AArch64::MOVA_2ZMXI_V_S);
5179 return;
5180 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5181 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5182 AArch64::MOVA_2ZMXI_V_D);
5183 return;
5184 }
5185 break;
5186 }
5187 case Intrinsic::aarch64_sme_read_hor_vg4: {
5188 if (VT == MVT::nxv16i8) {
5189 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5190 AArch64::MOVA_4ZMXI_H_B);
5191 return;
5192 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5193 VT == MVT::nxv8bf16) {
5194 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5195 AArch64::MOVA_4ZMXI_H_H);
5196 return;
5197 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5198 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5199 AArch64::MOVA_4ZMXI_H_S);
5200 return;
5201 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5202 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5203 AArch64::MOVA_4ZMXI_H_D);
5204 return;
5205 }
5206 break;
5207 }
5208 case Intrinsic::aarch64_sme_read_ver_vg4: {
5209 if (VT == MVT::nxv16i8) {
5210 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5211 AArch64::MOVA_4ZMXI_V_B);
5212 return;
5213 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5214 VT == MVT::nxv8bf16) {
5215 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5216 AArch64::MOVA_4ZMXI_V_H);
5217 return;
5218 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5219 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5220 AArch64::MOVA_4ZMXI_V_S);
5221 return;
5222 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5223 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5224 AArch64::MOVA_4ZMXI_V_D);
5225 return;
5226 }
5227 break;
5228 }
5229 case Intrinsic::aarch64_sme_read_vg1x2: {
5230 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5231 AArch64::MOVA_VG2_2ZMXI);
5232 return;
5233 }
5234 case Intrinsic::aarch64_sme_read_vg1x4: {
5235 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5236 AArch64::MOVA_VG4_4ZMXI);
5237 return;
5238 }
5239 case Intrinsic::swift_async_context_addr: {
5240 SDLoc DL(Node);
5241 SDValue Chain = Node->getOperand(0);
5242 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5243 SDValue Res = SDValue(
5244 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5245 CurDAG->getTargetConstant(8, DL, MVT::i32),
5246 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5247 0);
5248 ReplaceUses(SDValue(Node, 0), Res);
5249 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5250 CurDAG->RemoveDeadNode(Node);
5251
5252 auto &MF = CurDAG->getMachineFunction();
5253 MF.getFrameInfo().setFrameAddressIsTaken(true);
5254 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5255 return;
5256 }
5257 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5258 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5259 Node->getValueType(0),
5260 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5261 AArch64::LUTI2_4ZTZI_S}))
5262 // Second Immediate must be <= 3:
5263 SelectMultiVectorLuti(Node, 4, Opc, 3);
5264 return;
5265 }
5266 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5267 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5268 Node->getValueType(0),
5269 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5270 // Second Immediate must be <= 1:
5271 SelectMultiVectorLuti(Node, 4, Opc, 1);
5272 return;
5273 }
5274 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5275 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5276 Node->getValueType(0),
5277 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5278 AArch64::LUTI2_2ZTZI_S}))
5279 // Second Immediate must be <= 7:
5280 SelectMultiVectorLuti(Node, 2, Opc, 7);
5281 return;
5282 }
5283 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5284 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5285 Node->getValueType(0),
5286 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5287 AArch64::LUTI4_2ZTZI_S}))
5288 // Second Immediate must be <= 3:
5289 SelectMultiVectorLuti(Node, 2, Opc, 3);
5290 return;
5291 }
5292 }
5293 } break;
5295 unsigned IntNo = Node->getConstantOperandVal(0);
5296 switch (IntNo) {
5297 default:
5298 break;
5299 case Intrinsic::aarch64_tagp:
5300 SelectTagP(Node);
5301 return;
5302 case Intrinsic::aarch64_neon_tbl2:
5303 SelectTable(Node, 2,
5304 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5305 false);
5306 return;
5307 case Intrinsic::aarch64_neon_tbl3:
5308 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5309 : AArch64::TBLv16i8Three,
5310 false);
5311 return;
5312 case Intrinsic::aarch64_neon_tbl4:
5313 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5314 : AArch64::TBLv16i8Four,
5315 false);
5316 return;
5317 case Intrinsic::aarch64_neon_tbx2:
5318 SelectTable(Node, 2,
5319 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5320 true);
5321 return;
5322 case Intrinsic::aarch64_neon_tbx3:
5323 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5324 : AArch64::TBXv16i8Three,
5325 true);
5326 return;
5327 case Intrinsic::aarch64_neon_tbx4:
5328 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5329 : AArch64::TBXv16i8Four,
5330 true);
5331 return;
5332 case Intrinsic::aarch64_sve_srshl_single_x2:
5333 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5334 Node->getValueType(0),
5335 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5336 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5337 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5338 return;
5339 case Intrinsic::aarch64_sve_srshl_single_x4:
5340 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5341 Node->getValueType(0),
5342 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5343 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5344 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5345 return;
5346 case Intrinsic::aarch64_sve_urshl_single_x2:
5347 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5348 Node->getValueType(0),
5349 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5350 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5351 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5352 return;
5353 case Intrinsic::aarch64_sve_urshl_single_x4:
5354 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5355 Node->getValueType(0),
5356 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5357 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5358 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5359 return;
5360 case Intrinsic::aarch64_sve_srshl_x2:
5361 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5362 Node->getValueType(0),
5363 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5364 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5365 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5366 return;
5367 case Intrinsic::aarch64_sve_srshl_x4:
5368 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5369 Node->getValueType(0),
5370 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5371 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5372 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5373 return;
5374 case Intrinsic::aarch64_sve_urshl_x2:
5375 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5376 Node->getValueType(0),
5377 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5378 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5379 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5380 return;
5381 case Intrinsic::aarch64_sve_urshl_x4:
5382 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5383 Node->getValueType(0),
5384 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5385 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5386 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5387 return;
5388 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5389 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5390 Node->getValueType(0),
5391 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5392 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5393 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5394 return;
5395 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5396 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5397 Node->getValueType(0),
5398 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5399 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5400 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5401 return;
5402 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5403 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5404 Node->getValueType(0),
5405 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5406 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5407 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5408 return;
5409 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5410 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5411 Node->getValueType(0),
5412 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5413 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5414 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5415 return;
5416 case Intrinsic::aarch64_sve_whilege_x2:
5417 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5418 Node->getValueType(0),
5419 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5420 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5421 SelectWhilePair(Node, Op);
5422 return;
5423 case Intrinsic::aarch64_sve_whilegt_x2:
5424 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5425 Node->getValueType(0),
5426 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5427 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5428 SelectWhilePair(Node, Op);
5429 return;
5430 case Intrinsic::aarch64_sve_whilehi_x2:
5431 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5432 Node->getValueType(0),
5433 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5434 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5435 SelectWhilePair(Node, Op);
5436 return;
5437 case Intrinsic::aarch64_sve_whilehs_x2:
5438 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5439 Node->getValueType(0),
5440 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5441 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5442 SelectWhilePair(Node, Op);
5443 return;
5444 case Intrinsic::aarch64_sve_whilele_x2:
5445 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5446 Node->getValueType(0),
5447 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5448 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5449 SelectWhilePair(Node, Op);
5450 return;
5451 case Intrinsic::aarch64_sve_whilelo_x2:
5452 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5453 Node->getValueType(0),
5454 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
5455 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
5456 SelectWhilePair(Node, Op);
5457 return;
5458 case Intrinsic::aarch64_sve_whilels_x2:
5459 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5460 Node->getValueType(0),
5461 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
5462 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
5463 SelectWhilePair(Node, Op);
5464 return;
5465 case Intrinsic::aarch64_sve_whilelt_x2:
5466 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5467 Node->getValueType(0),
5468 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
5469 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
5470 SelectWhilePair(Node, Op);
5471 return;
5472 case Intrinsic::aarch64_sve_smax_single_x2:
5473 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5474 Node->getValueType(0),
5475 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
5476 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
5477 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5478 return;
5479 case Intrinsic::aarch64_sve_umax_single_x2:
5480 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5481 Node->getValueType(0),
5482 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
5483 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
5484 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5485 return;
5486 case Intrinsic::aarch64_sve_fmax_single_x2:
5487 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5488 Node->getValueType(0),
5489 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
5490 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
5491 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5492 return;
5493 case Intrinsic::aarch64_sve_smax_single_x4:
5494 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5495 Node->getValueType(0),
5496 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
5497 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
5498 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5499 return;
5500 case Intrinsic::aarch64_sve_umax_single_x4:
5501 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5502 Node->getValueType(0),
5503 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
5504 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
5505 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5506 return;
5507 case Intrinsic::aarch64_sve_fmax_single_x4:
5508 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5509 Node->getValueType(0),
5510 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
5511 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
5512 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5513 return;
5514 case Intrinsic::aarch64_sve_smin_single_x2:
5515 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5516 Node->getValueType(0),
5517 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
5518 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
5519 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5520 return;
5521 case Intrinsic::aarch64_sve_umin_single_x2:
5522 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5523 Node->getValueType(0),
5524 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
5525 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
5526 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5527 return;
5528 case Intrinsic::aarch64_sve_fmin_single_x2:
5529 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5530 Node->getValueType(0),
5531 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
5532 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
5533 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5534 return;
5535 case Intrinsic::aarch64_sve_smin_single_x4:
5536 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5537 Node->getValueType(0),
5538 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
5539 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
5540 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5541 return;
5542 case Intrinsic::aarch64_sve_umin_single_x4:
5543 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5544 Node->getValueType(0),
5545 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
5546 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
5547 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5548 return;
5549 case Intrinsic::aarch64_sve_fmin_single_x4:
5550 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5551 Node->getValueType(0),
5552 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
5553 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
5554 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5555 return;
5556 case Intrinsic::aarch64_sve_smax_x2:
5557 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5558 Node->getValueType(0),
5559 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
5560 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
5561 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5562 return;
5563 case Intrinsic::aarch64_sve_umax_x2:
5564 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5565 Node->getValueType(0),
5566 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
5567 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
5568 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5569 return;
5570 case Intrinsic::aarch64_sve_fmax_x2:
5571 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5572 Node->getValueType(0),
5573 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
5574 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
5575 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5576 return;
5577 case Intrinsic::aarch64_sve_smax_x4:
5578 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5579 Node->getValueType(0),
5580 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
5581 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
5582 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5583 return;
5584 case Intrinsic::aarch64_sve_umax_x4:
5585 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5586 Node->getValueType(0),
5587 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
5588 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
5589 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5590 return;
5591 case Intrinsic::aarch64_sve_fmax_x4:
5592 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5593 Node->getValueType(0),
5594 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
5595 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
5596 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5597 return;
5598 case Intrinsic::aarch64_sve_smin_x2:
5599 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5600 Node->getValueType(0),
5601 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
5602 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
5603 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5604 return;
5605 case Intrinsic::aarch64_sve_umin_x2:
5606 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5607 Node->getValueType(0),
5608 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
5609 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
5610 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5611 return;
5612 case Intrinsic::aarch64_sve_fmin_x2:
5613 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5614 Node->getValueType(0),
5615 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
5616 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
5617 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5618 return;
5619 case Intrinsic::aarch64_sve_smin_x4:
5620 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5621 Node->getValueType(0),
5622 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
5623 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
5624 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5625 return;
5626 case Intrinsic::aarch64_sve_umin_x4:
5627 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5628 Node->getValueType(0),
5629 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
5630 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
5631 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5632 return;
5633 case Intrinsic::aarch64_sve_fmin_x4:
5634 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5635 Node->getValueType(0),
5636 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
5637 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
5638 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5639 return;
5640 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
5641 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5642 Node->getValueType(0),
5643 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
5644 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
5645 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5646 return;
5647 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
5648 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5649 Node->getValueType(0),
5650 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
5651 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
5652 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5653 return;
5654 case Intrinsic::aarch64_sve_fminnm_single_x2:
5655 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5656 Node->getValueType(0),
5657 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
5658 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
5659 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5660 return;
5661 case Intrinsic::aarch64_sve_fminnm_single_x4:
5662 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5663 Node->getValueType(0),
5664 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
5665 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
5666 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5667 return;
5668 case Intrinsic::aarch64_sve_fmaxnm_x2:
5669 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5670 Node->getValueType(0),
5671 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
5672 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
5673 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5674 return;
5675 case Intrinsic::aarch64_sve_fmaxnm_x4:
5676 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5677 Node->getValueType(0),
5678 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
5679 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
5680 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5681 return;
5682 case Intrinsic::aarch64_sve_fminnm_x2:
5683 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5684 Node->getValueType(0),
5685 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
5686 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
5687 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5688 return;
5689 case Intrinsic::aarch64_sve_fminnm_x4:
5690 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5691 Node->getValueType(0),
5692 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
5693 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
5694 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5695 return;
5696 case Intrinsic::aarch64_sve_fcvtzs_x2:
5697 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
5698 return;
5699 case Intrinsic::aarch64_sve_scvtf_x2:
5700 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
5701 return;
5702 case Intrinsic::aarch64_sve_fcvtzu_x2:
5703 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
5704 return;
5705 case Intrinsic::aarch64_sve_ucvtf_x2:
5706 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
5707 return;
5708 case Intrinsic::aarch64_sve_fcvtzs_x4:
5709 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
5710 return;
5711 case Intrinsic::aarch64_sve_scvtf_x4:
5712 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
5713 return;
5714 case Intrinsic::aarch64_sve_fcvtzu_x4:
5715 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
5716 return;
5717 case Intrinsic::aarch64_sve_ucvtf_x4:
5718 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
5719 return;
5720 case Intrinsic::aarch64_sve_sclamp_single_x2:
5721 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5722 Node->getValueType(0),
5723 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
5724 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
5725 SelectClamp(Node, 2, Op);
5726 return;
5727 case Intrinsic::aarch64_sve_uclamp_single_x2:
5728 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5729 Node->getValueType(0),
5730 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
5731 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
5732 SelectClamp(Node, 2, Op);
5733 return;
5734 case Intrinsic::aarch64_sve_fclamp_single_x2:
5735 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5736 Node->getValueType(0),
5737 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
5738 AArch64::FCLAMP_VG2_2Z2Z_D}))
5739 SelectClamp(Node, 2, Op);
5740 return;
5741 case Intrinsic::aarch64_sve_sclamp_single_x4:
5742 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5743 Node->getValueType(0),
5744 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
5745 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
5746 SelectClamp(Node, 4, Op);
5747 return;
5748 case Intrinsic::aarch64_sve_uclamp_single_x4:
5749 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5750 Node->getValueType(0),
5751 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
5752 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
5753 SelectClamp(Node, 4, Op);
5754 return;
5755 case Intrinsic::aarch64_sve_fclamp_single_x4:
5756 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5757 Node->getValueType(0),
5758 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
5759 AArch64::FCLAMP_VG4_4Z4Z_D}))
5760 SelectClamp(Node, 4, Op);
5761 return;
5762 case Intrinsic::aarch64_sve_add_single_x2:
5763 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5764 Node->getValueType(0),
5765 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
5766 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
5767 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5768 return;
5769 case Intrinsic::aarch64_sve_add_single_x4:
5770 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5771 Node->getValueType(0),
5772 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
5773 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
5774 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5775 return;
5776 case Intrinsic::aarch64_sve_zip_x2:
5777 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5778 Node->getValueType(0),
5779 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
5780 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
5781 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
5782 return;
5783 case Intrinsic::aarch64_sve_zipq_x2:
5784 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
5785 AArch64::ZIP_VG2_2ZZZ_Q);
5786 return;
5787 case Intrinsic::aarch64_sve_zip_x4:
5788 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5789 Node->getValueType(0),
5790 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
5791 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
5792 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
5793 return;
5794 case Intrinsic::aarch64_sve_zipq_x4:
5795 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
5796 AArch64::ZIP_VG4_4Z4Z_Q);
5797 return;
5798 case Intrinsic::aarch64_sve_uzp_x2:
5799 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5800 Node->getValueType(0),
5801 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
5802 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
5803 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
5804 return;
5805 case Intrinsic::aarch64_sve_uzpq_x2:
5806 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
5807 AArch64::UZP_VG2_2ZZZ_Q);
5808 return;
5809 case Intrinsic::aarch64_sve_uzp_x4:
5810 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5811 Node->getValueType(0),
5812 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
5813 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
5814 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
5815 return;
5816 case Intrinsic::aarch64_sve_uzpq_x4:
5817 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
5818 AArch64::UZP_VG4_4Z4Z_Q);
5819 return;
5820 case Intrinsic::aarch64_sve_sel_x2:
5821 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5822 Node->getValueType(0),
5823 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
5824 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
5825 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
5826 return;
5827 case Intrinsic::aarch64_sve_sel_x4:
5828 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5829 Node->getValueType(0),
5830 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
5831 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
5832 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
5833 return;
5834 case Intrinsic::aarch64_sve_frinta_x2:
5835 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
5836 return;
5837 case Intrinsic::aarch64_sve_frinta_x4:
5838 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
5839 return;
5840 case Intrinsic::aarch64_sve_frintm_x2:
5841 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
5842 return;
5843 case Intrinsic::aarch64_sve_frintm_x4:
5844 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
5845 return;
5846 case Intrinsic::aarch64_sve_frintn_x2:
5847 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
5848 return;
5849 case Intrinsic::aarch64_sve_frintn_x4:
5850 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
5851 return;
5852 case Intrinsic::aarch64_sve_frintp_x2:
5853 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
5854 return;
5855 case Intrinsic::aarch64_sve_frintp_x4:
5856 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
5857 return;
5858 case Intrinsic::aarch64_sve_sunpk_x2:
5859 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5860 Node->getValueType(0),
5861 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
5862 AArch64::SUNPK_VG2_2ZZ_D}))
5863 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
5864 return;
5865 case Intrinsic::aarch64_sve_uunpk_x2:
5866 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5867 Node->getValueType(0),
5868 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
5869 AArch64::UUNPK_VG2_2ZZ_D}))
5870 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
5871 return;
5872 case Intrinsic::aarch64_sve_sunpk_x4:
5873 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5874 Node->getValueType(0),
5875 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
5876 AArch64::SUNPK_VG4_4Z2Z_D}))
5877 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
5878 return;
5879 case Intrinsic::aarch64_sve_uunpk_x4:
5880 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5881 Node->getValueType(0),
5882 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
5883 AArch64::UUNPK_VG4_4Z2Z_D}))
5884 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
5885 return;
5886 case Intrinsic::aarch64_sve_pext_x2: {
5887 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5888 Node->getValueType(0),
5889 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
5890 AArch64::PEXT_2PCI_D}))
5891 SelectPExtPair(Node, Op);
5892 return;
5893 }
5894 }
5895 break;
5896 }
5897 case ISD::INTRINSIC_VOID: {
5898 unsigned IntNo = Node->getConstantOperandVal(1);
5899 if (Node->getNumOperands() >= 3)
5900 VT = Node->getOperand(2)->getValueType(0);
5901 switch (IntNo) {
5902 default:
5903 break;
5904 case Intrinsic::aarch64_neon_st1x2: {
5905 if (VT == MVT::v8i8) {
5906 SelectStore(Node, 2, AArch64::ST1Twov8b);
5907 return;
5908 } else if (VT == MVT::v16i8) {
5909 SelectStore(Node, 2, AArch64::ST1Twov16b);
5910 return;
5911 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
5912 VT == MVT::v4bf16) {
5913 SelectStore(Node, 2, AArch64::ST1Twov4h);
5914 return;
5915 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
5916 VT == MVT::v8bf16) {
5917 SelectStore(Node, 2, AArch64::ST1Twov8h);
5918 return;
5919 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5920 SelectStore(Node, 2, AArch64::ST1Twov2s);
5921 return;
5922 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5923 SelectStore(Node, 2, AArch64::ST1Twov4s);
5924 return;
5925 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5926 SelectStore(Node, 2, AArch64::ST1Twov2d);
5927 return;
5928 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5929 SelectStore(Node, 2, AArch64::ST1Twov1d);
5930 return;
5931 }
5932 break;
5933 }
5934 case Intrinsic::aarch64_neon_st1x3: {
5935 if (VT == MVT::v8i8) {
5936 SelectStore(Node, 3, AArch64::ST1Threev8b);
5937 return;
5938 } else if (VT == MVT::v16i8) {
5939 SelectStore(Node, 3, AArch64::ST1Threev16b);
5940 return;
5941 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
5942 VT == MVT::v4bf16) {
5943 SelectStore(Node, 3, AArch64::ST1Threev4h);
5944 return;
5945 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
5946 VT == MVT::v8bf16) {
5947 SelectStore(Node, 3, AArch64::ST1Threev8h);
5948 return;
5949 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5950 SelectStore(Node, 3, AArch64::ST1Threev2s);
5951 return;
5952 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5953 SelectStore(Node, 3, AArch64::ST1Threev4s);
5954 return;
5955 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5956 SelectStore(Node, 3, AArch64::ST1Threev2d);
5957 return;
5958 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5959 SelectStore(Node, 3, AArch64::ST1Threev1d);
5960 return;
5961 }
5962 break;
5963 }
5964 case Intrinsic::aarch64_neon_st1x4: {
5965 if (VT == MVT::v8i8) {
5966 SelectStore(Node, 4, AArch64::ST1Fourv8b);
5967 return;
5968 } else if (VT == MVT::v16i8) {
5969 SelectStore(Node, 4, AArch64::ST1Fourv16b);
5970 return;
5971 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
5972 VT == MVT::v4bf16) {
5973 SelectStore(Node, 4, AArch64::ST1Fourv4h);
5974 return;
5975 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
5976 VT == MVT::v8bf16) {
5977 SelectStore(Node, 4, AArch64::ST1Fourv8h);
5978 return;
5979 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5980 SelectStore(Node, 4, AArch64::ST1Fourv2s);
5981 return;
5982 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5983 SelectStore(Node, 4, AArch64::ST1Fourv4s);
5984 return;
5985 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5986 SelectStore(Node, 4, AArch64::ST1Fourv2d);
5987 return;
5988 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5989 SelectStore(Node, 4, AArch64::ST1Fourv1d);
5990 return;
5991 }
5992 break;
5993 }
5994 case Intrinsic::aarch64_neon_st2: {
5995 if (VT == MVT::v8i8) {
5996 SelectStore(Node, 2, AArch64::ST2Twov8b);
5997 return;
5998 } else if (VT == MVT::v16i8) {
5999 SelectStore(Node, 2, AArch64::ST2Twov16b);
6000 return;
6001 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6002 VT == MVT::v4bf16) {
6003 SelectStore(Node, 2, AArch64::ST2Twov4h);
6004 return;
6005 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6006 VT == MVT::v8bf16) {
6007 SelectStore(Node, 2, AArch64::ST2Twov8h);
6008 return;
6009 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6010 SelectStore(Node, 2, AArch64::ST2Twov2s);
6011 return;
6012 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6013 SelectStore(Node, 2, AArch64::ST2Twov4s);
6014 return;
6015 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6016 SelectStore(Node, 2, AArch64::ST2Twov2d);
6017 return;
6018 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6019 SelectStore(Node, 2, AArch64::ST1Twov1d);
6020 return;
6021 }
6022 break;
6023 }
6024 case Intrinsic::aarch64_neon_st3: {
6025 if (VT == MVT::v8i8) {
6026 SelectStore(Node, 3, AArch64::ST3Threev8b);
6027 return;
6028 } else if (VT == MVT::v16i8) {
6029 SelectStore(Node, 3, AArch64::ST3Threev16b);
6030 return;
6031 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6032 VT == MVT::v4bf16) {
6033 SelectStore(Node, 3, AArch64::ST3Threev4h);
6034 return;
6035 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6036 VT == MVT::v8bf16) {
6037 SelectStore(Node, 3, AArch64::ST3Threev8h);
6038 return;
6039 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6040 SelectStore(Node, 3, AArch64::ST3Threev2s);
6041 return;
6042 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6043 SelectStore(Node, 3, AArch64::ST3Threev4s);
6044 return;
6045 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6046 SelectStore(Node, 3, AArch64::ST3Threev2d);
6047 return;
6048 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6049 SelectStore(Node, 3, AArch64::ST1Threev1d);
6050 return;
6051 }
6052 break;
6053 }
6054 case Intrinsic::aarch64_neon_st4: {
6055 if (VT == MVT::v8i8) {
6056 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6057 return;
6058 } else if (VT == MVT::v16i8) {
6059 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6060 return;
6061 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6062 VT == MVT::v4bf16) {
6063 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6064 return;
6065 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6066 VT == MVT::v8bf16) {
6067 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6068 return;
6069 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6070 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6071 return;
6072 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6073 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6074 return;
6075 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6076 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6077 return;
6078 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6079 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6080 return;
6081 }
6082 break;
6083 }
6084 case Intrinsic::aarch64_neon_st2lane: {
6085 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6086 SelectStoreLane(Node, 2, AArch64::ST2i8);
6087 return;
6088 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6089 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6090 SelectStoreLane(Node, 2, AArch64::ST2i16);
6091 return;
6092 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6093 VT == MVT::v2f32) {
6094 SelectStoreLane(Node, 2, AArch64::ST2i32);
6095 return;
6096 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6097 VT == MVT::v1f64) {
6098 SelectStoreLane(Node, 2, AArch64::ST2i64);
6099 return;
6100 }
6101 break;
6102 }
6103 case Intrinsic::aarch64_neon_st3lane: {
6104 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6105 SelectStoreLane(Node, 3, AArch64::ST3i8);
6106 return;
6107 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6108 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6109 SelectStoreLane(Node, 3, AArch64::ST3i16);
6110 return;
6111 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6112 VT == MVT::v2f32) {
6113 SelectStoreLane(Node, 3, AArch64::ST3i32);
6114 return;
6115 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6116 VT == MVT::v1f64) {
6117 SelectStoreLane(Node, 3, AArch64::ST3i64);
6118 return;
6119 }
6120 break;
6121 }
6122 case Intrinsic::aarch64_neon_st4lane: {
6123 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6124 SelectStoreLane(Node, 4, AArch64::ST4i8);
6125 return;
6126 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6127 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6128 SelectStoreLane(Node, 4, AArch64::ST4i16);
6129 return;
6130 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6131 VT == MVT::v2f32) {
6132 SelectStoreLane(Node, 4, AArch64::ST4i32);
6133 return;
6134 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6135 VT == MVT::v1f64) {
6136 SelectStoreLane(Node, 4, AArch64::ST4i64);
6137 return;
6138 }
6139 break;
6140 }
6141 case Intrinsic::aarch64_sve_st2q: {
6142 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6143 return;
6144 }
6145 case Intrinsic::aarch64_sve_st3q: {
6146 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6147 return;
6148 }
6149 case Intrinsic::aarch64_sve_st4q: {
6150 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6151 return;
6152 }
6153 case Intrinsic::aarch64_sve_st2: {
6154 if (VT == MVT::nxv16i8) {
6155 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6156 return;
6157 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6158 VT == MVT::nxv8bf16) {
6159 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6160 return;
6161 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6162 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6163 return;
6164 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6165 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6166 return;
6167 }
6168 break;
6169 }
6170 case Intrinsic::aarch64_sve_st3: {
6171 if (VT == MVT::nxv16i8) {
6172 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6173 return;
6174 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6175 VT == MVT::nxv8bf16) {
6176 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6177 return;
6178 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6179 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6180 return;
6181 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6182 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6183 return;
6184 }
6185 break;
6186 }
6187 case Intrinsic::aarch64_sve_st4: {
6188 if (VT == MVT::nxv16i8) {
6189 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6190 return;
6191 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6192 VT == MVT::nxv8bf16) {
6193 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6194 return;
6195 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6196 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6197 return;
6198 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6199 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6200 return;
6201 }
6202 break;
6203 }
6204 }
6205 break;
6206 }
6207 case AArch64ISD::LD2post: {
6208 if (VT == MVT::v8i8) {
6209 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6210 return;
6211 } else if (VT == MVT::v16i8) {
6212 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6213 return;
6214 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6215 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6216 return;
6217 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6218 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6219 return;
6220 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6221 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6222 return;
6223 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6224 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6225 return;
6226 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6227 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6228 return;
6229 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6230 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6231 return;
6232 }
6233 break;
6234 }
6235 case AArch64ISD::LD3post: {
6236 if (VT == MVT::v8i8) {
6237 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6238 return;
6239 } else if (VT == MVT::v16i8) {
6240 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6241 return;
6242 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6243 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6244 return;
6245 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6246 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6247 return;
6248 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6249 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6250 return;
6251 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6252 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6253 return;
6254 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6255 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6256 return;
6257 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6258 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6259 return;
6260 }
6261 break;
6262 }
6263 case AArch64ISD::LD4post: {
6264 if (VT == MVT::v8i8) {
6265 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6266 return;
6267 } else if (VT == MVT::v16i8) {
6268 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6269 return;
6270 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6271 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6272 return;
6273 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6274 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6275 return;
6276 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6277 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6278 return;
6279 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6280 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6281 return;
6282 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6283 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6284 return;
6285 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6286 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6287 return;
6288 }
6289 break;
6290 }
6291 case AArch64ISD::LD1x2post: {
6292 if (VT == MVT::v8i8) {
6293 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6294 return;
6295 } else if (VT == MVT::v16i8) {
6296 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6297 return;
6298 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6299 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6300 return;
6301 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6302 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6303 return;
6304 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6305 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6306 return;
6307 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6308 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6309 return;
6310 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6311 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6312 return;
6313 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6314 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
6315 return;
6316 }
6317 break;
6318 }
6319 case AArch64ISD::LD1x3post: {
6320 if (VT == MVT::v8i8) {
6321 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
6322 return;
6323 } else if (VT == MVT::v16i8) {
6324 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
6325 return;
6326 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6327 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
6328 return;
6329 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6330 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
6331 return;
6332 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6333 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
6334 return;
6335 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6336 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
6337 return;
6338 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6339 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6340 return;
6341 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6342 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
6343 return;
6344 }
6345 break;
6346 }
6347 case AArch64ISD::LD1x4post: {
6348 if (VT == MVT::v8i8) {
6349 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
6350 return;
6351 } else if (VT == MVT::v16i8) {
6352 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
6353 return;
6354 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6355 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
6356 return;
6357 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6358 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
6359 return;
6360 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6361 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
6362 return;
6363 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6364 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
6365 return;
6366 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6367 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6368 return;
6369 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6370 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
6371 return;
6372 }
6373 break;
6374 }
6376 if (VT == MVT::v8i8) {
6377 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
6378 return;
6379 } else if (VT == MVT::v16i8) {
6380 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
6381 return;
6382 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6383 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
6384 return;
6385 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6386 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
6387 return;
6388 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6389 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
6390 return;
6391 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6392 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
6393 return;
6394 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6395 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
6396 return;
6397 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6398 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
6399 return;
6400 }
6401 break;
6402 }
6404 if (VT == MVT::v8i8) {
6405 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
6406 return;
6407 } else if (VT == MVT::v16i8) {
6408 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
6409 return;
6410 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6411 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
6412 return;
6413 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6414 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
6415 return;
6416 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6417 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
6418 return;
6419 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6420 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
6421 return;
6422 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6423 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
6424 return;
6425 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6426 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
6427 return;
6428 }
6429 break;
6430 }
6432 if (VT == MVT::v8i8) {
6433 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
6434 return;
6435 } else if (VT == MVT::v16i8) {
6436 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
6437 return;
6438 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6439 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
6440 return;
6441 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6442 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
6443 return;
6444 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6445 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
6446 return;
6447 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6448 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
6449 return;
6450 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6451 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
6452 return;
6453 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6454 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
6455 return;
6456 }
6457 break;
6458 }
6460 if (VT == MVT::v8i8) {
6461 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
6462 return;
6463 } else if (VT == MVT::v16i8) {
6464 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
6465 return;
6466 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6467 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
6468 return;
6469 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6470 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
6471 return;
6472 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6473 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
6474 return;
6475 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6476 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
6477 return;
6478 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6479 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
6480 return;
6481 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6482 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
6483 return;
6484 }
6485 break;
6486 }
6488 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6489 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
6490 return;
6491 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6492 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6493 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
6494 return;
6495 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6496 VT == MVT::v2f32) {
6497 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
6498 return;
6499 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6500 VT == MVT::v1f64) {
6501 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
6502 return;
6503 }
6504 break;
6505 }
6507 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6508 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
6509 return;
6510 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6511 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6512 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
6513 return;
6514 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6515 VT == MVT::v2f32) {
6516 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
6517 return;
6518 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6519 VT == MVT::v1f64) {
6520 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
6521 return;
6522 }
6523 break;
6524 }
6526 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6527 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
6528 return;
6529 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6530 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6531 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
6532 return;
6533 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6534 VT == MVT::v2f32) {
6535 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
6536 return;
6537 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6538 VT == MVT::v1f64) {
6539 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
6540 return;
6541 }
6542 break;
6543 }
6545 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6546 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
6547 return;
6548 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6549 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6550 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
6551 return;
6552 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6553 VT == MVT::v2f32) {
6554 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
6555 return;
6556 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6557 VT == MVT::v1f64) {
6558 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
6559 return;
6560 }
6561 break;
6562 }
6563 case AArch64ISD::ST2post: {
6564 VT = Node->getOperand(1).getValueType();
6565 if (VT == MVT::v8i8) {
6566 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
6567 return;
6568 } else if (VT == MVT::v16i8) {
6569 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
6570 return;
6571 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6572 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
6573 return;
6574 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6575 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
6576 return;
6577 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6578 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
6579 return;
6580 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6581 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
6582 return;
6583 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6584 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
6585 return;
6586 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6587 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
6588 return;
6589 }
6590 break;
6591 }
6592 case AArch64ISD::ST3post: {
6593 VT = Node->getOperand(1).getValueType();
6594 if (VT == MVT::v8i8) {
6595 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
6596 return;
6597 } else if (VT == MVT::v16i8) {
6598 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
6599 return;
6600 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6601 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
6602 return;
6603 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6604 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
6605 return;
6606 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6607 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
6608 return;
6609 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6610 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
6611 return;
6612 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6613 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
6614 return;
6615 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6616 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
6617 return;
6618 }
6619 break;
6620 }
6621 case AArch64ISD::ST4post: {
6622 VT = Node->getOperand(1).getValueType();
6623 if (VT == MVT::v8i8) {
6624 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
6625 return;
6626 } else if (VT == MVT::v16i8) {
6627 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
6628 return;
6629 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6630 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
6631 return;
6632 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6633 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
6634 return;
6635 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6636 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
6637 return;
6638 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6639 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
6640 return;
6641 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6642 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
6643 return;
6644 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6645 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
6646 return;
6647 }
6648 break;
6649 }
6650 case AArch64ISD::ST1x2post: {
6651 VT = Node->getOperand(1).getValueType();
6652 if (VT == MVT::v8i8) {
6653 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
6654 return;
6655 } else if (VT == MVT::v16i8) {
6656 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
6657 return;
6658 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6659 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
6660 return;
6661 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6662 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
6663 return;
6664 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6665 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
6666 return;
6667 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6668 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
6669 return;
6670 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6671 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
6672 return;
6673 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6674 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
6675 return;
6676 }
6677 break;
6678 }
6679 case AArch64ISD::ST1x3post: {
6680 VT = Node->getOperand(1).getValueType();
6681 if (VT == MVT::v8i8) {
6682 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
6683 return;
6684 } else if (VT == MVT::v16i8) {
6685 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
6686 return;
6687 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6688 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
6689 return;
6690 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
6691 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
6692 return;
6693 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6694 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
6695 return;
6696 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6697 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
6698 return;
6699 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6700 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
6701 return;
6702 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6703 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
6704 return;
6705 }
6706 break;
6707 }
6708 case AArch64ISD::ST1x4post: {
6709 VT = Node->getOperand(1).getValueType();
6710 if (VT == MVT::v8i8) {
6711 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
6712 return;
6713 } else if (VT == MVT::v16i8) {
6714 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
6715 return;
6716 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6717 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
6718 return;
6719 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6720 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
6721 return;
6722 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6723 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
6724 return;
6725 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6726 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
6727 return;
6728 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6729 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
6730 return;
6731 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6732 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
6733 return;
6734 }
6735 break;
6736 }
6738 VT = Node->getOperand(1).getValueType();
6739 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6740 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
6741 return;
6742 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6743 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6744 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
6745 return;
6746 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6747 VT == MVT::v2f32) {
6748 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
6749 return;
6750 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6751 VT == MVT::v1f64) {
6752 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
6753 return;
6754 }
6755 break;
6756 }
6758 VT = Node->getOperand(1).getValueType();
6759 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6760 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
6761 return;
6762 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6763 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6764 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
6765 return;
6766 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6767 VT == MVT::v2f32) {
6768 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
6769 return;
6770 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6771 VT == MVT::v1f64) {
6772 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
6773 return;
6774 }
6775 break;
6776 }
6778 VT = Node->getOperand(1).getValueType();
6779 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6780 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
6781 return;
6782 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6783 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6784 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
6785 return;
6786 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6787 VT == MVT::v2f32) {
6788 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
6789 return;
6790 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6791 VT == MVT::v1f64) {
6792 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
6793 return;
6794 }
6795 break;
6796 }
6798 if (VT == MVT::nxv16i8) {
6799 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
6800 return;
6801 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6802 VT == MVT::nxv8bf16) {
6803 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
6804 return;
6805 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6806 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);
6807 return;
6808 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6809 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);
6810 return;
6811 }
6812 break;
6813 }
6815 if (VT == MVT::nxv16i8) {
6816 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
6817 return;
6818 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6819 VT == MVT::nxv8bf16) {
6820 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
6821 return;
6822 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6823 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);
6824 return;
6825 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6826 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);
6827 return;
6828 }
6829 break;
6830 }
6832 if (VT == MVT::nxv16i8) {
6833 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
6834 return;
6835 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6836 VT == MVT::nxv8bf16) {
6837 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
6838 return;
6839 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6840 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);
6841 return;
6842 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6843 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);
6844 return;
6845 }
6846 break;
6847 }
6848 }
6849
6850 // Select the default instruction
6851 SelectCode(Node);
6852}
6853
6854/// createAArch64ISelDag - This pass converts a legalized DAG into a
6855/// AArch64-specific DAG, ready for instruction scheduling.
6857 CodeGenOptLevel OptLevel) {
6858 return new AArch64DAGToDAGISel(TM, OptLevel);
6859}
6860
6861/// When \p PredVT is a scalable vector predicate in the form
6862/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
6863/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
6864/// structured vectors (NumVec >1), the output data type is
6865/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
6866/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
6867/// EVT.
6869 unsigned NumVec) {
6870 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
6871 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
6872 return EVT();
6873
6874 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
6875 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
6876 return EVT();
6877
6878 ElementCount EC = PredVT.getVectorElementCount();
6879 EVT ScalarVT =
6880 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
6881 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
6882
6883 return MemVT;
6884}
6885
6886/// Return the EVT of the data associated to a memory operation in \p
6887/// Root. If such EVT cannot be retrived, it returns an invalid EVT.
6889 if (isa<MemSDNode>(Root))
6890 return cast<MemSDNode>(Root)->getMemoryVT();
6891
6892 if (isa<MemIntrinsicSDNode>(Root))
6893 return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
6894
6895 const unsigned Opcode = Root->getOpcode();
6896 // For custom ISD nodes, we have to look at them individually to extract the
6897 // type of the data moved to/from memory.
6898 switch (Opcode) {
6903 return cast<VTSDNode>(Root->getOperand(3))->getVT();
6905 return cast<VTSDNode>(Root->getOperand(4))->getVT();
6908 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2);
6911 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3);
6914 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4);
6915 default:
6916 break;
6917 }
6918
6919 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
6920 return EVT();
6921
6922 switch (Root->getConstantOperandVal(1)) {
6923 default:
6924 return EVT();
6925 case Intrinsic::aarch64_sme_ldr:
6926 case Intrinsic::aarch64_sme_str:
6927 return MVT::nxv16i8;
6928 case Intrinsic::aarch64_sve_prf:
6929 // We are using an SVE prefetch intrinsic. Type must be inferred from the
6930 // width of the predicate.
6932 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
6933 case Intrinsic::aarch64_sve_ld2_sret:
6934 case Intrinsic::aarch64_sve_ld2q_sret:
6936 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
6937 case Intrinsic::aarch64_sve_st2q:
6939 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
6940 case Intrinsic::aarch64_sve_ld3_sret:
6941 case Intrinsic::aarch64_sve_ld3q_sret:
6943 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
6944 case Intrinsic::aarch64_sve_st3q:
6946 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
6947 case Intrinsic::aarch64_sve_ld4_sret:
6948 case Intrinsic::aarch64_sve_ld4q_sret:
6950 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
6951 case Intrinsic::aarch64_sve_st4q:
6953 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
6954 case Intrinsic::aarch64_sve_ld1udq:
6955 case Intrinsic::aarch64_sve_st1dq:
6956 return EVT(MVT::nxv1i64);
6957 case Intrinsic::aarch64_sve_ld1uwq:
6958 case Intrinsic::aarch64_sve_st1wq:
6959 return EVT(MVT::nxv1i32);
6960 }
6961}
6962
6963/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
6964/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
6965/// where Root is the memory access using N for its address.
6966template <int64_t Min, int64_t Max>
6967bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
6968 SDValue &Base,
6969 SDValue &OffImm) {
6970 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
6971 const DataLayout &DL = CurDAG->getDataLayout();
6972 const MachineFrameInfo &MFI = MF->getFrameInfo();
6973
6974 if (N.getOpcode() == ISD::FrameIndex) {
6975 int FI = cast<FrameIndexSDNode>(N)->getIndex();
6976 // We can only encode VL scaled offsets, so only fold in frame indexes
6977 // referencing SVE objects.
6979 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
6980 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
6981 return true;
6982 }
6983
6984 return false;
6985 }
6986
6987 if (MemVT == EVT())
6988 return false;
6989
6990 if (N.getOpcode() != ISD::ADD)
6991 return false;
6992
6993 SDValue VScale = N.getOperand(1);
6994 if (VScale.getOpcode() != ISD::VSCALE)
6995 return false;
6996
6997 TypeSize TS = MemVT.getSizeInBits();
6998 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
6999 int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7000
7001 if ((MulImm % MemWidthBytes) != 0)
7002 return false;
7003
7004 int64_t Offset = MulImm / MemWidthBytes;
7005 if (Offset < Min || Offset > Max)
7006 return false;
7007
7008 Base = N.getOperand(0);
7009 if (Base.getOpcode() == ISD::FrameIndex) {
7010 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7011 // We can only encode VL scaled offsets, so only fold in frame indexes
7012 // referencing SVE objects.
7014 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7015 }
7016
7017 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7018 return true;
7019}
7020
7021/// Select register plus register addressing mode for SVE, with scaled
7022/// offset.
7023bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7024 SDValue &Base,
7025 SDValue &Offset) {
7026 if (N.getOpcode() != ISD::ADD)
7027 return false;
7028
7029 // Process an ADD node.
7030 const SDValue LHS = N.getOperand(0);
7031 const SDValue RHS = N.getOperand(1);
7032
7033 // 8 bit data does not come with the SHL node, so it is treated
7034 // separately.
7035 if (Scale == 0) {
7036 Base = LHS;
7037 Offset = RHS;
7038 return true;
7039 }
7040
7041 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7042 int64_t ImmOff = C->getSExtValue();
7043 unsigned Size = 1 << Scale;
7044
7045 // To use the reg+reg addressing mode, the immediate must be a multiple of
7046 // the vector element's byte size.
7047 if (ImmOff % Size)
7048 return false;
7049
7050 SDLoc DL(N);
7051 Base = LHS;
7052 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7053 SDValue Ops[] = {Offset};
7054 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7055 Offset = SDValue(MI, 0);
7056 return true;
7057 }
7058
7059 // Check if the RHS is a shift node with a constant.
7060 if (RHS.getOpcode() != ISD::SHL)
7061 return false;
7062
7063 const SDValue ShiftRHS = RHS.getOperand(1);
7064 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7065 if (C->getZExtValue() == Scale) {
7066 Base = LHS;
7067 Offset = RHS.getOperand(0);
7068 return true;
7069 }
7070
7071 return false;
7072}
7073
7074bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7075 const AArch64TargetLowering *TLI =
7076 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7077
7078 return TLI->isAllActivePredicate(*CurDAG, N);
7079}
7080
7081bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7082 EVT VT = N.getValueType();
7083 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7084}
7085
7086bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7088 unsigned Scale) {
7089 // Try to untangle an ADD node into a 'reg + offset'
7090 if (N.getOpcode() == ISD::ADD)
7091 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
7092 int64_t ImmOff = C->getSExtValue();
7093 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) {
7094 Base = N.getOperand(0);
7095 Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7096 return true;
7097 }
7098 }
7099
7100 // By default, just match reg + 0.
7101 Base = N;
7102 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7103 return true;
7104}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
#define PASS_NAME
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
#define DEBUG_TYPE
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
aarch64 promote const
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define R2(n)
uint64_t High
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Value * RHS
Value * LHS
support::ulittle16_t & Lo
Definition: aarch32.cpp:206
support::ulittle16_t & Hi
Definition: aarch32.cpp:205
DEMANGLE_DUMP_METHOD void dump() const
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
bool getExactInverse(APFloat *inv) const
Definition: APFloat.h:1334
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1185
Class for arbitrary precision integers.
Definition: APInt.h:76
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1498
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1627
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:236
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1446
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1596
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1555
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1412
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:488
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint8_t getStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
A description of a memory reference used in the backend.
An SDNode that represents everything that will be needed to construct a MachineInstr.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
iterator_range< use_iterator > uses()
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps)
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:448
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:693
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
unsigned getID() const
Return the register class ID number.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:926
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
const SysReg * lookupSysRegByName(StringRef)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:560
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1248
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:240
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1038
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:784
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:904
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:775
@ WRITE_REGISTER
Definition: ISDOpcodes.h:119
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1244
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:212
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:628
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:68
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:209
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:706
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:574
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:118
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:781
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1336
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1255
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:799
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:681
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:787
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1484
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1515
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
Reg
All possible values of the reg field in the ModR/M byte.
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
@ Offset
Definition: DWP.cpp:456
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:263
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:330
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:269
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1928
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:257
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
@ And
Bitwise or logical AND of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Extended Value Type.
Definition: ValueTypes.h:34
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:349
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:203
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:198
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40