LLVM 19.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34//===--------------------------------------------------------------------===//
35/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
36/// instructions for SelectionDAG operations.
37///
38namespace {
39
40class AArch64DAGToDAGISel : public SelectionDAGISel {
41
42 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
43 /// make the right decision when generating code for different targets.
44 const AArch64Subtarget *Subtarget;
45
46public:
47 static char ID;
48
49 AArch64DAGToDAGISel() = delete;
50
51 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
52 CodeGenOptLevel OptLevel)
53 : SelectionDAGISel(ID, tm, OptLevel), Subtarget(nullptr) {}
54
55 bool runOnMachineFunction(MachineFunction &MF) override {
56 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
58 }
59
60 void Select(SDNode *Node) override;
61
62 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
63 /// inline asm expressions.
65 InlineAsm::ConstraintCode ConstraintID,
66 std::vector<SDValue> &OutOps) override;
67
68 template <signed Low, signed High, signed Scale>
69 bool SelectRDVLImm(SDValue N, SDValue &Imm);
70
71 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
72 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
73 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
74 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
75 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
76 return SelectShiftedRegister(N, false, Reg, Shift);
77 }
78 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
79 return SelectShiftedRegister(N, true, Reg, Shift);
80 }
81 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
82 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
83 }
84 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
85 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
86 }
87 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
88 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
89 }
90 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
91 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
92 }
93 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
94 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
95 }
96 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
97 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
98 }
99 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
100 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
101 }
102 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
103 return SelectAddrModeIndexed(N, 1, Base, OffImm);
104 }
105 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
106 return SelectAddrModeIndexed(N, 2, Base, OffImm);
107 }
108 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
109 return SelectAddrModeIndexed(N, 4, Base, OffImm);
110 }
111 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
112 return SelectAddrModeIndexed(N, 8, Base, OffImm);
113 }
114 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
115 return SelectAddrModeIndexed(N, 16, Base, OffImm);
116 }
117 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
118 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
119 }
120 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
121 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
122 }
123 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
124 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
125 }
126 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
127 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
128 }
129 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
130 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
131 }
132 template <unsigned Size, unsigned Max>
133 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
134 // Test if there is an appropriate addressing mode and check if the
135 // immediate fits.
136 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
137 if (Found) {
138 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
139 int64_t C = CI->getSExtValue();
140 if (C <= Max)
141 return true;
142 }
143 }
144
145 // Otherwise, base only, materialize address in register.
146 Base = N;
147 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
148 return true;
149 }
150
151 template<int Width>
152 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
153 SDValue &SignExtend, SDValue &DoShift) {
154 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
155 }
156
157 template<int Width>
158 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
159 SDValue &SignExtend, SDValue &DoShift) {
160 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
161 }
162
163 bool SelectExtractHigh(SDValue N, SDValue &Res) {
164 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
165 N = N->getOperand(0);
166 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
167 !isa<ConstantSDNode>(N->getOperand(1)))
168 return false;
169 EVT VT = N->getValueType(0);
170 EVT LVT = N->getOperand(0).getValueType();
171 unsigned Index = N->getConstantOperandVal(1);
172 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
174 return false;
175 Res = N->getOperand(0);
176 return true;
177 }
178
179 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
180 if (N.getOpcode() != AArch64ISD::VLSHR)
181 return false;
182 SDValue Op = N->getOperand(0);
183 EVT VT = Op.getValueType();
184 unsigned ShtAmt = N->getConstantOperandVal(1);
185 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
186 return false;
187
188 APInt Imm;
189 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
190 Imm = APInt(VT.getScalarSizeInBits(),
191 Op.getOperand(1).getConstantOperandVal(0)
192 << Op.getOperand(1).getConstantOperandVal(1));
193 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
194 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
195 Imm = APInt(VT.getScalarSizeInBits(),
196 Op.getOperand(1).getConstantOperandVal(0));
197 else
198 return false;
199
200 if (Imm != 1ULL << (ShtAmt - 1))
201 return false;
202
203 Res1 = Op.getOperand(0);
204 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
205 return true;
206 }
207
208 bool SelectDupZeroOrUndef(SDValue N) {
209 switch(N->getOpcode()) {
210 case ISD::UNDEF:
211 return true;
212 case AArch64ISD::DUP:
213 case ISD::SPLAT_VECTOR: {
214 auto Opnd0 = N->getOperand(0);
215 if (isNullConstant(Opnd0))
216 return true;
217 if (isNullFPConstant(Opnd0))
218 return true;
219 break;
220 }
221 default:
222 break;
223 }
224
225 return false;
226 }
227
228 bool SelectDupZero(SDValue N) {
229 switch(N->getOpcode()) {
230 case AArch64ISD::DUP:
231 case ISD::SPLAT_VECTOR: {
232 auto Opnd0 = N->getOperand(0);
233 if (isNullConstant(Opnd0))
234 return true;
235 if (isNullFPConstant(Opnd0))
236 return true;
237 break;
238 }
239 }
240
241 return false;
242 }
243
244 bool SelectDupNegativeZero(SDValue N) {
245 switch(N->getOpcode()) {
246 case AArch64ISD::DUP:
247 case ISD::SPLAT_VECTOR: {
248 ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
249 return Const && Const->isZero() && Const->isNegative();
250 }
251 }
252
253 return false;
254 }
255
256 template<MVT::SimpleValueType VT>
257 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
258 return SelectSVEAddSubImm(N, VT, Imm, Shift);
259 }
260
261 template <MVT::SimpleValueType VT, bool Negate>
262 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
263 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
264 }
265
266 template <MVT::SimpleValueType VT>
267 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
268 return SelectSVECpyDupImm(N, VT, Imm, Shift);
269 }
270
271 template <MVT::SimpleValueType VT, bool Invert = false>
272 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
273 return SelectSVELogicalImm(N, VT, Imm, Invert);
274 }
275
276 template <MVT::SimpleValueType VT>
277 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
278 return SelectSVEArithImm(N, VT, Imm);
279 }
280
281 template <unsigned Low, unsigned High, bool AllowSaturation = false>
282 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
283 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
284 }
285
286 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
287 if (N->getOpcode() != ISD::SPLAT_VECTOR)
288 return false;
289
290 EVT EltVT = N->getValueType(0).getVectorElementType();
291 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
292 /* High */ EltVT.getFixedSizeInBits(),
293 /* AllowSaturation */ true, Imm);
294 }
295
296 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
297 template<signed Min, signed Max, signed Scale, bool Shift>
298 bool SelectCntImm(SDValue N, SDValue &Imm) {
299 if (!isa<ConstantSDNode>(N))
300 return false;
301
302 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
303 if (Shift)
304 MulImm = 1LL << MulImm;
305
306 if ((MulImm % std::abs(Scale)) != 0)
307 return false;
308
309 MulImm /= Scale;
310 if ((MulImm >= Min) && (MulImm <= Max)) {
311 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
312 return true;
313 }
314
315 return false;
316 }
317
318 template <signed Max, signed Scale>
319 bool SelectEXTImm(SDValue N, SDValue &Imm) {
320 if (!isa<ConstantSDNode>(N))
321 return false;
322
323 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
324
325 if (MulImm >= 0 && MulImm <= Max) {
326 MulImm *= Scale;
327 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
328 return true;
329 }
330
331 return false;
332 }
333
334 template <unsigned BaseReg, unsigned Max>
335 bool ImmToReg(SDValue N, SDValue &Imm) {
336 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
337 uint64_t C = CI->getZExtValue();
338
339 if (C > Max)
340 return false;
341
342 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
343 return true;
344 }
345 return false;
346 }
347
348 /// Form sequences of consecutive 64/128-bit registers for use in NEON
349 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
350 /// between 1 and 4 elements. If it contains a single element that is returned
351 /// unchanged; otherwise a REG_SEQUENCE value is returned.
354 // Form a sequence of SVE registers for instructions using list of vectors,
355 // e.g. structured loads and stores (ldN, stN).
356 SDValue createZTuple(ArrayRef<SDValue> Vecs);
357
358 // Similar to above, except the register must start at a multiple of the
359 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
360 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
361
362 /// Generic helper for the createDTuple/createQTuple
363 /// functions. Those should almost always be called instead.
364 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
365 const unsigned SubRegs[]);
366
367 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
368
369 bool tryIndexedLoad(SDNode *N);
370
371 bool trySelectStackSlotTagP(SDNode *N);
372 void SelectTagP(SDNode *N);
373
374 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
375 unsigned SubRegIdx);
376 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
377 unsigned SubRegIdx);
378 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
379 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
380 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
381 unsigned Opc_rr, unsigned Opc_ri,
382 bool IsIntr = false);
383 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
384 unsigned Scale, unsigned Opc_ri,
385 unsigned Opc_rr);
386 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
387 bool IsZmMulti, unsigned Opcode,
388 bool HasPred = false);
389 void SelectPExtPair(SDNode *N, unsigned Opc);
390 void SelectWhilePair(SDNode *N, unsigned Opc);
391 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
392 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
393 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
394 bool IsTupleInput, unsigned Opc);
395 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
396
397 template <unsigned MaxIdx, unsigned Scale>
398 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
399 unsigned Op);
400
401 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
402 /// SVE Reg+Imm addressing mode.
403 template <int64_t Min, int64_t Max>
404 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
405 SDValue &OffImm);
406 /// SVE Reg+Reg address mode.
407 template <unsigned Scale>
408 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
409 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
410 }
411
412 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc,
413 uint32_t MaxImm);
414
415 template <unsigned MaxIdx, unsigned Scale>
416 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
417 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
418 }
419
420 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
421 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
422 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
423 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
424 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
425 unsigned Opc_rr, unsigned Opc_ri);
426 std::tuple<unsigned, SDValue, SDValue>
427 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
428 const SDValue &OldBase, const SDValue &OldOffset,
429 unsigned Scale);
430
431 bool tryBitfieldExtractOp(SDNode *N);
432 bool tryBitfieldExtractOpFromSExt(SDNode *N);
433 bool tryBitfieldInsertOp(SDNode *N);
434 bool tryBitfieldInsertInZeroOp(SDNode *N);
435 bool tryShiftAmountMod(SDNode *N);
436
437 bool tryReadRegister(SDNode *N);
438 bool tryWriteRegister(SDNode *N);
439
440 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
441 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
442
443 bool trySelectXAR(SDNode *N);
444
445// Include the pieces autogenerated from the target description.
446#include "AArch64GenDAGISel.inc"
447
448private:
449 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
450 SDValue &Shift);
451 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
452 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
453 SDValue &OffImm) {
454 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
455 }
456 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
457 unsigned Size, SDValue &Base,
458 SDValue &OffImm);
459 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
460 SDValue &OffImm);
461 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
462 SDValue &OffImm);
463 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
464 SDValue &Offset, SDValue &SignExtend,
465 SDValue &DoShift);
466 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
467 SDValue &Offset, SDValue &SignExtend,
468 SDValue &DoShift);
469 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
470 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
471 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
472 SDValue &Offset, SDValue &SignExtend);
473
474 template<unsigned RegWidth>
475 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
476 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
477 }
478
479 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
480
481 template<unsigned RegWidth>
482 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
483 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
484 }
485
486 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
487 unsigned Width);
488
489 bool SelectCMP_SWAP(SDNode *N);
490
491 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
492 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
493 bool Negate);
494 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
495 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
496
497 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
498 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
499 bool AllowSaturation, SDValue &Imm);
500
501 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
502 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
503 SDValue &Offset);
504 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
505 SDValue &Offset, unsigned Scale = 1);
506
507 bool SelectAllActivePredicate(SDValue N);
508 bool SelectAnyPredicate(SDValue N);
509};
510} // end anonymous namespace
511
512char AArch64DAGToDAGISel::ID = 0;
513
514INITIALIZE_PASS(AArch64DAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
515
516/// isIntImmediate - This method tests to see if the node is a constant
517/// operand. If so Imm will receive the 32-bit value.
518static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
519 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
520 Imm = C->getZExtValue();
521 return true;
522 }
523 return false;
524}
525
526// isIntImmediate - This method tests to see if a constant operand.
527// If so Imm will receive the value.
528static bool isIntImmediate(SDValue N, uint64_t &Imm) {
529 return isIntImmediate(N.getNode(), Imm);
530}
531
532// isOpcWithIntImmediate - This method tests to see if the node is a specific
533// opcode and that it has a immediate integer right operand.
534// If so Imm will receive the 32 bit value.
535static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
536 uint64_t &Imm) {
537 return N->getOpcode() == Opc &&
538 isIntImmediate(N->getOperand(1).getNode(), Imm);
539}
540
541// isIntImmediateEq - This method tests to see if N is a constant operand that
542// is equivalent to 'ImmExpected'.
543#ifndef NDEBUG
544static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
545 uint64_t Imm;
546 if (!isIntImmediate(N.getNode(), Imm))
547 return false;
548 return Imm == ImmExpected;
549}
550#endif
551
552bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
553 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
554 std::vector<SDValue> &OutOps) {
555 switch(ConstraintID) {
556 default:
557 llvm_unreachable("Unexpected asm memory constraint");
558 case InlineAsm::ConstraintCode::m:
559 case InlineAsm::ConstraintCode::o:
560 case InlineAsm::ConstraintCode::Q:
561 // We need to make sure that this one operand does not end up in XZR, thus
562 // require the address to be in a PointerRegClass register.
563 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
564 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
565 SDLoc dl(Op);
566 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
567 SDValue NewOp =
568 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
569 dl, Op.getValueType(),
570 Op, RC), 0);
571 OutOps.push_back(NewOp);
572 return false;
573 }
574 return true;
575}
576
577/// SelectArithImmed - Select an immediate value that can be represented as
578/// a 12-bit value shifted left by either 0 or 12. If so, return true with
579/// Val set to the 12-bit value and Shift set to the shifter operand.
580bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
581 SDValue &Shift) {
582 // This function is called from the addsub_shifted_imm ComplexPattern,
583 // which lists [imm] as the list of opcode it's interested in, however
584 // we still need to check whether the operand is actually an immediate
585 // here because the ComplexPattern opcode list is only used in
586 // root-level opcode matching.
587 if (!isa<ConstantSDNode>(N.getNode()))
588 return false;
589
590 uint64_t Immed = N.getNode()->getAsZExtVal();
591 unsigned ShiftAmt;
592
593 if (Immed >> 12 == 0) {
594 ShiftAmt = 0;
595 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
596 ShiftAmt = 12;
597 Immed = Immed >> 12;
598 } else
599 return false;
600
601 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
602 SDLoc dl(N);
603 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
604 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
605 return true;
606}
607
608/// SelectNegArithImmed - As above, but negates the value before trying to
609/// select it.
610bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
611 SDValue &Shift) {
612 // This function is called from the addsub_shifted_imm ComplexPattern,
613 // which lists [imm] as the list of opcode it's interested in, however
614 // we still need to check whether the operand is actually an immediate
615 // here because the ComplexPattern opcode list is only used in
616 // root-level opcode matching.
617 if (!isa<ConstantSDNode>(N.getNode()))
618 return false;
619
620 // The immediate operand must be a 24-bit zero-extended immediate.
621 uint64_t Immed = N.getNode()->getAsZExtVal();
622
623 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
624 // have the opposite effect on the C flag, so this pattern mustn't match under
625 // those circumstances.
626 if (Immed == 0)
627 return false;
628
629 if (N.getValueType() == MVT::i32)
630 Immed = ~((uint32_t)Immed) + 1;
631 else
632 Immed = ~Immed + 1ULL;
633 if (Immed & 0xFFFFFFFFFF000000ULL)
634 return false;
635
636 Immed &= 0xFFFFFFULL;
637 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
638 Shift);
639}
640
641/// getShiftTypeForNode - Translate a shift node to the corresponding
642/// ShiftType value.
644 switch (N.getOpcode()) {
645 default:
647 case ISD::SHL:
648 return AArch64_AM::LSL;
649 case ISD::SRL:
650 return AArch64_AM::LSR;
651 case ISD::SRA:
652 return AArch64_AM::ASR;
653 case ISD::ROTR:
654 return AArch64_AM::ROR;
655 }
656}
657
658/// Determine whether it is worth it to fold SHL into the addressing
659/// mode.
661 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
662 // It is worth folding logical shift of up to three places.
663 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
664 if (!CSD)
665 return false;
666 unsigned ShiftVal = CSD->getZExtValue();
667 if (ShiftVal > 3)
668 return false;
669
670 // Check if this particular node is reused in any non-memory related
671 // operation. If yes, do not try to fold this node into the address
672 // computation, since the computation will be kept.
673 const SDNode *Node = V.getNode();
674 for (SDNode *UI : Node->uses())
675 if (!isa<MemSDNode>(*UI))
676 for (SDNode *UII : UI->uses())
677 if (!isa<MemSDNode>(*UII))
678 return false;
679 return true;
680}
681
682/// Determine whether it is worth to fold V into an extended register addressing
683/// mode.
684bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
685 // Trivial if we are optimizing for code size or if there is only
686 // one use of the value.
687 if (CurDAG->shouldOptForSize() || V.hasOneUse())
688 return true;
689
690 // If a subtarget has a slow shift, folding a shift into multiple loads
691 // costs additional micro-ops.
692 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
693 return false;
694
695 // Check whether we're going to emit the address arithmetic anyway because
696 // it's used by a non-address operation.
697 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
698 return true;
699 if (V.getOpcode() == ISD::ADD) {
700 const SDValue LHS = V.getOperand(0);
701 const SDValue RHS = V.getOperand(1);
702 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
703 return true;
704 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
705 return true;
706 }
707
708 // It hurts otherwise, since the value will be reused.
709 return false;
710}
711
712/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
713/// to select more shifted register
714bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
715 SDValue &Shift) {
716 EVT VT = N.getValueType();
717 if (VT != MVT::i32 && VT != MVT::i64)
718 return false;
719
720 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
721 return false;
722 SDValue LHS = N.getOperand(0);
723 if (!LHS->hasOneUse())
724 return false;
725
726 unsigned LHSOpcode = LHS->getOpcode();
727 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
728 return false;
729
730 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
731 if (!ShiftAmtNode)
732 return false;
733
734 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
735 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
736 if (!RHSC)
737 return false;
738
739 APInt AndMask = RHSC->getAPIntValue();
740 unsigned LowZBits, MaskLen;
741 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
742 return false;
743
744 unsigned BitWidth = N.getValueSizeInBits();
745 SDLoc DL(LHS);
746 uint64_t NewShiftC;
747 unsigned NewShiftOp;
748 if (LHSOpcode == ISD::SHL) {
749 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
750 // BitWidth != LowZBits + MaskLen doesn't match the pattern
751 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
752 return false;
753
754 NewShiftC = LowZBits - ShiftAmtC;
755 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
756 } else {
757 if (LowZBits == 0)
758 return false;
759
760 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
761 NewShiftC = LowZBits + ShiftAmtC;
762 if (NewShiftC >= BitWidth)
763 return false;
764
765 // SRA need all high bits
766 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
767 return false;
768
769 // SRL high bits can be 0 or 1
770 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
771 return false;
772
773 if (LHSOpcode == ISD::SRL)
774 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
775 else
776 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
777 }
778
779 assert(NewShiftC < BitWidth && "Invalid shift amount");
780 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
781 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
782 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
783 NewShiftAmt, BitWidthMinus1),
784 0);
785 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
786 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
787 return true;
788}
789
790/// getExtendTypeForNode - Translate an extend node to the corresponding
791/// ExtendType value.
793getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
794 if (N.getOpcode() == ISD::SIGN_EXTEND ||
795 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
796 EVT SrcVT;
797 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
798 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
799 else
800 SrcVT = N.getOperand(0).getValueType();
801
802 if (!IsLoadStore && SrcVT == MVT::i8)
803 return AArch64_AM::SXTB;
804 else if (!IsLoadStore && SrcVT == MVT::i16)
805 return AArch64_AM::SXTH;
806 else if (SrcVT == MVT::i32)
807 return AArch64_AM::SXTW;
808 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
809
811 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
812 N.getOpcode() == ISD::ANY_EXTEND) {
813 EVT SrcVT = N.getOperand(0).getValueType();
814 if (!IsLoadStore && SrcVT == MVT::i8)
815 return AArch64_AM::UXTB;
816 else if (!IsLoadStore && SrcVT == MVT::i16)
817 return AArch64_AM::UXTH;
818 else if (SrcVT == MVT::i32)
819 return AArch64_AM::UXTW;
820 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
821
823 } else if (N.getOpcode() == ISD::AND) {
824 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
825 if (!CSD)
827 uint64_t AndMask = CSD->getZExtValue();
828
829 switch (AndMask) {
830 default:
832 case 0xFF:
833 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
834 case 0xFFFF:
835 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
836 case 0xFFFFFFFF:
837 return AArch64_AM::UXTW;
838 }
839 }
840
842}
843
844/// Determine whether it is worth to fold V into an extended register of an
845/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
846/// instruction, and the shift should be treated as worth folding even if has
847/// multiple uses.
848bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
849 // Trivial if we are optimizing for code size or if there is only
850 // one use of the value.
851 if (CurDAG->shouldOptForSize() || V.hasOneUse())
852 return true;
853
854 // If a subtarget has a fastpath LSL we can fold a logical shift into
855 // the add/sub and save a cycle.
856 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
857 V.getConstantOperandVal(1) <= 4 &&
859 return true;
860
861 // It hurts otherwise, since the value will be reused.
862 return false;
863}
864
865/// SelectShiftedRegister - Select a "shifted register" operand. If the value
866/// is not shifted, set the Shift operand to default of "LSL 0". The logical
867/// instructions allow the shifted register to be rotated, but the arithmetic
868/// instructions do not. The AllowROR parameter specifies whether ROR is
869/// supported.
870bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
871 SDValue &Reg, SDValue &Shift) {
872 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
873 return true;
874
876 if (ShType == AArch64_AM::InvalidShiftExtend)
877 return false;
878 if (!AllowROR && ShType == AArch64_AM::ROR)
879 return false;
880
881 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
882 unsigned BitSize = N.getValueSizeInBits();
883 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
884 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
885
886 Reg = N.getOperand(0);
887 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
888 return isWorthFoldingALU(N, true);
889 }
890
891 return false;
892}
893
894/// Instructions that accept extend modifiers like UXTW expect the register
895/// being extended to be a GPR32, but the incoming DAG might be acting on a
896/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
897/// this is the case.
899 if (N.getValueType() == MVT::i32)
900 return N;
901
902 SDLoc dl(N);
903 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
904}
905
906// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
907template<signed Low, signed High, signed Scale>
908bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
909 if (!isa<ConstantSDNode>(N))
910 return false;
911
912 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
913 if ((MulImm % std::abs(Scale)) == 0) {
914 int64_t RDVLImm = MulImm / Scale;
915 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
916 Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
917 return true;
918 }
919 }
920
921 return false;
922}
923
924/// SelectArithExtendedRegister - Select a "extended register" operand. This
925/// operand folds in an extend followed by an optional left shift.
926bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
927 SDValue &Shift) {
928 unsigned ShiftVal = 0;
930
931 if (N.getOpcode() == ISD::SHL) {
932 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
933 if (!CSD)
934 return false;
935 ShiftVal = CSD->getZExtValue();
936 if (ShiftVal > 4)
937 return false;
938
939 Ext = getExtendTypeForNode(N.getOperand(0));
941 return false;
942
943 Reg = N.getOperand(0).getOperand(0);
944 } else {
947 return false;
948
949 Reg = N.getOperand(0);
950
951 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
952 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
953 auto isDef32 = [](SDValue N) {
954 unsigned Opc = N.getOpcode();
955 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
956 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
957 Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
958 Opc != ISD::FREEZE;
959 };
960 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
961 isDef32(Reg))
962 return false;
963 }
964
965 // AArch64 mandates that the RHS of the operation must use the smallest
966 // register class that could contain the size being extended from. Thus,
967 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
968 // there might not be an actual 32-bit value in the program. We can
969 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
970 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
971 Reg = narrowIfNeeded(CurDAG, Reg);
972 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
973 MVT::i32);
974 return isWorthFoldingALU(N);
975}
976
977/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
978/// operand is refered by the instructions have SP operand
979bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
980 SDValue &Shift) {
981 unsigned ShiftVal = 0;
983
984 if (N.getOpcode() != ISD::SHL)
985 return false;
986
987 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
988 if (!CSD)
989 return false;
990 ShiftVal = CSD->getZExtValue();
991 if (ShiftVal > 4)
992 return false;
993
995 Reg = N.getOperand(0);
996 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
997 MVT::i32);
998 return isWorthFoldingALU(N);
999}
1000
1001/// If there's a use of this ADDlow that's not itself a load/store then we'll
1002/// need to create a real ADD instruction from it anyway and there's no point in
1003/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1004/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1005/// leads to duplicated ADRP instructions.
1007 for (auto *Use : N->uses()) {
1008 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
1009 Use->getOpcode() != ISD::ATOMIC_LOAD &&
1010 Use->getOpcode() != ISD::ATOMIC_STORE)
1011 return false;
1012
1013 // ldar and stlr have much more restrictive addressing modes (just a
1014 // register).
1015 if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering()))
1016 return false;
1017 }
1018
1019 return true;
1020}
1021
1022/// Check if the immediate offset is valid as a scaled immediate.
1023static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1024 unsigned Size) {
1025 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1026 Offset < (Range << Log2_32(Size)))
1027 return true;
1028 return false;
1029}
1030
1031/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1032/// immediate" address. The "Size" argument is the size in bytes of the memory
1033/// reference, which determines the scale.
1034bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1035 unsigned BW, unsigned Size,
1036 SDValue &Base,
1037 SDValue &OffImm) {
1038 SDLoc dl(N);
1039 const DataLayout &DL = CurDAG->getDataLayout();
1040 const TargetLowering *TLI = getTargetLowering();
1041 if (N.getOpcode() == ISD::FrameIndex) {
1042 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1043 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1044 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1045 return true;
1046 }
1047
1048 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1049 // selected here doesn't support labels/immediates, only base+offset.
1050 if (CurDAG->isBaseWithConstantOffset(N)) {
1051 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1052 if (IsSignedImm) {
1053 int64_t RHSC = RHS->getSExtValue();
1054 unsigned Scale = Log2_32(Size);
1055 int64_t Range = 0x1LL << (BW - 1);
1056
1057 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1058 RHSC < (Range << Scale)) {
1059 Base = N.getOperand(0);
1060 if (Base.getOpcode() == ISD::FrameIndex) {
1061 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1062 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1063 }
1064 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1065 return true;
1066 }
1067 } else {
1068 // unsigned Immediate
1069 uint64_t RHSC = RHS->getZExtValue();
1070 unsigned Scale = Log2_32(Size);
1071 uint64_t Range = 0x1ULL << BW;
1072
1073 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1074 Base = N.getOperand(0);
1075 if (Base.getOpcode() == ISD::FrameIndex) {
1076 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1077 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1078 }
1079 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1080 return true;
1081 }
1082 }
1083 }
1084 }
1085 // Base only. The address will be materialized into a register before
1086 // the memory is accessed.
1087 // add x0, Xbase, #offset
1088 // stp x1, x2, [x0]
1089 Base = N;
1090 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1091 return true;
1092}
1093
1094/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1095/// immediate" address. The "Size" argument is the size in bytes of the memory
1096/// reference, which determines the scale.
1097bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1098 SDValue &Base, SDValue &OffImm) {
1099 SDLoc dl(N);
1100 const DataLayout &DL = CurDAG->getDataLayout();
1101 const TargetLowering *TLI = getTargetLowering();
1102 if (N.getOpcode() == ISD::FrameIndex) {
1103 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1104 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1105 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1106 return true;
1107 }
1108
1109 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1110 GlobalAddressSDNode *GAN =
1111 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1112 Base = N.getOperand(0);
1113 OffImm = N.getOperand(1);
1114 if (!GAN)
1115 return true;
1116
1117 if (GAN->getOffset() % Size == 0 &&
1119 return true;
1120 }
1121
1122 if (CurDAG->isBaseWithConstantOffset(N)) {
1123 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1124 int64_t RHSC = (int64_t)RHS->getZExtValue();
1125 unsigned Scale = Log2_32(Size);
1126 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1127 Base = N.getOperand(0);
1128 if (Base.getOpcode() == ISD::FrameIndex) {
1129 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1130 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1131 }
1132 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1133 return true;
1134 }
1135 }
1136 }
1137
1138 // Before falling back to our general case, check if the unscaled
1139 // instructions can handle this. If so, that's preferable.
1140 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1141 return false;
1142
1143 // Base only. The address will be materialized into a register before
1144 // the memory is accessed.
1145 // add x0, Xbase, #offset
1146 // ldr x0, [x0]
1147 Base = N;
1148 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1149 return true;
1150}
1151
1152/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1153/// immediate" address. This should only match when there is an offset that
1154/// is not valid for a scaled immediate addressing mode. The "Size" argument
1155/// is the size in bytes of the memory reference, which is needed here to know
1156/// what is valid for a scaled immediate.
1157bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1158 SDValue &Base,
1159 SDValue &OffImm) {
1160 if (!CurDAG->isBaseWithConstantOffset(N))
1161 return false;
1162 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1163 int64_t RHSC = RHS->getSExtValue();
1164 if (RHSC >= -256 && RHSC < 256) {
1165 Base = N.getOperand(0);
1166 if (Base.getOpcode() == ISD::FrameIndex) {
1167 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1168 const TargetLowering *TLI = getTargetLowering();
1169 Base = CurDAG->getTargetFrameIndex(
1170 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1171 }
1172 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1173 return true;
1174 }
1175 }
1176 return false;
1177}
1178
1180 SDLoc dl(N);
1181 SDValue ImpDef = SDValue(
1182 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1183 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1184 N);
1185}
1186
1187/// Check if the given SHL node (\p N), can be used to form an
1188/// extended register for an addressing mode.
1189bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1190 bool WantExtend, SDValue &Offset,
1191 SDValue &SignExtend) {
1192 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1193 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1194 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1195 return false;
1196
1197 SDLoc dl(N);
1198 if (WantExtend) {
1200 getExtendTypeForNode(N.getOperand(0), true);
1202 return false;
1203
1204 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1205 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1206 MVT::i32);
1207 } else {
1208 Offset = N.getOperand(0);
1209 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1210 }
1211
1212 unsigned LegalShiftVal = Log2_32(Size);
1213 unsigned ShiftVal = CSD->getZExtValue();
1214
1215 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1216 return false;
1217
1218 return isWorthFoldingAddr(N, Size);
1219}
1220
1221bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1223 SDValue &SignExtend,
1224 SDValue &DoShift) {
1225 if (N.getOpcode() != ISD::ADD)
1226 return false;
1227 SDValue LHS = N.getOperand(0);
1228 SDValue RHS = N.getOperand(1);
1229 SDLoc dl(N);
1230
1231 // We don't want to match immediate adds here, because they are better lowered
1232 // to the register-immediate addressing modes.
1233 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
1234 return false;
1235
1236 // Check if this particular node is reused in any non-memory related
1237 // operation. If yes, do not try to fold this node into the address
1238 // computation, since the computation will be kept.
1239 const SDNode *Node = N.getNode();
1240 for (SDNode *UI : Node->uses()) {
1241 if (!isa<MemSDNode>(*UI))
1242 return false;
1243 }
1244
1245 // Remember if it is worth folding N when it produces extended register.
1246 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1247
1248 // Try to match a shifted extend on the RHS.
1249 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1250 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1251 Base = LHS;
1252 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1253 return true;
1254 }
1255
1256 // Try to match a shifted extend on the LHS.
1257 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1258 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1259 Base = RHS;
1260 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1261 return true;
1262 }
1263
1264 // There was no shift, whatever else we find.
1265 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1266
1268 // Try to match an unshifted extend on the LHS.
1269 if (IsExtendedRegisterWorthFolding &&
1270 (Ext = getExtendTypeForNode(LHS, true)) !=
1272 Base = RHS;
1273 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1274 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1275 MVT::i32);
1276 if (isWorthFoldingAddr(LHS, Size))
1277 return true;
1278 }
1279
1280 // Try to match an unshifted extend on the RHS.
1281 if (IsExtendedRegisterWorthFolding &&
1282 (Ext = getExtendTypeForNode(RHS, true)) !=
1284 Base = LHS;
1285 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1286 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1287 MVT::i32);
1288 if (isWorthFoldingAddr(RHS, Size))
1289 return true;
1290 }
1291
1292 return false;
1293}
1294
1295// Check if the given immediate is preferred by ADD. If an immediate can be
1296// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1297// encoded by one MOVZ, return true.
1298static bool isPreferredADD(int64_t ImmOff) {
1299 // Constant in [0x0, 0xfff] can be encoded in ADD.
1300 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1301 return true;
1302 // Check if it can be encoded in an "ADD LSL #12".
1303 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1304 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1305 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1306 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1307 return false;
1308}
1309
1310bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1312 SDValue &SignExtend,
1313 SDValue &DoShift) {
1314 if (N.getOpcode() != ISD::ADD)
1315 return false;
1316 SDValue LHS = N.getOperand(0);
1317 SDValue RHS = N.getOperand(1);
1318 SDLoc DL(N);
1319
1320 // Check if this particular node is reused in any non-memory related
1321 // operation. If yes, do not try to fold this node into the address
1322 // computation, since the computation will be kept.
1323 const SDNode *Node = N.getNode();
1324 for (SDNode *UI : Node->uses()) {
1325 if (!isa<MemSDNode>(*UI))
1326 return false;
1327 }
1328
1329 // Watch out if RHS is a wide immediate, it can not be selected into
1330 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1331 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1332 // instructions like:
1333 // MOV X0, WideImmediate
1334 // ADD X1, BaseReg, X0
1335 // LDR X2, [X1, 0]
1336 // For such situation, using [BaseReg, XReg] addressing mode can save one
1337 // ADD/SUB:
1338 // MOV X0, WideImmediate
1339 // LDR X2, [BaseReg, X0]
1340 if (isa<ConstantSDNode>(RHS)) {
1341 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1342 // Skip the immediate can be selected by load/store addressing mode.
1343 // Also skip the immediate can be encoded by a single ADD (SUB is also
1344 // checked by using -ImmOff).
1345 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1346 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1347 return false;
1348
1349 SDValue Ops[] = { RHS };
1350 SDNode *MOVI =
1351 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1352 SDValue MOVIV = SDValue(MOVI, 0);
1353 // This ADD of two X register will be selected into [Reg+Reg] mode.
1354 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1355 }
1356
1357 // Remember if it is worth folding N when it produces extended register.
1358 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1359
1360 // Try to match a shifted extend on the RHS.
1361 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1362 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1363 Base = LHS;
1364 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1365 return true;
1366 }
1367
1368 // Try to match a shifted extend on the LHS.
1369 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1370 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1371 Base = RHS;
1372 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1373 return true;
1374 }
1375
1376 // Match any non-shifted, non-extend, non-immediate add expression.
1377 Base = LHS;
1378 Offset = RHS;
1379 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1380 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1381 // Reg1 + Reg2 is free: no check needed.
1382 return true;
1383}
1384
1385SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1386 static const unsigned RegClassIDs[] = {
1387 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1388 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1389 AArch64::dsub2, AArch64::dsub3};
1390
1391 return createTuple(Regs, RegClassIDs, SubRegs);
1392}
1393
1394SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1395 static const unsigned RegClassIDs[] = {
1396 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1397 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1398 AArch64::qsub2, AArch64::qsub3};
1399
1400 return createTuple(Regs, RegClassIDs, SubRegs);
1401}
1402
1403SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1404 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1405 AArch64::ZPR3RegClassID,
1406 AArch64::ZPR4RegClassID};
1407 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1408 AArch64::zsub2, AArch64::zsub3};
1409
1410 return createTuple(Regs, RegClassIDs, SubRegs);
1411}
1412
1413SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1414 assert(Regs.size() == 2 || Regs.size() == 4);
1415
1416 // The createTuple interface requires 3 RegClassIDs for each possible
1417 // tuple type even though we only have them for ZPR2 and ZPR4.
1418 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1419 AArch64::ZPR4Mul4RegClassID};
1420 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1421 AArch64::zsub2, AArch64::zsub3};
1422 return createTuple(Regs, RegClassIDs, SubRegs);
1423}
1424
1425SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1426 const unsigned RegClassIDs[],
1427 const unsigned SubRegs[]) {
1428 // There's no special register-class for a vector-list of 1 element: it's just
1429 // a vector.
1430 if (Regs.size() == 1)
1431 return Regs[0];
1432
1433 assert(Regs.size() >= 2 && Regs.size() <= 4);
1434
1435 SDLoc DL(Regs[0]);
1436
1438
1439 // First operand of REG_SEQUENCE is the desired RegClass.
1440 Ops.push_back(
1441 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1442
1443 // Then we get pairs of source & subregister-position for the components.
1444 for (unsigned i = 0; i < Regs.size(); ++i) {
1445 Ops.push_back(Regs[i]);
1446 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1447 }
1448
1449 SDNode *N =
1450 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1451 return SDValue(N, 0);
1452}
1453
1454void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1455 bool isExt) {
1456 SDLoc dl(N);
1457 EVT VT = N->getValueType(0);
1458
1459 unsigned ExtOff = isExt;
1460
1461 // Form a REG_SEQUENCE to force register allocation.
1462 unsigned Vec0Off = ExtOff + 1;
1463 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1464 N->op_begin() + Vec0Off + NumVecs);
1465 SDValue RegSeq = createQTuple(Regs);
1466
1468 if (isExt)
1469 Ops.push_back(N->getOperand(1));
1470 Ops.push_back(RegSeq);
1471 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1472 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1473}
1474
1475bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1476 LoadSDNode *LD = cast<LoadSDNode>(N);
1477 if (LD->isUnindexed())
1478 return false;
1479 EVT VT = LD->getMemoryVT();
1480 EVT DstVT = N->getValueType(0);
1481 ISD::MemIndexedMode AM = LD->getAddressingMode();
1482 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1483
1484 // We're not doing validity checking here. That was done when checking
1485 // if we should mark the load as indexed or not. We're just selecting
1486 // the right instruction.
1487 unsigned Opcode = 0;
1488
1489 ISD::LoadExtType ExtType = LD->getExtensionType();
1490 bool InsertTo64 = false;
1491 if (VT == MVT::i64)
1492 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1493 else if (VT == MVT::i32) {
1494 if (ExtType == ISD::NON_EXTLOAD)
1495 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1496 else if (ExtType == ISD::SEXTLOAD)
1497 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1498 else {
1499 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1500 InsertTo64 = true;
1501 // The result of the load is only i32. It's the subreg_to_reg that makes
1502 // it into an i64.
1503 DstVT = MVT::i32;
1504 }
1505 } else if (VT == MVT::i16) {
1506 if (ExtType == ISD::SEXTLOAD) {
1507 if (DstVT == MVT::i64)
1508 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1509 else
1510 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1511 } else {
1512 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1513 InsertTo64 = DstVT == MVT::i64;
1514 // The result of the load is only i32. It's the subreg_to_reg that makes
1515 // it into an i64.
1516 DstVT = MVT::i32;
1517 }
1518 } else if (VT == MVT::i8) {
1519 if (ExtType == ISD::SEXTLOAD) {
1520 if (DstVT == MVT::i64)
1521 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1522 else
1523 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1524 } else {
1525 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1526 InsertTo64 = DstVT == MVT::i64;
1527 // The result of the load is only i32. It's the subreg_to_reg that makes
1528 // it into an i64.
1529 DstVT = MVT::i32;
1530 }
1531 } else if (VT == MVT::f16) {
1532 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1533 } else if (VT == MVT::bf16) {
1534 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1535 } else if (VT == MVT::f32) {
1536 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1537 } else if (VT == MVT::f64 || VT.is64BitVector()) {
1538 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1539 } else if (VT.is128BitVector()) {
1540 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1541 } else
1542 return false;
1543 SDValue Chain = LD->getChain();
1544 SDValue Base = LD->getBasePtr();
1545 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1546 int OffsetVal = (int)OffsetOp->getZExtValue();
1547 SDLoc dl(N);
1548 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1549 SDValue Ops[] = { Base, Offset, Chain };
1550 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1551 MVT::Other, Ops);
1552
1553 // Transfer memoperands.
1554 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1555 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1556
1557 // Either way, we're replacing the node, so tell the caller that.
1558 SDValue LoadedVal = SDValue(Res, 1);
1559 if (InsertTo64) {
1560 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1561 LoadedVal =
1562 SDValue(CurDAG->getMachineNode(
1563 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1564 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1565 SubReg),
1566 0);
1567 }
1568
1569 ReplaceUses(SDValue(N, 0), LoadedVal);
1570 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1571 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1572 CurDAG->RemoveDeadNode(N);
1573 return true;
1574}
1575
1576void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1577 unsigned SubRegIdx) {
1578 SDLoc dl(N);
1579 EVT VT = N->getValueType(0);
1580 SDValue Chain = N->getOperand(0);
1581
1582 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1583 Chain};
1584
1585 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1586
1587 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1588 SDValue SuperReg = SDValue(Ld, 0);
1589 for (unsigned i = 0; i < NumVecs; ++i)
1590 ReplaceUses(SDValue(N, i),
1591 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1592
1593 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1594
1595 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1596 // because it's too simple to have needed special treatment during lowering.
1597 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1598 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1599 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1600 }
1601
1602 CurDAG->RemoveDeadNode(N);
1603}
1604
1605void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1606 unsigned Opc, unsigned SubRegIdx) {
1607 SDLoc dl(N);
1608 EVT VT = N->getValueType(0);
1609 SDValue Chain = N->getOperand(0);
1610
1611 SDValue Ops[] = {N->getOperand(1), // Mem operand
1612 N->getOperand(2), // Incremental
1613 Chain};
1614
1615 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1616 MVT::Untyped, MVT::Other};
1617
1618 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1619
1620 // Update uses of write back register
1621 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1622
1623 // Update uses of vector list
1624 SDValue SuperReg = SDValue(Ld, 1);
1625 if (NumVecs == 1)
1626 ReplaceUses(SDValue(N, 0), SuperReg);
1627 else
1628 for (unsigned i = 0; i < NumVecs; ++i)
1629 ReplaceUses(SDValue(N, i),
1630 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1631
1632 // Update the chain
1633 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1634 CurDAG->RemoveDeadNode(N);
1635}
1636
1637/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1638/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1639/// new Base and an SDValue representing the new offset.
1640std::tuple<unsigned, SDValue, SDValue>
1641AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1642 unsigned Opc_ri,
1643 const SDValue &OldBase,
1644 const SDValue &OldOffset,
1645 unsigned Scale) {
1646 SDValue NewBase = OldBase;
1647 SDValue NewOffset = OldOffset;
1648 // Detect a possible Reg+Imm addressing mode.
1649 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1650 N, OldBase, NewBase, NewOffset);
1651
1652 // Detect a possible reg+reg addressing mode, but only if we haven't already
1653 // detected a Reg+Imm one.
1654 const bool IsRegReg =
1655 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1656
1657 // Select the instruction.
1658 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1659}
1660
1661enum class SelectTypeKind {
1662 Int1 = 0,
1663 Int = 1,
1664 FP = 2,
1665 AnyType = 3,
1666};
1667
1668/// This function selects an opcode from a list of opcodes, which is
1669/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1670/// element types, in this order.
1671template <SelectTypeKind Kind>
1672static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1673 // Only match scalable vector VTs
1674 if (!VT.isScalableVector())
1675 return 0;
1676
1677 EVT EltVT = VT.getVectorElementType();
1678 switch (Kind) {
1680 break;
1682 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1683 EltVT != MVT::i64)
1684 return 0;
1685 break;
1687 if (EltVT != MVT::i1)
1688 return 0;
1689 break;
1690 case SelectTypeKind::FP:
1691 if (EltVT != MVT::f16 && EltVT != MVT::f32 && EltVT != MVT::f64)
1692 return 0;
1693 break;
1694 }
1695
1696 unsigned Offset;
1697 switch (VT.getVectorMinNumElements()) {
1698 case 16: // 8-bit
1699 Offset = 0;
1700 break;
1701 case 8: // 16-bit
1702 Offset = 1;
1703 break;
1704 case 4: // 32-bit
1705 Offset = 2;
1706 break;
1707 case 2: // 64-bit
1708 Offset = 3;
1709 break;
1710 default:
1711 return 0;
1712 }
1713
1714 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1715}
1716
1717// This function is almost identical to SelectWhilePair, but has an
1718// extra check on the range of the immediate operand.
1719// TODO: Merge these two functions together at some point?
1720void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1721 // Immediate can be either 0 or 1.
1722 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1723 if (Imm->getZExtValue() > 1)
1724 return;
1725
1726 SDLoc DL(N);
1727 EVT VT = N->getValueType(0);
1728 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1729 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1730 SDValue SuperReg = SDValue(WhilePair, 0);
1731
1732 for (unsigned I = 0; I < 2; ++I)
1733 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1734 AArch64::psub0 + I, DL, VT, SuperReg));
1735
1736 CurDAG->RemoveDeadNode(N);
1737}
1738
1739void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1740 SDLoc DL(N);
1741 EVT VT = N->getValueType(0);
1742
1743 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1744
1745 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1746 SDValue SuperReg = SDValue(WhilePair, 0);
1747
1748 for (unsigned I = 0; I < 2; ++I)
1749 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1750 AArch64::psub0 + I, DL, VT, SuperReg));
1751
1752 CurDAG->RemoveDeadNode(N);
1753}
1754
1755void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1756 unsigned Opcode) {
1757 EVT VT = N->getValueType(0);
1758 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1759 SDValue Ops = createZTuple(Regs);
1760 SDLoc DL(N);
1761 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1762 SDValue SuperReg = SDValue(Intrinsic, 0);
1763 for (unsigned i = 0; i < NumVecs; ++i)
1764 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1765 AArch64::zsub0 + i, DL, VT, SuperReg));
1766
1767 CurDAG->RemoveDeadNode(N);
1768}
1769
1770void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1771 unsigned NumVecs,
1772 bool IsZmMulti,
1773 unsigned Opcode,
1774 bool HasPred) {
1775 assert(Opcode != 0 && "Unexpected opcode");
1776
1777 SDLoc DL(N);
1778 EVT VT = N->getValueType(0);
1779 unsigned FirstVecIdx = HasPred ? 2 : 1;
1780
1781 auto GetMultiVecOperand = [=](unsigned StartIdx) {
1782 SmallVector<SDValue, 4> Regs(N->op_begin() + StartIdx,
1783 N->op_begin() + StartIdx + NumVecs);
1784 return createZMulTuple(Regs);
1785 };
1786
1787 SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1788
1789 SDValue Zm;
1790 if (IsZmMulti)
1791 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1792 else
1793 Zm = N->getOperand(NumVecs + FirstVecIdx);
1794
1796 if (HasPred)
1797 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
1798 N->getOperand(1), Zdn, Zm);
1799 else
1800 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
1801 SDValue SuperReg = SDValue(Intrinsic, 0);
1802 for (unsigned i = 0; i < NumVecs; ++i)
1803 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1804 AArch64::zsub0 + i, DL, VT, SuperReg));
1805
1806 CurDAG->RemoveDeadNode(N);
1807}
1808
1809void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
1810 unsigned Scale, unsigned Opc_ri,
1811 unsigned Opc_rr, bool IsIntr) {
1812 assert(Scale < 5 && "Invalid scaling value.");
1813 SDLoc DL(N);
1814 EVT VT = N->getValueType(0);
1815 SDValue Chain = N->getOperand(0);
1816
1817 // Optimize addressing mode.
1819 unsigned Opc;
1820 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1821 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
1822 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
1823
1824 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
1825 Base, // Memory operand
1826 Offset, Chain};
1827
1828 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1829
1830 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1831 SDValue SuperReg = SDValue(Load, 0);
1832 for (unsigned i = 0; i < NumVecs; ++i)
1833 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1834 AArch64::zsub0 + i, DL, VT, SuperReg));
1835
1836 // Copy chain
1837 unsigned ChainIdx = NumVecs;
1838 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1839 CurDAG->RemoveDeadNode(N);
1840}
1841
1842void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
1843 unsigned NumVecs,
1844 unsigned Scale,
1845 unsigned Opc_ri,
1846 unsigned Opc_rr) {
1847 assert(Scale < 4 && "Invalid scaling value.");
1848 SDLoc DL(N);
1849 EVT VT = N->getValueType(0);
1850 SDValue Chain = N->getOperand(0);
1851
1852 SDValue PNg = N->getOperand(2);
1853 SDValue Base = N->getOperand(3);
1854 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
1855 unsigned Opc;
1856 std::tie(Opc, Base, Offset) =
1857 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
1858
1859 SDValue Ops[] = {PNg, // Predicate-as-counter
1860 Base, // Memory operand
1861 Offset, Chain};
1862
1863 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1864
1865 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1866 SDValue SuperReg = SDValue(Load, 0);
1867 for (unsigned i = 0; i < NumVecs; ++i)
1868 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1869 AArch64::zsub0 + i, DL, VT, SuperReg));
1870
1871 // Copy chain
1872 unsigned ChainIdx = NumVecs;
1873 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1874 CurDAG->RemoveDeadNode(N);
1875}
1876
1877void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
1878 unsigned Opcode) {
1879 if (N->getValueType(0) != MVT::nxv4f32)
1880 return;
1881 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
1882}
1883
1884void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
1885 unsigned NumOutVecs,
1886 unsigned Opc, uint32_t MaxImm) {
1887 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
1888 if (Imm->getZExtValue() > MaxImm)
1889 return;
1890
1891 SDValue ZtValue;
1892 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
1893 return;
1894 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)};
1895 SDLoc DL(Node);
1896 EVT VT = Node->getValueType(0);
1897
1899 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
1900 SDValue SuperReg = SDValue(Instruction, 0);
1901
1902 for (unsigned I = 0; I < NumOutVecs; ++I)
1903 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
1904 AArch64::zsub0 + I, DL, VT, SuperReg));
1905
1906 // Copy chain
1907 unsigned ChainIdx = NumOutVecs;
1908 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
1909 CurDAG->RemoveDeadNode(Node);
1910}
1911
1912void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
1913 unsigned Op) {
1914 SDLoc DL(N);
1915 EVT VT = N->getValueType(0);
1916
1917 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1918 SDValue Zd = createZMulTuple(Regs);
1919 SDValue Zn = N->getOperand(1 + NumVecs);
1920 SDValue Zm = N->getOperand(2 + NumVecs);
1921
1922 SDValue Ops[] = {Zd, Zn, Zm};
1923
1924 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
1925 SDValue SuperReg = SDValue(Intrinsic, 0);
1926 for (unsigned i = 0; i < NumVecs; ++i)
1927 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1928 AArch64::zsub0 + i, DL, VT, SuperReg));
1929
1930 CurDAG->RemoveDeadNode(N);
1931}
1932
1933bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
1934 switch (BaseReg) {
1935 default:
1936 return false;
1937 case AArch64::ZA:
1938 case AArch64::ZAB0:
1939 if (TileNum == 0)
1940 break;
1941 return false;
1942 case AArch64::ZAH0:
1943 if (TileNum <= 1)
1944 break;
1945 return false;
1946 case AArch64::ZAS0:
1947 if (TileNum <= 3)
1948 break;
1949 return false;
1950 case AArch64::ZAD0:
1951 if (TileNum <= 7)
1952 break;
1953 return false;
1954 }
1955
1956 BaseReg += TileNum;
1957 return true;
1958}
1959
1960template <unsigned MaxIdx, unsigned Scale>
1961void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
1962 unsigned BaseReg, unsigned Op) {
1963 unsigned TileNum = 0;
1964 if (BaseReg != AArch64::ZA)
1965 TileNum = N->getConstantOperandVal(2);
1966
1967 if (!SelectSMETile(BaseReg, TileNum))
1968 return;
1969
1970 SDValue SliceBase, Base, Offset;
1971 if (BaseReg == AArch64::ZA)
1972 SliceBase = N->getOperand(2);
1973 else
1974 SliceBase = N->getOperand(3);
1975
1976 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
1977 return;
1978
1979 SDLoc DL(N);
1980 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
1981 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
1982 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
1983
1984 EVT VT = N->getValueType(0);
1985 for (unsigned I = 0; I < NumVecs; ++I)
1986 ReplaceUses(SDValue(N, I),
1987 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
1988 SDValue(Mov, 0)));
1989 // Copy chain
1990 unsigned ChainIdx = NumVecs;
1991 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
1992 CurDAG->RemoveDeadNode(N);
1993}
1994
1995void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
1996 unsigned NumOutVecs,
1997 bool IsTupleInput,
1998 unsigned Opc) {
1999 SDLoc DL(N);
2000 EVT VT = N->getValueType(0);
2001 unsigned NumInVecs = N->getNumOperands() - 1;
2002
2004 if (IsTupleInput) {
2005 assert((NumInVecs == 2 || NumInVecs == 4) &&
2006 "Don't know how to handle multi-register input!");
2007 SmallVector<SDValue, 4> Regs(N->op_begin() + 1,
2008 N->op_begin() + 1 + NumInVecs);
2009 Ops.push_back(createZMulTuple(Regs));
2010 } else {
2011 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2012 for (unsigned I = 0; I < NumInVecs; I++)
2013 Ops.push_back(N->getOperand(1 + I));
2014 }
2015
2016 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2017 SDValue SuperReg = SDValue(Res, 0);
2018
2019 for (unsigned I = 0; I < NumOutVecs; I++)
2020 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2021 AArch64::zsub0 + I, DL, VT, SuperReg));
2022 CurDAG->RemoveDeadNode(N);
2023}
2024
2025void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2026 unsigned Opc) {
2027 SDLoc dl(N);
2028 EVT VT = N->getOperand(2)->getValueType(0);
2029
2030 // Form a REG_SEQUENCE to force register allocation.
2031 bool Is128Bit = VT.getSizeInBits() == 128;
2032 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2033 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2034
2035 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2036 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2037
2038 // Transfer memoperands.
2039 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2040 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2041
2042 ReplaceNode(N, St);
2043}
2044
2045void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2046 unsigned Scale, unsigned Opc_rr,
2047 unsigned Opc_ri) {
2048 SDLoc dl(N);
2049
2050 // Form a REG_SEQUENCE to force register allocation.
2051 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2052 SDValue RegSeq = createZTuple(Regs);
2053
2054 // Optimize addressing mode.
2055 unsigned Opc;
2057 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2058 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2059 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2060
2061 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2062 Base, // address
2063 Offset, // offset
2064 N->getOperand(0)}; // chain
2065 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2066
2067 ReplaceNode(N, St);
2068}
2069
2070bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2071 SDValue &OffImm) {
2072 SDLoc dl(N);
2073 const DataLayout &DL = CurDAG->getDataLayout();
2074 const TargetLowering *TLI = getTargetLowering();
2075
2076 // Try to match it for the frame address
2077 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2078 int FI = FINode->getIndex();
2079 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2080 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2081 return true;
2082 }
2083
2084 return false;
2085}
2086
2087void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2088 unsigned Opc) {
2089 SDLoc dl(N);
2090 EVT VT = N->getOperand(2)->getValueType(0);
2091 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2092 MVT::Other}; // Type for the Chain
2093
2094 // Form a REG_SEQUENCE to force register allocation.
2095 bool Is128Bit = VT.getSizeInBits() == 128;
2096 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2097 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2098
2099 SDValue Ops[] = {RegSeq,
2100 N->getOperand(NumVecs + 1), // base register
2101 N->getOperand(NumVecs + 2), // Incremental
2102 N->getOperand(0)}; // Chain
2103 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2104
2105 ReplaceNode(N, St);
2106}
2107
2108namespace {
2109/// WidenVector - Given a value in the V64 register class, produce the
2110/// equivalent value in the V128 register class.
2111class WidenVector {
2112 SelectionDAG &DAG;
2113
2114public:
2115 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2116
2117 SDValue operator()(SDValue V64Reg) {
2118 EVT VT = V64Reg.getValueType();
2119 unsigned NarrowSize = VT.getVectorNumElements();
2120 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2121 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2122 SDLoc DL(V64Reg);
2123
2124 SDValue Undef =
2125 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2126 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2127 }
2128};
2129} // namespace
2130
2131/// NarrowVector - Given a value in the V128 register class, produce the
2132/// equivalent value in the V64 register class.
2134 EVT VT = V128Reg.getValueType();
2135 unsigned WideSize = VT.getVectorNumElements();
2136 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2137 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2138
2139 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2140 V128Reg);
2141}
2142
2143void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2144 unsigned Opc) {
2145 SDLoc dl(N);
2146 EVT VT = N->getValueType(0);
2147 bool Narrow = VT.getSizeInBits() == 64;
2148
2149 // Form a REG_SEQUENCE to force register allocation.
2150 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2151
2152 if (Narrow)
2153 transform(Regs, Regs.begin(),
2154 WidenVector(*CurDAG));
2155
2156 SDValue RegSeq = createQTuple(Regs);
2157
2158 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2159
2160 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2161
2162 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2163 N->getOperand(NumVecs + 3), N->getOperand(0)};
2164 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2165 SDValue SuperReg = SDValue(Ld, 0);
2166
2167 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2168 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2169 AArch64::qsub2, AArch64::qsub3 };
2170 for (unsigned i = 0; i < NumVecs; ++i) {
2171 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2172 if (Narrow)
2173 NV = NarrowVector(NV, *CurDAG);
2174 ReplaceUses(SDValue(N, i), NV);
2175 }
2176
2177 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2178 CurDAG->RemoveDeadNode(N);
2179}
2180
2181void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2182 unsigned Opc) {
2183 SDLoc dl(N);
2184 EVT VT = N->getValueType(0);
2185 bool Narrow = VT.getSizeInBits() == 64;
2186
2187 // Form a REG_SEQUENCE to force register allocation.
2188 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2189
2190 if (Narrow)
2191 transform(Regs, Regs.begin(),
2192 WidenVector(*CurDAG));
2193
2194 SDValue RegSeq = createQTuple(Regs);
2195
2196 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2197 RegSeq->getValueType(0), MVT::Other};
2198
2199 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2200
2201 SDValue Ops[] = {RegSeq,
2202 CurDAG->getTargetConstant(LaneNo, dl,
2203 MVT::i64), // Lane Number
2204 N->getOperand(NumVecs + 2), // Base register
2205 N->getOperand(NumVecs + 3), // Incremental
2206 N->getOperand(0)};
2207 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2208
2209 // Update uses of the write back register
2210 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2211
2212 // Update uses of the vector list
2213 SDValue SuperReg = SDValue(Ld, 1);
2214 if (NumVecs == 1) {
2215 ReplaceUses(SDValue(N, 0),
2216 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2217 } else {
2218 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2219 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2220 AArch64::qsub2, AArch64::qsub3 };
2221 for (unsigned i = 0; i < NumVecs; ++i) {
2222 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2223 SuperReg);
2224 if (Narrow)
2225 NV = NarrowVector(NV, *CurDAG);
2226 ReplaceUses(SDValue(N, i), NV);
2227 }
2228 }
2229
2230 // Update the Chain
2231 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2232 CurDAG->RemoveDeadNode(N);
2233}
2234
2235void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2236 unsigned Opc) {
2237 SDLoc dl(N);
2238 EVT VT = N->getOperand(2)->getValueType(0);
2239 bool Narrow = VT.getSizeInBits() == 64;
2240
2241 // Form a REG_SEQUENCE to force register allocation.
2242 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2243
2244 if (Narrow)
2245 transform(Regs, Regs.begin(),
2246 WidenVector(*CurDAG));
2247
2248 SDValue RegSeq = createQTuple(Regs);
2249
2250 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2251
2252 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2253 N->getOperand(NumVecs + 3), N->getOperand(0)};
2254 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2255
2256 // Transfer memoperands.
2257 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2258 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2259
2260 ReplaceNode(N, St);
2261}
2262
2263void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2264 unsigned Opc) {
2265 SDLoc dl(N);
2266 EVT VT = N->getOperand(2)->getValueType(0);
2267 bool Narrow = VT.getSizeInBits() == 64;
2268
2269 // Form a REG_SEQUENCE to force register allocation.
2270 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2271
2272 if (Narrow)
2273 transform(Regs, Regs.begin(),
2274 WidenVector(*CurDAG));
2275
2276 SDValue RegSeq = createQTuple(Regs);
2277
2278 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2279 MVT::Other};
2280
2281 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2282
2283 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2284 N->getOperand(NumVecs + 2), // Base Register
2285 N->getOperand(NumVecs + 3), // Incremental
2286 N->getOperand(0)};
2287 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2288
2289 // Transfer memoperands.
2290 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2291 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2292
2293 ReplaceNode(N, St);
2294}
2295
2297 unsigned &Opc, SDValue &Opd0,
2298 unsigned &LSB, unsigned &MSB,
2299 unsigned NumberOfIgnoredLowBits,
2300 bool BiggerPattern) {
2301 assert(N->getOpcode() == ISD::AND &&
2302 "N must be a AND operation to call this function");
2303
2304 EVT VT = N->getValueType(0);
2305
2306 // Here we can test the type of VT and return false when the type does not
2307 // match, but since it is done prior to that call in the current context
2308 // we turned that into an assert to avoid redundant code.
2309 assert((VT == MVT::i32 || VT == MVT::i64) &&
2310 "Type checking must have been done before calling this function");
2311
2312 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2313 // changed the AND node to a 32-bit mask operation. We'll have to
2314 // undo that as part of the transform here if we want to catch all
2315 // the opportunities.
2316 // Currently the NumberOfIgnoredLowBits argument helps to recover
2317 // from these situations when matching bigger pattern (bitfield insert).
2318
2319 // For unsigned extracts, check for a shift right and mask
2320 uint64_t AndImm = 0;
2321 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2322 return false;
2323
2324 const SDNode *Op0 = N->getOperand(0).getNode();
2325
2326 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2327 // simplified. Try to undo that
2328 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2329
2330 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2331 if (AndImm & (AndImm + 1))
2332 return false;
2333
2334 bool ClampMSB = false;
2335 uint64_t SrlImm = 0;
2336 // Handle the SRL + ANY_EXTEND case.
2337 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2338 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2339 // Extend the incoming operand of the SRL to 64-bit.
2340 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2341 // Make sure to clamp the MSB so that we preserve the semantics of the
2342 // original operations.
2343 ClampMSB = true;
2344 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2346 SrlImm)) {
2347 // If the shift result was truncated, we can still combine them.
2348 Opd0 = Op0->getOperand(0).getOperand(0);
2349
2350 // Use the type of SRL node.
2351 VT = Opd0->getValueType(0);
2352 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2353 Opd0 = Op0->getOperand(0);
2354 ClampMSB = (VT == MVT::i32);
2355 } else if (BiggerPattern) {
2356 // Let's pretend a 0 shift right has been performed.
2357 // The resulting code will be at least as good as the original one
2358 // plus it may expose more opportunities for bitfield insert pattern.
2359 // FIXME: Currently we limit this to the bigger pattern, because
2360 // some optimizations expect AND and not UBFM.
2361 Opd0 = N->getOperand(0);
2362 } else
2363 return false;
2364
2365 // Bail out on large immediates. This happens when no proper
2366 // combining/constant folding was performed.
2367 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2368 LLVM_DEBUG(
2369 (dbgs() << N
2370 << ": Found large shift immediate, this should not happen\n"));
2371 return false;
2372 }
2373
2374 LSB = SrlImm;
2375 MSB = SrlImm +
2376 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2377 : llvm::countr_one<uint64_t>(AndImm)) -
2378 1;
2379 if (ClampMSB)
2380 // Since we're moving the extend before the right shift operation, we need
2381 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2382 // the zeros which would get shifted in with the original right shift
2383 // operation.
2384 MSB = MSB > 31 ? 31 : MSB;
2385
2386 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2387 return true;
2388}
2389
2390static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
2391 SDValue &Opd0, unsigned &Immr,
2392 unsigned &Imms) {
2393 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2394
2395 EVT VT = N->getValueType(0);
2396 unsigned BitWidth = VT.getSizeInBits();
2397 assert((VT == MVT::i32 || VT == MVT::i64) &&
2398 "Type checking must have been done before calling this function");
2399
2400 SDValue Op = N->getOperand(0);
2401 if (Op->getOpcode() == ISD::TRUNCATE) {
2402 Op = Op->getOperand(0);
2403 VT = Op->getValueType(0);
2404 BitWidth = VT.getSizeInBits();
2405 }
2406
2407 uint64_t ShiftImm;
2408 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2409 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2410 return false;
2411
2412 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2413 if (ShiftImm + Width > BitWidth)
2414 return false;
2415
2416 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2417 Opd0 = Op.getOperand(0);
2418 Immr = ShiftImm;
2419 Imms = ShiftImm + Width - 1;
2420 return true;
2421}
2422
2423static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
2424 SDValue &Opd0, unsigned &LSB,
2425 unsigned &MSB) {
2426 // We are looking for the following pattern which basically extracts several
2427 // continuous bits from the source value and places it from the LSB of the
2428 // destination value, all other bits of the destination value or set to zero:
2429 //
2430 // Value2 = AND Value, MaskImm
2431 // SRL Value2, ShiftImm
2432 //
2433 // with MaskImm >> ShiftImm to search for the bit width.
2434 //
2435 // This gets selected into a single UBFM:
2436 //
2437 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2438 //
2439
2440 if (N->getOpcode() != ISD::SRL)
2441 return false;
2442
2443 uint64_t AndMask = 0;
2444 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2445 return false;
2446
2447 Opd0 = N->getOperand(0).getOperand(0);
2448
2449 uint64_t SrlImm = 0;
2450 if (!isIntImmediate(N->getOperand(1), SrlImm))
2451 return false;
2452
2453 // Check whether we really have several bits extract here.
2454 if (!isMask_64(AndMask >> SrlImm))
2455 return false;
2456
2457 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2458 LSB = SrlImm;
2459 MSB = llvm::Log2_64(AndMask);
2460 return true;
2461}
2462
2463static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2464 unsigned &Immr, unsigned &Imms,
2465 bool BiggerPattern) {
2466 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2467 "N must be a SHR/SRA operation to call this function");
2468
2469 EVT VT = N->getValueType(0);
2470
2471 // Here we can test the type of VT and return false when the type does not
2472 // match, but since it is done prior to that call in the current context
2473 // we turned that into an assert to avoid redundant code.
2474 assert((VT == MVT::i32 || VT == MVT::i64) &&
2475 "Type checking must have been done before calling this function");
2476
2477 // Check for AND + SRL doing several bits extract.
2478 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2479 return true;
2480
2481 // We're looking for a shift of a shift.
2482 uint64_t ShlImm = 0;
2483 uint64_t TruncBits = 0;
2484 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2485 Opd0 = N->getOperand(0).getOperand(0);
2486 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2487 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2488 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2489 // be considered as setting high 32 bits as zero. Our strategy here is to
2490 // always generate 64bit UBFM. This consistency will help the CSE pass
2491 // later find more redundancy.
2492 Opd0 = N->getOperand(0).getOperand(0);
2493 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2494 VT = Opd0.getValueType();
2495 assert(VT == MVT::i64 && "the promoted type should be i64");
2496 } else if (BiggerPattern) {
2497 // Let's pretend a 0 shift left has been performed.
2498 // FIXME: Currently we limit this to the bigger pattern case,
2499 // because some optimizations expect AND and not UBFM
2500 Opd0 = N->getOperand(0);
2501 } else
2502 return false;
2503
2504 // Missing combines/constant folding may have left us with strange
2505 // constants.
2506 if (ShlImm >= VT.getSizeInBits()) {
2507 LLVM_DEBUG(
2508 (dbgs() << N
2509 << ": Found large shift immediate, this should not happen\n"));
2510 return false;
2511 }
2512
2513 uint64_t SrlImm = 0;
2514 if (!isIntImmediate(N->getOperand(1), SrlImm))
2515 return false;
2516
2517 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2518 "bad amount in shift node!");
2519 int immr = SrlImm - ShlImm;
2520 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2521 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2522 // SRA requires a signed extraction
2523 if (VT == MVT::i32)
2524 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2525 else
2526 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2527 return true;
2528}
2529
2530bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2531 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2532
2533 EVT VT = N->getValueType(0);
2534 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2535 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2536 return false;
2537
2538 uint64_t ShiftImm;
2539 SDValue Op = N->getOperand(0);
2540 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2541 return false;
2542
2543 SDLoc dl(N);
2544 // Extend the incoming operand of the shift to 64-bits.
2545 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2546 unsigned Immr = ShiftImm;
2547 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2548 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2549 CurDAG->getTargetConstant(Imms, dl, VT)};
2550 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2551 return true;
2552}
2553
2554static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2555 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2556 unsigned NumberOfIgnoredLowBits = 0,
2557 bool BiggerPattern = false) {
2558 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2559 return false;
2560
2561 switch (N->getOpcode()) {
2562 default:
2563 if (!N->isMachineOpcode())
2564 return false;
2565 break;
2566 case ISD::AND:
2567 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2568 NumberOfIgnoredLowBits, BiggerPattern);
2569 case ISD::SRL:
2570 case ISD::SRA:
2571 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2572
2574 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2575 }
2576
2577 unsigned NOpc = N->getMachineOpcode();
2578 switch (NOpc) {
2579 default:
2580 return false;
2581 case AArch64::SBFMWri:
2582 case AArch64::UBFMWri:
2583 case AArch64::SBFMXri:
2584 case AArch64::UBFMXri:
2585 Opc = NOpc;
2586 Opd0 = N->getOperand(0);
2587 Immr = N->getConstantOperandVal(1);
2588 Imms = N->getConstantOperandVal(2);
2589 return true;
2590 }
2591 // Unreachable
2592 return false;
2593}
2594
2595bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2596 unsigned Opc, Immr, Imms;
2597 SDValue Opd0;
2598 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2599 return false;
2600
2601 EVT VT = N->getValueType(0);
2602 SDLoc dl(N);
2603
2604 // If the bit extract operation is 64bit but the original type is 32bit, we
2605 // need to add one EXTRACT_SUBREG.
2606 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2607 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2608 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2609
2610 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2611 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2612 MVT::i32, SDValue(BFM, 0));
2613 ReplaceNode(N, Inner.getNode());
2614 return true;
2615 }
2616
2617 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2618 CurDAG->getTargetConstant(Imms, dl, VT)};
2619 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2620 return true;
2621}
2622
2623/// Does DstMask form a complementary pair with the mask provided by
2624/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2625/// this asks whether DstMask zeroes precisely those bits that will be set by
2626/// the other half.
2627static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2628 unsigned NumberOfIgnoredHighBits, EVT VT) {
2629 assert((VT == MVT::i32 || VT == MVT::i64) &&
2630 "i32 or i64 mask type expected!");
2631 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2632
2633 APInt SignificantDstMask = APInt(BitWidth, DstMask);
2634 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2635
2636 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2637 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2638}
2639
2640// Look for bits that will be useful for later uses.
2641// A bit is consider useless as soon as it is dropped and never used
2642// before it as been dropped.
2643// E.g., looking for useful bit of x
2644// 1. y = x & 0x7
2645// 2. z = y >> 2
2646// After #1, x useful bits are 0x7, then the useful bits of x, live through
2647// y.
2648// After #2, the useful bits of x are 0x4.
2649// However, if x is used on an unpredicatable instruction, then all its bits
2650// are useful.
2651// E.g.
2652// 1. y = x & 0x7
2653// 2. z = y >> 2
2654// 3. str x, [@x]
2655static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2656
2658 unsigned Depth) {
2659 uint64_t Imm =
2660 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2661 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2662 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2663 getUsefulBits(Op, UsefulBits, Depth + 1);
2664}
2665
2667 uint64_t Imm, uint64_t MSB,
2668 unsigned Depth) {
2669 // inherit the bitwidth value
2670 APInt OpUsefulBits(UsefulBits);
2671 OpUsefulBits = 1;
2672
2673 if (MSB >= Imm) {
2674 OpUsefulBits <<= MSB - Imm + 1;
2675 --OpUsefulBits;
2676 // The interesting part will be in the lower part of the result
2677 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2678 // The interesting part was starting at Imm in the argument
2679 OpUsefulBits <<= Imm;
2680 } else {
2681 OpUsefulBits <<= MSB + 1;
2682 --OpUsefulBits;
2683 // The interesting part will be shifted in the result
2684 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2685 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2686 // The interesting part was at zero in the argument
2687 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2688 }
2689
2690 UsefulBits &= OpUsefulBits;
2691}
2692
2693static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2694 unsigned Depth) {
2695 uint64_t Imm =
2696 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2697 uint64_t MSB =
2698 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2699
2700 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2701}
2702
2704 unsigned Depth) {
2705 uint64_t ShiftTypeAndValue =
2706 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2707 APInt Mask(UsefulBits);
2708 Mask.clearAllBits();
2709 Mask.flipAllBits();
2710
2711 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2712 // Shift Left
2713 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2714 Mask <<= ShiftAmt;
2715 getUsefulBits(Op, Mask, Depth + 1);
2716 Mask.lshrInPlace(ShiftAmt);
2717 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2718 // Shift Right
2719 // We do not handle AArch64_AM::ASR, because the sign will change the
2720 // number of useful bits
2721 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2722 Mask.lshrInPlace(ShiftAmt);
2723 getUsefulBits(Op, Mask, Depth + 1);
2724 Mask <<= ShiftAmt;
2725 } else
2726 return;
2727
2728 UsefulBits &= Mask;
2729}
2730
2731static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2732 unsigned Depth) {
2733 uint64_t Imm =
2734 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2735 uint64_t MSB =
2736 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
2737
2738 APInt OpUsefulBits(UsefulBits);
2739 OpUsefulBits = 1;
2740
2741 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
2742 ResultUsefulBits.flipAllBits();
2743 APInt Mask(UsefulBits.getBitWidth(), 0);
2744
2745 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
2746
2747 if (MSB >= Imm) {
2748 // The instruction is a BFXIL.
2749 uint64_t Width = MSB - Imm + 1;
2750 uint64_t LSB = Imm;
2751
2752 OpUsefulBits <<= Width;
2753 --OpUsefulBits;
2754
2755 if (Op.getOperand(1) == Orig) {
2756 // Copy the low bits from the result to bits starting from LSB.
2757 Mask = ResultUsefulBits & OpUsefulBits;
2758 Mask <<= LSB;
2759 }
2760
2761 if (Op.getOperand(0) == Orig)
2762 // Bits starting from LSB in the input contribute to the result.
2763 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2764 } else {
2765 // The instruction is a BFI.
2766 uint64_t Width = MSB + 1;
2767 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
2768
2769 OpUsefulBits <<= Width;
2770 --OpUsefulBits;
2771 OpUsefulBits <<= LSB;
2772
2773 if (Op.getOperand(1) == Orig) {
2774 // Copy the bits from the result to the zero bits.
2775 Mask = ResultUsefulBits & OpUsefulBits;
2776 Mask.lshrInPlace(LSB);
2777 }
2778
2779 if (Op.getOperand(0) == Orig)
2780 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2781 }
2782
2783 UsefulBits &= Mask;
2784}
2785
2786static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2787 SDValue Orig, unsigned Depth) {
2788
2789 // Users of this node should have already been instruction selected
2790 // FIXME: Can we turn that into an assert?
2791 if (!UserNode->isMachineOpcode())
2792 return;
2793
2794 switch (UserNode->getMachineOpcode()) {
2795 default:
2796 return;
2797 case AArch64::ANDSWri:
2798 case AArch64::ANDSXri:
2799 case AArch64::ANDWri:
2800 case AArch64::ANDXri:
2801 // We increment Depth only when we call the getUsefulBits
2802 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2803 Depth);
2804 case AArch64::UBFMWri:
2805 case AArch64::UBFMXri:
2806 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2807
2808 case AArch64::ORRWrs:
2809 case AArch64::ORRXrs:
2810 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
2811 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2812 Depth);
2813 return;
2814 case AArch64::BFMWri:
2815 case AArch64::BFMXri:
2816 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2817
2818 case AArch64::STRBBui:
2819 case AArch64::STURBBi:
2820 if (UserNode->getOperand(0) != Orig)
2821 return;
2822 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2823 return;
2824
2825 case AArch64::STRHHui:
2826 case AArch64::STURHHi:
2827 if (UserNode->getOperand(0) != Orig)
2828 return;
2829 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2830 return;
2831 }
2832}
2833
2834static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2836 return;
2837 // Initialize UsefulBits
2838 if (!Depth) {
2839 unsigned Bitwidth = Op.getScalarValueSizeInBits();
2840 // At the beginning, assume every produced bits is useful
2841 UsefulBits = APInt(Bitwidth, 0);
2842 UsefulBits.flipAllBits();
2843 }
2844 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2845
2846 for (SDNode *Node : Op.getNode()->uses()) {
2847 // A use cannot produce useful bits
2848 APInt UsefulBitsForUse = APInt(UsefulBits);
2849 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2850 UsersUsefulBits |= UsefulBitsForUse;
2851 }
2852 // UsefulBits contains the produced bits that are meaningful for the
2853 // current definition, thus a user cannot make a bit meaningful at
2854 // this point
2855 UsefulBits &= UsersUsefulBits;
2856}
2857
2858/// Create a machine node performing a notional SHL of Op by ShlAmount. If
2859/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2860/// 0, return Op unchanged.
2861static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2862 if (ShlAmount == 0)
2863 return Op;
2864
2865 EVT VT = Op.getValueType();
2866 SDLoc dl(Op);
2867 unsigned BitWidth = VT.getSizeInBits();
2868 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2869
2870 SDNode *ShiftNode;
2871 if (ShlAmount > 0) {
2872 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2873 ShiftNode = CurDAG->getMachineNode(
2874 UBFMOpc, dl, VT, Op,
2875 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
2876 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
2877 } else {
2878 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2879 assert(ShlAmount < 0 && "expected right shift");
2880 int ShrAmount = -ShlAmount;
2881 ShiftNode = CurDAG->getMachineNode(
2882 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
2883 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
2884 }
2885
2886 return SDValue(ShiftNode, 0);
2887}
2888
2889// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
2891 bool BiggerPattern,
2892 const uint64_t NonZeroBits,
2893 SDValue &Src, int &DstLSB,
2894 int &Width);
2895
2896// For bit-field-positioning pattern "shl VAL, N)".
2898 bool BiggerPattern,
2899 const uint64_t NonZeroBits,
2900 SDValue &Src, int &DstLSB,
2901 int &Width);
2902
2903/// Does this tree qualify as an attempt to move a bitfield into position,
2904/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
2906 bool BiggerPattern, SDValue &Src,
2907 int &DstLSB, int &Width) {
2908 EVT VT = Op.getValueType();
2909 unsigned BitWidth = VT.getSizeInBits();
2910 (void)BitWidth;
2911 assert(BitWidth == 32 || BitWidth == 64);
2912
2913 KnownBits Known = CurDAG->computeKnownBits(Op);
2914
2915 // Non-zero in the sense that they're not provably zero, which is the key
2916 // point if we want to use this value
2917 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2918 if (!isShiftedMask_64(NonZeroBits))
2919 return false;
2920
2921 switch (Op.getOpcode()) {
2922 default:
2923 break;
2924 case ISD::AND:
2925 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
2926 NonZeroBits, Src, DstLSB, Width);
2927 case ISD::SHL:
2928 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
2929 NonZeroBits, Src, DstLSB, Width);
2930 }
2931
2932 return false;
2933}
2934
2936 bool BiggerPattern,
2937 const uint64_t NonZeroBits,
2938 SDValue &Src, int &DstLSB,
2939 int &Width) {
2940 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
2941
2942 EVT VT = Op.getValueType();
2943 assert((VT == MVT::i32 || VT == MVT::i64) &&
2944 "Caller guarantees VT is one of i32 or i64");
2945 (void)VT;
2946
2947 uint64_t AndImm;
2948 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
2949 return false;
2950
2951 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
2952 // 1) (AndImm & (1 << POS) == 0)
2953 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
2954 //
2955 // 1) and 2) don't agree so something must be wrong (e.g., in
2956 // 'SelectionDAG::computeKnownBits')
2957 assert((~AndImm & NonZeroBits) == 0 &&
2958 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
2959
2960 SDValue AndOp0 = Op.getOperand(0);
2961
2962 uint64_t ShlImm;
2963 SDValue ShlOp0;
2964 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
2965 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
2966 ShlOp0 = AndOp0.getOperand(0);
2967 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
2969 ShlImm)) {
2970 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
2971
2972 // ShlVal == shl(val, N), which is a left shift on a smaller type.
2973 SDValue ShlVal = AndOp0.getOperand(0);
2974
2975 // Since this is after type legalization and ShlVal is extended to MVT::i64,
2976 // expect VT to be MVT::i32.
2977 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
2978
2979 // Widens 'val' to MVT::i64 as the source of bit field positioning.
2980 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
2981 } else
2982 return false;
2983
2984 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
2985 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
2986 // AndOp0+AND.
2987 if (!BiggerPattern && !AndOp0.hasOneUse())
2988 return false;
2989
2990 DstLSB = llvm::countr_zero(NonZeroBits);
2991 Width = llvm::countr_one(NonZeroBits >> DstLSB);
2992
2993 // Bail out on large Width. This happens when no proper combining / constant
2994 // folding was performed.
2995 if (Width >= (int)VT.getSizeInBits()) {
2996 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
2997 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
2998 // "val".
2999 // If VT is i32, what Width >= 32 means:
3000 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3001 // demands at least 'Width' bits (after dag-combiner). This together with
3002 // `any_extend` Op (undefined higher bits) indicates missed combination
3003 // when lowering the 'and' IR instruction to an machine IR instruction.
3004 LLVM_DEBUG(
3005 dbgs()
3006 << "Found large Width in bit-field-positioning -- this indicates no "
3007 "proper combining / constant folding was performed\n");
3008 return false;
3009 }
3010
3011 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3012 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3013 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3014 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3015 // which case it is not profitable to insert an extra shift.
3016 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3017 return false;
3018
3019 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3020 return true;
3021}
3022
3023// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3024// UBFIZ.
3026 SDValue &Src, int &DstLSB,
3027 int &Width) {
3028 // Caller should have verified that N is a left shift with constant shift
3029 // amount; asserts that.
3030 assert(Op.getOpcode() == ISD::SHL &&
3031 "Op.getNode() should be a SHL node to call this function");
3032 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3033 "Op.getNode() should shift ShlImm to call this function");
3034
3035 uint64_t AndImm = 0;
3036 SDValue Op0 = Op.getOperand(0);
3037 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3038 return false;
3039
3040 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3041 if (isMask_64(ShiftedAndImm)) {
3042 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3043 // should end with Mask, and could be prefixed with random bits if those
3044 // bits are shifted out.
3045 //
3046 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3047 // the AND result corresponding to those bits are shifted out, so it's fine
3048 // to not extract them.
3049 Width = llvm::countr_one(ShiftedAndImm);
3050 DstLSB = ShlImm;
3051 Src = Op0.getOperand(0);
3052 return true;
3053 }
3054 return false;
3055}
3056
3058 bool BiggerPattern,
3059 const uint64_t NonZeroBits,
3060 SDValue &Src, int &DstLSB,
3061 int &Width) {
3062 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3063
3064 EVT VT = Op.getValueType();
3065 assert((VT == MVT::i32 || VT == MVT::i64) &&
3066 "Caller guarantees that type is i32 or i64");
3067 (void)VT;
3068
3069 uint64_t ShlImm;
3070 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3071 return false;
3072
3073 if (!BiggerPattern && !Op.hasOneUse())
3074 return false;
3075
3076 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3077 return true;
3078
3079 DstLSB = llvm::countr_zero(NonZeroBits);
3080 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3081
3082 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3083 return false;
3084
3085 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3086 return true;
3087}
3088
3089static bool isShiftedMask(uint64_t Mask, EVT VT) {
3090 assert(VT == MVT::i32 || VT == MVT::i64);
3091 if (VT == MVT::i32)
3092 return isShiftedMask_32(Mask);
3093 return isShiftedMask_64(Mask);
3094}
3095
3096// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3097// inserted only sets known zero bits.
3099 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3100
3101 EVT VT = N->getValueType(0);
3102 if (VT != MVT::i32 && VT != MVT::i64)
3103 return false;
3104
3105 unsigned BitWidth = VT.getSizeInBits();
3106
3107 uint64_t OrImm;
3108 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3109 return false;
3110
3111 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3112 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3113 // performance neutral.
3115 return false;
3116
3117 uint64_t MaskImm;
3118 SDValue And = N->getOperand(0);
3119 // Must be a single use AND with an immediate operand.
3120 if (!And.hasOneUse() ||
3121 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3122 return false;
3123
3124 // Compute the Known Zero for the AND as this allows us to catch more general
3125 // cases than just looking for AND with imm.
3126 KnownBits Known = CurDAG->computeKnownBits(And);
3127
3128 // Non-zero in the sense that they're not provably zero, which is the key
3129 // point if we want to use this value.
3130 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3131
3132 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3133 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3134 return false;
3135
3136 // The bits being inserted must only set those bits that are known to be zero.
3137 if ((OrImm & NotKnownZero) != 0) {
3138 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3139 // currently handle this case.
3140 return false;
3141 }
3142
3143 // BFI/BFXIL dst, src, #lsb, #width.
3144 int LSB = llvm::countr_one(NotKnownZero);
3145 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3146
3147 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3148 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3149 unsigned ImmS = Width - 1;
3150
3151 // If we're creating a BFI instruction avoid cases where we need more
3152 // instructions to materialize the BFI constant as compared to the original
3153 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3154 // should be no worse in this case.
3155 bool IsBFI = LSB != 0;
3156 uint64_t BFIImm = OrImm >> LSB;
3157 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3158 // We have a BFI instruction and we know the constant can't be materialized
3159 // with a ORR-immediate with the zero register.
3160 unsigned OrChunks = 0, BFIChunks = 0;
3161 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3162 if (((OrImm >> Shift) & 0xFFFF) != 0)
3163 ++OrChunks;
3164 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3165 ++BFIChunks;
3166 }
3167 if (BFIChunks > OrChunks)
3168 return false;
3169 }
3170
3171 // Materialize the constant to be inserted.
3172 SDLoc DL(N);
3173 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3174 SDNode *MOVI = CurDAG->getMachineNode(
3175 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3176
3177 // Create the BFI/BFXIL instruction.
3178 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3179 CurDAG->getTargetConstant(ImmR, DL, VT),
3180 CurDAG->getTargetConstant(ImmS, DL, VT)};
3181 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3182 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3183 return true;
3184}
3185
3187 SDValue &ShiftedOperand,
3188 uint64_t &EncodedShiftImm) {
3189 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3190 if (!Dst.hasOneUse())
3191 return false;
3192
3193 EVT VT = Dst.getValueType();
3194 assert((VT == MVT::i32 || VT == MVT::i64) &&
3195 "Caller should guarantee that VT is one of i32 or i64");
3196 const unsigned SizeInBits = VT.getSizeInBits();
3197
3198 SDLoc DL(Dst.getNode());
3199 uint64_t AndImm, ShlImm;
3200 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3201 isShiftedMask_64(AndImm)) {
3202 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3203 SDValue DstOp0 = Dst.getOperand(0);
3204 if (!DstOp0.hasOneUse())
3205 return false;
3206
3207 // An example to illustrate the transformation
3208 // From:
3209 // lsr x8, x1, #1
3210 // and x8, x8, #0x3f80
3211 // bfxil x8, x1, #0, #7
3212 // To:
3213 // and x8, x23, #0x7f
3214 // ubfx x9, x23, #8, #7
3215 // orr x23, x8, x9, lsl #7
3216 //
3217 // The number of instructions remains the same, but ORR is faster than BFXIL
3218 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3219 // the dependency chain is improved after the transformation.
3220 uint64_t SrlImm;
3221 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3222 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3223 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3224 unsigned MaskWidth =
3225 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3226 unsigned UBFMOpc =
3227 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3228 SDNode *UBFMNode = CurDAG->getMachineNode(
3229 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3230 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3231 VT),
3232 CurDAG->getTargetConstant(
3233 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3234 ShiftedOperand = SDValue(UBFMNode, 0);
3235 EncodedShiftImm = AArch64_AM::getShifterImm(
3236 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3237 return true;
3238 }
3239 }
3240 return false;
3241 }
3242
3243 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3244 ShiftedOperand = Dst.getOperand(0);
3245 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3246 return true;
3247 }
3248
3249 uint64_t SrlImm;
3250 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3251 ShiftedOperand = Dst.getOperand(0);
3252 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3253 return true;
3254 }
3255 return false;
3256}
3257
3258// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3259// the operands and select it to AArch64::ORR with shifted registers if
3260// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3261static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3262 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3263 const bool BiggerPattern) {
3264 EVT VT = N->getValueType(0);
3265 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3266 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3267 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3268 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3269 assert((VT == MVT::i32 || VT == MVT::i64) &&
3270 "Expect result type to be i32 or i64 since N is combinable to BFM");
3271 SDLoc DL(N);
3272
3273 // Bail out if BFM simplifies away one node in BFM Dst.
3274 if (OrOpd1 != Dst)
3275 return false;
3276
3277 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3278 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3279 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3280 if (BiggerPattern) {
3281 uint64_t SrcAndImm;
3282 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3283 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3284 // OrOpd0 = AND Src, #Mask
3285 // So BFM simplifies away one AND node from Src and doesn't simplify away
3286 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3287 // one node (from Rd), ORR is better since it has higher throughput and
3288 // smaller latency than BFM on many AArch64 processors (and for the rest
3289 // ORR is at least as good as BFM).
3290 SDValue ShiftedOperand;
3291 uint64_t EncodedShiftImm;
3292 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3293 EncodedShiftImm)) {
3294 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3295 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3296 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3297 return true;
3298 }
3299 }
3300 return false;
3301 }
3302
3303 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3304
3305 uint64_t ShlImm;
3306 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3307 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3308 SDValue Ops[] = {
3309 Dst, Src,
3310 CurDAG->getTargetConstant(
3312 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3313 return true;
3314 }
3315
3316 // Select the following pattern to left-shifted operand rather than BFI.
3317 // %val1 = op ..
3318 // %val2 = shl %val1, #imm
3319 // %res = or %val1, %val2
3320 //
3321 // If N is selected to be BFI, we know that
3322 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3323 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3324 //
3325 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3326 if (OrOpd0.getOperand(0) == OrOpd1) {
3327 SDValue Ops[] = {
3328 OrOpd1, OrOpd1,
3329 CurDAG->getTargetConstant(
3331 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3332 return true;
3333 }
3334 }
3335
3336 uint64_t SrlImm;
3337 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3338 // Select the following pattern to right-shifted operand rather than BFXIL.
3339 // %val1 = op ..
3340 // %val2 = lshr %val1, #imm
3341 // %res = or %val1, %val2
3342 //
3343 // If N is selected to be BFXIL, we know that
3344 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3345 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3346 //
3347 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3348 if (OrOpd0.getOperand(0) == OrOpd1) {
3349 SDValue Ops[] = {
3350 OrOpd1, OrOpd1,
3351 CurDAG->getTargetConstant(
3353 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3354 return true;
3355 }
3356 }
3357
3358 return false;
3359}
3360
3361static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3362 SelectionDAG *CurDAG) {
3363 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3364
3365 EVT VT = N->getValueType(0);
3366 if (VT != MVT::i32 && VT != MVT::i64)
3367 return false;
3368
3369 unsigned BitWidth = VT.getSizeInBits();
3370
3371 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3372 // have the expected shape. Try to undo that.
3373
3374 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3375 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3376
3377 // Given a OR operation, check if we have the following pattern
3378 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3379 // isBitfieldExtractOp)
3380 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3381 // countTrailingZeros(mask2) == imm2 - imm + 1
3382 // f = d | c
3383 // if yes, replace the OR instruction with:
3384 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3385
3386 // OR is commutative, check all combinations of operand order and values of
3387 // BiggerPattern, i.e.
3388 // Opd0, Opd1, BiggerPattern=false
3389 // Opd1, Opd0, BiggerPattern=false
3390 // Opd0, Opd1, BiggerPattern=true
3391 // Opd1, Opd0, BiggerPattern=true
3392 // Several of these combinations may match, so check with BiggerPattern=false
3393 // first since that will produce better results by matching more instructions
3394 // and/or inserting fewer extra instructions.
3395 for (int I = 0; I < 4; ++I) {
3396
3397 SDValue Dst, Src;
3398 unsigned ImmR, ImmS;
3399 bool BiggerPattern = I / 2;
3400 SDValue OrOpd0Val = N->getOperand(I % 2);
3401 SDNode *OrOpd0 = OrOpd0Val.getNode();
3402 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3403 SDNode *OrOpd1 = OrOpd1Val.getNode();
3404
3405 unsigned BFXOpc;
3406 int DstLSB, Width;
3407 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3408 NumberOfIgnoredLowBits, BiggerPattern)) {
3409 // Check that the returned opcode is compatible with the pattern,
3410 // i.e., same type and zero extended (U and not S)
3411 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3412 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3413 continue;
3414
3415 // Compute the width of the bitfield insertion
3416 DstLSB = 0;
3417 Width = ImmS - ImmR + 1;
3418 // FIXME: This constraint is to catch bitfield insertion we may
3419 // want to widen the pattern if we want to grab general bitfied
3420 // move case
3421 if (Width <= 0)
3422 continue;
3423
3424 // If the mask on the insertee is correct, we have a BFXIL operation. We
3425 // can share the ImmR and ImmS values from the already-computed UBFM.
3426 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3427 BiggerPattern,
3428 Src, DstLSB, Width)) {
3429 ImmR = (BitWidth - DstLSB) % BitWidth;
3430 ImmS = Width - 1;
3431 } else
3432 continue;
3433
3434 // Check the second part of the pattern
3435 EVT VT = OrOpd1Val.getValueType();
3436 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3437
3438 // Compute the Known Zero for the candidate of the first operand.
3439 // This allows to catch more general case than just looking for
3440 // AND with imm. Indeed, simplify-demanded-bits may have removed
3441 // the AND instruction because it proves it was useless.
3442 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3443
3444 // Check if there is enough room for the second operand to appear
3445 // in the first one
3446 APInt BitsToBeInserted =
3447 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3448
3449 if ((BitsToBeInserted & ~Known.Zero) != 0)
3450 continue;
3451
3452 // Set the first operand
3453 uint64_t Imm;
3454 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3455 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3456 // In that case, we can eliminate the AND
3457 Dst = OrOpd1->getOperand(0);
3458 else
3459 // Maybe the AND has been removed by simplify-demanded-bits
3460 // or is useful because it discards more bits
3461 Dst = OrOpd1Val;
3462
3463 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3464 // with shifted operand is more efficient.
3465 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3466 BiggerPattern))
3467 return true;
3468
3469 // both parts match
3470 SDLoc DL(N);
3471 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3472 CurDAG->getTargetConstant(ImmS, DL, VT)};
3473 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3474 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3475 return true;
3476 }
3477
3478 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3479 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3480 // mask (e.g., 0x000ffff0).
3481 uint64_t Mask0Imm, Mask1Imm;
3482 SDValue And0 = N->getOperand(0);
3483 SDValue And1 = N->getOperand(1);
3484 if (And0.hasOneUse() && And1.hasOneUse() &&
3485 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3486 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3487 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3488 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3489
3490 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3491 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3492 // bits to be inserted.
3493 if (isShiftedMask(Mask0Imm, VT)) {
3494 std::swap(And0, And1);
3495 std::swap(Mask0Imm, Mask1Imm);
3496 }
3497
3498 SDValue Src = And1->getOperand(0);
3499 SDValue Dst = And0->getOperand(0);
3500 unsigned LSB = llvm::countr_zero(Mask1Imm);
3501 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3502
3503 // The BFXIL inserts the low-order bits from a source register, so right
3504 // shift the needed bits into place.
3505 SDLoc DL(N);
3506 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3507 uint64_t LsrImm = LSB;
3508 if (Src->hasOneUse() &&
3509 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3510 (LsrImm + LSB) < BitWidth) {
3511 Src = Src->getOperand(0);
3512 LsrImm += LSB;
3513 }
3514
3515 SDNode *LSR = CurDAG->getMachineNode(
3516 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3517 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3518
3519 // BFXIL is an alias of BFM, so translate to BFM operands.
3520 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3521 unsigned ImmS = Width - 1;
3522
3523 // Create the BFXIL instruction.
3524 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3525 CurDAG->getTargetConstant(ImmR, DL, VT),
3526 CurDAG->getTargetConstant(ImmS, DL, VT)};
3527 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3528 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3529 return true;
3530 }
3531
3532 return false;
3533}
3534
3535bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3536 if (N->getOpcode() != ISD::OR)
3537 return false;
3538
3539 APInt NUsefulBits;
3540 getUsefulBits(SDValue(N, 0), NUsefulBits);
3541
3542 // If all bits are not useful, just return UNDEF.
3543 if (!NUsefulBits) {
3544 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3545 return true;
3546 }
3547
3548 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3549 return true;
3550
3551 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3552}
3553
3554/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3555/// equivalent of a left shift by a constant amount followed by an and masking
3556/// out a contiguous set of bits.
3557bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3558 if (N->getOpcode() != ISD::AND)
3559 return false;
3560
3561 EVT VT = N->getValueType(0);
3562 if (VT != MVT::i32 && VT != MVT::i64)
3563 return false;
3564
3565 SDValue Op0;
3566 int DstLSB, Width;
3567 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3568 Op0, DstLSB, Width))
3569 return false;
3570
3571 // ImmR is the rotate right amount.
3572 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3573 // ImmS is the most significant bit of the source to be moved.
3574 unsigned ImmS = Width - 1;
3575
3576 SDLoc DL(N);
3577 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3578 CurDAG->getTargetConstant(ImmS, DL, VT)};
3579 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3580 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3581 return true;
3582}
3583
3584/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3585/// variable shift/rotate instructions.
3586bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3587 EVT VT = N->getValueType(0);
3588
3589 unsigned Opc;
3590 switch (N->getOpcode()) {
3591 case ISD::ROTR:
3592 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3593 break;
3594 case ISD::SHL:
3595 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3596 break;
3597 case ISD::SRL:
3598 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3599 break;
3600 case ISD::SRA:
3601 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3602 break;
3603 default:
3604 return false;
3605 }
3606
3607 uint64_t Size;
3608 uint64_t Bits;
3609 if (VT == MVT::i32) {
3610 Bits = 5;
3611 Size = 32;
3612 } else if (VT == MVT::i64) {
3613 Bits = 6;
3614 Size = 64;
3615 } else
3616 return false;
3617
3618 SDValue ShiftAmt = N->getOperand(1);
3619 SDLoc DL(N);
3620 SDValue NewShiftAmt;
3621
3622 // Skip over an extend of the shift amount.
3623 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3624 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3625 ShiftAmt = ShiftAmt->getOperand(0);
3626
3627 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3628 SDValue Add0 = ShiftAmt->getOperand(0);
3629 SDValue Add1 = ShiftAmt->getOperand(1);
3630 uint64_t Add0Imm;
3631 uint64_t Add1Imm;
3632 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3633 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3634 // to avoid the ADD/SUB.
3635 NewShiftAmt = Add0;
3636 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3637 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3638 (Add0Imm % Size == 0)) {
3639 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3640 // to generate a NEG instead of a SUB from a constant.
3641 unsigned NegOpc;
3642 unsigned ZeroReg;
3643 EVT SubVT = ShiftAmt->getValueType(0);
3644 if (SubVT == MVT::i32) {
3645 NegOpc = AArch64::SUBWrr;
3646 ZeroReg = AArch64::WZR;
3647 } else {
3648 assert(SubVT == MVT::i64);
3649 NegOpc = AArch64::SUBXrr;
3650 ZeroReg = AArch64::XZR;
3651 }
3652 SDValue Zero =
3653 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3654 MachineSDNode *Neg =
3655 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3656 NewShiftAmt = SDValue(Neg, 0);
3657 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3658 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3659 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3660 // to generate a NOT instead of a SUB from a constant.
3661 unsigned NotOpc;
3662 unsigned ZeroReg;
3663 EVT SubVT = ShiftAmt->getValueType(0);
3664 if (SubVT == MVT::i32) {
3665 NotOpc = AArch64::ORNWrr;
3666 ZeroReg = AArch64::WZR;
3667 } else {
3668 assert(SubVT == MVT::i64);
3669 NotOpc = AArch64::ORNXrr;
3670 ZeroReg = AArch64::XZR;
3671 }
3672 SDValue Zero =
3673 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3674 MachineSDNode *Not =
3675 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3676 NewShiftAmt = SDValue(Not, 0);
3677 } else
3678 return false;
3679 } else {
3680 // If the shift amount is masked with an AND, check that the mask covers the
3681 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3682 // the AND.
3683 uint64_t MaskImm;
3684 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3685 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3686 return false;
3687
3688 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3689 return false;
3690
3691 NewShiftAmt = ShiftAmt->getOperand(0);
3692 }
3693
3694 // Narrow/widen the shift amount to match the size of the shift operation.
3695 if (VT == MVT::i32)
3696 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3697 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3698 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3699 MachineSDNode *Ext = CurDAG->getMachineNode(
3700 AArch64::SUBREG_TO_REG, DL, VT,
3701 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3702 NewShiftAmt = SDValue(Ext, 0);
3703 }
3704
3705 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3706 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3707 return true;
3708}
3709
3711 SDValue &FixedPos,
3712 unsigned RegWidth,
3713 bool isReciprocal) {
3714 APFloat FVal(0.0);
3715 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
3716 FVal = CN->getValueAPF();
3717 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3718 // Some otherwise illegal constants are allowed in this case.
3719 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3720 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3721 return false;
3722
3723 ConstantPoolSDNode *CN =
3724 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3725 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3726 } else
3727 return false;
3728
3729 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
3730 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
3731 // x-register.
3732 //
3733 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
3734 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
3735 // integers.
3736 bool IsExact;
3737
3738 if (isReciprocal)
3739 if (!FVal.getExactInverse(&FVal))
3740 return false;
3741
3742 // fbits is between 1 and 64 in the worst-case, which means the fmul
3743 // could have 2^64 as an actual operand. Need 65 bits of precision.
3744 APSInt IntVal(65, true);
3745 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
3746
3747 // N.b. isPowerOf2 also checks for > 0.
3748 if (!IsExact || !IntVal.isPowerOf2())
3749 return false;
3750 unsigned FBits = IntVal.logBase2();
3751
3752 // Checks above should have guaranteed that we haven't lost information in
3753 // finding FBits, but it must still be in range.
3754 if (FBits == 0 || FBits > RegWidth) return false;
3755
3756 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
3757 return true;
3758}
3759
3760bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
3761 unsigned RegWidth) {
3762 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3763 false);
3764}
3765
3766bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
3767 SDValue &FixedPos,
3768 unsigned RegWidth) {
3769 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3770 true);
3771}
3772
3773// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
3774// of the string and obtains the integer values from them and combines these
3775// into a single value to be used in the MRS/MSR instruction.
3778 RegString.split(Fields, ':');
3779
3780 if (Fields.size() == 1)
3781 return -1;
3782
3783 assert(Fields.size() == 5
3784 && "Invalid number of fields in read register string");
3785
3787 bool AllIntFields = true;
3788
3789 for (StringRef Field : Fields) {
3790 unsigned IntField;
3791 AllIntFields &= !Field.getAsInteger(10, IntField);
3792 Ops.push_back(IntField);
3793 }
3794
3795 assert(AllIntFields &&
3796 "Unexpected non-integer value in special register string.");
3797 (void)AllIntFields;
3798
3799 // Need to combine the integer fields of the string into a single value
3800 // based on the bit encoding of MRS/MSR instruction.
3801 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
3802 (Ops[3] << 3) | (Ops[4]);
3803}
3804
3805// Lower the read_register intrinsic to an MRS instruction node if the special
3806// register string argument is either of the form detailed in the ALCE (the
3807// form described in getIntOperandsFromRegsterString) or is a named register
3808// known by the MRS SysReg mapper.
3809bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
3810 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3811 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3812 SDLoc DL(N);
3813
3814 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
3815
3816 unsigned Opcode64Bit = AArch64::MRS;
3817 int Imm = getIntOperandFromRegisterString(RegString->getString());
3818 if (Imm == -1) {
3819 // No match, Use the sysreg mapper to map the remaining possible strings to
3820 // the value for the register to be used for the instruction operand.
3821 const auto *TheReg =
3822 AArch64SysReg::lookupSysRegByName(RegString->getString());
3823 if (TheReg && TheReg->Readable &&
3824 TheReg->haveFeatures(Subtarget->getFeatureBits()))
3825 Imm = TheReg->Encoding;
3826 else
3827 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
3828
3829 if (Imm == -1) {
3830 // Still no match, see if this is "pc" or give up.
3831 if (!ReadIs128Bit && RegString->getString() == "pc") {
3832 Opcode64Bit = AArch64::ADR;
3833 Imm = 0;
3834 } else {
3835 return false;
3836 }
3837 }
3838 }
3839
3840 SDValue InChain = N->getOperand(0);
3841 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
3842 if (!ReadIs128Bit) {
3843 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
3844 {SysRegImm, InChain});
3845 } else {
3846 SDNode *MRRS = CurDAG->getMachineNode(
3847 AArch64::MRRS, DL,
3848 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
3849 {SysRegImm, InChain});
3850
3851 // Sysregs are not endian. The even register always contains the low half
3852 // of the register.
3853 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
3854 SDValue(MRRS, 0));
3855 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
3856 SDValue(MRRS, 0));
3857 SDValue OutChain = SDValue(MRRS, 1);
3858
3859 ReplaceUses(SDValue(N, 0), Lo);
3860 ReplaceUses(SDValue(N, 1), Hi);
3861 ReplaceUses(SDValue(N, 2), OutChain);
3862 };
3863 return true;
3864}
3865
3866// Lower the write_register intrinsic to an MSR instruction node if the special
3867// register string argument is either of the form detailed in the ALCE (the
3868// form described in getIntOperandsFromRegsterString) or is a named register
3869// known by the MSR SysReg mapper.
3870bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
3871 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3872 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3873 SDLoc DL(N);
3874
3875 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
3876
3877 if (!WriteIs128Bit) {
3878 // Check if the register was one of those allowed as the pstatefield value
3879 // in the MSR (immediate) instruction. To accept the values allowed in the
3880 // pstatefield for the MSR (immediate) instruction, we also require that an
3881 // immediate value has been provided as an argument, we know that this is
3882 // the case as it has been ensured by semantic checking.
3883 auto trySelectPState = [&](auto PMapper, unsigned State) {
3884 if (PMapper) {
3885 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
3886 "Expected a constant integer expression.");
3887 unsigned Reg = PMapper->Encoding;
3888 uint64_t Immed = N->getConstantOperandVal(2);
3889 CurDAG->SelectNodeTo(
3890 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
3891 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
3892 return true;
3893 }
3894 return false;
3895 };
3896
3897 if (trySelectPState(
3898 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
3899 AArch64::MSRpstateImm4))
3900 return true;
3901 if (trySelectPState(
3902 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
3903 AArch64::MSRpstateImm1))
3904 return true;
3905 }
3906
3907 int Imm = getIntOperandFromRegisterString(RegString->getString());
3908 if (Imm == -1) {
3909 // Use the sysreg mapper to attempt to map the remaining possible strings
3910 // to the value for the register to be used for the MSR (register)
3911 // instruction operand.
3912 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
3913 if (TheReg && TheReg->Writeable &&
3914 TheReg->haveFeatures(Subtarget->getFeatureBits()))
3915 Imm = TheReg->Encoding;
3916 else
3917 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
3918
3919 if (Imm == -1)
3920 return false;
3921 }
3922
3923 SDValue InChain = N->getOperand(0);
3924 if (!WriteIs128Bit) {
3925 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
3926 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
3927 N->getOperand(2), InChain);
3928 } else {
3929 // No endian swap. The lower half always goes into the even subreg, and the
3930 // higher half always into the odd supreg.
3931 SDNode *Pair = CurDAG->getMachineNode(
3932 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
3933 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
3934 MVT::i32),
3935 N->getOperand(2),
3936 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
3937 N->getOperand(3),
3938 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
3939
3940 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
3941 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
3942 SDValue(Pair, 0), InChain);
3943 }
3944
3945 return true;
3946}
3947
3948/// We've got special pseudo-instructions for these
3949bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3950 unsigned Opcode;
3951 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3952
3953 // Leave IR for LSE if subtarget supports it.
3954 if (Subtarget->hasLSE()) return false;
3955
3956 if (MemTy == MVT::i8)
3957 Opcode = AArch64::CMP_SWAP_8;
3958 else if (MemTy == MVT::i16)
3959 Opcode = AArch64::CMP_SWAP_16;
3960 else if (MemTy == MVT::i32)
3961 Opcode = AArch64::CMP_SWAP_32;
3962 else if (MemTy == MVT::i64)
3963 Opcode = AArch64::CMP_SWAP_64;
3964 else
3965 llvm_unreachable("Unknown AtomicCmpSwap type");
3966
3967 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
3968 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3969 N->getOperand(0)};
3970 SDNode *CmpSwap = CurDAG->getMachineNode(
3971 Opcode, SDLoc(N),
3972 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
3973
3974 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3975 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3976
3977 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3978 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3979 CurDAG->RemoveDeadNode(N);
3980
3981 return true;
3982}
3983
3984bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
3985 SDValue &Shift) {
3986 if (!isa<ConstantSDNode>(N))
3987 return false;
3988
3989 SDLoc DL(N);
3990 uint64_t Val = cast<ConstantSDNode>(N)
3991 ->getAPIntValue()
3992 .trunc(VT.getFixedSizeInBits())
3993 .getZExtValue();
3994
3995 switch (VT.SimpleTy) {
3996 case MVT::i8:
3997 // All immediates are supported.
3998 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
3999 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4000 return true;
4001 case MVT::i16:
4002 case MVT::i32:
4003 case MVT::i64:
4004 // Support 8bit unsigned immediates.
4005 if (Val <= 255) {
4006 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4007 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4008 return true;
4009 }
4010 // Support 16bit unsigned immediates that are a multiple of 256.
4011 if (Val <= 65280 && Val % 256 == 0) {
4012 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4013 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4014 return true;
4015 }
4016 break;
4017 default:
4018 break;
4019 }
4020
4021 return false;
4022}
4023
4024bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4025 SDValue &Imm, SDValue &Shift,
4026 bool Negate) {
4027 if (!isa<ConstantSDNode>(N))
4028 return false;
4029
4030 SDLoc DL(N);
4031 int64_t Val = cast<ConstantSDNode>(N)
4032 ->getAPIntValue()
4033 .trunc(VT.getFixedSizeInBits())
4034 .getSExtValue();
4035
4036 if (Negate)
4037 Val = -Val;
4038
4039 // Signed saturating instructions treat their immediate operand as unsigned,
4040 // whereas the related intrinsics define their operands to be signed. This
4041 // means we can only use the immediate form when the operand is non-negative.
4042 if (Val < 0)
4043 return false;
4044
4045 switch (VT.SimpleTy) {
4046 case MVT::i8:
4047 // All positive immediates are supported.
4048 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4049 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4050 return true;
4051 case MVT::i16:
4052 case MVT::i32:
4053 case MVT::i64:
4054 // Support 8bit positive immediates.
4055 if (Val <= 255) {
4056 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4057 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4058 return true;
4059 }
4060 // Support 16bit positive immediates that are a multiple of 256.
4061 if (Val <= 65280 && Val % 256 == 0) {
4062 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4063 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4064 return true;
4065 }
4066 break;
4067 default:
4068 break;
4069 }
4070
4071 return false;
4072}
4073
4074bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4075 SDValue &Shift) {
4076 if (!isa<ConstantSDNode>(N))
4077 return false;
4078
4079 SDLoc DL(N);
4080 int64_t Val = cast<ConstantSDNode>(N)
4081 ->getAPIntValue()
4082 .trunc(VT.getFixedSizeInBits())
4083 .getSExtValue();
4084
4085 switch (VT.SimpleTy) {
4086 case MVT::i8:
4087 // All immediates are supported.
4088 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4089 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4090 return true;
4091 case MVT::i16:
4092 case MVT::i32:
4093 case MVT::i64:
4094 // Support 8bit signed immediates.
4095 if (Val >= -128 && Val <= 127) {
4096 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4097 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4098 return true;
4099 }
4100 // Support 16bit signed immediates that are a multiple of 256.
4101 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
4102 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4103 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
4104 return true;
4105 }
4106 break;
4107 default:
4108 break;
4109 }
4110
4111 return false;
4112}
4113
4114bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4115 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4116 int64_t ImmVal = CNode->getSExtValue();
4117 SDLoc DL(N);
4118 if (ImmVal >= -128 && ImmVal < 128) {
4119 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4120 return true;
4121 }
4122 }
4123 return false;
4124}
4125
4126bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4127 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4128 uint64_t ImmVal = CNode->getZExtValue();
4129
4130 switch (VT.SimpleTy) {
4131 case MVT::i8:
4132 ImmVal &= 0xFF;
4133 break;
4134 case MVT::i16:
4135 ImmVal &= 0xFFFF;
4136 break;
4137 case MVT::i32:
4138 ImmVal &= 0xFFFFFFFF;
4139 break;
4140 case MVT::i64:
4141 break;
4142 default:
4143 llvm_unreachable("Unexpected type");
4144 }
4145
4146 if (ImmVal < 256) {
4147 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4148 return true;
4149 }
4150 }
4151 return false;
4152}
4153
4154bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4155 bool Invert) {
4156 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4157 uint64_t ImmVal = CNode->getZExtValue();
4158 SDLoc DL(N);
4159
4160 if (Invert)
4161 ImmVal = ~ImmVal;
4162
4163 // Shift mask depending on type size.
4164 switch (VT.SimpleTy) {
4165 case MVT::i8:
4166 ImmVal &= 0xFF;
4167 ImmVal |= ImmVal << 8;
4168 ImmVal |= ImmVal << 16;
4169 ImmVal |= ImmVal << 32;
4170 break;
4171 case MVT::i16:
4172 ImmVal &= 0xFFFF;
4173 ImmVal |= ImmVal << 16;
4174 ImmVal |= ImmVal << 32;
4175 break;
4176 case MVT::i32:
4177 ImmVal &= 0xFFFFFFFF;
4178 ImmVal |= ImmVal << 32;
4179 break;
4180 case MVT::i64:
4181 break;
4182 default:
4183 llvm_unreachable("Unexpected type");
4184 }
4185
4186 uint64_t encoding;
4187 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
4188 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
4189 return true;
4190 }
4191 }
4192 return false;
4193}
4194
4195// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4196// Rather than attempt to normalise everything we can sometimes saturate the
4197// shift amount during selection. This function also allows for consistent
4198// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4199// required by the instructions.
4200bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4201 uint64_t High, bool AllowSaturation,
4202 SDValue &Imm) {
4203 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4204 uint64_t ImmVal = CN->getZExtValue();
4205
4206 // Reject shift amounts that are too small.
4207 if (ImmVal < Low)
4208 return false;
4209
4210 // Reject or saturate shift amounts that are too big.
4211 if (ImmVal > High) {
4212 if (!AllowSaturation)
4213 return false;
4214 ImmVal = High;
4215 }
4216
4217 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4218 return true;
4219 }
4220
4221 return false;
4222}
4223
4224bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4225 // tagp(FrameIndex, IRGstack, tag_offset):
4226 // since the offset between FrameIndex and IRGstack is a compile-time
4227 // constant, this can be lowered to a single ADDG instruction.
4228 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4229 return false;
4230 }
4231
4232 SDValue IRG_SP = N->getOperand(2);
4233 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4234 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4235 return false;
4236 }
4237
4238 const TargetLowering *TLI = getTargetLowering();
4239 SDLoc DL(N);
4240 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4241 SDValue FiOp = CurDAG->getTargetFrameIndex(
4242 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4243 int TagOffset = N->getConstantOperandVal(3);
4244
4245 SDNode *Out = CurDAG->getMachineNode(
4246 AArch64::TAGPstack, DL, MVT::i64,
4247 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4248 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4249 ReplaceNode(N, Out);
4250 return true;
4251}
4252
4253void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4254 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4255 "llvm.aarch64.tagp third argument must be an immediate");
4256 if (trySelectStackSlotTagP(N))
4257 return;
4258 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4259 // compile-time constant, not just for stack allocations.
4260
4261 // General case for unrelated pointers in Op1 and Op2.
4262 SDLoc DL(N);
4263 int TagOffset = N->getConstantOperandVal(3);
4264 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4265 {N->getOperand(1), N->getOperand(2)});
4266 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4267 {SDValue(N1, 0), N->getOperand(2)});
4268 SDNode *N3 = CurDAG->getMachineNode(
4269 AArch64::ADDG, DL, MVT::i64,
4270 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4271 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4272 ReplaceNode(N, N3);
4273}
4274
4275bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4276 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4277
4278 // Bail when not a "cast" like insert_subvector.
4279 if (N->getConstantOperandVal(2) != 0)
4280 return false;
4281 if (!N->getOperand(0).isUndef())
4282 return false;
4283
4284 // Bail when normal isel should do the job.
4285 EVT VT = N->getValueType(0);
4286 EVT InVT = N->getOperand(1).getValueType();
4287 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4288 return false;
4289 if (InVT.getSizeInBits() <= 128)
4290 return false;
4291
4292 // NOTE: We can only get here when doing fixed length SVE code generation.
4293 // We do manual selection because the types involved are not linked to real
4294 // registers (despite being legal) and must be coerced into SVE registers.
4295
4297 "Expected to insert into a packed scalable vector!");
4298
4299 SDLoc DL(N);
4300 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4301 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4302 N->getOperand(1), RC));
4303 return true;
4304}
4305
4306bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4307 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4308
4309 // Bail when not a "cast" like extract_subvector.
4310 if (N->getConstantOperandVal(1) != 0)
4311 return false;
4312
4313 // Bail when normal isel can do the job.
4314 EVT VT = N->getValueType(0);
4315 EVT InVT = N->getOperand(0).getValueType();
4316 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4317 return false;
4318 if (VT.getSizeInBits() <= 128)
4319 return false;
4320
4321 // NOTE: We can only get here when doing fixed length SVE code generation.
4322 // We do manual selection because the types involved are not linked to real
4323 // registers (despite being legal) and must be coerced into SVE registers.
4324
4326 "Expected to extract from a packed scalable vector!");
4327
4328 SDLoc DL(N);
4329 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4330 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4331 N->getOperand(0), RC));
4332 return true;
4333}
4334
4335bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4336 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4337
4338 SDValue N0 = N->getOperand(0);
4339 SDValue N1 = N->getOperand(1);
4340 EVT VT = N->getValueType(0);
4341
4342 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4343 // Rotate by a constant is a funnel shift in IR which is exanded to
4344 // an OR with shifted operands.
4345 // We do the following transform:
4346 // OR N0, N1 -> xar (x, y, imm)
4347 // Where:
4348 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4349 // N0 = SHL_PRED true, V, splat(bits-imm)
4350 // V = (xor x, y)
4351 if (VT.isScalableVector() && Subtarget->hasSVE2orSME()) {
4352 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4354 std::swap(N0, N1);
4355 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4357 return false;
4358
4359 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4360 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4361 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4362 return false;
4363
4364 SDValue XOR = N0.getOperand(1);
4365 if (XOR.getOpcode() != ISD::XOR || XOR != N1.getOperand(1))
4366 return false;
4367
4368 APInt ShlAmt, ShrAmt;
4369 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4371 return false;
4372
4373 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4374 return false;
4375
4376 SDLoc DL(N);
4377 SDValue Imm =
4378 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4379
4380 SDValue Ops[] = {XOR.getOperand(0), XOR.getOperand(1), Imm};
4381 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4382 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4383 AArch64::XAR_ZZZI_D})) {
4384 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4385 return true;
4386 }
4387 return false;
4388 }
4389
4390 if (!Subtarget->hasSHA3())
4391 return false;
4392
4393 if (N0->getOpcode() != AArch64ISD::VSHL ||
4395 return false;
4396
4397 if (N0->getOperand(0) != N1->getOperand(0) ||
4398 N1->getOperand(0)->getOpcode() != ISD::XOR)
4399 return false;
4400
4401 SDValue XOR = N0.getOperand(0);
4402 SDValue R1 = XOR.getOperand(0);
4403 SDValue R2 = XOR.getOperand(1);
4404
4405 unsigned HsAmt = N0.getConstantOperandVal(1);
4406 unsigned ShAmt = N1.getConstantOperandVal(1);
4407
4408 SDLoc DL = SDLoc(N0.getOperand(1));
4409 SDValue Imm = CurDAG->getTargetConstant(
4410 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4411
4412 if (ShAmt + HsAmt != 64)
4413 return false;
4414
4415 SDValue Ops[] = {R1, R2, Imm};
4416 CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops);
4417
4418 return true;
4419}
4420
4421void AArch64DAGToDAGISel::Select(SDNode *Node) {
4422 // If we have a custom node, we already have selected!
4423 if (Node->isMachineOpcode()) {
4424 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4425 Node->setNodeId(-1);
4426 return;
4427 }
4428
4429 // Few custom selection stuff.
4430 EVT VT = Node->getValueType(0);
4431
4432 switch (Node->getOpcode()) {
4433 default:
4434 break;
4435
4437 if (SelectCMP_SWAP(Node))
4438 return;
4439 break;
4440
4441 case ISD::READ_REGISTER:
4442 case AArch64ISD::MRRS:
4443 if (tryReadRegister(Node))
4444 return;
4445 break;
4446
4448 case AArch64ISD::MSRR:
4449 if (tryWriteRegister(Node))
4450 return;
4451 break;
4452
4453 case ISD::LOAD: {
4454 // Try to select as an indexed load. Fall through to normal processing
4455 // if we can't.
4456 if (tryIndexedLoad(Node))
4457 return;
4458 break;
4459 }
4460
4461 case ISD::SRL:
4462 case ISD::AND:
4463 case ISD::SRA:
4465 if (tryBitfieldExtractOp(Node))
4466 return;
4467 if (tryBitfieldInsertInZeroOp(Node))
4468 return;
4469 [[fallthrough]];
4470 case ISD::ROTR:
4471 case ISD::SHL:
4472 if (tryShiftAmountMod(Node))
4473 return;
4474 break;
4475
4476 case ISD::SIGN_EXTEND:
4477 if (tryBitfieldExtractOpFromSExt(Node))
4478 return;
4479 break;
4480
4481 case ISD::OR:
4482 if (tryBitfieldInsertOp(Node))
4483 return;
4484 if (trySelectXAR(Node))
4485 return;
4486 break;
4487
4489 if (trySelectCastScalableToFixedLengthVector(Node))
4490 return;
4491 break;
4492 }
4493
4494 case ISD::INSERT_SUBVECTOR: {
4495 if (trySelectCastFixedLengthToScalableVector(Node))
4496 return;
4497 break;
4498 }
4499
4500 case ISD::Constant: {
4501 // Materialize zero constants as copies from WZR/XZR. This allows
4502 // the coalescer to propagate these into other instructions.
4503 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4504 if (ConstNode->isZero()) {
4505 if (VT == MVT::i32) {
4506 SDValue New = CurDAG->getCopyFromReg(
4507 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4508 ReplaceNode(Node, New.getNode());
4509 return;
4510 } else if (VT == MVT::i64) {
4511 SDValue New = CurDAG->getCopyFromReg(
4512 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4513 ReplaceNode(Node, New.getNode());
4514 return;
4515 }
4516 }
4517 break;
4518 }
4519
4520 case ISD::FrameIndex: {
4521 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4522 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4523 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4524 const TargetLowering *TLI = getTargetLowering();
4525 SDValue TFI = CurDAG->getTargetFrameIndex(
4526 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4527 SDLoc DL(Node);
4528 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4529 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4530 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4531 return;
4532 }
4534 unsigned IntNo = Node->getConstantOperandVal(1);
4535 switch (IntNo) {
4536 default:
4537 break;
4538 case Intrinsic::aarch64_ldaxp:
4539 case Intrinsic::aarch64_ldxp: {
4540 unsigned Op =
4541 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4542 SDValue MemAddr = Node->getOperand(2);
4543 SDLoc DL(Node);
4544 SDValue Chain = Node->getOperand(0);
4545
4546 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4547 MVT::Other, MemAddr, Chain);
4548
4549 // Transfer memoperands.
4551 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4552 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4553 ReplaceNode(Node, Ld);
4554 return;
4555 }
4556 case Intrinsic::aarch64_stlxp:
4557 case Intrinsic::aarch64_stxp: {
4558 unsigned Op =
4559 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4560 SDLoc DL(Node);
4561 SDValue Chain = Node->getOperand(0);
4562 SDValue ValLo = Node->getOperand(2);
4563 SDValue ValHi = Node->getOperand(3);
4564 SDValue MemAddr = Node->getOperand(4);
4565
4566 // Place arguments in the right order.
4567 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4568
4569 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4570 // Transfer memoperands.
4572 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4573 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4574
4575 ReplaceNode(Node, St);
4576 return;
4577 }
4578 case Intrinsic::aarch64_neon_ld1x2:
4579 if (VT == MVT::v8i8) {
4580 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4581 return;
4582 } else if (VT == MVT::v16i8) {
4583 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4584 return;
4585 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4586 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4587 return;
4588 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4589 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4590 return;
4591 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4592 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4593 return;
4594 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4595 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4596 return;
4597 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4598 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4599 return;
4600 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4601 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4602 return;
4603 }
4604 break;
4605 case Intrinsic::aarch64_neon_ld1x3:
4606 if (VT == MVT::v8i8) {
4607 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4608 return;
4609 } else if (VT == MVT::v16i8) {
4610 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4611 return;
4612 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4613 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
4614 return;
4615 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4616 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
4617 return;
4618 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4619 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
4620 return;
4621 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4622 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
4623 return;
4624 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4625 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4626 return;
4627 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4628 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
4629 return;
4630 }
4631 break;
4632 case Intrinsic::aarch64_neon_ld1x4:
4633 if (VT == MVT::v8i8) {
4634 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
4635 return;
4636 } else if (VT == MVT::v16i8) {
4637 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
4638 return;
4639 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4640 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
4641 return;
4642 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4643 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
4644 return;
4645 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4646 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
4647 return;
4648 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4649 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
4650 return;
4651 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4652 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4653 return;
4654 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4655 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
4656 return;
4657 }
4658 break;
4659 case Intrinsic::aarch64_neon_ld2:
4660 if (VT == MVT::v8i8) {
4661 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
4662 return;
4663 } else if (VT == MVT::v16i8) {
4664 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
4665 return;
4666 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4667 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
4668 return;
4669 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4670 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
4671 return;
4672 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4673 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
4674 return;
4675 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4676 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
4677 return;
4678 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4679 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4680 return;
4681 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4682 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
4683 return;
4684 }
4685 break;
4686 case Intrinsic::aarch64_neon_ld3:
4687 if (VT == MVT::v8i8) {
4688 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
4689 return;
4690 } else if (VT == MVT::v16i8) {
4691 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
4692 return;
4693 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4694 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
4695 return;
4696 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4697 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
4698 return;
4699 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4700 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
4701 return;
4702 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4703 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
4704 return;
4705 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4706 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4707 return;
4708 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4709 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
4710 return;
4711 }
4712 break;
4713 case Intrinsic::aarch64_neon_ld4:
4714 if (VT == MVT::v8i8) {
4715 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
4716 return;
4717 } else if (VT == MVT::v16i8) {
4718 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
4719 return;
4720 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4721 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
4722 return;
4723 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4724 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
4725 return;
4726 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4727 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
4728 return;
4729 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4730 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
4731 return;
4732 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4733 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4734 return;
4735 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4736 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
4737 return;
4738 }
4739 break;
4740 case Intrinsic::aarch64_neon_ld2r:
4741 if (VT == MVT::v8i8) {
4742 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
4743 return;
4744 } else if (VT == MVT::v16i8) {
4745 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
4746 return;
4747 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4748 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
4749 return;
4750 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4751 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
4752 return;
4753 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4754 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
4755 return;
4756 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4757 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
4758 return;
4759 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4760 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
4761 return;
4762 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4763 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
4764 return;
4765 }
4766 break;
4767 case Intrinsic::aarch64_neon_ld3r:
4768 if (VT == MVT::v8i8) {
4769 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
4770 return;
4771 } else if (VT == MVT::v16i8) {
4772 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
4773 return;
4774 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4775 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
4776 return;
4777 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4778 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
4779 return;
4780 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4781 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
4782 return;
4783 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4784 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
4785 return;
4786 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4787 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
4788 return;
4789 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4790 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
4791 return;
4792 }
4793 break;
4794 case Intrinsic::aarch64_neon_ld4r:
4795 if (VT == MVT::v8i8) {
4796 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
4797 return;
4798 } else if (VT == MVT::v16i8) {
4799 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
4800 return;
4801 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4802 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
4803 return;
4804 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4805 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
4806 return;
4807 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4808 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
4809 return;
4810 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4811 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
4812 return;
4813 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4814 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
4815 return;
4816 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4817 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
4818 return;
4819 }
4820 break;
4821 case Intrinsic::aarch64_neon_ld2lane:
4822 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4823 SelectLoadLane(Node, 2, AArch64::LD2i8);
4824 return;
4825 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4826 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4827 SelectLoadLane(Node, 2, AArch64::LD2i16);
4828 return;
4829 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4830 VT == MVT::v2f32) {
4831 SelectLoadLane(Node, 2, AArch64::LD2i32);
4832 return;
4833 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4834 VT == MVT::v1f64) {
4835 SelectLoadLane(Node, 2, AArch64::LD2i64);
4836 return;
4837 }
4838 break;
4839 case Intrinsic::aarch64_neon_ld3lane:
4840 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4841 SelectLoadLane(Node, 3, AArch64::LD3i8);
4842 return;
4843 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4844 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4845 SelectLoadLane(Node, 3, AArch64::LD3i16);
4846 return;
4847 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4848 VT == MVT::v2f32) {
4849 SelectLoadLane(Node, 3, AArch64::LD3i32);
4850 return;
4851 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4852 VT == MVT::v1f64) {
4853 SelectLoadLane(Node, 3, AArch64::LD3i64);
4854 return;
4855 }
4856 break;
4857 case Intrinsic::aarch64_neon_ld4lane:
4858 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4859 SelectLoadLane(Node, 4, AArch64::LD4i8);
4860 return;
4861 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4862 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4863 SelectLoadLane(Node, 4, AArch64::LD4i16);
4864 return;
4865 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4866 VT == MVT::v2f32) {
4867 SelectLoadLane(Node, 4, AArch64::LD4i32);
4868 return;
4869 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4870 VT == MVT::v1f64) {
4871 SelectLoadLane(Node, 4, AArch64::LD4i64);
4872 return;
4873 }
4874 break;
4875 case Intrinsic::aarch64_ld64b:
4876 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
4877 return;
4878 case Intrinsic::aarch64_sve_ld2q_sret: {
4879 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
4880 return;
4881 }
4882 case Intrinsic::aarch64_sve_ld3q_sret: {
4883 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
4884 return;
4885 }
4886 case Intrinsic::aarch64_sve_ld4q_sret: {
4887 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
4888 return;
4889 }
4890 case Intrinsic::aarch64_sve_ld2_sret: {
4891 if (VT == MVT::nxv16i8) {
4892 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
4893 true);
4894 return;
4895 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4896 VT == MVT::nxv8bf16) {
4897 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
4898 true);
4899 return;
4900 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4901 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
4902 true);
4903 return;
4904 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4905 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
4906 true);
4907 return;
4908 }
4909 break;
4910 }
4911 case Intrinsic::aarch64_sve_ld1_pn_x2: {
4912 if (VT == MVT::nxv16i8) {
4913 if (Subtarget->hasSME2())
4914 SelectContiguousMultiVectorLoad(
4915 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
4916 else if (Subtarget->hasSVE2p1())
4917 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
4918 AArch64::LD1B_2Z);
4919 else
4920 break;
4921 return;
4922 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4923 VT == MVT::nxv8bf16) {
4924 if (Subtarget->hasSME2())
4925 SelectContiguousMultiVectorLoad(
4926 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
4927 else if (Subtarget->hasSVE2p1())
4928 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
4929 AArch64::LD1H_2Z);
4930 else
4931 break;
4932 return;
4933 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4934 if (Subtarget->hasSME2())
4935 SelectContiguousMultiVectorLoad(
4936 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
4937 else if (Subtarget->hasSVE2p1())
4938 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
4939 AArch64::LD1W_2Z);
4940 else
4941 break;
4942 return;
4943 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4944 if (Subtarget->hasSME2())
4945 SelectContiguousMultiVectorLoad(
4946 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
4947 else if (Subtarget->hasSVE2p1())
4948 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
4949 AArch64::LD1D_2Z);
4950 else
4951 break;
4952 return;
4953 }
4954 break;
4955 }
4956 case Intrinsic::aarch64_sve_ld1_pn_x4: {
4957 if (VT == MVT::nxv16i8) {
4958 if (Subtarget->hasSME2())
4959 SelectContiguousMultiVectorLoad(
4960 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
4961 else if (Subtarget->hasSVE2p1())
4962 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
4963 AArch64::LD1B_4Z);
4964 else
4965 break;
4966 return;
4967 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4968 VT == MVT::nxv8bf16) {
4969 if (Subtarget->hasSME2())
4970 SelectContiguousMultiVectorLoad(
4971 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
4972 else if (Subtarget->hasSVE2p1())
4973 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
4974 AArch64::LD1H_4Z);
4975 else
4976 break;
4977 return;
4978 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4979 if (Subtarget->hasSME2())
4980 SelectContiguousMultiVectorLoad(
4981 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
4982 else if (Subtarget->hasSVE2p1())
4983 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
4984 AArch64::LD1W_4Z);
4985 else
4986 break;
4987 return;
4988 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4989 if (Subtarget->hasSME2())
4990 SelectContiguousMultiVectorLoad(
4991 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
4992 else if (Subtarget->hasSVE2p1())
4993 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
4994 AArch64::LD1D_4Z);
4995 else
4996 break;
4997 return;
4998 }
4999 break;
5000 }
5001 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5002 if (VT == MVT::nxv16i8) {
5003 if (Subtarget->hasSME2())
5004 SelectContiguousMultiVectorLoad(Node, 2, 0,
5005 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5006 AArch64::LDNT1B_2Z_PSEUDO);
5007 else if (Subtarget->hasSVE2p1())
5008 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5009 AArch64::LDNT1B_2Z);
5010 else
5011 break;
5012 return;
5013 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5014 VT == MVT::nxv8bf16) {
5015 if (Subtarget->hasSME2())
5016 SelectContiguousMultiVectorLoad(Node, 2, 1,
5017 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5018 AArch64::LDNT1H_2Z_PSEUDO);
5019 else if (Subtarget->hasSVE2p1())
5020 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5021 AArch64::LDNT1H_2Z);
5022 else
5023 break;
5024 return;
5025 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5026 if (Subtarget->hasSME2())
5027 SelectContiguousMultiVectorLoad(Node, 2, 2,
5028 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5029 AArch64::LDNT1W_2Z_PSEUDO);
5030 else if (Subtarget->hasSVE2p1())
5031 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5032 AArch64::LDNT1W_2Z);
5033 else
5034 break;
5035 return;
5036 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5037 if (Subtarget->hasSME2())
5038 SelectContiguousMultiVectorLoad(Node, 2, 3,
5039 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5040 AArch64::LDNT1D_2Z_PSEUDO);
5041 else if (Subtarget->hasSVE2p1())
5042 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5043 AArch64::LDNT1D_2Z);
5044 else
5045 break;
5046 return;
5047 }
5048 break;
5049 }
5050 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5051 if (VT == MVT::nxv16i8) {
5052 if (Subtarget->hasSME2())
5053 SelectContiguousMultiVectorLoad(Node, 4, 0,
5054 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5055 AArch64::LDNT1B_4Z_PSEUDO);
5056 else if (Subtarget->hasSVE2p1())
5057 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5058 AArch64::LDNT1B_4Z);
5059 else
5060 break;
5061 return;
5062 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5063 VT == MVT::nxv8bf16) {
5064 if (Subtarget->hasSME2())
5065 SelectContiguousMultiVectorLoad(Node, 4, 1,
5066 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5067 AArch64::LDNT1H_4Z_PSEUDO);
5068 else if (Subtarget->hasSVE2p1())
5069 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5070 AArch64::LDNT1H_4Z);
5071 else
5072 break;
5073 return;
5074 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5075 if (Subtarget->hasSME2())
5076 SelectContiguousMultiVectorLoad(Node, 4, 2,
5077 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5078 AArch64::LDNT1W_4Z_PSEUDO);
5079 else if (Subtarget->hasSVE2p1())
5080 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5081 AArch64::LDNT1W_4Z);
5082 else
5083 break;
5084 return;
5085 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5086 if (Subtarget->hasSME2())
5087 SelectContiguousMultiVectorLoad(Node, 4, 3,
5088 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5089 AArch64::LDNT1D_4Z_PSEUDO);
5090 else if (Subtarget->hasSVE2p1())
5091 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5092 AArch64::LDNT1D_4Z);
5093 else
5094 break;
5095 return;
5096 }
5097 break;
5098 }
5099 case Intrinsic::aarch64_sve_ld3_sret: {
5100 if (VT == MVT::nxv16i8) {
5101 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5102 true);
5103 return;
5104 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5105 VT == MVT::nxv8bf16) {
5106 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5107 true);
5108 return;
5109 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5110 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5111 true);
5112 return;
5113 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5114 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5115 true);
5116 return;
5117 }
5118 break;
5119 }
5120 case Intrinsic::aarch64_sve_ld4_sret: {
5121 if (VT == MVT::nxv16i8) {
5122 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5123 true);
5124 return;
5125 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5126 VT == MVT::nxv8bf16) {
5127 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5128 true);
5129 return;
5130 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5131 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5132 true);
5133 return;
5134 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5135 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5136 true);
5137 return;
5138 }
5139 break;
5140 }
5141 case Intrinsic::aarch64_sme_read_hor_vg2: {
5142 if (VT == MVT::nxv16i8) {
5143 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5144 AArch64::MOVA_2ZMXI_H_B);
5145 return;
5146 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5147 VT == MVT::nxv8bf16) {
5148 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5149 AArch64::MOVA_2ZMXI_H_H);
5150 return;
5151 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5152 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5153 AArch64::MOVA_2ZMXI_H_S);
5154 return;
5155 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5156 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5157 AArch64::MOVA_2ZMXI_H_D);
5158 return;
5159 }
5160 break;
5161 }
5162 case Intrinsic::aarch64_sme_read_ver_vg2: {
5163 if (VT == MVT::nxv16i8) {
5164 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5165 AArch64::MOVA_2ZMXI_V_B);
5166 return;
5167 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5168 VT == MVT::nxv8bf16) {
5169 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5170 AArch64::MOVA_2ZMXI_V_H);
5171 return;
5172 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5173 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5174 AArch64::MOVA_2ZMXI_V_S);
5175 return;
5176 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5177 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5178 AArch64::MOVA_2ZMXI_V_D);
5179 return;
5180 }
5181 break;
5182 }
5183 case Intrinsic::aarch64_sme_read_hor_vg4: {
5184 if (VT == MVT::nxv16i8) {
5185 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5186 AArch64::MOVA_4ZMXI_H_B);
5187 return;
5188 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5189 VT == MVT::nxv8bf16) {
5190 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5191 AArch64::MOVA_4ZMXI_H_H);
5192 return;
5193 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5194 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5195 AArch64::MOVA_4ZMXI_H_S);
5196 return;
5197 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5198 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5199 AArch64::MOVA_4ZMXI_H_D);
5200 return;
5201 }
5202 break;
5203 }
5204 case Intrinsic::aarch64_sme_read_ver_vg4: {
5205 if (VT == MVT::nxv16i8) {
5206 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5207 AArch64::MOVA_4ZMXI_V_B);
5208 return;
5209 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5210 VT == MVT::nxv8bf16) {
5211 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5212 AArch64::MOVA_4ZMXI_V_H);
5213 return;
5214 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5215 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5216 AArch64::MOVA_4ZMXI_V_S);
5217 return;
5218 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5219 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5220 AArch64::MOVA_4ZMXI_V_D);
5221 return;
5222 }
5223 break;
5224 }
5225 case Intrinsic::aarch64_sme_read_vg1x2: {
5226 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5227 AArch64::MOVA_VG2_2ZMXI);
5228 return;
5229 }
5230 case Intrinsic::aarch64_sme_read_vg1x4: {
5231 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5232 AArch64::MOVA_VG4_4ZMXI);
5233 return;
5234 }
5235 case Intrinsic::swift_async_context_addr: {
5236 SDLoc DL(Node);
5237 SDValue Chain = Node->getOperand(0);
5238 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5239 SDValue Res = SDValue(
5240 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5241 CurDAG->getTargetConstant(8, DL, MVT::i32),
5242 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5243 0);
5244 ReplaceUses(SDValue(Node, 0), Res);
5245 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5246 CurDAG->RemoveDeadNode(Node);
5247
5248 auto &MF = CurDAG->getMachineFunction();
5249 MF.getFrameInfo().setFrameAddressIsTaken(true);
5250 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5251 return;
5252 }
5253 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5254 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5255 Node->getValueType(0),
5256 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5257 AArch64::LUTI2_4ZTZI_S}))
5258 // Second Immediate must be <= 3:
5259 SelectMultiVectorLuti(Node, 4, Opc, 3);
5260 return;
5261 }
5262 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5263 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5264 Node->getValueType(0),
5265 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5266 // Second Immediate must be <= 1:
5267 SelectMultiVectorLuti(Node, 4, Opc, 1);
5268 return;
5269 }
5270 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5271 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5272 Node->getValueType(0),
5273 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5274 AArch64::LUTI2_2ZTZI_S}))
5275 // Second Immediate must be <= 7:
5276 SelectMultiVectorLuti(Node, 2, Opc, 7);
5277 return;
5278 }
5279 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5280 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5281 Node->getValueType(0),
5282 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5283 AArch64::LUTI4_2ZTZI_S}))
5284 // Second Immediate must be <= 3:
5285 SelectMultiVectorLuti(Node, 2, Opc, 3);
5286 return;
5287 }
5288 }
5289 } break;
5291 unsigned IntNo = Node->getConstantOperandVal(0);
5292 switch (IntNo) {
5293 default:
5294 break;
5295 case Intrinsic::aarch64_tagp:
5296 SelectTagP(Node);
5297 return;
5298 case Intrinsic::aarch64_neon_tbl2:
5299 SelectTable(Node, 2,
5300 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5301 false);
5302 return;
5303 case Intrinsic::aarch64_neon_tbl3:
5304 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5305 : AArch64::TBLv16i8Three,
5306 false);
5307 return;
5308 case Intrinsic::aarch64_neon_tbl4:
5309 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5310 : AArch64::TBLv16i8Four,
5311 false);
5312 return;
5313 case Intrinsic::aarch64_neon_tbx2:
5314 SelectTable(Node, 2,
5315 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5316 true);
5317 return;
5318 case Intrinsic::aarch64_neon_tbx3:
5319 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5320 : AArch64::TBXv16i8Three,
5321 true);
5322 return;
5323 case Intrinsic::aarch64_neon_tbx4:
5324 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5325 : AArch64::TBXv16i8Four,
5326 true);
5327 return;
5328 case Intrinsic::aarch64_sve_srshl_single_x2:
5329 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5330 Node->getValueType(0),
5331 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5332 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5333 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5334 return;
5335 case Intrinsic::aarch64_sve_srshl_single_x4:
5336 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5337 Node->getValueType(0),
5338 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5339 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5340 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5341 return;
5342 case Intrinsic::aarch64_sve_urshl_single_x2:
5343 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5344 Node->getValueType(0),
5345 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5346 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5347 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5348 return;
5349 case Intrinsic::aarch64_sve_urshl_single_x4:
5350 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5351 Node->getValueType(0),
5352 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5353 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5354 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5355 return;
5356 case Intrinsic::aarch64_sve_srshl_x2:
5357 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5358 Node->getValueType(0),
5359 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5360 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5361 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5362 return;
5363 case Intrinsic::aarch64_sve_srshl_x4:
5364 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5365 Node->getValueType(0),
5366 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5367 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5368 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5369 return;
5370 case Intrinsic::aarch64_sve_urshl_x2:
5371 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5372 Node->getValueType(0),
5373 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5374 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5375 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5376 return;
5377 case Intrinsic::aarch64_sve_urshl_x4:
5378 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5379 Node->getValueType(0),
5380 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5381 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5382 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5383 return;
5384 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5385 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5386 Node->getValueType(0),
5387 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5388 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5389 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5390 return;
5391 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5392 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5393 Node->getValueType(0),
5394 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5395 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5396 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5397 return;
5398 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5399 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5400 Node->getValueType(0),
5401 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5402 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5403 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5404 return;
5405 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5406 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5407 Node->getValueType(0),
5408 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5409 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5410 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5411 return;
5412 case Intrinsic::aarch64_sve_whilege_x2:
5413 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5414 Node->getValueType(0),
5415 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5416 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5417 SelectWhilePair(Node, Op);
5418 return;
5419 case Intrinsic::aarch64_sve_whilegt_x2:
5420 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5421 Node->getValueType(0),
5422 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5423 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5424 SelectWhilePair(Node, Op);
5425 return;
5426 case Intrinsic::aarch64_sve_whilehi_x2:
5427 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5428 Node->getValueType(0),
5429 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5430 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5431 SelectWhilePair(Node, Op);
5432 return;
5433 case Intrinsic::aarch64_sve_whilehs_x2:
5434 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5435 Node->getValueType(0),
5436 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5437 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5438 SelectWhilePair(Node, Op);
5439 return;
5440 case Intrinsic::aarch64_sve_whilele_x2:
5441 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5442 Node->getValueType(0),
5443 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5444 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5445 SelectWhilePair(Node, Op);
5446 return;
5447 case Intrinsic::aarch64_sve_whilelo_x2:
5448 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5449 Node->getValueType(0),
5450 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
5451 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
5452 SelectWhilePair(Node, Op);
5453 return;
5454 case Intrinsic::aarch64_sve_whilels_x2:
5455 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5456 Node->getValueType(0),
5457 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
5458 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
5459 SelectWhilePair(Node, Op);
5460 return;
5461 case Intrinsic::aarch64_sve_whilelt_x2:
5462 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5463 Node->getValueType(0),
5464 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
5465 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
5466 SelectWhilePair(Node, Op);
5467 return;
5468 case Intrinsic::aarch64_sve_smax_single_x2:
5469 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5470 Node->getValueType(0),
5471 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
5472 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
5473 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5474 return;
5475 case Intrinsic::aarch64_sve_umax_single_x2:
5476 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5477 Node->getValueType(0),
5478 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
5479 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
5480 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5481 return;
5482 case Intrinsic::aarch64_sve_fmax_single_x2:
5483 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5484 Node->getValueType(0),
5485 {0, AArch64::FMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_S,
5486 AArch64::FMAX_VG2_2ZZ_D}))
5487 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5488 return;
5489 case Intrinsic::aarch64_sve_smax_single_x4:
5490 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5491 Node->getValueType(0),
5492 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
5493 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
5494 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5495 return;
5496 case Intrinsic::aarch64_sve_umax_single_x4:
5497 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5498 Node->getValueType(0),
5499 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
5500 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
5501 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5502 return;
5503 case Intrinsic::aarch64_sve_fmax_single_x4:
5504 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5505 Node->getValueType(0),
5506 {0, AArch64::FMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_S,
5507 AArch64::FMAX_VG4_4ZZ_D}))
5508 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5509 return;
5510 case Intrinsic::aarch64_sve_smin_single_x2:
5511 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5512 Node->getValueType(0),
5513 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
5514 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
5515 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5516 return;
5517 case Intrinsic::aarch64_sve_umin_single_x2:
5518 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5519 Node->getValueType(0),
5520 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
5521 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
5522 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5523 return;
5524 case Intrinsic::aarch64_sve_fmin_single_x2:
5525 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5526 Node->getValueType(0),
5527 {0, AArch64::FMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_S,
5528 AArch64::FMIN_VG2_2ZZ_D}))
5529 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5530 return;
5531 case Intrinsic::aarch64_sve_smin_single_x4:
5532 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5533 Node->getValueType(0),
5534 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
5535 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
5536 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5537 return;
5538 case Intrinsic::aarch64_sve_umin_single_x4:
5539 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5540 Node->getValueType(0),
5541 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
5542 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
5543 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5544 return;
5545 case Intrinsic::aarch64_sve_fmin_single_x4:
5546 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5547 Node->getValueType(0),
5548 {0, AArch64::FMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_S,
5549 AArch64::FMIN_VG4_4ZZ_D}))
5550 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5551 return;
5552 case Intrinsic::aarch64_sve_smax_x2:
5553 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5554 Node->getValueType(0),
5555 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
5556 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
5557 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5558 return;
5559 case Intrinsic::aarch64_sve_umax_x2:
5560 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5561 Node->getValueType(0),
5562 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
5563 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
5564 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5565 return;
5566 case Intrinsic::aarch64_sve_fmax_x2:
5567 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5568 Node->getValueType(0),
5569 {0, AArch64::FMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_S,
5570 AArch64::FMAX_VG2_2Z2Z_D}))
5571 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5572 return;
5573 case Intrinsic::aarch64_sve_smax_x4:
5574 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5575 Node->getValueType(0),
5576 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
5577 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
5578 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5579 return;
5580 case Intrinsic::aarch64_sve_umax_x4:
5581 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5582 Node->getValueType(0),
5583 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
5584 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
5585 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5586 return;
5587 case Intrinsic::aarch64_sve_fmax_x4:
5588 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5589 Node->getValueType(0),
5590 {0, AArch64::FMAX_VG4_4Z4Z_H, AArch64::FMAX_VG4_4Z4Z_S,
5591 AArch64::FMAX_VG4_4Z4Z_D}))
5592 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5593 return;
5594 case Intrinsic::aarch64_sve_smin_x2:
5595 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5596 Node->getValueType(0),
5597 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
5598 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
5599 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5600 return;
5601 case Intrinsic::aarch64_sve_umin_x2:
5602 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5603 Node->getValueType(0),
5604 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
5605 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
5606 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5607 return;
5608 case Intrinsic::aarch64_sve_fmin_x2:
5609 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5610 Node->getValueType(0),
5611 {0, AArch64::FMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_S,
5612 AArch64::FMIN_VG2_2Z2Z_D}))
5613 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5614 return;
5615 case Intrinsic::aarch64_sve_smin_x4:
5616 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5617 Node->getValueType(0),
5618 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
5619 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
5620 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5621 return;
5622 case Intrinsic::aarch64_sve_umin_x4:
5623 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5624 Node->getValueType(0),
5625 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
5626 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
5627 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5628 return;
5629 case Intrinsic::aarch64_sve_fmin_x4:
5630 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5631 Node->getValueType(0),
5632 {0, AArch64::FMIN_VG4_4Z4Z_H, AArch64::FMIN_VG4_4Z4Z_S,
5633 AArch64::FMIN_VG4_4Z4Z_D}))
5634 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5635 return;
5636 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
5637 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5638 Node->getValueType(0),
5639 {0, AArch64::FMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_S,
5640 AArch64::FMAXNM_VG2_2ZZ_D}))
5641 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5642 return;
5643 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
5644 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5645 Node->getValueType(0),
5646 {0, AArch64::FMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_S,
5647 AArch64::FMAXNM_VG4_4ZZ_D}))
5648 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5649 return;
5650 case Intrinsic::aarch64_sve_fminnm_single_x2:
5651 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5652 Node->getValueType(0),
5653 {0, AArch64::FMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_S,
5654 AArch64::FMINNM_VG2_2ZZ_D}))
5655 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5656 return;
5657 case Intrinsic::aarch64_sve_fminnm_single_x4:
5658 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5659 Node->getValueType(0),
5660 {0, AArch64::FMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_S,
5661 AArch64::FMINNM_VG4_4ZZ_D}))
5662 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5663 return;
5664 case Intrinsic::aarch64_sve_fmaxnm_x2:
5665 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5666 Node->getValueType(0),
5667 {0, AArch64::FMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_S,
5668 AArch64::FMAXNM_VG2_2Z2Z_D}))
5669 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5670 return;
5671 case Intrinsic::aarch64_sve_fmaxnm_x4:
5672 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5673 Node->getValueType(0),
5674 {0, AArch64::FMAXNM_VG4_4Z4Z_H, AArch64::FMAXNM_VG4_4Z4Z_S,
5675 AArch64::FMAXNM_VG4_4Z4Z_D}))
5676 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5677 return;
5678 case Intrinsic::aarch64_sve_fminnm_x2:
5679 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5680 Node->getValueType(0),
5681 {0, AArch64::FMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_S,
5682 AArch64::FMINNM_VG2_2Z2Z_D}))
5683 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5684 return;
5685 case Intrinsic::aarch64_sve_fminnm_x4:
5686 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5687 Node->getValueType(0),
5688 {0, AArch64::FMINNM_VG4_4Z4Z_H, AArch64::FMINNM_VG4_4Z4Z_S,
5689 AArch64::FMINNM_VG4_4Z4Z_D}))
5690 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5691 return;
5692 case Intrinsic::aarch64_sve_fcvtzs_x2:
5693 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
5694 return;
5695 case Intrinsic::aarch64_sve_scvtf_x2:
5696 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
5697 return;
5698 case Intrinsic::aarch64_sve_fcvtzu_x2:
5699 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
5700 return;
5701 case Intrinsic::aarch64_sve_ucvtf_x2:
5702 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
5703 return;
5704 case Intrinsic::aarch64_sve_fcvtzs_x4:
5705 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
5706 return;
5707 case Intrinsic::aarch64_sve_scvtf_x4:
5708 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
5709 return;
5710 case Intrinsic::aarch64_sve_fcvtzu_x4:
5711 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
5712 return;
5713 case Intrinsic::aarch64_sve_ucvtf_x4:
5714 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
5715 return;
5716 case Intrinsic::aarch64_sve_sclamp_single_x2:
5717 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5718 Node->getValueType(0),
5719 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
5720 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
5721 SelectClamp(Node, 2, Op);
5722 return;
5723 case Intrinsic::aarch64_sve_uclamp_single_x2:
5724 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5725 Node->getValueType(0),
5726 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
5727 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
5728 SelectClamp(Node, 2, Op);
5729 return;
5730 case Intrinsic::aarch64_sve_fclamp_single_x2:
5731 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5732 Node->getValueType(0),
5733 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
5734 AArch64::FCLAMP_VG2_2Z2Z_D}))
5735 SelectClamp(Node, 2, Op);
5736 return;
5737 case Intrinsic::aarch64_sve_sclamp_single_x4:
5738 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5739 Node->getValueType(0),
5740 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
5741 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
5742 SelectClamp(Node, 4, Op);
5743 return;
5744 case Intrinsic::aarch64_sve_uclamp_single_x4:
5745 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5746 Node->getValueType(0),
5747 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
5748 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
5749 SelectClamp(Node, 4, Op);
5750 return;
5751 case Intrinsic::aarch64_sve_fclamp_single_x4:
5752 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5753 Node->getValueType(0),
5754 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
5755 AArch64::FCLAMP_VG4_4Z4Z_D}))
5756 SelectClamp(Node, 4, Op);
5757 return;
5758 case Intrinsic::aarch64_sve_add_single_x2:
5759 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5760 Node->getValueType(0),
5761 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
5762 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
5763 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5764 return;
5765 case Intrinsic::aarch64_sve_add_single_x4:
5766 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5767 Node->getValueType(0),
5768 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
5769 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
5770 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5771 return;
5772 case Intrinsic::aarch64_sve_zip_x2:
5773 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5774 Node->getValueType(0),
5775 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
5776 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
5777 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
5778 return;
5779 case Intrinsic::aarch64_sve_zipq_x2:
5780 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
5781 AArch64::ZIP_VG2_2ZZZ_Q);
5782 return;
5783 case Intrinsic::aarch64_sve_zip_x4:
5784 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5785 Node->getValueType(0),
5786 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
5787 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
5788 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
5789 return;
5790 case Intrinsic::aarch64_sve_zipq_x4:
5791 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
5792 AArch64::ZIP_VG4_4Z4Z_Q);
5793 return;
5794 case Intrinsic::aarch64_sve_uzp_x2:
5795 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5796 Node->getValueType(0),
5797 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
5798 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
5799 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
5800 return;
5801 case Intrinsic::aarch64_sve_uzpq_x2:
5802 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
5803 AArch64::UZP_VG2_2ZZZ_Q);
5804 return;
5805 case Intrinsic::aarch64_sve_uzp_x4:
5806 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5807 Node->getValueType(0),
5808 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
5809 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
5810 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
5811 return;
5812 case Intrinsic::aarch64_sve_uzpq_x4:
5813 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
5814 AArch64::UZP_VG4_4Z4Z_Q);
5815 return;
5816 case Intrinsic::aarch64_sve_sel_x2:
5817 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5818 Node->getValueType(0),
5819 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
5820 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
5821 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
5822 return;
5823 case Intrinsic::aarch64_sve_sel_x4:
5824 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5825 Node->getValueType(0),
5826 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
5827 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
5828 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
5829 return;
5830 case Intrinsic::aarch64_sve_frinta_x2:
5831 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
5832 return;
5833 case Intrinsic::aarch64_sve_frinta_x4:
5834 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
5835 return;
5836 case Intrinsic::aarch64_sve_frintm_x2:
5837 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
5838 return;
5839 case Intrinsic::aarch64_sve_frintm_x4:
5840 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
5841 return;
5842 case Intrinsic::aarch64_sve_frintn_x2:
5843 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
5844 return;
5845 case Intrinsic::aarch64_sve_frintn_x4:
5846 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
5847 return;
5848 case Intrinsic::aarch64_sve_frintp_x2:
5849 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
5850 return;
5851 case Intrinsic::aarch64_sve_frintp_x4:
5852 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
5853 return;
5854 case Intrinsic::aarch64_sve_sunpk_x2:
5855 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5856 Node->getValueType(0),
5857 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
5858 AArch64::SUNPK_VG2_2ZZ_D}))
5859 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
5860 return;
5861 case Intrinsic::aarch64_sve_uunpk_x2:
5862 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5863 Node->getValueType(0),
5864 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
5865 AArch64::UUNPK_VG2_2ZZ_D}))
5866 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
5867 return;
5868 case Intrinsic::aarch64_sve_sunpk_x4:
5869 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5870 Node->getValueType(0),
5871 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
5872 AArch64::SUNPK_VG4_4Z2Z_D}))
5873 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
5874 return;
5875 case Intrinsic::aarch64_sve_uunpk_x4:
5876 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5877 Node->getValueType(0),
5878 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
5879 AArch64::UUNPK_VG4_4Z2Z_D}))
5880 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
5881 return;
5882 case Intrinsic::aarch64_sve_pext_x2: {
5883 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5884 Node->getValueType(0),
5885 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
5886 AArch64::PEXT_2PCI_D}))
5887 SelectPExtPair(Node, Op);
5888 return;
5889 }
5890 }
5891 break;
5892 }
5893 case ISD::INTRINSIC_VOID: {
5894 unsigned IntNo = Node->getConstantOperandVal(1);
5895 if (Node->getNumOperands() >= 3)
5896 VT = Node->getOperand(2)->getValueType(0);
5897 switch (IntNo) {
5898 default:
5899 break;
5900 case Intrinsic::aarch64_neon_st1x2: {
5901 if (VT == MVT::v8i8) {
5902 SelectStore(Node, 2, AArch64::ST1Twov8b);
5903 return;
5904 } else if (VT == MVT::v16i8) {
5905 SelectStore(Node, 2, AArch64::ST1Twov16b);
5906 return;
5907 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
5908 VT == MVT::v4bf16) {
5909 SelectStore(Node, 2, AArch64::ST1Twov4h);
5910 return;
5911 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
5912 VT == MVT::v8bf16) {
5913 SelectStore(Node, 2, AArch64::ST1Twov8h);
5914 return;
5915 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5916 SelectStore(Node, 2, AArch64::ST1Twov2s);
5917 return;
5918 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5919 SelectStore(Node, 2, AArch64::ST1Twov4s);
5920 return;
5921 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5922 SelectStore(Node, 2, AArch64::ST1Twov2d);
5923 return;
5924 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5925 SelectStore(Node, 2, AArch64::ST1Twov1d);
5926 return;
5927 }
5928 break;
5929 }
5930 case Intrinsic::aarch64_neon_st1x3: {
5931 if (VT == MVT::v8i8) {
5932 SelectStore(Node, 3, AArch64::ST1Threev8b);
5933 return;
5934 } else if (VT == MVT::v16i8) {
5935 SelectStore(Node, 3, AArch64::ST1Threev16b);
5936 return;
5937 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
5938 VT == MVT::v4bf16) {
5939 SelectStore(Node, 3, AArch64::ST1Threev4h);
5940 return;
5941 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
5942 VT == MVT::v8bf16) {
5943 SelectStore(Node, 3, AArch64::ST1Threev8h);
5944 return;
5945 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5946 SelectStore(Node, 3, AArch64::ST1Threev2s);
5947 return;
5948 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5949 SelectStore(Node, 3, AArch64::ST1Threev4s);
5950 return;
5951 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5952 SelectStore(Node, 3, AArch64::ST1Threev2d);
5953 return;
5954 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5955 SelectStore(Node, 3, AArch64::ST1Threev1d);
5956 return;
5957 }
5958 break;
5959 }
5960 case Intrinsic::aarch64_neon_st1x4: {
5961 if (VT == MVT::v8i8) {
5962 SelectStore(Node, 4, AArch64::ST1Fourv8b);
5963 return;
5964 } else if (VT == MVT::v16i8) {
5965 SelectStore(Node, 4, AArch64::ST1Fourv16b);
5966 return;
5967 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
5968 VT == MVT::v4bf16) {
5969 SelectStore(Node, 4, AArch64::ST1Fourv4h);
5970 return;
5971 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
5972 VT == MVT::v8bf16) {
5973 SelectStore(Node, 4, AArch64::ST1Fourv8h);
5974 return;
5975 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5976 SelectStore(Node, 4, AArch64::ST1Fourv2s);
5977 return;
5978 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5979 SelectStore(Node, 4, AArch64::ST1Fourv4s);
5980 return;
5981 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5982 SelectStore(Node, 4, AArch64::ST1Fourv2d);
5983 return;
5984 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5985 SelectStore(Node, 4, AArch64::ST1Fourv1d);
5986 return;
5987 }
5988 break;
5989 }
5990 case Intrinsic::aarch64_neon_st2: {
5991 if (VT == MVT::v8i8) {
5992 SelectStore(Node, 2, AArch64::ST2Twov8b);
5993 return;
5994 } else if (VT == MVT::v16i8) {
5995 SelectStore(Node, 2, AArch64::ST2Twov16b);
5996 return;
5997 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
5998 VT == MVT::v4bf16) {
5999 SelectStore(Node, 2, AArch64::ST2Twov4h);
6000 return;
6001 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6002 VT == MVT::v8bf16) {
6003 SelectStore(Node, 2, AArch64::ST2Twov8h);
6004 return;
6005 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6006 SelectStore(Node, 2, AArch64::ST2Twov2s);
6007 return;
6008 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6009 SelectStore(Node, 2, AArch64::ST2Twov4s);
6010 return;
6011 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6012 SelectStore(Node, 2, AArch64::ST2Twov2d);
6013 return;
6014 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6015 SelectStore(Node, 2, AArch64::ST1Twov1d);
6016 return;
6017 }
6018 break;
6019 }
6020 case Intrinsic::aarch64_neon_st3: {
6021 if (VT == MVT::v8i8) {
6022 SelectStore(Node, 3, AArch64::ST3Threev8b);
6023 return;
6024 } else if (VT == MVT::v16i8) {
6025 SelectStore(Node, 3, AArch64::ST3Threev16b);
6026 return;
6027 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6028 VT == MVT::v4bf16) {
6029 SelectStore(Node, 3, AArch64::ST3Threev4h);
6030 return;
6031 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6032 VT == MVT::v8bf16) {
6033 SelectStore(Node, 3, AArch64::ST3Threev8h);
6034 return;
6035 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6036 SelectStore(Node, 3, AArch64::ST3Threev2s);
6037 return;
6038 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6039 SelectStore(Node, 3, AArch64::ST3Threev4s);
6040 return;
6041 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6042 SelectStore(Node, 3, AArch64::ST3Threev2d);
6043 return;
6044 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6045 SelectStore(Node, 3, AArch64::ST1Threev1d);
6046 return;
6047 }
6048 break;
6049 }
6050 case Intrinsic::aarch64_neon_st4: {
6051 if (VT == MVT::v8i8) {
6052 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6053 return;
6054 } else if (VT == MVT::v16i8) {
6055 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6056 return;
6057 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6058 VT == MVT::v4bf16) {
6059 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6060 return;
6061 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6062 VT == MVT::v8bf16) {
6063 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6064 return;
6065 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6066 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6067 return;
6068 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6069 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6070 return;
6071 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6072 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6073 return;
6074 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6075 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6076 return;
6077 }
6078 break;
6079 }
6080 case Intrinsic::aarch64_neon_st2lane: {
6081 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6082 SelectStoreLane(Node, 2, AArch64::ST2i8);
6083 return;
6084 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6085 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6086 SelectStoreLane(Node, 2, AArch64::ST2i16);
6087 return;
6088 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6089 VT == MVT::v2f32) {
6090 SelectStoreLane(Node, 2, AArch64::ST2i32);
6091 return;
6092 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6093 VT == MVT::v1f64) {
6094 SelectStoreLane(Node, 2, AArch64::ST2i64);
6095 return;
6096 }
6097 break;
6098 }
6099 case Intrinsic::aarch64_neon_st3lane: {
6100 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6101 SelectStoreLane(Node, 3, AArch64::ST3i8);
6102 return;
6103 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6104 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6105 SelectStoreLane(Node, 3, AArch64::ST3i16);
6106 return;
6107 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6108 VT == MVT::v2f32) {
6109 SelectStoreLane(Node, 3, AArch64::ST3i32);
6110 return;
6111 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6112 VT == MVT::v1f64) {
6113 SelectStoreLane(Node, 3, AArch64::ST3i64);
6114 return;
6115 }
6116 break;
6117 }
6118 case Intrinsic::aarch64_neon_st4lane: {
6119 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6120 SelectStoreLane(Node, 4, AArch64::ST4i8);
6121 return;
6122 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6123 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6124 SelectStoreLane(Node, 4, AArch64::ST4i16);
6125 return;
6126 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6127 VT == MVT::v2f32) {
6128 SelectStoreLane(Node, 4, AArch64::ST4i32);
6129 return;
6130 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6131 VT == MVT::v1f64) {
6132 SelectStoreLane(Node, 4, AArch64::ST4i64);
6133 return;
6134 }
6135 break;
6136 }
6137 case Intrinsic::aarch64_sve_st2q: {
6138 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6139 return;
6140 }
6141 case Intrinsic::aarch64_sve_st3q: {
6142 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6143 return;
6144 }
6145 case Intrinsic::aarch64_sve_st4q: {
6146 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6147 return;
6148 }
6149 case Intrinsic::aarch64_sve_st2: {
6150 if (VT == MVT::nxv16i8) {
6151 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6152 return;
6153 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6154 VT == MVT::nxv8bf16) {
6155 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6156 return;
6157 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6158 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6159 return;
6160 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6161 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6162 return;
6163 }
6164 break;
6165 }
6166 case Intrinsic::aarch64_sve_st3: {
6167 if (VT == MVT::nxv16i8) {
6168 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6169 return;
6170 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6171 VT == MVT::nxv8bf16) {
6172 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6173 return;
6174 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6175 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6176 return;
6177 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6178 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6179 return;
6180 }
6181 break;
6182 }
6183 case Intrinsic::aarch64_sve_st4: {
6184 if (VT == MVT::nxv16i8) {
6185 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6186 return;
6187 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6188 VT == MVT::nxv8bf16) {
6189 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6190 return;
6191 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6192 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6193 return;
6194 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6195 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6196 return;
6197 }
6198 break;
6199 }
6200 }
6201 break;
6202 }
6203 case AArch64ISD::LD2post: {
6204 if (VT == MVT::v8i8) {
6205 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6206 return;
6207 } else if (VT == MVT::v16i8) {
6208 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6209 return;
6210 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6211 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6212 return;
6213 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6214 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6215 return;
6216 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6217 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6218 return;
6219 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6220 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6221 return;
6222 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6223 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6224 return;
6225 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6226 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6227 return;
6228 }
6229 break;
6230 }
6231 case AArch64ISD::LD3post: {
6232 if (VT == MVT::v8i8) {
6233 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6234 return;
6235 } else if (VT == MVT::v16i8) {
6236 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6237 return;
6238 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6239 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6240 return;
6241 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6242 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6243 return;
6244 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6245 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6246 return;
6247 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6248 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6249 return;
6250 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6251 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6252 return;
6253 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6254 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6255 return;
6256 }
6257 break;
6258 }
6259 case AArch64ISD::LD4post: {
6260 if (VT == MVT::v8i8) {
6261 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6262 return;
6263 } else if (VT == MVT::v16i8) {
6264 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6265 return;
6266 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6267 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6268 return;
6269 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6270 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6271 return;
6272 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6273 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6274 return;
6275 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6276 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6277 return;
6278 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6279 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6280 return;
6281 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6282 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6283 return;
6284 }
6285 break;
6286 }
6287 case AArch64ISD::LD1x2post: {
6288 if (VT == MVT::v8i8) {
6289 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6290 return;
6291 } else if (VT == MVT::v16i8) {
6292 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6293 return;
6294 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6295 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6296 return;
6297 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6298 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6299 return;
6300 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6301 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6302 return;
6303 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6304 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6305 return;
6306 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6307 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6308 return;
6309 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6310 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
6311 return;
6312 }
6313 break;
6314 }
6315 case AArch64ISD::LD1x3post: {
6316 if (VT == MVT::v8i8) {
6317 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
6318 return;
6319 } else if (VT == MVT::v16i8) {
6320 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
6321 return;
6322 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6323 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
6324 return;
6325 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6326 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
6327 return;
6328 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6329 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
6330 return;
6331 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6332 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
6333 return;
6334 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6335 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6336 return;
6337 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6338 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
6339 return;
6340 }
6341 break;
6342 }
6343 case AArch64ISD::LD1x4post: {
6344 if (VT == MVT::v8i8) {
6345 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
6346 return;
6347 } else if (VT == MVT::v16i8) {
6348 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
6349 return;
6350 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6351 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
6352 return;
6353 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6354 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
6355 return;
6356 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6357 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
6358 return;
6359 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6360 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
6361 return;
6362 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6363 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6364 return;
6365 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6366 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
6367 return;
6368 }
6369 break;
6370 }
6372 if (VT == MVT::v8i8) {
6373 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
6374 return;
6375 } else if (VT == MVT::v16i8) {
6376 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
6377 return;
6378 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6379 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
6380 return;
6381 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6382 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
6383 return;
6384 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6385 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
6386 return;
6387 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6388 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
6389 return;
6390 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6391 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
6392 return;
6393 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6394 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
6395 return;
6396 }
6397 break;
6398 }
6400 if (VT == MVT::v8i8) {
6401 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
6402 return;
6403 } else if (VT == MVT::v16i8) {
6404 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
6405 return;
6406 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6407 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
6408 return;
6409 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6410 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
6411 return;
6412 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6413 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
6414 return;
6415 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6416 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
6417 return;
6418 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6419 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
6420 return;
6421 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6422 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
6423 return;
6424 }
6425 break;
6426 }
6428 if (VT == MVT::v8i8) {
6429 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
6430 return;
6431 } else if (VT == MVT::v16i8) {
6432 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
6433 return;
6434 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6435 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
6436 return;
6437 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6438 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
6439 return;
6440 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6441 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
6442 return;
6443 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6444 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
6445 return;
6446 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6447 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
6448 return;
6449 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6450 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
6451 return;
6452 }
6453 break;
6454 }
6456 if (VT == MVT::v8i8) {
6457 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
6458 return;
6459 } else if (VT == MVT::v16i8) {
6460 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
6461 return;
6462 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6463 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
6464 return;
6465 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6466 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
6467 return;
6468 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6469 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
6470 return;
6471 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6472 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
6473 return;
6474 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6475 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
6476 return;
6477 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6478 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
6479 return;
6480 }
6481 break;
6482 }
6484 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6485 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
6486 return;
6487 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6488 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6489 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
6490 return;
6491 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6492 VT == MVT::v2f32) {
6493 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
6494 return;
6495 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6496 VT == MVT::v1f64) {
6497 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
6498 return;
6499 }
6500 break;
6501 }
6503 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6504 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
6505 return;
6506 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6507 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6508 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
6509 return;
6510 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6511 VT == MVT::v2f32) {
6512 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
6513 return;
6514 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6515 VT == MVT::v1f64) {
6516 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
6517 return;
6518 }
6519 break;
6520 }
6522 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6523 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
6524 return;
6525 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6526 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6527 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
6528 return;
6529 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6530 VT == MVT::v2f32) {
6531 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
6532 return;
6533 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6534 VT == MVT::v1f64) {
6535 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
6536 return;
6537 }
6538 break;
6539 }
6541 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6542 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
6543 return;
6544 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6545 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6546 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
6547 return;
6548 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6549 VT == MVT::v2f32) {
6550 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
6551 return;
6552 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6553 VT == MVT::v1f64) {
6554 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
6555 return;
6556 }
6557 break;
6558 }
6559 case AArch64ISD::ST2post: {
6560 VT = Node->getOperand(1).getValueType();
6561 if (VT == MVT::v8i8) {
6562 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
6563 return;
6564 } else if (VT == MVT::v16i8) {
6565 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
6566 return;
6567 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6568 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
6569 return;
6570 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6571 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
6572 return;
6573 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6574 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
6575 return;
6576 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6577 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
6578 return;
6579 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6580 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
6581 return;
6582 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6583 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
6584 return;
6585 }
6586 break;
6587 }
6588 case AArch64ISD::ST3post: {
6589 VT = Node->getOperand(1).getValueType();
6590 if (VT == MVT::v8i8) {
6591 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
6592 return;
6593 } else if (VT == MVT::v16i8) {
6594 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
6595 return;
6596 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6597 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
6598 return;
6599 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6600 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
6601 return;
6602 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6603 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
6604 return;
6605 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6606 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
6607 return;
6608 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6609 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
6610 return;
6611 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6612 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
6613 return;
6614 }
6615 break;
6616 }
6617 case AArch64ISD::ST4post: {
6618 VT = Node->getOperand(1).getValueType();
6619 if (VT == MVT::v8i8) {
6620 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
6621 return;
6622 } else if (VT == MVT::v16i8) {
6623 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
6624 return;
6625 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6626 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
6627 return;
6628 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6629 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
6630 return;
6631 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6632 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
6633 return;
6634 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6635 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
6636 return;
6637 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6638 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
6639 return;
6640 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6641 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
6642 return;
6643 }
6644 break;
6645 }
6646 case AArch64ISD::ST1x2post: {
6647 VT = Node->getOperand(1).getValueType();
6648 if (VT == MVT::v8i8) {
6649 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
6650 return;
6651 } else if (VT == MVT::v16i8) {
6652 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
6653 return;
6654 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6655 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
6656 return;
6657 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6658 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
6659 return;
6660 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6661 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
6662 return;
6663 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6664 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
6665 return;
6666 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6667 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
6668 return;
6669 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6670 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
6671 return;
6672 }
6673 break;
6674 }
6675 case AArch64ISD::ST1x3post: {
6676 VT = Node->getOperand(1).getValueType();
6677 if (VT == MVT::v8i8) {
6678 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
6679 return;
6680 } else if (VT == MVT::v16i8) {
6681 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
6682 return;
6683 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6684 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
6685 return;
6686 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
6687 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
6688 return;
6689 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6690 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
6691 return;
6692 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6693 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
6694 return;
6695 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6696 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
6697 return;
6698 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6699 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
6700 return;
6701 }
6702 break;
6703 }
6704 case AArch64ISD::ST1x4post: {
6705 VT = Node->getOperand(1).getValueType();
6706 if (VT == MVT::v8i8) {
6707 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
6708 return;
6709 } else if (VT == MVT::v16i8) {
6710 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
6711 return;
6712 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6713 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
6714 return;
6715 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6716 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
6717 return;
6718 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6719 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
6720 return;
6721 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6722 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
6723 return;
6724 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6725 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
6726 return;
6727 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6728 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
6729 return;
6730 }
6731 break;
6732 }
6734 VT = Node->getOperand(1).getValueType();
6735 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6736 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
6737 return;
6738 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6739 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6740 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
6741 return;
6742 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6743 VT == MVT::v2f32) {
6744 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
6745 return;
6746 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6747 VT == MVT::v1f64) {
6748 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
6749 return;
6750 }
6751 break;
6752 }
6754 VT = Node->getOperand(1).getValueType();
6755 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6756 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
6757 return;
6758 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6759 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6760 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
6761 return;
6762 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6763 VT == MVT::v2f32) {
6764 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
6765 return;
6766 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6767 VT == MVT::v1f64) {
6768 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
6769 return;
6770 }
6771 break;
6772 }
6774 VT = Node->getOperand(1).getValueType();
6775 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6776 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
6777 return;
6778 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6779 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6780 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
6781 return;
6782 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6783 VT == MVT::v2f32) {
6784 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
6785 return;
6786 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6787 VT == MVT::v1f64) {
6788 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
6789 return;
6790 }
6791 break;
6792 }
6794 if (VT == MVT::nxv16i8) {
6795 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
6796 return;
6797 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6798 VT == MVT::nxv8bf16) {
6799 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
6800 return;
6801 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6802 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);
6803 return;
6804 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6805 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);
6806 return;
6807 }
6808 break;
6809 }
6811 if (VT == MVT::nxv16i8) {
6812 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
6813 return;
6814 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6815 VT == MVT::nxv8bf16) {
6816 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
6817 return;
6818 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6819 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);
6820 return;
6821 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6822 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);
6823 return;
6824 }
6825 break;
6826 }
6828 if (VT == MVT::nxv16i8) {
6829 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
6830 return;
6831 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6832 VT == MVT::nxv8bf16) {
6833 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
6834 return;
6835 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6836 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);
6837 return;
6838 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6839 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);
6840 return;
6841 }
6842 break;
6843 }
6844 }
6845
6846 // Select the default instruction
6847 SelectCode(Node);
6848}
6849
6850/// createAArch64ISelDag - This pass converts a legalized DAG into a
6851/// AArch64-specific DAG, ready for instruction scheduling.
6853 CodeGenOptLevel OptLevel) {
6854 return new AArch64DAGToDAGISel(TM, OptLevel);
6855}
6856
6857/// When \p PredVT is a scalable vector predicate in the form
6858/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
6859/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
6860/// structured vectors (NumVec >1), the output data type is
6861/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
6862/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
6863/// EVT.
6865 unsigned NumVec) {
6866 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
6867 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
6868 return EVT();
6869
6870 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
6871 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
6872 return EVT();
6873
6874 ElementCount EC = PredVT.getVectorElementCount();
6875 EVT ScalarVT =
6876 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
6877 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
6878
6879 return MemVT;
6880}
6881
6882/// Return the EVT of the data associated to a memory operation in \p
6883/// Root. If such EVT cannot be retrived, it returns an invalid EVT.
6885 if (isa<MemSDNode>(Root))
6886 return cast<MemSDNode>(Root)->getMemoryVT();
6887
6888 if (isa<MemIntrinsicSDNode>(Root))
6889 return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
6890
6891 const unsigned Opcode = Root->getOpcode();
6892 // For custom ISD nodes, we have to look at them individually to extract the
6893 // type of the data moved to/from memory.
6894 switch (Opcode) {
6899 return cast<VTSDNode>(Root->getOperand(3))->getVT();
6901 return cast<VTSDNode>(Root->getOperand(4))->getVT();
6904 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2);
6907 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3);
6910 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4);
6911 default:
6912 break;
6913 }
6914
6915 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
6916 return EVT();
6917
6918 switch (Root->getConstantOperandVal(1)) {
6919 default:
6920 return EVT();
6921 case Intrinsic::aarch64_sme_ldr:
6922 case Intrinsic::aarch64_sme_str:
6923 return MVT::nxv16i8;
6924 case Intrinsic::aarch64_sve_prf:
6925 // We are using an SVE prefetch intrinsic. Type must be inferred from the
6926 // width of the predicate.
6928 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
6929 case Intrinsic::aarch64_sve_ld2_sret:
6930 case Intrinsic::aarch64_sve_ld2q_sret:
6932 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
6933 case Intrinsic::aarch64_sve_st2q:
6935 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
6936 case Intrinsic::aarch64_sve_ld3_sret:
6937 case Intrinsic::aarch64_sve_ld3q_sret:
6939 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
6940 case Intrinsic::aarch64_sve_st3q:
6942 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
6943 case Intrinsic::aarch64_sve_ld4_sret:
6944 case Intrinsic::aarch64_sve_ld4q_sret:
6946 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
6947 case Intrinsic::aarch64_sve_st4q:
6949 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
6950 case Intrinsic::aarch64_sve_ld1udq:
6951 case Intrinsic::aarch64_sve_st1dq:
6952 return EVT(MVT::nxv1i64);
6953 case Intrinsic::aarch64_sve_ld1uwq:
6954 case Intrinsic::aarch64_sve_st1wq:
6955 return EVT(MVT::nxv1i32);
6956 }
6957}
6958
6959/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
6960/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
6961/// where Root is the memory access using N for its address.
6962template <int64_t Min, int64_t Max>
6963bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
6964 SDValue &Base,
6965 SDValue &OffImm) {
6966 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
6967 const DataLayout &DL = CurDAG->getDataLayout();
6968 const MachineFrameInfo &MFI = MF->getFrameInfo();
6969
6970 if (N.getOpcode() == ISD::FrameIndex) {
6971 int FI = cast<FrameIndexSDNode>(N)->getIndex();
6972 // We can only encode VL scaled offsets, so only fold in frame indexes
6973 // referencing SVE objects.
6975 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
6976 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
6977 return true;
6978 }
6979
6980 return false;
6981 }
6982
6983 if (MemVT == EVT())
6984 return false;
6985
6986 if (N.getOpcode() != ISD::ADD)
6987 return false;
6988
6989 SDValue VScale = N.getOperand(1);
6990 if (VScale.getOpcode() != ISD::VSCALE)
6991 return false;
6992
6993 TypeSize TS = MemVT.getSizeInBits();
6994 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
6995 int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
6996
6997 if ((MulImm % MemWidthBytes) != 0)
6998 return false;
6999
7000 int64_t Offset = MulImm / MemWidthBytes;
7001 if (Offset < Min || Offset > Max)
7002 return false;
7003
7004 Base = N.getOperand(0);
7005 if (Base.getOpcode() == ISD::FrameIndex) {
7006 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7007 // We can only encode VL scaled offsets, so only fold in frame indexes
7008 // referencing SVE objects.
7010 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7011 }
7012
7013 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7014 return true;
7015}
7016
7017/// Select register plus register addressing mode for SVE, with scaled
7018/// offset.
7019bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7020 SDValue &Base,
7021 SDValue &Offset) {
7022 if (N.getOpcode() != ISD::ADD)
7023 return false;
7024
7025 // Process an ADD node.
7026 const SDValue LHS = N.getOperand(0);
7027 const SDValue RHS = N.getOperand(1);
7028
7029 // 8 bit data does not come with the SHL node, so it is treated
7030 // separately.
7031 if (Scale == 0) {
7032 Base = LHS;
7033 Offset = RHS;
7034 return true;
7035 }
7036
7037 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7038 int64_t ImmOff = C->getSExtValue();
7039 unsigned Size = 1 << Scale;
7040
7041 // To use the reg+reg addressing mode, the immediate must be a multiple of
7042 // the vector element's byte size.
7043 if (ImmOff % Size)
7044 return false;
7045
7046 SDLoc DL(N);
7047 Base = LHS;
7048 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7049 SDValue Ops[] = {Offset};
7050 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7051 Offset = SDValue(MI, 0);
7052 return true;
7053 }
7054
7055 // Check if the RHS is a shift node with a constant.
7056 if (RHS.getOpcode() != ISD::SHL)
7057 return false;
7058
7059 const SDValue ShiftRHS = RHS.getOperand(1);
7060 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7061 if (C->getZExtValue() == Scale) {
7062 Base = LHS;
7063 Offset = RHS.getOperand(0);
7064 return true;
7065 }
7066
7067 return false;
7068}
7069
7070bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7071 const AArch64TargetLowering *TLI =
7072 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7073
7074 return TLI->isAllActivePredicate(*CurDAG, N);
7075}
7076
7077bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7078 EVT VT = N.getValueType();
7079 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7080}
7081
7082bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7084 unsigned Scale) {
7085 // Try to untangle an ADD node into a 'reg + offset'
7086 if (N.getOpcode() == ISD::ADD)
7087 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
7088 int64_t ImmOff = C->getSExtValue();
7089 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) {
7090 Base = N.getOperand(0);
7091 Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7092 return true;
7093 }
7094 }
7095
7096 // By default, just match reg + 0.
7097 Base = N;
7098 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7099 return true;
7100}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
#define PASS_NAME
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
#define DEBUG_TYPE
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
aarch64 promote const
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define R2(n)
uint64_t High
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Value * RHS
Value * LHS
support::ulittle16_t & Lo
Definition: aarch32.cpp:206
support::ulittle16_t & Hi
Definition: aarch32.cpp:205
DEMANGLE_DUMP_METHOD void dump() const
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
bool getExactInverse(APFloat *inv) const
Definition: APFloat.h:1334
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1185
Class for arbitrary precision integers.
Definition: APInt.h:76
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1620
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:236
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1548
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1405
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:488
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint8_t getStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
A description of a memory reference used in the backend.
An SDNode that represents everything that will be needed to construct a MachineInstr.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
iterator_range< use_iterator > uses()
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps)
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:447
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:675
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:696
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
unsigned getID() const
Return the register class ID number.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:926
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
const SysReg * lookupSysRegByName(StringRef)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:559
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1241
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1031
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ WRITE_REGISTER
Definition: ISDOpcodes.h:119
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1237
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:627
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:68
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:208
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:573
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:118
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:780
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1329
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1248
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:798
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:786
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1472
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1503
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
@ Offset
Definition: DWP.cpp:456
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:252
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:319
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:258
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1928
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:313
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:246
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
@ And
Bitwise or logical AND of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Extended Value Type.
Definition: ValueTypes.h:34
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:349
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:203
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:198
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40