LLVM 19.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34//===--------------------------------------------------------------------===//
35/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
36/// instructions for SelectionDAG operations.
37///
38namespace {
39
40class AArch64DAGToDAGISel : public SelectionDAGISel {
41
42 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
43 /// make the right decision when generating code for different targets.
44 const AArch64Subtarget *Subtarget;
45
46public:
47 AArch64DAGToDAGISel() = delete;
48
49 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
50 CodeGenOptLevel OptLevel)
51 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
52
53 bool runOnMachineFunction(MachineFunction &MF) override {
54 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
56 }
57
58 void Select(SDNode *Node) override;
59
60 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
61 /// inline asm expressions.
63 InlineAsm::ConstraintCode ConstraintID,
64 std::vector<SDValue> &OutOps) override;
65
66 template <signed Low, signed High, signed Scale>
67 bool SelectRDVLImm(SDValue N, SDValue &Imm);
68
69 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
70 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
71 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
72 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
73 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
74 return SelectShiftedRegister(N, false, Reg, Shift);
75 }
76 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
77 return SelectShiftedRegister(N, true, Reg, Shift);
78 }
79 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
80 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
81 }
82 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
83 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
84 }
85 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
86 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
87 }
88 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
89 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
90 }
91 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
92 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
93 }
94 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
95 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
96 }
97 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
98 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
99 }
100 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
101 return SelectAddrModeIndexed(N, 1, Base, OffImm);
102 }
103 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
104 return SelectAddrModeIndexed(N, 2, Base, OffImm);
105 }
106 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
107 return SelectAddrModeIndexed(N, 4, Base, OffImm);
108 }
109 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
110 return SelectAddrModeIndexed(N, 8, Base, OffImm);
111 }
112 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
113 return SelectAddrModeIndexed(N, 16, Base, OffImm);
114 }
115 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
116 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
117 }
118 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
119 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
120 }
121 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
122 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
123 }
124 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
125 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
126 }
127 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
128 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
129 }
130 template <unsigned Size, unsigned Max>
131 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
132 // Test if there is an appropriate addressing mode and check if the
133 // immediate fits.
134 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
135 if (Found) {
136 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
137 int64_t C = CI->getSExtValue();
138 if (C <= Max)
139 return true;
140 }
141 }
142
143 // Otherwise, base only, materialize address in register.
144 Base = N;
145 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
146 return true;
147 }
148
149 template<int Width>
150 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
151 SDValue &SignExtend, SDValue &DoShift) {
152 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
153 }
154
155 template<int Width>
156 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
157 SDValue &SignExtend, SDValue &DoShift) {
158 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
159 }
160
161 bool SelectExtractHigh(SDValue N, SDValue &Res) {
162 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
163 N = N->getOperand(0);
164 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
165 !isa<ConstantSDNode>(N->getOperand(1)))
166 return false;
167 EVT VT = N->getValueType(0);
168 EVT LVT = N->getOperand(0).getValueType();
169 unsigned Index = N->getConstantOperandVal(1);
170 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
172 return false;
173 Res = N->getOperand(0);
174 return true;
175 }
176
177 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
178 if (N.getOpcode() != AArch64ISD::VLSHR)
179 return false;
180 SDValue Op = N->getOperand(0);
181 EVT VT = Op.getValueType();
182 unsigned ShtAmt = N->getConstantOperandVal(1);
183 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
184 return false;
185
186 APInt Imm;
187 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
188 Imm = APInt(VT.getScalarSizeInBits(),
189 Op.getOperand(1).getConstantOperandVal(0)
190 << Op.getOperand(1).getConstantOperandVal(1));
191 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
192 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
193 Imm = APInt(VT.getScalarSizeInBits(),
194 Op.getOperand(1).getConstantOperandVal(0));
195 else
196 return false;
197
198 if (Imm != 1ULL << (ShtAmt - 1))
199 return false;
200
201 Res1 = Op.getOperand(0);
202 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
203 return true;
204 }
205
206 bool SelectDupZeroOrUndef(SDValue N) {
207 switch(N->getOpcode()) {
208 case ISD::UNDEF:
209 return true;
210 case AArch64ISD::DUP:
211 case ISD::SPLAT_VECTOR: {
212 auto Opnd0 = N->getOperand(0);
213 if (isNullConstant(Opnd0))
214 return true;
215 if (isNullFPConstant(Opnd0))
216 return true;
217 break;
218 }
219 default:
220 break;
221 }
222
223 return false;
224 }
225
226 bool SelectDupZero(SDValue N) {
227 switch(N->getOpcode()) {
228 case AArch64ISD::DUP:
229 case ISD::SPLAT_VECTOR: {
230 auto Opnd0 = N->getOperand(0);
231 if (isNullConstant(Opnd0))
232 return true;
233 if (isNullFPConstant(Opnd0))
234 return true;
235 break;
236 }
237 }
238
239 return false;
240 }
241
242 bool SelectDupNegativeZero(SDValue N) {
243 switch(N->getOpcode()) {
244 case AArch64ISD::DUP:
245 case ISD::SPLAT_VECTOR: {
246 ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
247 return Const && Const->isZero() && Const->isNegative();
248 }
249 }
250
251 return false;
252 }
253
254 template<MVT::SimpleValueType VT>
255 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
256 return SelectSVEAddSubImm(N, VT, Imm, Shift);
257 }
258
259 template <MVT::SimpleValueType VT, bool Negate>
260 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
261 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
262 }
263
264 template <MVT::SimpleValueType VT>
265 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
266 return SelectSVECpyDupImm(N, VT, Imm, Shift);
267 }
268
269 template <MVT::SimpleValueType VT, bool Invert = false>
270 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
271 return SelectSVELogicalImm(N, VT, Imm, Invert);
272 }
273
274 template <MVT::SimpleValueType VT>
275 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
276 return SelectSVEArithImm(N, VT, Imm);
277 }
278
279 template <unsigned Low, unsigned High, bool AllowSaturation = false>
280 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
281 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
282 }
283
284 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
285 if (N->getOpcode() != ISD::SPLAT_VECTOR)
286 return false;
287
288 EVT EltVT = N->getValueType(0).getVectorElementType();
289 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
290 /* High */ EltVT.getFixedSizeInBits(),
291 /* AllowSaturation */ true, Imm);
292 }
293
294 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
295 template<signed Min, signed Max, signed Scale, bool Shift>
296 bool SelectCntImm(SDValue N, SDValue &Imm) {
297 if (!isa<ConstantSDNode>(N))
298 return false;
299
300 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
301 if (Shift)
302 MulImm = 1LL << MulImm;
303
304 if ((MulImm % std::abs(Scale)) != 0)
305 return false;
306
307 MulImm /= Scale;
308 if ((MulImm >= Min) && (MulImm <= Max)) {
309 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
310 return true;
311 }
312
313 return false;
314 }
315
316 template <signed Max, signed Scale>
317 bool SelectEXTImm(SDValue N, SDValue &Imm) {
318 if (!isa<ConstantSDNode>(N))
319 return false;
320
321 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
322
323 if (MulImm >= 0 && MulImm <= Max) {
324 MulImm *= Scale;
325 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
326 return true;
327 }
328
329 return false;
330 }
331
332 template <unsigned BaseReg, unsigned Max>
333 bool ImmToReg(SDValue N, SDValue &Imm) {
334 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
335 uint64_t C = CI->getZExtValue();
336
337 if (C > Max)
338 return false;
339
340 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
341 return true;
342 }
343 return false;
344 }
345
346 /// Form sequences of consecutive 64/128-bit registers for use in NEON
347 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
348 /// between 1 and 4 elements. If it contains a single element that is returned
349 /// unchanged; otherwise a REG_SEQUENCE value is returned.
352 // Form a sequence of SVE registers for instructions using list of vectors,
353 // e.g. structured loads and stores (ldN, stN).
354 SDValue createZTuple(ArrayRef<SDValue> Vecs);
355
356 // Similar to above, except the register must start at a multiple of the
357 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
358 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
359
360 /// Generic helper for the createDTuple/createQTuple
361 /// functions. Those should almost always be called instead.
362 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
363 const unsigned SubRegs[]);
364
365 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
366
367 bool tryIndexedLoad(SDNode *N);
368
369 bool trySelectStackSlotTagP(SDNode *N);
370 void SelectTagP(SDNode *N);
371
372 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
373 unsigned SubRegIdx);
374 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
375 unsigned SubRegIdx);
376 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
377 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
378 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
379 unsigned Opc_rr, unsigned Opc_ri,
380 bool IsIntr = false);
381 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
382 unsigned Scale, unsigned Opc_ri,
383 unsigned Opc_rr);
384 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
385 bool IsZmMulti, unsigned Opcode,
386 bool HasPred = false);
387 void SelectPExtPair(SDNode *N, unsigned Opc);
388 void SelectWhilePair(SDNode *N, unsigned Opc);
389 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
390 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
391 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
392 bool IsTupleInput, unsigned Opc);
393 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
394
395 template <unsigned MaxIdx, unsigned Scale>
396 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
397 unsigned Op);
398 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
399 unsigned Op, unsigned MaxIdx, unsigned Scale,
400 unsigned BaseReg = 0);
401 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
402 /// SVE Reg+Imm addressing mode.
403 template <int64_t Min, int64_t Max>
404 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
405 SDValue &OffImm);
406 /// SVE Reg+Reg address mode.
407 template <unsigned Scale>
408 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
409 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
410 }
411
412 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc,
413 uint32_t MaxImm);
414
415 template <unsigned MaxIdx, unsigned Scale>
416 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
417 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
418 }
419
420 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
421 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
422 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
423 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
424 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
425 unsigned Opc_rr, unsigned Opc_ri);
426 std::tuple<unsigned, SDValue, SDValue>
427 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
428 const SDValue &OldBase, const SDValue &OldOffset,
429 unsigned Scale);
430
431 bool tryBitfieldExtractOp(SDNode *N);
432 bool tryBitfieldExtractOpFromSExt(SDNode *N);
433 bool tryBitfieldInsertOp(SDNode *N);
434 bool tryBitfieldInsertInZeroOp(SDNode *N);
435 bool tryShiftAmountMod(SDNode *N);
436
437 bool tryReadRegister(SDNode *N);
438 bool tryWriteRegister(SDNode *N);
439
440 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
441 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
442
443 bool trySelectXAR(SDNode *N);
444
445// Include the pieces autogenerated from the target description.
446#include "AArch64GenDAGISel.inc"
447
448private:
449 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
450 SDValue &Shift);
451 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
452 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
453 SDValue &OffImm) {
454 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
455 }
456 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
457 unsigned Size, SDValue &Base,
458 SDValue &OffImm);
459 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
460 SDValue &OffImm);
461 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
462 SDValue &OffImm);
463 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
464 SDValue &Offset, SDValue &SignExtend,
465 SDValue &DoShift);
466 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
467 SDValue &Offset, SDValue &SignExtend,
468 SDValue &DoShift);
469 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
470 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
471 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
472 SDValue &Offset, SDValue &SignExtend);
473
474 template<unsigned RegWidth>
475 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
476 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
477 }
478
479 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
480
481 template<unsigned RegWidth>
482 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
483 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
484 }
485
486 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
487 unsigned Width);
488
489 bool SelectCMP_SWAP(SDNode *N);
490
491 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
492 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
493 bool Negate);
494 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
495 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
496
497 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
498 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
499 bool AllowSaturation, SDValue &Imm);
500
501 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
502 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
503 SDValue &Offset);
504 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
505 SDValue &Offset, unsigned Scale = 1);
506
507 bool SelectAllActivePredicate(SDValue N);
508 bool SelectAnyPredicate(SDValue N);
509};
510
511class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
512public:
513 static char ID;
514 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
515 CodeGenOptLevel OptLevel)
517 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
518};
519} // end anonymous namespace
520
521char AArch64DAGToDAGISelLegacy::ID = 0;
522
523INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
524
525/// isIntImmediate - This method tests to see if the node is a constant
526/// operand. If so Imm will receive the 32-bit value.
527static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
528 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
529 Imm = C->getZExtValue();
530 return true;
531 }
532 return false;
533}
534
535// isIntImmediate - This method tests to see if a constant operand.
536// If so Imm will receive the value.
537static bool isIntImmediate(SDValue N, uint64_t &Imm) {
538 return isIntImmediate(N.getNode(), Imm);
539}
540
541// isOpcWithIntImmediate - This method tests to see if the node is a specific
542// opcode and that it has a immediate integer right operand.
543// If so Imm will receive the 32 bit value.
544static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
545 uint64_t &Imm) {
546 return N->getOpcode() == Opc &&
547 isIntImmediate(N->getOperand(1).getNode(), Imm);
548}
549
550// isIntImmediateEq - This method tests to see if N is a constant operand that
551// is equivalent to 'ImmExpected'.
552#ifndef NDEBUG
553static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
554 uint64_t Imm;
555 if (!isIntImmediate(N.getNode(), Imm))
556 return false;
557 return Imm == ImmExpected;
558}
559#endif
560
561bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
562 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
563 std::vector<SDValue> &OutOps) {
564 switch(ConstraintID) {
565 default:
566 llvm_unreachable("Unexpected asm memory constraint");
567 case InlineAsm::ConstraintCode::m:
568 case InlineAsm::ConstraintCode::o:
569 case InlineAsm::ConstraintCode::Q:
570 // We need to make sure that this one operand does not end up in XZR, thus
571 // require the address to be in a PointerRegClass register.
572 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
573 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
574 SDLoc dl(Op);
575 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
576 SDValue NewOp =
577 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
578 dl, Op.getValueType(),
579 Op, RC), 0);
580 OutOps.push_back(NewOp);
581 return false;
582 }
583 return true;
584}
585
586/// SelectArithImmed - Select an immediate value that can be represented as
587/// a 12-bit value shifted left by either 0 or 12. If so, return true with
588/// Val set to the 12-bit value and Shift set to the shifter operand.
589bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
590 SDValue &Shift) {
591 // This function is called from the addsub_shifted_imm ComplexPattern,
592 // which lists [imm] as the list of opcode it's interested in, however
593 // we still need to check whether the operand is actually an immediate
594 // here because the ComplexPattern opcode list is only used in
595 // root-level opcode matching.
596 if (!isa<ConstantSDNode>(N.getNode()))
597 return false;
598
599 uint64_t Immed = N.getNode()->getAsZExtVal();
600 unsigned ShiftAmt;
601
602 if (Immed >> 12 == 0) {
603 ShiftAmt = 0;
604 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
605 ShiftAmt = 12;
606 Immed = Immed >> 12;
607 } else
608 return false;
609
610 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
611 SDLoc dl(N);
612 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
613 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
614 return true;
615}
616
617/// SelectNegArithImmed - As above, but negates the value before trying to
618/// select it.
619bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
620 SDValue &Shift) {
621 // This function is called from the addsub_shifted_imm ComplexPattern,
622 // which lists [imm] as the list of opcode it's interested in, however
623 // we still need to check whether the operand is actually an immediate
624 // here because the ComplexPattern opcode list is only used in
625 // root-level opcode matching.
626 if (!isa<ConstantSDNode>(N.getNode()))
627 return false;
628
629 // The immediate operand must be a 24-bit zero-extended immediate.
630 uint64_t Immed = N.getNode()->getAsZExtVal();
631
632 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
633 // have the opposite effect on the C flag, so this pattern mustn't match under
634 // those circumstances.
635 if (Immed == 0)
636 return false;
637
638 if (N.getValueType() == MVT::i32)
639 Immed = ~((uint32_t)Immed) + 1;
640 else
641 Immed = ~Immed + 1ULL;
642 if (Immed & 0xFFFFFFFFFF000000ULL)
643 return false;
644
645 Immed &= 0xFFFFFFULL;
646 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
647 Shift);
648}
649
650/// getShiftTypeForNode - Translate a shift node to the corresponding
651/// ShiftType value.
653 switch (N.getOpcode()) {
654 default:
656 case ISD::SHL:
657 return AArch64_AM::LSL;
658 case ISD::SRL:
659 return AArch64_AM::LSR;
660 case ISD::SRA:
661 return AArch64_AM::ASR;
662 case ISD::ROTR:
663 return AArch64_AM::ROR;
664 }
665}
666
667/// Determine whether it is worth it to fold SHL into the addressing
668/// mode.
670 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
671 // It is worth folding logical shift of up to three places.
672 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
673 if (!CSD)
674 return false;
675 unsigned ShiftVal = CSD->getZExtValue();
676 if (ShiftVal > 3)
677 return false;
678
679 // Check if this particular node is reused in any non-memory related
680 // operation. If yes, do not try to fold this node into the address
681 // computation, since the computation will be kept.
682 const SDNode *Node = V.getNode();
683 for (SDNode *UI : Node->uses())
684 if (!isa<MemSDNode>(*UI))
685 for (SDNode *UII : UI->uses())
686 if (!isa<MemSDNode>(*UII))
687 return false;
688 return true;
689}
690
691/// Determine whether it is worth to fold V into an extended register addressing
692/// mode.
693bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
694 // Trivial if we are optimizing for code size or if there is only
695 // one use of the value.
696 if (CurDAG->shouldOptForSize() || V.hasOneUse())
697 return true;
698
699 // If a subtarget has a slow shift, folding a shift into multiple loads
700 // costs additional micro-ops.
701 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
702 return false;
703
704 // Check whether we're going to emit the address arithmetic anyway because
705 // it's used by a non-address operation.
706 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
707 return true;
708 if (V.getOpcode() == ISD::ADD) {
709 const SDValue LHS = V.getOperand(0);
710 const SDValue RHS = V.getOperand(1);
711 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
712 return true;
713 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
714 return true;
715 }
716
717 // It hurts otherwise, since the value will be reused.
718 return false;
719}
720
721/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
722/// to select more shifted register
723bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
724 SDValue &Shift) {
725 EVT VT = N.getValueType();
726 if (VT != MVT::i32 && VT != MVT::i64)
727 return false;
728
729 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
730 return false;
731 SDValue LHS = N.getOperand(0);
732 if (!LHS->hasOneUse())
733 return false;
734
735 unsigned LHSOpcode = LHS->getOpcode();
736 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
737 return false;
738
739 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
740 if (!ShiftAmtNode)
741 return false;
742
743 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
744 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
745 if (!RHSC)
746 return false;
747
748 APInt AndMask = RHSC->getAPIntValue();
749 unsigned LowZBits, MaskLen;
750 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
751 return false;
752
753 unsigned BitWidth = N.getValueSizeInBits();
754 SDLoc DL(LHS);
755 uint64_t NewShiftC;
756 unsigned NewShiftOp;
757 if (LHSOpcode == ISD::SHL) {
758 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
759 // BitWidth != LowZBits + MaskLen doesn't match the pattern
760 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
761 return false;
762
763 NewShiftC = LowZBits - ShiftAmtC;
764 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
765 } else {
766 if (LowZBits == 0)
767 return false;
768
769 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
770 NewShiftC = LowZBits + ShiftAmtC;
771 if (NewShiftC >= BitWidth)
772 return false;
773
774 // SRA need all high bits
775 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
776 return false;
777
778 // SRL high bits can be 0 or 1
779 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
780 return false;
781
782 if (LHSOpcode == ISD::SRL)
783 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
784 else
785 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
786 }
787
788 assert(NewShiftC < BitWidth && "Invalid shift amount");
789 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
790 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
791 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
792 NewShiftAmt, BitWidthMinus1),
793 0);
794 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
795 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
796 return true;
797}
798
799/// getExtendTypeForNode - Translate an extend node to the corresponding
800/// ExtendType value.
802getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
803 if (N.getOpcode() == ISD::SIGN_EXTEND ||
804 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
805 EVT SrcVT;
806 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
807 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
808 else
809 SrcVT = N.getOperand(0).getValueType();
810
811 if (!IsLoadStore && SrcVT == MVT::i8)
812 return AArch64_AM::SXTB;
813 else if (!IsLoadStore && SrcVT == MVT::i16)
814 return AArch64_AM::SXTH;
815 else if (SrcVT == MVT::i32)
816 return AArch64_AM::SXTW;
817 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
818
820 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
821 N.getOpcode() == ISD::ANY_EXTEND) {
822 EVT SrcVT = N.getOperand(0).getValueType();
823 if (!IsLoadStore && SrcVT == MVT::i8)
824 return AArch64_AM::UXTB;
825 else if (!IsLoadStore && SrcVT == MVT::i16)
826 return AArch64_AM::UXTH;
827 else if (SrcVT == MVT::i32)
828 return AArch64_AM::UXTW;
829 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
830
832 } else if (N.getOpcode() == ISD::AND) {
833 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
834 if (!CSD)
836 uint64_t AndMask = CSD->getZExtValue();
837
838 switch (AndMask) {
839 default:
841 case 0xFF:
842 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
843 case 0xFFFF:
844 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
845 case 0xFFFFFFFF:
846 return AArch64_AM::UXTW;
847 }
848 }
849
851}
852
853/// Determine whether it is worth to fold V into an extended register of an
854/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
855/// instruction, and the shift should be treated as worth folding even if has
856/// multiple uses.
857bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
858 // Trivial if we are optimizing for code size or if there is only
859 // one use of the value.
860 if (CurDAG->shouldOptForSize() || V.hasOneUse())
861 return true;
862
863 // If a subtarget has a fastpath LSL we can fold a logical shift into
864 // the add/sub and save a cycle.
865 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
866 V.getConstantOperandVal(1) <= 4 &&
868 return true;
869
870 // It hurts otherwise, since the value will be reused.
871 return false;
872}
873
874/// SelectShiftedRegister - Select a "shifted register" operand. If the value
875/// is not shifted, set the Shift operand to default of "LSL 0". The logical
876/// instructions allow the shifted register to be rotated, but the arithmetic
877/// instructions do not. The AllowROR parameter specifies whether ROR is
878/// supported.
879bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
880 SDValue &Reg, SDValue &Shift) {
881 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
882 return true;
883
885 if (ShType == AArch64_AM::InvalidShiftExtend)
886 return false;
887 if (!AllowROR && ShType == AArch64_AM::ROR)
888 return false;
889
890 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
891 unsigned BitSize = N.getValueSizeInBits();
892 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
893 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
894
895 Reg = N.getOperand(0);
896 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
897 return isWorthFoldingALU(N, true);
898 }
899
900 return false;
901}
902
903/// Instructions that accept extend modifiers like UXTW expect the register
904/// being extended to be a GPR32, but the incoming DAG might be acting on a
905/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
906/// this is the case.
908 if (N.getValueType() == MVT::i32)
909 return N;
910
911 SDLoc dl(N);
912 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
913}
914
915// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
916template<signed Low, signed High, signed Scale>
917bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
918 if (!isa<ConstantSDNode>(N))
919 return false;
920
921 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
922 if ((MulImm % std::abs(Scale)) == 0) {
923 int64_t RDVLImm = MulImm / Scale;
924 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
925 Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
926 return true;
927 }
928 }
929
930 return false;
931}
932
933/// SelectArithExtendedRegister - Select a "extended register" operand. This
934/// operand folds in an extend followed by an optional left shift.
935bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
936 SDValue &Shift) {
937 unsigned ShiftVal = 0;
939
940 if (N.getOpcode() == ISD::SHL) {
941 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
942 if (!CSD)
943 return false;
944 ShiftVal = CSD->getZExtValue();
945 if (ShiftVal > 4)
946 return false;
947
948 Ext = getExtendTypeForNode(N.getOperand(0));
950 return false;
951
952 Reg = N.getOperand(0).getOperand(0);
953 } else {
956 return false;
957
958 Reg = N.getOperand(0);
959
960 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
961 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
962 auto isDef32 = [](SDValue N) {
963 unsigned Opc = N.getOpcode();
964 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
965 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
966 Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
967 Opc != ISD::FREEZE;
968 };
969 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
970 isDef32(Reg))
971 return false;
972 }
973
974 // AArch64 mandates that the RHS of the operation must use the smallest
975 // register class that could contain the size being extended from. Thus,
976 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
977 // there might not be an actual 32-bit value in the program. We can
978 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
979 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
980 Reg = narrowIfNeeded(CurDAG, Reg);
981 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
982 MVT::i32);
983 return isWorthFoldingALU(N);
984}
985
986/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
987/// operand is refered by the instructions have SP operand
988bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
989 SDValue &Shift) {
990 unsigned ShiftVal = 0;
992
993 if (N.getOpcode() != ISD::SHL)
994 return false;
995
996 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
997 if (!CSD)
998 return false;
999 ShiftVal = CSD->getZExtValue();
1000 if (ShiftVal > 4)
1001 return false;
1002
1004 Reg = N.getOperand(0);
1005 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1006 MVT::i32);
1007 return isWorthFoldingALU(N);
1008}
1009
1010/// If there's a use of this ADDlow that's not itself a load/store then we'll
1011/// need to create a real ADD instruction from it anyway and there's no point in
1012/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1013/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1014/// leads to duplicated ADRP instructions.
1016 for (auto *Use : N->uses()) {
1017 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
1018 Use->getOpcode() != ISD::ATOMIC_LOAD &&
1019 Use->getOpcode() != ISD::ATOMIC_STORE)
1020 return false;
1021
1022 // ldar and stlr have much more restrictive addressing modes (just a
1023 // register).
1024 if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering()))
1025 return false;
1026 }
1027
1028 return true;
1029}
1030
1031/// Check if the immediate offset is valid as a scaled immediate.
1032static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1033 unsigned Size) {
1034 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1035 Offset < (Range << Log2_32(Size)))
1036 return true;
1037 return false;
1038}
1039
1040/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1041/// immediate" address. The "Size" argument is the size in bytes of the memory
1042/// reference, which determines the scale.
1043bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1044 unsigned BW, unsigned Size,
1045 SDValue &Base,
1046 SDValue &OffImm) {
1047 SDLoc dl(N);
1048 const DataLayout &DL = CurDAG->getDataLayout();
1049 const TargetLowering *TLI = getTargetLowering();
1050 if (N.getOpcode() == ISD::FrameIndex) {
1051 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1052 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1053 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1054 return true;
1055 }
1056
1057 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1058 // selected here doesn't support labels/immediates, only base+offset.
1059 if (CurDAG->isBaseWithConstantOffset(N)) {
1060 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1061 if (IsSignedImm) {
1062 int64_t RHSC = RHS->getSExtValue();
1063 unsigned Scale = Log2_32(Size);
1064 int64_t Range = 0x1LL << (BW - 1);
1065
1066 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1067 RHSC < (Range << Scale)) {
1068 Base = N.getOperand(0);
1069 if (Base.getOpcode() == ISD::FrameIndex) {
1070 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1071 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1072 }
1073 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1074 return true;
1075 }
1076 } else {
1077 // unsigned Immediate
1078 uint64_t RHSC = RHS->getZExtValue();
1079 unsigned Scale = Log2_32(Size);
1080 uint64_t Range = 0x1ULL << BW;
1081
1082 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1083 Base = N.getOperand(0);
1084 if (Base.getOpcode() == ISD::FrameIndex) {
1085 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1086 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1087 }
1088 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1089 return true;
1090 }
1091 }
1092 }
1093 }
1094 // Base only. The address will be materialized into a register before
1095 // the memory is accessed.
1096 // add x0, Xbase, #offset
1097 // stp x1, x2, [x0]
1098 Base = N;
1099 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1100 return true;
1101}
1102
1103/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1104/// immediate" address. The "Size" argument is the size in bytes of the memory
1105/// reference, which determines the scale.
1106bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1107 SDValue &Base, SDValue &OffImm) {
1108 SDLoc dl(N);
1109 const DataLayout &DL = CurDAG->getDataLayout();
1110 const TargetLowering *TLI = getTargetLowering();
1111 if (N.getOpcode() == ISD::FrameIndex) {
1112 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1113 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1114 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1115 return true;
1116 }
1117
1118 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1119 GlobalAddressSDNode *GAN =
1120 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1121 Base = N.getOperand(0);
1122 OffImm = N.getOperand(1);
1123 if (!GAN)
1124 return true;
1125
1126 if (GAN->getOffset() % Size == 0 &&
1128 return true;
1129 }
1130
1131 if (CurDAG->isBaseWithConstantOffset(N)) {
1132 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1133 int64_t RHSC = (int64_t)RHS->getZExtValue();
1134 unsigned Scale = Log2_32(Size);
1135 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1136 Base = N.getOperand(0);
1137 if (Base.getOpcode() == ISD::FrameIndex) {
1138 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1139 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1140 }
1141 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1142 return true;
1143 }
1144 }
1145 }
1146
1147 // Before falling back to our general case, check if the unscaled
1148 // instructions can handle this. If so, that's preferable.
1149 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1150 return false;
1151
1152 // Base only. The address will be materialized into a register before
1153 // the memory is accessed.
1154 // add x0, Xbase, #offset
1155 // ldr x0, [x0]
1156 Base = N;
1157 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1158 return true;
1159}
1160
1161/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1162/// immediate" address. This should only match when there is an offset that
1163/// is not valid for a scaled immediate addressing mode. The "Size" argument
1164/// is the size in bytes of the memory reference, which is needed here to know
1165/// what is valid for a scaled immediate.
1166bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1167 SDValue &Base,
1168 SDValue &OffImm) {
1169 if (!CurDAG->isBaseWithConstantOffset(N))
1170 return false;
1171 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1172 int64_t RHSC = RHS->getSExtValue();
1173 if (RHSC >= -256 && RHSC < 256) {
1174 Base = N.getOperand(0);
1175 if (Base.getOpcode() == ISD::FrameIndex) {
1176 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1177 const TargetLowering *TLI = getTargetLowering();
1178 Base = CurDAG->getTargetFrameIndex(
1179 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1180 }
1181 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1182 return true;
1183 }
1184 }
1185 return false;
1186}
1187
1189 SDLoc dl(N);
1190 SDValue ImpDef = SDValue(
1191 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1192 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1193 N);
1194}
1195
1196/// Check if the given SHL node (\p N), can be used to form an
1197/// extended register for an addressing mode.
1198bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1199 bool WantExtend, SDValue &Offset,
1200 SDValue &SignExtend) {
1201 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1202 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1203 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1204 return false;
1205
1206 SDLoc dl(N);
1207 if (WantExtend) {
1209 getExtendTypeForNode(N.getOperand(0), true);
1211 return false;
1212
1213 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1214 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1215 MVT::i32);
1216 } else {
1217 Offset = N.getOperand(0);
1218 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1219 }
1220
1221 unsigned LegalShiftVal = Log2_32(Size);
1222 unsigned ShiftVal = CSD->getZExtValue();
1223
1224 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1225 return false;
1226
1227 return isWorthFoldingAddr(N, Size);
1228}
1229
1230bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1232 SDValue &SignExtend,
1233 SDValue &DoShift) {
1234 if (N.getOpcode() != ISD::ADD)
1235 return false;
1236 SDValue LHS = N.getOperand(0);
1237 SDValue RHS = N.getOperand(1);
1238 SDLoc dl(N);
1239
1240 // We don't want to match immediate adds here, because they are better lowered
1241 // to the register-immediate addressing modes.
1242 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
1243 return false;
1244
1245 // Check if this particular node is reused in any non-memory related
1246 // operation. If yes, do not try to fold this node into the address
1247 // computation, since the computation will be kept.
1248 const SDNode *Node = N.getNode();
1249 for (SDNode *UI : Node->uses()) {
1250 if (!isa<MemSDNode>(*UI))
1251 return false;
1252 }
1253
1254 // Remember if it is worth folding N when it produces extended register.
1255 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1256
1257 // Try to match a shifted extend on the RHS.
1258 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1259 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1260 Base = LHS;
1261 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1262 return true;
1263 }
1264
1265 // Try to match a shifted extend on the LHS.
1266 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1267 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1268 Base = RHS;
1269 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1270 return true;
1271 }
1272
1273 // There was no shift, whatever else we find.
1274 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1275
1277 // Try to match an unshifted extend on the LHS.
1278 if (IsExtendedRegisterWorthFolding &&
1279 (Ext = getExtendTypeForNode(LHS, true)) !=
1281 Base = RHS;
1282 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1283 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1284 MVT::i32);
1285 if (isWorthFoldingAddr(LHS, Size))
1286 return true;
1287 }
1288
1289 // Try to match an unshifted extend on the RHS.
1290 if (IsExtendedRegisterWorthFolding &&
1291 (Ext = getExtendTypeForNode(RHS, true)) !=
1293 Base = LHS;
1294 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1295 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1296 MVT::i32);
1297 if (isWorthFoldingAddr(RHS, Size))
1298 return true;
1299 }
1300
1301 return false;
1302}
1303
1304// Check if the given immediate is preferred by ADD. If an immediate can be
1305// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1306// encoded by one MOVZ, return true.
1307static bool isPreferredADD(int64_t ImmOff) {
1308 // Constant in [0x0, 0xfff] can be encoded in ADD.
1309 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1310 return true;
1311 // Check if it can be encoded in an "ADD LSL #12".
1312 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1313 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1314 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1315 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1316 return false;
1317}
1318
1319bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1321 SDValue &SignExtend,
1322 SDValue &DoShift) {
1323 if (N.getOpcode() != ISD::ADD)
1324 return false;
1325 SDValue LHS = N.getOperand(0);
1326 SDValue RHS = N.getOperand(1);
1327 SDLoc DL(N);
1328
1329 // Check if this particular node is reused in any non-memory related
1330 // operation. If yes, do not try to fold this node into the address
1331 // computation, since the computation will be kept.
1332 const SDNode *Node = N.getNode();
1333 for (SDNode *UI : Node->uses()) {
1334 if (!isa<MemSDNode>(*UI))
1335 return false;
1336 }
1337
1338 // Watch out if RHS is a wide immediate, it can not be selected into
1339 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1340 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1341 // instructions like:
1342 // MOV X0, WideImmediate
1343 // ADD X1, BaseReg, X0
1344 // LDR X2, [X1, 0]
1345 // For such situation, using [BaseReg, XReg] addressing mode can save one
1346 // ADD/SUB:
1347 // MOV X0, WideImmediate
1348 // LDR X2, [BaseReg, X0]
1349 if (isa<ConstantSDNode>(RHS)) {
1350 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1351 // Skip the immediate can be selected by load/store addressing mode.
1352 // Also skip the immediate can be encoded by a single ADD (SUB is also
1353 // checked by using -ImmOff).
1354 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1355 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1356 return false;
1357
1358 SDValue Ops[] = { RHS };
1359 SDNode *MOVI =
1360 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1361 SDValue MOVIV = SDValue(MOVI, 0);
1362 // This ADD of two X register will be selected into [Reg+Reg] mode.
1363 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1364 }
1365
1366 // Remember if it is worth folding N when it produces extended register.
1367 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1368
1369 // Try to match a shifted extend on the RHS.
1370 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1371 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1372 Base = LHS;
1373 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1374 return true;
1375 }
1376
1377 // Try to match a shifted extend on the LHS.
1378 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1379 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1380 Base = RHS;
1381 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1382 return true;
1383 }
1384
1385 // Match any non-shifted, non-extend, non-immediate add expression.
1386 Base = LHS;
1387 Offset = RHS;
1388 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1389 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1390 // Reg1 + Reg2 is free: no check needed.
1391 return true;
1392}
1393
1394SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1395 static const unsigned RegClassIDs[] = {
1396 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1397 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1398 AArch64::dsub2, AArch64::dsub3};
1399
1400 return createTuple(Regs, RegClassIDs, SubRegs);
1401}
1402
1403SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1404 static const unsigned RegClassIDs[] = {
1405 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1406 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1407 AArch64::qsub2, AArch64::qsub3};
1408
1409 return createTuple(Regs, RegClassIDs, SubRegs);
1410}
1411
1412SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1413 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1414 AArch64::ZPR3RegClassID,
1415 AArch64::ZPR4RegClassID};
1416 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1417 AArch64::zsub2, AArch64::zsub3};
1418
1419 return createTuple(Regs, RegClassIDs, SubRegs);
1420}
1421
1422SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1423 assert(Regs.size() == 2 || Regs.size() == 4);
1424
1425 // The createTuple interface requires 3 RegClassIDs for each possible
1426 // tuple type even though we only have them for ZPR2 and ZPR4.
1427 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1428 AArch64::ZPR4Mul4RegClassID};
1429 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1430 AArch64::zsub2, AArch64::zsub3};
1431 return createTuple(Regs, RegClassIDs, SubRegs);
1432}
1433
1434SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1435 const unsigned RegClassIDs[],
1436 const unsigned SubRegs[]) {
1437 // There's no special register-class for a vector-list of 1 element: it's just
1438 // a vector.
1439 if (Regs.size() == 1)
1440 return Regs[0];
1441
1442 assert(Regs.size() >= 2 && Regs.size() <= 4);
1443
1444 SDLoc DL(Regs[0]);
1445
1447
1448 // First operand of REG_SEQUENCE is the desired RegClass.
1449 Ops.push_back(
1450 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1451
1452 // Then we get pairs of source & subregister-position for the components.
1453 for (unsigned i = 0; i < Regs.size(); ++i) {
1454 Ops.push_back(Regs[i]);
1455 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1456 }
1457
1458 SDNode *N =
1459 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1460 return SDValue(N, 0);
1461}
1462
1463void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1464 bool isExt) {
1465 SDLoc dl(N);
1466 EVT VT = N->getValueType(0);
1467
1468 unsigned ExtOff = isExt;
1469
1470 // Form a REG_SEQUENCE to force register allocation.
1471 unsigned Vec0Off = ExtOff + 1;
1472 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1473 N->op_begin() + Vec0Off + NumVecs);
1474 SDValue RegSeq = createQTuple(Regs);
1475
1477 if (isExt)
1478 Ops.push_back(N->getOperand(1));
1479 Ops.push_back(RegSeq);
1480 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1481 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1482}
1483
1484bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1485 LoadSDNode *LD = cast<LoadSDNode>(N);
1486 if (LD->isUnindexed())
1487 return false;
1488 EVT VT = LD->getMemoryVT();
1489 EVT DstVT = N->getValueType(0);
1490 ISD::MemIndexedMode AM = LD->getAddressingMode();
1491 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1492
1493 // We're not doing validity checking here. That was done when checking
1494 // if we should mark the load as indexed or not. We're just selecting
1495 // the right instruction.
1496 unsigned Opcode = 0;
1497
1498 ISD::LoadExtType ExtType = LD->getExtensionType();
1499 bool InsertTo64 = false;
1500 if (VT == MVT::i64)
1501 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1502 else if (VT == MVT::i32) {
1503 if (ExtType == ISD::NON_EXTLOAD)
1504 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1505 else if (ExtType == ISD::SEXTLOAD)
1506 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1507 else {
1508 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1509 InsertTo64 = true;
1510 // The result of the load is only i32. It's the subreg_to_reg that makes
1511 // it into an i64.
1512 DstVT = MVT::i32;
1513 }
1514 } else if (VT == MVT::i16) {
1515 if (ExtType == ISD::SEXTLOAD) {
1516 if (DstVT == MVT::i64)
1517 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1518 else
1519 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1520 } else {
1521 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1522 InsertTo64 = DstVT == MVT::i64;
1523 // The result of the load is only i32. It's the subreg_to_reg that makes
1524 // it into an i64.
1525 DstVT = MVT::i32;
1526 }
1527 } else if (VT == MVT::i8) {
1528 if (ExtType == ISD::SEXTLOAD) {
1529 if (DstVT == MVT::i64)
1530 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1531 else
1532 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1533 } else {
1534 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1535 InsertTo64 = DstVT == MVT::i64;
1536 // The result of the load is only i32. It's the subreg_to_reg that makes
1537 // it into an i64.
1538 DstVT = MVT::i32;
1539 }
1540 } else if (VT == MVT::f16) {
1541 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1542 } else if (VT == MVT::bf16) {
1543 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1544 } else if (VT == MVT::f32) {
1545 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1546 } else if (VT == MVT::f64 || VT.is64BitVector()) {
1547 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1548 } else if (VT.is128BitVector()) {
1549 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1550 } else
1551 return false;
1552 SDValue Chain = LD->getChain();
1553 SDValue Base = LD->getBasePtr();
1554 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1555 int OffsetVal = (int)OffsetOp->getZExtValue();
1556 SDLoc dl(N);
1557 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1558 SDValue Ops[] = { Base, Offset, Chain };
1559 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1560 MVT::Other, Ops);
1561
1562 // Transfer memoperands.
1563 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1564 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1565
1566 // Either way, we're replacing the node, so tell the caller that.
1567 SDValue LoadedVal = SDValue(Res, 1);
1568 if (InsertTo64) {
1569 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1570 LoadedVal =
1571 SDValue(CurDAG->getMachineNode(
1572 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1573 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1574 SubReg),
1575 0);
1576 }
1577
1578 ReplaceUses(SDValue(N, 0), LoadedVal);
1579 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1580 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1581 CurDAG->RemoveDeadNode(N);
1582 return true;
1583}
1584
1585void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1586 unsigned SubRegIdx) {
1587 SDLoc dl(N);
1588 EVT VT = N->getValueType(0);
1589 SDValue Chain = N->getOperand(0);
1590
1591 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1592 Chain};
1593
1594 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1595
1596 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1597 SDValue SuperReg = SDValue(Ld, 0);
1598 for (unsigned i = 0; i < NumVecs; ++i)
1599 ReplaceUses(SDValue(N, i),
1600 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1601
1602 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1603
1604 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1605 // because it's too simple to have needed special treatment during lowering.
1606 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1607 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1608 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1609 }
1610
1611 CurDAG->RemoveDeadNode(N);
1612}
1613
1614void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1615 unsigned Opc, unsigned SubRegIdx) {
1616 SDLoc dl(N);
1617 EVT VT = N->getValueType(0);
1618 SDValue Chain = N->getOperand(0);
1619
1620 SDValue Ops[] = {N->getOperand(1), // Mem operand
1621 N->getOperand(2), // Incremental
1622 Chain};
1623
1624 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1625 MVT::Untyped, MVT::Other};
1626
1627 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1628
1629 // Update uses of write back register
1630 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1631
1632 // Update uses of vector list
1633 SDValue SuperReg = SDValue(Ld, 1);
1634 if (NumVecs == 1)
1635 ReplaceUses(SDValue(N, 0), SuperReg);
1636 else
1637 for (unsigned i = 0; i < NumVecs; ++i)
1638 ReplaceUses(SDValue(N, i),
1639 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1640
1641 // Update the chain
1642 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1643 CurDAG->RemoveDeadNode(N);
1644}
1645
1646/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1647/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1648/// new Base and an SDValue representing the new offset.
1649std::tuple<unsigned, SDValue, SDValue>
1650AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1651 unsigned Opc_ri,
1652 const SDValue &OldBase,
1653 const SDValue &OldOffset,
1654 unsigned Scale) {
1655 SDValue NewBase = OldBase;
1656 SDValue NewOffset = OldOffset;
1657 // Detect a possible Reg+Imm addressing mode.
1658 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1659 N, OldBase, NewBase, NewOffset);
1660
1661 // Detect a possible reg+reg addressing mode, but only if we haven't already
1662 // detected a Reg+Imm one.
1663 const bool IsRegReg =
1664 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1665
1666 // Select the instruction.
1667 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1668}
1669
1670enum class SelectTypeKind {
1671 Int1 = 0,
1672 Int = 1,
1673 FP = 2,
1674 AnyType = 3,
1675};
1676
1677/// This function selects an opcode from a list of opcodes, which is
1678/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1679/// element types, in this order.
1680template <SelectTypeKind Kind>
1681static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1682 // Only match scalable vector VTs
1683 if (!VT.isScalableVector())
1684 return 0;
1685
1686 EVT EltVT = VT.getVectorElementType();
1687 unsigned Key = VT.getVectorMinNumElements();
1688 switch (Kind) {
1690 break;
1692 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1693 EltVT != MVT::i64)
1694 return 0;
1695 break;
1697 if (EltVT != MVT::i1)
1698 return 0;
1699 break;
1700 case SelectTypeKind::FP:
1701 if (EltVT == MVT::bf16)
1702 Key = 16;
1703 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1704 EltVT != MVT::f64)
1705 return 0;
1706 break;
1707 }
1708
1709 unsigned Offset;
1710 switch (Key) {
1711 case 16: // 8-bit or bf16
1712 Offset = 0;
1713 break;
1714 case 8: // 16-bit
1715 Offset = 1;
1716 break;
1717 case 4: // 32-bit
1718 Offset = 2;
1719 break;
1720 case 2: // 64-bit
1721 Offset = 3;
1722 break;
1723 default:
1724 return 0;
1725 }
1726
1727 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1728}
1729
1730// This function is almost identical to SelectWhilePair, but has an
1731// extra check on the range of the immediate operand.
1732// TODO: Merge these two functions together at some point?
1733void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1734 // Immediate can be either 0 or 1.
1735 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1736 if (Imm->getZExtValue() > 1)
1737 return;
1738
1739 SDLoc DL(N);
1740 EVT VT = N->getValueType(0);
1741 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1742 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1743 SDValue SuperReg = SDValue(WhilePair, 0);
1744
1745 for (unsigned I = 0; I < 2; ++I)
1746 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1747 AArch64::psub0 + I, DL, VT, SuperReg));
1748
1749 CurDAG->RemoveDeadNode(N);
1750}
1751
1752void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1753 SDLoc DL(N);
1754 EVT VT = N->getValueType(0);
1755
1756 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1757
1758 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1759 SDValue SuperReg = SDValue(WhilePair, 0);
1760
1761 for (unsigned I = 0; I < 2; ++I)
1762 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1763 AArch64::psub0 + I, DL, VT, SuperReg));
1764
1765 CurDAG->RemoveDeadNode(N);
1766}
1767
1768void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1769 unsigned Opcode) {
1770 EVT VT = N->getValueType(0);
1771 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1772 SDValue Ops = createZTuple(Regs);
1773 SDLoc DL(N);
1774 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1775 SDValue SuperReg = SDValue(Intrinsic, 0);
1776 for (unsigned i = 0; i < NumVecs; ++i)
1777 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1778 AArch64::zsub0 + i, DL, VT, SuperReg));
1779
1780 CurDAG->RemoveDeadNode(N);
1781}
1782
1783void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1784 unsigned NumVecs,
1785 bool IsZmMulti,
1786 unsigned Opcode,
1787 bool HasPred) {
1788 assert(Opcode != 0 && "Unexpected opcode");
1789
1790 SDLoc DL(N);
1791 EVT VT = N->getValueType(0);
1792 unsigned FirstVecIdx = HasPred ? 2 : 1;
1793
1794 auto GetMultiVecOperand = [=](unsigned StartIdx) {
1795 SmallVector<SDValue, 4> Regs(N->op_begin() + StartIdx,
1796 N->op_begin() + StartIdx + NumVecs);
1797 return createZMulTuple(Regs);
1798 };
1799
1800 SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1801
1802 SDValue Zm;
1803 if (IsZmMulti)
1804 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1805 else
1806 Zm = N->getOperand(NumVecs + FirstVecIdx);
1807
1809 if (HasPred)
1810 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
1811 N->getOperand(1), Zdn, Zm);
1812 else
1813 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
1814 SDValue SuperReg = SDValue(Intrinsic, 0);
1815 for (unsigned i = 0; i < NumVecs; ++i)
1816 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1817 AArch64::zsub0 + i, DL, VT, SuperReg));
1818
1819 CurDAG->RemoveDeadNode(N);
1820}
1821
1822void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
1823 unsigned Scale, unsigned Opc_ri,
1824 unsigned Opc_rr, bool IsIntr) {
1825 assert(Scale < 5 && "Invalid scaling value.");
1826 SDLoc DL(N);
1827 EVT VT = N->getValueType(0);
1828 SDValue Chain = N->getOperand(0);
1829
1830 // Optimize addressing mode.
1832 unsigned Opc;
1833 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1834 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
1835 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
1836
1837 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
1838 Base, // Memory operand
1839 Offset, Chain};
1840
1841 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1842
1843 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1844 SDValue SuperReg = SDValue(Load, 0);
1845 for (unsigned i = 0; i < NumVecs; ++i)
1846 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1847 AArch64::zsub0 + i, DL, VT, SuperReg));
1848
1849 // Copy chain
1850 unsigned ChainIdx = NumVecs;
1851 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1852 CurDAG->RemoveDeadNode(N);
1853}
1854
1855void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
1856 unsigned NumVecs,
1857 unsigned Scale,
1858 unsigned Opc_ri,
1859 unsigned Opc_rr) {
1860 assert(Scale < 4 && "Invalid scaling value.");
1861 SDLoc DL(N);
1862 EVT VT = N->getValueType(0);
1863 SDValue Chain = N->getOperand(0);
1864
1865 SDValue PNg = N->getOperand(2);
1866 SDValue Base = N->getOperand(3);
1867 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
1868 unsigned Opc;
1869 std::tie(Opc, Base, Offset) =
1870 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
1871
1872 SDValue Ops[] = {PNg, // Predicate-as-counter
1873 Base, // Memory operand
1874 Offset, Chain};
1875
1876 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1877
1878 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1879 SDValue SuperReg = SDValue(Load, 0);
1880 for (unsigned i = 0; i < NumVecs; ++i)
1881 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1882 AArch64::zsub0 + i, DL, VT, SuperReg));
1883
1884 // Copy chain
1885 unsigned ChainIdx = NumVecs;
1886 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1887 CurDAG->RemoveDeadNode(N);
1888}
1889
1890void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
1891 unsigned Opcode) {
1892 if (N->getValueType(0) != MVT::nxv4f32)
1893 return;
1894 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
1895}
1896
1897void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
1898 unsigned NumOutVecs,
1899 unsigned Opc, uint32_t MaxImm) {
1900 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
1901 if (Imm->getZExtValue() > MaxImm)
1902 return;
1903
1904 SDValue ZtValue;
1905 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
1906 return;
1907 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)};
1908 SDLoc DL(Node);
1909 EVT VT = Node->getValueType(0);
1910
1912 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
1913 SDValue SuperReg = SDValue(Instruction, 0);
1914
1915 for (unsigned I = 0; I < NumOutVecs; ++I)
1916 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
1917 AArch64::zsub0 + I, DL, VT, SuperReg));
1918
1919 // Copy chain
1920 unsigned ChainIdx = NumOutVecs;
1921 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
1922 CurDAG->RemoveDeadNode(Node);
1923}
1924
1925void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
1926 unsigned Op) {
1927 SDLoc DL(N);
1928 EVT VT = N->getValueType(0);
1929
1930 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1931 SDValue Zd = createZMulTuple(Regs);
1932 SDValue Zn = N->getOperand(1 + NumVecs);
1933 SDValue Zm = N->getOperand(2 + NumVecs);
1934
1935 SDValue Ops[] = {Zd, Zn, Zm};
1936
1937 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
1938 SDValue SuperReg = SDValue(Intrinsic, 0);
1939 for (unsigned i = 0; i < NumVecs; ++i)
1940 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1941 AArch64::zsub0 + i, DL, VT, SuperReg));
1942
1943 CurDAG->RemoveDeadNode(N);
1944}
1945
1946bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
1947 switch (BaseReg) {
1948 default:
1949 return false;
1950 case AArch64::ZA:
1951 case AArch64::ZAB0:
1952 if (TileNum == 0)
1953 break;
1954 return false;
1955 case AArch64::ZAH0:
1956 if (TileNum <= 1)
1957 break;
1958 return false;
1959 case AArch64::ZAS0:
1960 if (TileNum <= 3)
1961 break;
1962 return false;
1963 case AArch64::ZAD0:
1964 if (TileNum <= 7)
1965 break;
1966 return false;
1967 }
1968
1969 BaseReg += TileNum;
1970 return true;
1971}
1972
1973template <unsigned MaxIdx, unsigned Scale>
1974void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
1975 unsigned BaseReg, unsigned Op) {
1976 unsigned TileNum = 0;
1977 if (BaseReg != AArch64::ZA)
1978 TileNum = N->getConstantOperandVal(2);
1979
1980 if (!SelectSMETile(BaseReg, TileNum))
1981 return;
1982
1983 SDValue SliceBase, Base, Offset;
1984 if (BaseReg == AArch64::ZA)
1985 SliceBase = N->getOperand(2);
1986 else
1987 SliceBase = N->getOperand(3);
1988
1989 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
1990 return;
1991
1992 SDLoc DL(N);
1993 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
1994 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
1995 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
1996
1997 EVT VT = N->getValueType(0);
1998 for (unsigned I = 0; I < NumVecs; ++I)
1999 ReplaceUses(SDValue(N, I),
2000 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2001 SDValue(Mov, 0)));
2002 // Copy chain
2003 unsigned ChainIdx = NumVecs;
2004 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2005 CurDAG->RemoveDeadNode(N);
2006}
2007
2008void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2009 unsigned Op, unsigned MaxIdx,
2010 unsigned Scale, unsigned BaseReg) {
2011 // Slice can be in different positions
2012 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2013 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2014 SDValue SliceBase = N->getOperand(2);
2015 if (BaseReg != AArch64::ZA)
2016 SliceBase = N->getOperand(3);
2017
2019 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2020 return;
2021 // The correct Za tile number is computed in Machine Instruction
2022 // See EmitZAInstr
2023 // DAG cannot select Za tile as an output register with ZReg
2024 SDLoc DL(N);
2026 if (BaseReg != AArch64::ZA )
2027 Ops.push_back(N->getOperand(2));
2028 Ops.push_back(Base);
2029 Ops.push_back(Offset);
2030 Ops.push_back(N->getOperand(0)); //Chain
2031 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2032
2033 EVT VT = N->getValueType(0);
2034 for (unsigned I = 0; I < NumVecs; ++I)
2035 ReplaceUses(SDValue(N, I),
2036 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2037 SDValue(Mov, 0)));
2038
2039 // Copy chain
2040 unsigned ChainIdx = NumVecs;
2041 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2042 CurDAG->RemoveDeadNode(N);
2043}
2044
2045void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2046 unsigned NumOutVecs,
2047 bool IsTupleInput,
2048 unsigned Opc) {
2049 SDLoc DL(N);
2050 EVT VT = N->getValueType(0);
2051 unsigned NumInVecs = N->getNumOperands() - 1;
2052
2054 if (IsTupleInput) {
2055 assert((NumInVecs == 2 || NumInVecs == 4) &&
2056 "Don't know how to handle multi-register input!");
2057 SmallVector<SDValue, 4> Regs(N->op_begin() + 1,
2058 N->op_begin() + 1 + NumInVecs);
2059 Ops.push_back(createZMulTuple(Regs));
2060 } else {
2061 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2062 for (unsigned I = 0; I < NumInVecs; I++)
2063 Ops.push_back(N->getOperand(1 + I));
2064 }
2065
2066 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2067 SDValue SuperReg = SDValue(Res, 0);
2068
2069 for (unsigned I = 0; I < NumOutVecs; I++)
2070 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2071 AArch64::zsub0 + I, DL, VT, SuperReg));
2072 CurDAG->RemoveDeadNode(N);
2073}
2074
2075void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2076 unsigned Opc) {
2077 SDLoc dl(N);
2078 EVT VT = N->getOperand(2)->getValueType(0);
2079
2080 // Form a REG_SEQUENCE to force register allocation.
2081 bool Is128Bit = VT.getSizeInBits() == 128;
2082 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2083 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2084
2085 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2086 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2087
2088 // Transfer memoperands.
2089 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2090 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2091
2092 ReplaceNode(N, St);
2093}
2094
2095void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2096 unsigned Scale, unsigned Opc_rr,
2097 unsigned Opc_ri) {
2098 SDLoc dl(N);
2099
2100 // Form a REG_SEQUENCE to force register allocation.
2101 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2102 SDValue RegSeq = createZTuple(Regs);
2103
2104 // Optimize addressing mode.
2105 unsigned Opc;
2107 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2108 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2109 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2110
2111 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2112 Base, // address
2113 Offset, // offset
2114 N->getOperand(0)}; // chain
2115 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2116
2117 ReplaceNode(N, St);
2118}
2119
2120bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2121 SDValue &OffImm) {
2122 SDLoc dl(N);
2123 const DataLayout &DL = CurDAG->getDataLayout();
2124 const TargetLowering *TLI = getTargetLowering();
2125
2126 // Try to match it for the frame address
2127 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2128 int FI = FINode->getIndex();
2129 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2130 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2131 return true;
2132 }
2133
2134 return false;
2135}
2136
2137void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2138 unsigned Opc) {
2139 SDLoc dl(N);
2140 EVT VT = N->getOperand(2)->getValueType(0);
2141 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2142 MVT::Other}; // Type for the Chain
2143
2144 // Form a REG_SEQUENCE to force register allocation.
2145 bool Is128Bit = VT.getSizeInBits() == 128;
2146 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2147 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2148
2149 SDValue Ops[] = {RegSeq,
2150 N->getOperand(NumVecs + 1), // base register
2151 N->getOperand(NumVecs + 2), // Incremental
2152 N->getOperand(0)}; // Chain
2153 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2154
2155 ReplaceNode(N, St);
2156}
2157
2158namespace {
2159/// WidenVector - Given a value in the V64 register class, produce the
2160/// equivalent value in the V128 register class.
2161class WidenVector {
2162 SelectionDAG &DAG;
2163
2164public:
2165 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2166
2167 SDValue operator()(SDValue V64Reg) {
2168 EVT VT = V64Reg.getValueType();
2169 unsigned NarrowSize = VT.getVectorNumElements();
2170 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2171 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2172 SDLoc DL(V64Reg);
2173
2174 SDValue Undef =
2175 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2176 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2177 }
2178};
2179} // namespace
2180
2181/// NarrowVector - Given a value in the V128 register class, produce the
2182/// equivalent value in the V64 register class.
2184 EVT VT = V128Reg.getValueType();
2185 unsigned WideSize = VT.getVectorNumElements();
2186 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2187 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2188
2189 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2190 V128Reg);
2191}
2192
2193void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2194 unsigned Opc) {
2195 SDLoc dl(N);
2196 EVT VT = N->getValueType(0);
2197 bool Narrow = VT.getSizeInBits() == 64;
2198
2199 // Form a REG_SEQUENCE to force register allocation.
2200 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2201
2202 if (Narrow)
2203 transform(Regs, Regs.begin(),
2204 WidenVector(*CurDAG));
2205
2206 SDValue RegSeq = createQTuple(Regs);
2207
2208 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2209
2210 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2211
2212 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2213 N->getOperand(NumVecs + 3), N->getOperand(0)};
2214 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2215 SDValue SuperReg = SDValue(Ld, 0);
2216
2217 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2218 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2219 AArch64::qsub2, AArch64::qsub3 };
2220 for (unsigned i = 0; i < NumVecs; ++i) {
2221 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2222 if (Narrow)
2223 NV = NarrowVector(NV, *CurDAG);
2224 ReplaceUses(SDValue(N, i), NV);
2225 }
2226
2227 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2228 CurDAG->RemoveDeadNode(N);
2229}
2230
2231void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2232 unsigned Opc) {
2233 SDLoc dl(N);
2234 EVT VT = N->getValueType(0);
2235 bool Narrow = VT.getSizeInBits() == 64;
2236
2237 // Form a REG_SEQUENCE to force register allocation.
2238 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2239
2240 if (Narrow)
2241 transform(Regs, Regs.begin(),
2242 WidenVector(*CurDAG));
2243
2244 SDValue RegSeq = createQTuple(Regs);
2245
2246 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2247 RegSeq->getValueType(0), MVT::Other};
2248
2249 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2250
2251 SDValue Ops[] = {RegSeq,
2252 CurDAG->getTargetConstant(LaneNo, dl,
2253 MVT::i64), // Lane Number
2254 N->getOperand(NumVecs + 2), // Base register
2255 N->getOperand(NumVecs + 3), // Incremental
2256 N->getOperand(0)};
2257 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2258
2259 // Update uses of the write back register
2260 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2261
2262 // Update uses of the vector list
2263 SDValue SuperReg = SDValue(Ld, 1);
2264 if (NumVecs == 1) {
2265 ReplaceUses(SDValue(N, 0),
2266 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2267 } else {
2268 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2269 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2270 AArch64::qsub2, AArch64::qsub3 };
2271 for (unsigned i = 0; i < NumVecs; ++i) {
2272 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2273 SuperReg);
2274 if (Narrow)
2275 NV = NarrowVector(NV, *CurDAG);
2276 ReplaceUses(SDValue(N, i), NV);
2277 }
2278 }
2279
2280 // Update the Chain
2281 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2282 CurDAG->RemoveDeadNode(N);
2283}
2284
2285void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2286 unsigned Opc) {
2287 SDLoc dl(N);
2288 EVT VT = N->getOperand(2)->getValueType(0);
2289 bool Narrow = VT.getSizeInBits() == 64;
2290
2291 // Form a REG_SEQUENCE to force register allocation.
2292 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2293
2294 if (Narrow)
2295 transform(Regs, Regs.begin(),
2296 WidenVector(*CurDAG));
2297
2298 SDValue RegSeq = createQTuple(Regs);
2299
2300 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2301
2302 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2303 N->getOperand(NumVecs + 3), N->getOperand(0)};
2304 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2305
2306 // Transfer memoperands.
2307 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2308 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2309
2310 ReplaceNode(N, St);
2311}
2312
2313void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2314 unsigned Opc) {
2315 SDLoc dl(N);
2316 EVT VT = N->getOperand(2)->getValueType(0);
2317 bool Narrow = VT.getSizeInBits() == 64;
2318
2319 // Form a REG_SEQUENCE to force register allocation.
2320 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2321
2322 if (Narrow)
2323 transform(Regs, Regs.begin(),
2324 WidenVector(*CurDAG));
2325
2326 SDValue RegSeq = createQTuple(Regs);
2327
2328 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2329 MVT::Other};
2330
2331 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2332
2333 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2334 N->getOperand(NumVecs + 2), // Base Register
2335 N->getOperand(NumVecs + 3), // Incremental
2336 N->getOperand(0)};
2337 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2338
2339 // Transfer memoperands.
2340 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2341 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2342
2343 ReplaceNode(N, St);
2344}
2345
2347 unsigned &Opc, SDValue &Opd0,
2348 unsigned &LSB, unsigned &MSB,
2349 unsigned NumberOfIgnoredLowBits,
2350 bool BiggerPattern) {
2351 assert(N->getOpcode() == ISD::AND &&
2352 "N must be a AND operation to call this function");
2353
2354 EVT VT = N->getValueType(0);
2355
2356 // Here we can test the type of VT and return false when the type does not
2357 // match, but since it is done prior to that call in the current context
2358 // we turned that into an assert to avoid redundant code.
2359 assert((VT == MVT::i32 || VT == MVT::i64) &&
2360 "Type checking must have been done before calling this function");
2361
2362 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2363 // changed the AND node to a 32-bit mask operation. We'll have to
2364 // undo that as part of the transform here if we want to catch all
2365 // the opportunities.
2366 // Currently the NumberOfIgnoredLowBits argument helps to recover
2367 // from these situations when matching bigger pattern (bitfield insert).
2368
2369 // For unsigned extracts, check for a shift right and mask
2370 uint64_t AndImm = 0;
2371 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2372 return false;
2373
2374 const SDNode *Op0 = N->getOperand(0).getNode();
2375
2376 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2377 // simplified. Try to undo that
2378 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2379
2380 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2381 if (AndImm & (AndImm + 1))
2382 return false;
2383
2384 bool ClampMSB = false;
2385 uint64_t SrlImm = 0;
2386 // Handle the SRL + ANY_EXTEND case.
2387 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2388 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2389 // Extend the incoming operand of the SRL to 64-bit.
2390 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2391 // Make sure to clamp the MSB so that we preserve the semantics of the
2392 // original operations.
2393 ClampMSB = true;
2394 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2396 SrlImm)) {
2397 // If the shift result was truncated, we can still combine them.
2398 Opd0 = Op0->getOperand(0).getOperand(0);
2399
2400 // Use the type of SRL node.
2401 VT = Opd0->getValueType(0);
2402 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2403 Opd0 = Op0->getOperand(0);
2404 ClampMSB = (VT == MVT::i32);
2405 } else if (BiggerPattern) {
2406 // Let's pretend a 0 shift right has been performed.
2407 // The resulting code will be at least as good as the original one
2408 // plus it may expose more opportunities for bitfield insert pattern.
2409 // FIXME: Currently we limit this to the bigger pattern, because
2410 // some optimizations expect AND and not UBFM.
2411 Opd0 = N->getOperand(0);
2412 } else
2413 return false;
2414
2415 // Bail out on large immediates. This happens when no proper
2416 // combining/constant folding was performed.
2417 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2418 LLVM_DEBUG(
2419 (dbgs() << N
2420 << ": Found large shift immediate, this should not happen\n"));
2421 return false;
2422 }
2423
2424 LSB = SrlImm;
2425 MSB = SrlImm +
2426 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2427 : llvm::countr_one<uint64_t>(AndImm)) -
2428 1;
2429 if (ClampMSB)
2430 // Since we're moving the extend before the right shift operation, we need
2431 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2432 // the zeros which would get shifted in with the original right shift
2433 // operation.
2434 MSB = MSB > 31 ? 31 : MSB;
2435
2436 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2437 return true;
2438}
2439
2440static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
2441 SDValue &Opd0, unsigned &Immr,
2442 unsigned &Imms) {
2443 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2444
2445 EVT VT = N->getValueType(0);
2446 unsigned BitWidth = VT.getSizeInBits();
2447 assert((VT == MVT::i32 || VT == MVT::i64) &&
2448 "Type checking must have been done before calling this function");
2449
2450 SDValue Op = N->getOperand(0);
2451 if (Op->getOpcode() == ISD::TRUNCATE) {
2452 Op = Op->getOperand(0);
2453 VT = Op->getValueType(0);
2454 BitWidth = VT.getSizeInBits();
2455 }
2456
2457 uint64_t ShiftImm;
2458 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2459 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2460 return false;
2461
2462 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2463 if (ShiftImm + Width > BitWidth)
2464 return false;
2465
2466 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2467 Opd0 = Op.getOperand(0);
2468 Immr = ShiftImm;
2469 Imms = ShiftImm + Width - 1;
2470 return true;
2471}
2472
2473static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
2474 SDValue &Opd0, unsigned &LSB,
2475 unsigned &MSB) {
2476 // We are looking for the following pattern which basically extracts several
2477 // continuous bits from the source value and places it from the LSB of the
2478 // destination value, all other bits of the destination value or set to zero:
2479 //
2480 // Value2 = AND Value, MaskImm
2481 // SRL Value2, ShiftImm
2482 //
2483 // with MaskImm >> ShiftImm to search for the bit width.
2484 //
2485 // This gets selected into a single UBFM:
2486 //
2487 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2488 //
2489
2490 if (N->getOpcode() != ISD::SRL)
2491 return false;
2492
2493 uint64_t AndMask = 0;
2494 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2495 return false;
2496
2497 Opd0 = N->getOperand(0).getOperand(0);
2498
2499 uint64_t SrlImm = 0;
2500 if (!isIntImmediate(N->getOperand(1), SrlImm))
2501 return false;
2502
2503 // Check whether we really have several bits extract here.
2504 if (!isMask_64(AndMask >> SrlImm))
2505 return false;
2506
2507 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2508 LSB = SrlImm;
2509 MSB = llvm::Log2_64(AndMask);
2510 return true;
2511}
2512
2513static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2514 unsigned &Immr, unsigned &Imms,
2515 bool BiggerPattern) {
2516 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2517 "N must be a SHR/SRA operation to call this function");
2518
2519 EVT VT = N->getValueType(0);
2520
2521 // Here we can test the type of VT and return false when the type does not
2522 // match, but since it is done prior to that call in the current context
2523 // we turned that into an assert to avoid redundant code.
2524 assert((VT == MVT::i32 || VT == MVT::i64) &&
2525 "Type checking must have been done before calling this function");
2526
2527 // Check for AND + SRL doing several bits extract.
2528 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2529 return true;
2530
2531 // We're looking for a shift of a shift.
2532 uint64_t ShlImm = 0;
2533 uint64_t TruncBits = 0;
2534 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2535 Opd0 = N->getOperand(0).getOperand(0);
2536 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2537 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2538 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2539 // be considered as setting high 32 bits as zero. Our strategy here is to
2540 // always generate 64bit UBFM. This consistency will help the CSE pass
2541 // later find more redundancy.
2542 Opd0 = N->getOperand(0).getOperand(0);
2543 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2544 VT = Opd0.getValueType();
2545 assert(VT == MVT::i64 && "the promoted type should be i64");
2546 } else if (BiggerPattern) {
2547 // Let's pretend a 0 shift left has been performed.
2548 // FIXME: Currently we limit this to the bigger pattern case,
2549 // because some optimizations expect AND and not UBFM
2550 Opd0 = N->getOperand(0);
2551 } else
2552 return false;
2553
2554 // Missing combines/constant folding may have left us with strange
2555 // constants.
2556 if (ShlImm >= VT.getSizeInBits()) {
2557 LLVM_DEBUG(
2558 (dbgs() << N
2559 << ": Found large shift immediate, this should not happen\n"));
2560 return false;
2561 }
2562
2563 uint64_t SrlImm = 0;
2564 if (!isIntImmediate(N->getOperand(1), SrlImm))
2565 return false;
2566
2567 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2568 "bad amount in shift node!");
2569 int immr = SrlImm - ShlImm;
2570 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2571 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2572 // SRA requires a signed extraction
2573 if (VT == MVT::i32)
2574 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2575 else
2576 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2577 return true;
2578}
2579
2580bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2581 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2582
2583 EVT VT = N->getValueType(0);
2584 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2585 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2586 return false;
2587
2588 uint64_t ShiftImm;
2589 SDValue Op = N->getOperand(0);
2590 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2591 return false;
2592
2593 SDLoc dl(N);
2594 // Extend the incoming operand of the shift to 64-bits.
2595 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2596 unsigned Immr = ShiftImm;
2597 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2598 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2599 CurDAG->getTargetConstant(Imms, dl, VT)};
2600 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2601 return true;
2602}
2603
2604static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2605 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2606 unsigned NumberOfIgnoredLowBits = 0,
2607 bool BiggerPattern = false) {
2608 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2609 return false;
2610
2611 switch (N->getOpcode()) {
2612 default:
2613 if (!N->isMachineOpcode())
2614 return false;
2615 break;
2616 case ISD::AND:
2617 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2618 NumberOfIgnoredLowBits, BiggerPattern);
2619 case ISD::SRL:
2620 case ISD::SRA:
2621 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2622
2624 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2625 }
2626
2627 unsigned NOpc = N->getMachineOpcode();
2628 switch (NOpc) {
2629 default:
2630 return false;
2631 case AArch64::SBFMWri:
2632 case AArch64::UBFMWri:
2633 case AArch64::SBFMXri:
2634 case AArch64::UBFMXri:
2635 Opc = NOpc;
2636 Opd0 = N->getOperand(0);
2637 Immr = N->getConstantOperandVal(1);
2638 Imms = N->getConstantOperandVal(2);
2639 return true;
2640 }
2641 // Unreachable
2642 return false;
2643}
2644
2645bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2646 unsigned Opc, Immr, Imms;
2647 SDValue Opd0;
2648 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2649 return false;
2650
2651 EVT VT = N->getValueType(0);
2652 SDLoc dl(N);
2653
2654 // If the bit extract operation is 64bit but the original type is 32bit, we
2655 // need to add one EXTRACT_SUBREG.
2656 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2657 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2658 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2659
2660 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2661 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2662 MVT::i32, SDValue(BFM, 0));
2663 ReplaceNode(N, Inner.getNode());
2664 return true;
2665 }
2666
2667 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2668 CurDAG->getTargetConstant(Imms, dl, VT)};
2669 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2670 return true;
2671}
2672
2673/// Does DstMask form a complementary pair with the mask provided by
2674/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2675/// this asks whether DstMask zeroes precisely those bits that will be set by
2676/// the other half.
2677static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2678 unsigned NumberOfIgnoredHighBits, EVT VT) {
2679 assert((VT == MVT::i32 || VT == MVT::i64) &&
2680 "i32 or i64 mask type expected!");
2681 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2682
2683 APInt SignificantDstMask = APInt(BitWidth, DstMask);
2684 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2685
2686 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2687 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2688}
2689
2690// Look for bits that will be useful for later uses.
2691// A bit is consider useless as soon as it is dropped and never used
2692// before it as been dropped.
2693// E.g., looking for useful bit of x
2694// 1. y = x & 0x7
2695// 2. z = y >> 2
2696// After #1, x useful bits are 0x7, then the useful bits of x, live through
2697// y.
2698// After #2, the useful bits of x are 0x4.
2699// However, if x is used on an unpredicatable instruction, then all its bits
2700// are useful.
2701// E.g.
2702// 1. y = x & 0x7
2703// 2. z = y >> 2
2704// 3. str x, [@x]
2705static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2706
2708 unsigned Depth) {
2709 uint64_t Imm =
2710 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2711 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2712 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2713 getUsefulBits(Op, UsefulBits, Depth + 1);
2714}
2715
2717 uint64_t Imm, uint64_t MSB,
2718 unsigned Depth) {
2719 // inherit the bitwidth value
2720 APInt OpUsefulBits(UsefulBits);
2721 OpUsefulBits = 1;
2722
2723 if (MSB >= Imm) {
2724 OpUsefulBits <<= MSB - Imm + 1;
2725 --OpUsefulBits;
2726 // The interesting part will be in the lower part of the result
2727 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2728 // The interesting part was starting at Imm in the argument
2729 OpUsefulBits <<= Imm;
2730 } else {
2731 OpUsefulBits <<= MSB + 1;
2732 --OpUsefulBits;
2733 // The interesting part will be shifted in the result
2734 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2735 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2736 // The interesting part was at zero in the argument
2737 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2738 }
2739
2740 UsefulBits &= OpUsefulBits;
2741}
2742
2743static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2744 unsigned Depth) {
2745 uint64_t Imm =
2746 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2747 uint64_t MSB =
2748 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2749
2750 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2751}
2752
2754 unsigned Depth) {
2755 uint64_t ShiftTypeAndValue =
2756 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2757 APInt Mask(UsefulBits);
2758 Mask.clearAllBits();
2759 Mask.flipAllBits();
2760
2761 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2762 // Shift Left
2763 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2764 Mask <<= ShiftAmt;
2765 getUsefulBits(Op, Mask, Depth + 1);
2766 Mask.lshrInPlace(ShiftAmt);
2767 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2768 // Shift Right
2769 // We do not handle AArch64_AM::ASR, because the sign will change the
2770 // number of useful bits
2771 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2772 Mask.lshrInPlace(ShiftAmt);
2773 getUsefulBits(Op, Mask, Depth + 1);
2774 Mask <<= ShiftAmt;
2775 } else
2776 return;
2777
2778 UsefulBits &= Mask;
2779}
2780
2781static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2782 unsigned Depth) {
2783 uint64_t Imm =
2784 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2785 uint64_t MSB =
2786 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
2787
2788 APInt OpUsefulBits(UsefulBits);
2789 OpUsefulBits = 1;
2790
2791 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
2792 ResultUsefulBits.flipAllBits();
2793 APInt Mask(UsefulBits.getBitWidth(), 0);
2794
2795 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
2796
2797 if (MSB >= Imm) {
2798 // The instruction is a BFXIL.
2799 uint64_t Width = MSB - Imm + 1;
2800 uint64_t LSB = Imm;
2801
2802 OpUsefulBits <<= Width;
2803 --OpUsefulBits;
2804
2805 if (Op.getOperand(1) == Orig) {
2806 // Copy the low bits from the result to bits starting from LSB.
2807 Mask = ResultUsefulBits & OpUsefulBits;
2808 Mask <<= LSB;
2809 }
2810
2811 if (Op.getOperand(0) == Orig)
2812 // Bits starting from LSB in the input contribute to the result.
2813 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2814 } else {
2815 // The instruction is a BFI.
2816 uint64_t Width = MSB + 1;
2817 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
2818
2819 OpUsefulBits <<= Width;
2820 --OpUsefulBits;
2821 OpUsefulBits <<= LSB;
2822
2823 if (Op.getOperand(1) == Orig) {
2824 // Copy the bits from the result to the zero bits.
2825 Mask = ResultUsefulBits & OpUsefulBits;
2826 Mask.lshrInPlace(LSB);
2827 }
2828
2829 if (Op.getOperand(0) == Orig)
2830 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2831 }
2832
2833 UsefulBits &= Mask;
2834}
2835
2836static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2837 SDValue Orig, unsigned Depth) {
2838
2839 // Users of this node should have already been instruction selected
2840 // FIXME: Can we turn that into an assert?
2841 if (!UserNode->isMachineOpcode())
2842 return;
2843
2844 switch (UserNode->getMachineOpcode()) {
2845 default:
2846 return;
2847 case AArch64::ANDSWri:
2848 case AArch64::ANDSXri:
2849 case AArch64::ANDWri:
2850 case AArch64::ANDXri:
2851 // We increment Depth only when we call the getUsefulBits
2852 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2853 Depth);
2854 case AArch64::UBFMWri:
2855 case AArch64::UBFMXri:
2856 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2857
2858 case AArch64::ORRWrs:
2859 case AArch64::ORRXrs:
2860 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
2861 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2862 Depth);
2863 return;
2864 case AArch64::BFMWri:
2865 case AArch64::BFMXri:
2866 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2867
2868 case AArch64::STRBBui:
2869 case AArch64::STURBBi:
2870 if (UserNode->getOperand(0) != Orig)
2871 return;
2872 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2873 return;
2874
2875 case AArch64::STRHHui:
2876 case AArch64::STURHHi:
2877 if (UserNode->getOperand(0) != Orig)
2878 return;
2879 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2880 return;
2881 }
2882}
2883
2884static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2886 return;
2887 // Initialize UsefulBits
2888 if (!Depth) {
2889 unsigned Bitwidth = Op.getScalarValueSizeInBits();
2890 // At the beginning, assume every produced bits is useful
2891 UsefulBits = APInt(Bitwidth, 0);
2892 UsefulBits.flipAllBits();
2893 }
2894 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2895
2896 for (SDNode *Node : Op.getNode()->uses()) {
2897 // A use cannot produce useful bits
2898 APInt UsefulBitsForUse = APInt(UsefulBits);
2899 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2900 UsersUsefulBits |= UsefulBitsForUse;
2901 }
2902 // UsefulBits contains the produced bits that are meaningful for the
2903 // current definition, thus a user cannot make a bit meaningful at
2904 // this point
2905 UsefulBits &= UsersUsefulBits;
2906}
2907
2908/// Create a machine node performing a notional SHL of Op by ShlAmount. If
2909/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2910/// 0, return Op unchanged.
2911static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2912 if (ShlAmount == 0)
2913 return Op;
2914
2915 EVT VT = Op.getValueType();
2916 SDLoc dl(Op);
2917 unsigned BitWidth = VT.getSizeInBits();
2918 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2919
2920 SDNode *ShiftNode;
2921 if (ShlAmount > 0) {
2922 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2923 ShiftNode = CurDAG->getMachineNode(
2924 UBFMOpc, dl, VT, Op,
2925 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
2926 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
2927 } else {
2928 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2929 assert(ShlAmount < 0 && "expected right shift");
2930 int ShrAmount = -ShlAmount;
2931 ShiftNode = CurDAG->getMachineNode(
2932 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
2933 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
2934 }
2935
2936 return SDValue(ShiftNode, 0);
2937}
2938
2939// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
2941 bool BiggerPattern,
2942 const uint64_t NonZeroBits,
2943 SDValue &Src, int &DstLSB,
2944 int &Width);
2945
2946// For bit-field-positioning pattern "shl VAL, N)".
2948 bool BiggerPattern,
2949 const uint64_t NonZeroBits,
2950 SDValue &Src, int &DstLSB,
2951 int &Width);
2952
2953/// Does this tree qualify as an attempt to move a bitfield into position,
2954/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
2956 bool BiggerPattern, SDValue &Src,
2957 int &DstLSB, int &Width) {
2958 EVT VT = Op.getValueType();
2959 unsigned BitWidth = VT.getSizeInBits();
2960 (void)BitWidth;
2961 assert(BitWidth == 32 || BitWidth == 64);
2962
2963 KnownBits Known = CurDAG->computeKnownBits(Op);
2964
2965 // Non-zero in the sense that they're not provably zero, which is the key
2966 // point if we want to use this value
2967 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2968 if (!isShiftedMask_64(NonZeroBits))
2969 return false;
2970
2971 switch (Op.getOpcode()) {
2972 default:
2973 break;
2974 case ISD::AND:
2975 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
2976 NonZeroBits, Src, DstLSB, Width);
2977 case ISD::SHL:
2978 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
2979 NonZeroBits, Src, DstLSB, Width);
2980 }
2981
2982 return false;
2983}
2984
2986 bool BiggerPattern,
2987 const uint64_t NonZeroBits,
2988 SDValue &Src, int &DstLSB,
2989 int &Width) {
2990 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
2991
2992 EVT VT = Op.getValueType();
2993 assert((VT == MVT::i32 || VT == MVT::i64) &&
2994 "Caller guarantees VT is one of i32 or i64");
2995 (void)VT;
2996
2997 uint64_t AndImm;
2998 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
2999 return false;
3000
3001 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3002 // 1) (AndImm & (1 << POS) == 0)
3003 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3004 //
3005 // 1) and 2) don't agree so something must be wrong (e.g., in
3006 // 'SelectionDAG::computeKnownBits')
3007 assert((~AndImm & NonZeroBits) == 0 &&
3008 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3009
3010 SDValue AndOp0 = Op.getOperand(0);
3011
3012 uint64_t ShlImm;
3013 SDValue ShlOp0;
3014 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3015 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3016 ShlOp0 = AndOp0.getOperand(0);
3017 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3019 ShlImm)) {
3020 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3021
3022 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3023 SDValue ShlVal = AndOp0.getOperand(0);
3024
3025 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3026 // expect VT to be MVT::i32.
3027 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3028
3029 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3030 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3031 } else
3032 return false;
3033
3034 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3035 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3036 // AndOp0+AND.
3037 if (!BiggerPattern && !AndOp0.hasOneUse())
3038 return false;
3039
3040 DstLSB = llvm::countr_zero(NonZeroBits);
3041 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3042
3043 // Bail out on large Width. This happens when no proper combining / constant
3044 // folding was performed.
3045 if (Width >= (int)VT.getSizeInBits()) {
3046 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3047 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3048 // "val".
3049 // If VT is i32, what Width >= 32 means:
3050 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3051 // demands at least 'Width' bits (after dag-combiner). This together with
3052 // `any_extend` Op (undefined higher bits) indicates missed combination
3053 // when lowering the 'and' IR instruction to an machine IR instruction.
3054 LLVM_DEBUG(
3055 dbgs()
3056 << "Found large Width in bit-field-positioning -- this indicates no "
3057 "proper combining / constant folding was performed\n");
3058 return false;
3059 }
3060
3061 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3062 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3063 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3064 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3065 // which case it is not profitable to insert an extra shift.
3066 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3067 return false;
3068
3069 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3070 return true;
3071}
3072
3073// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3074// UBFIZ.
3076 SDValue &Src, int &DstLSB,
3077 int &Width) {
3078 // Caller should have verified that N is a left shift with constant shift
3079 // amount; asserts that.
3080 assert(Op.getOpcode() == ISD::SHL &&
3081 "Op.getNode() should be a SHL node to call this function");
3082 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3083 "Op.getNode() should shift ShlImm to call this function");
3084
3085 uint64_t AndImm = 0;
3086 SDValue Op0 = Op.getOperand(0);
3087 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3088 return false;
3089
3090 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3091 if (isMask_64(ShiftedAndImm)) {
3092 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3093 // should end with Mask, and could be prefixed with random bits if those
3094 // bits are shifted out.
3095 //
3096 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3097 // the AND result corresponding to those bits are shifted out, so it's fine
3098 // to not extract them.
3099 Width = llvm::countr_one(ShiftedAndImm);
3100 DstLSB = ShlImm;
3101 Src = Op0.getOperand(0);
3102 return true;
3103 }
3104 return false;
3105}
3106
3108 bool BiggerPattern,
3109 const uint64_t NonZeroBits,
3110 SDValue &Src, int &DstLSB,
3111 int &Width) {
3112 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3113
3114 EVT VT = Op.getValueType();
3115 assert((VT == MVT::i32 || VT == MVT::i64) &&
3116 "Caller guarantees that type is i32 or i64");
3117 (void)VT;
3118
3119 uint64_t ShlImm;
3120 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3121 return false;
3122
3123 if (!BiggerPattern && !Op.hasOneUse())
3124 return false;
3125
3126 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3127 return true;
3128
3129 DstLSB = llvm::countr_zero(NonZeroBits);
3130 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3131
3132 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3133 return false;
3134
3135 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3136 return true;
3137}
3138
3139static bool isShiftedMask(uint64_t Mask, EVT VT) {
3140 assert(VT == MVT::i32 || VT == MVT::i64);
3141 if (VT == MVT::i32)
3142 return isShiftedMask_32(Mask);
3143 return isShiftedMask_64(Mask);
3144}
3145
3146// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3147// inserted only sets known zero bits.
3149 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3150
3151 EVT VT = N->getValueType(0);
3152 if (VT != MVT::i32 && VT != MVT::i64)
3153 return false;
3154
3155 unsigned BitWidth = VT.getSizeInBits();
3156
3157 uint64_t OrImm;
3158 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3159 return false;
3160
3161 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3162 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3163 // performance neutral.
3165 return false;
3166
3167 uint64_t MaskImm;
3168 SDValue And = N->getOperand(0);
3169 // Must be a single use AND with an immediate operand.
3170 if (!And.hasOneUse() ||
3171 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3172 return false;
3173
3174 // Compute the Known Zero for the AND as this allows us to catch more general
3175 // cases than just looking for AND with imm.
3176 KnownBits Known = CurDAG->computeKnownBits(And);
3177
3178 // Non-zero in the sense that they're not provably zero, which is the key
3179 // point if we want to use this value.
3180 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3181
3182 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3183 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3184 return false;
3185
3186 // The bits being inserted must only set those bits that are known to be zero.
3187 if ((OrImm & NotKnownZero) != 0) {
3188 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3189 // currently handle this case.
3190 return false;
3191 }
3192
3193 // BFI/BFXIL dst, src, #lsb, #width.
3194 int LSB = llvm::countr_one(NotKnownZero);
3195 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3196
3197 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3198 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3199 unsigned ImmS = Width - 1;
3200
3201 // If we're creating a BFI instruction avoid cases where we need more
3202 // instructions to materialize the BFI constant as compared to the original
3203 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3204 // should be no worse in this case.
3205 bool IsBFI = LSB != 0;
3206 uint64_t BFIImm = OrImm >> LSB;
3207 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3208 // We have a BFI instruction and we know the constant can't be materialized
3209 // with a ORR-immediate with the zero register.
3210 unsigned OrChunks = 0, BFIChunks = 0;
3211 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3212 if (((OrImm >> Shift) & 0xFFFF) != 0)
3213 ++OrChunks;
3214 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3215 ++BFIChunks;
3216 }
3217 if (BFIChunks > OrChunks)
3218 return false;
3219 }
3220
3221 // Materialize the constant to be inserted.
3222 SDLoc DL(N);
3223 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3224 SDNode *MOVI = CurDAG->getMachineNode(
3225 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3226
3227 // Create the BFI/BFXIL instruction.
3228 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3229 CurDAG->getTargetConstant(ImmR, DL, VT),
3230 CurDAG->getTargetConstant(ImmS, DL, VT)};
3231 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3232 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3233 return true;
3234}
3235
3237 SDValue &ShiftedOperand,
3238 uint64_t &EncodedShiftImm) {
3239 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3240 if (!Dst.hasOneUse())
3241 return false;
3242
3243 EVT VT = Dst.getValueType();
3244 assert((VT == MVT::i32 || VT == MVT::i64) &&
3245 "Caller should guarantee that VT is one of i32 or i64");
3246 const unsigned SizeInBits = VT.getSizeInBits();
3247
3248 SDLoc DL(Dst.getNode());
3249 uint64_t AndImm, ShlImm;
3250 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3251 isShiftedMask_64(AndImm)) {
3252 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3253 SDValue DstOp0 = Dst.getOperand(0);
3254 if (!DstOp0.hasOneUse())
3255 return false;
3256
3257 // An example to illustrate the transformation
3258 // From:
3259 // lsr x8, x1, #1
3260 // and x8, x8, #0x3f80
3261 // bfxil x8, x1, #0, #7
3262 // To:
3263 // and x8, x23, #0x7f
3264 // ubfx x9, x23, #8, #7
3265 // orr x23, x8, x9, lsl #7
3266 //
3267 // The number of instructions remains the same, but ORR is faster than BFXIL
3268 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3269 // the dependency chain is improved after the transformation.
3270 uint64_t SrlImm;
3271 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3272 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3273 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3274 unsigned MaskWidth =
3275 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3276 unsigned UBFMOpc =
3277 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3278 SDNode *UBFMNode = CurDAG->getMachineNode(
3279 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3280 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3281 VT),
3282 CurDAG->getTargetConstant(
3283 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3284 ShiftedOperand = SDValue(UBFMNode, 0);
3285 EncodedShiftImm = AArch64_AM::getShifterImm(
3286 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3287 return true;
3288 }
3289 }
3290 return false;
3291 }
3292
3293 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3294 ShiftedOperand = Dst.getOperand(0);
3295 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3296 return true;
3297 }
3298
3299 uint64_t SrlImm;
3300 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3301 ShiftedOperand = Dst.getOperand(0);
3302 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3303 return true;
3304 }
3305 return false;
3306}
3307
3308// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3309// the operands and select it to AArch64::ORR with shifted registers if
3310// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3311static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3312 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3313 const bool BiggerPattern) {
3314 EVT VT = N->getValueType(0);
3315 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3316 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3317 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3318 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3319 assert((VT == MVT::i32 || VT == MVT::i64) &&
3320 "Expect result type to be i32 or i64 since N is combinable to BFM");
3321 SDLoc DL(N);
3322
3323 // Bail out if BFM simplifies away one node in BFM Dst.
3324 if (OrOpd1 != Dst)
3325 return false;
3326
3327 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3328 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3329 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3330 if (BiggerPattern) {
3331 uint64_t SrcAndImm;
3332 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3333 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3334 // OrOpd0 = AND Src, #Mask
3335 // So BFM simplifies away one AND node from Src and doesn't simplify away
3336 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3337 // one node (from Rd), ORR is better since it has higher throughput and
3338 // smaller latency than BFM on many AArch64 processors (and for the rest
3339 // ORR is at least as good as BFM).
3340 SDValue ShiftedOperand;
3341 uint64_t EncodedShiftImm;
3342 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3343 EncodedShiftImm)) {
3344 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3345 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3346 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3347 return true;
3348 }
3349 }
3350 return false;
3351 }
3352
3353 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3354
3355 uint64_t ShlImm;
3356 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3357 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3358 SDValue Ops[] = {
3359 Dst, Src,
3360 CurDAG->getTargetConstant(
3362 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3363 return true;
3364 }
3365
3366 // Select the following pattern to left-shifted operand rather than BFI.
3367 // %val1 = op ..
3368 // %val2 = shl %val1, #imm
3369 // %res = or %val1, %val2
3370 //
3371 // If N is selected to be BFI, we know that
3372 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3373 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3374 //
3375 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3376 if (OrOpd0.getOperand(0) == OrOpd1) {
3377 SDValue Ops[] = {
3378 OrOpd1, OrOpd1,
3379 CurDAG->getTargetConstant(
3381 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3382 return true;
3383 }
3384 }
3385
3386 uint64_t SrlImm;
3387 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3388 // Select the following pattern to right-shifted operand rather than BFXIL.
3389 // %val1 = op ..
3390 // %val2 = lshr %val1, #imm
3391 // %res = or %val1, %val2
3392 //
3393 // If N is selected to be BFXIL, we know that
3394 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3395 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3396 //
3397 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3398 if (OrOpd0.getOperand(0) == OrOpd1) {
3399 SDValue Ops[] = {
3400 OrOpd1, OrOpd1,
3401 CurDAG->getTargetConstant(
3403 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3404 return true;
3405 }
3406 }
3407
3408 return false;
3409}
3410
3411static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3412 SelectionDAG *CurDAG) {
3413 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3414
3415 EVT VT = N->getValueType(0);
3416 if (VT != MVT::i32 && VT != MVT::i64)
3417 return false;
3418
3419 unsigned BitWidth = VT.getSizeInBits();
3420
3421 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3422 // have the expected shape. Try to undo that.
3423
3424 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3425 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3426
3427 // Given a OR operation, check if we have the following pattern
3428 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3429 // isBitfieldExtractOp)
3430 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3431 // countTrailingZeros(mask2) == imm2 - imm + 1
3432 // f = d | c
3433 // if yes, replace the OR instruction with:
3434 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3435
3436 // OR is commutative, check all combinations of operand order and values of
3437 // BiggerPattern, i.e.
3438 // Opd0, Opd1, BiggerPattern=false
3439 // Opd1, Opd0, BiggerPattern=false
3440 // Opd0, Opd1, BiggerPattern=true
3441 // Opd1, Opd0, BiggerPattern=true
3442 // Several of these combinations may match, so check with BiggerPattern=false
3443 // first since that will produce better results by matching more instructions
3444 // and/or inserting fewer extra instructions.
3445 for (int I = 0; I < 4; ++I) {
3446
3447 SDValue Dst, Src;
3448 unsigned ImmR, ImmS;
3449 bool BiggerPattern = I / 2;
3450 SDValue OrOpd0Val = N->getOperand(I % 2);
3451 SDNode *OrOpd0 = OrOpd0Val.getNode();
3452 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3453 SDNode *OrOpd1 = OrOpd1Val.getNode();
3454
3455 unsigned BFXOpc;
3456 int DstLSB, Width;
3457 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3458 NumberOfIgnoredLowBits, BiggerPattern)) {
3459 // Check that the returned opcode is compatible with the pattern,
3460 // i.e., same type and zero extended (U and not S)
3461 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3462 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3463 continue;
3464
3465 // Compute the width of the bitfield insertion
3466 DstLSB = 0;
3467 Width = ImmS - ImmR + 1;
3468 // FIXME: This constraint is to catch bitfield insertion we may
3469 // want to widen the pattern if we want to grab general bitfied
3470 // move case
3471 if (Width <= 0)
3472 continue;
3473
3474 // If the mask on the insertee is correct, we have a BFXIL operation. We
3475 // can share the ImmR and ImmS values from the already-computed UBFM.
3476 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3477 BiggerPattern,
3478 Src, DstLSB, Width)) {
3479 ImmR = (BitWidth - DstLSB) % BitWidth;
3480 ImmS = Width - 1;
3481 } else
3482 continue;
3483
3484 // Check the second part of the pattern
3485 EVT VT = OrOpd1Val.getValueType();
3486 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3487
3488 // Compute the Known Zero for the candidate of the first operand.
3489 // This allows to catch more general case than just looking for
3490 // AND with imm. Indeed, simplify-demanded-bits may have removed
3491 // the AND instruction because it proves it was useless.
3492 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3493
3494 // Check if there is enough room for the second operand to appear
3495 // in the first one
3496 APInt BitsToBeInserted =
3497 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3498
3499 if ((BitsToBeInserted & ~Known.Zero) != 0)
3500 continue;
3501
3502 // Set the first operand
3503 uint64_t Imm;
3504 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3505 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3506 // In that case, we can eliminate the AND
3507 Dst = OrOpd1->getOperand(0);
3508 else
3509 // Maybe the AND has been removed by simplify-demanded-bits
3510 // or is useful because it discards more bits
3511 Dst = OrOpd1Val;
3512
3513 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3514 // with shifted operand is more efficient.
3515 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3516 BiggerPattern))
3517 return true;
3518
3519 // both parts match
3520 SDLoc DL(N);
3521 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3522 CurDAG->getTargetConstant(ImmS, DL, VT)};
3523 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3524 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3525 return true;
3526 }
3527
3528 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3529 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3530 // mask (e.g., 0x000ffff0).
3531 uint64_t Mask0Imm, Mask1Imm;
3532 SDValue And0 = N->getOperand(0);
3533 SDValue And1 = N->getOperand(1);
3534 if (And0.hasOneUse() && And1.hasOneUse() &&
3535 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3536 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3537 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3538 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3539
3540 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3541 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3542 // bits to be inserted.
3543 if (isShiftedMask(Mask0Imm, VT)) {
3544 std::swap(And0, And1);
3545 std::swap(Mask0Imm, Mask1Imm);
3546 }
3547
3548 SDValue Src = And1->getOperand(0);
3549 SDValue Dst = And0->getOperand(0);
3550 unsigned LSB = llvm::countr_zero(Mask1Imm);
3551 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3552
3553 // The BFXIL inserts the low-order bits from a source register, so right
3554 // shift the needed bits into place.
3555 SDLoc DL(N);
3556 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3557 uint64_t LsrImm = LSB;
3558 if (Src->hasOneUse() &&
3559 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3560 (LsrImm + LSB) < BitWidth) {
3561 Src = Src->getOperand(0);
3562 LsrImm += LSB;
3563 }
3564
3565 SDNode *LSR = CurDAG->getMachineNode(
3566 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3567 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3568
3569 // BFXIL is an alias of BFM, so translate to BFM operands.
3570 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3571 unsigned ImmS = Width - 1;
3572
3573 // Create the BFXIL instruction.
3574 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3575 CurDAG->getTargetConstant(ImmR, DL, VT),
3576 CurDAG->getTargetConstant(ImmS, DL, VT)};
3577 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3578 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3579 return true;
3580 }
3581
3582 return false;
3583}
3584
3585bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3586 if (N->getOpcode() != ISD::OR)
3587 return false;
3588
3589 APInt NUsefulBits;
3590 getUsefulBits(SDValue(N, 0), NUsefulBits);
3591
3592 // If all bits are not useful, just return UNDEF.
3593 if (!NUsefulBits) {
3594 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3595 return true;
3596 }
3597
3598 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3599 return true;
3600
3601 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3602}
3603
3604/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3605/// equivalent of a left shift by a constant amount followed by an and masking
3606/// out a contiguous set of bits.
3607bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3608 if (N->getOpcode() != ISD::AND)
3609 return false;
3610
3611 EVT VT = N->getValueType(0);
3612 if (VT != MVT::i32 && VT != MVT::i64)
3613 return false;
3614
3615 SDValue Op0;
3616 int DstLSB, Width;
3617 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3618 Op0, DstLSB, Width))
3619 return false;
3620
3621 // ImmR is the rotate right amount.
3622 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3623 // ImmS is the most significant bit of the source to be moved.
3624 unsigned ImmS = Width - 1;
3625
3626 SDLoc DL(N);
3627 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3628 CurDAG->getTargetConstant(ImmS, DL, VT)};
3629 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3630 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3631 return true;
3632}
3633
3634/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3635/// variable shift/rotate instructions.
3636bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3637 EVT VT = N->getValueType(0);
3638
3639 unsigned Opc;
3640 switch (N->getOpcode()) {
3641 case ISD::ROTR:
3642 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3643 break;
3644 case ISD::SHL:
3645 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3646 break;
3647 case ISD::SRL:
3648 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3649 break;
3650 case ISD::SRA:
3651 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3652 break;
3653 default:
3654 return false;
3655 }
3656
3657 uint64_t Size;
3658 uint64_t Bits;
3659 if (VT == MVT::i32) {
3660 Bits = 5;
3661 Size = 32;
3662 } else if (VT == MVT::i64) {
3663 Bits = 6;
3664 Size = 64;
3665 } else
3666 return false;
3667
3668 SDValue ShiftAmt = N->getOperand(1);
3669 SDLoc DL(N);
3670 SDValue NewShiftAmt;
3671
3672 // Skip over an extend of the shift amount.
3673 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3674 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3675 ShiftAmt = ShiftAmt->getOperand(0);
3676
3677 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3678 SDValue Add0 = ShiftAmt->getOperand(0);
3679 SDValue Add1 = ShiftAmt->getOperand(1);
3680 uint64_t Add0Imm;
3681 uint64_t Add1Imm;
3682 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3683 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3684 // to avoid the ADD/SUB.
3685 NewShiftAmt = Add0;
3686 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3687 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3688 (Add0Imm % Size == 0)) {
3689 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3690 // to generate a NEG instead of a SUB from a constant.
3691 unsigned NegOpc;
3692 unsigned ZeroReg;
3693 EVT SubVT = ShiftAmt->getValueType(0);
3694 if (SubVT == MVT::i32) {
3695 NegOpc = AArch64::SUBWrr;
3696 ZeroReg = AArch64::WZR;
3697 } else {
3698 assert(SubVT == MVT::i64);
3699 NegOpc = AArch64::SUBXrr;
3700 ZeroReg = AArch64::XZR;
3701 }
3702 SDValue Zero =
3703 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3704 MachineSDNode *Neg =
3705 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3706 NewShiftAmt = SDValue(Neg, 0);
3707 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3708 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3709 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3710 // to generate a NOT instead of a SUB from a constant.
3711 unsigned NotOpc;
3712 unsigned ZeroReg;
3713 EVT SubVT = ShiftAmt->getValueType(0);
3714 if (SubVT == MVT::i32) {
3715 NotOpc = AArch64::ORNWrr;
3716 ZeroReg = AArch64::WZR;
3717 } else {
3718 assert(SubVT == MVT::i64);
3719 NotOpc = AArch64::ORNXrr;
3720 ZeroReg = AArch64::XZR;
3721 }
3722 SDValue Zero =
3723 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3725 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3726 NewShiftAmt = SDValue(Not, 0);
3727 } else
3728 return false;
3729 } else {
3730 // If the shift amount is masked with an AND, check that the mask covers the
3731 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3732 // the AND.
3733 uint64_t MaskImm;
3734 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3735 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3736 return false;
3737
3738 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3739 return false;
3740
3741 NewShiftAmt = ShiftAmt->getOperand(0);
3742 }
3743
3744 // Narrow/widen the shift amount to match the size of the shift operation.
3745 if (VT == MVT::i32)
3746 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3747 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3748 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3749 MachineSDNode *Ext = CurDAG->getMachineNode(
3750 AArch64::SUBREG_TO_REG, DL, VT,
3751 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3752 NewShiftAmt = SDValue(Ext, 0);
3753 }
3754
3755 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3756 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3757 return true;
3758}
3759
3761 SDValue &FixedPos,
3762 unsigned RegWidth,
3763 bool isReciprocal) {
3764 APFloat FVal(0.0);
3765 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
3766 FVal = CN->getValueAPF();
3767 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3768 // Some otherwise illegal constants are allowed in this case.
3769 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3770 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3771 return false;
3772
3773 ConstantPoolSDNode *CN =
3774 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3775 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3776 } else
3777 return false;
3778
3779 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
3780 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
3781 // x-register.
3782 //
3783 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
3784 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
3785 // integers.
3786 bool IsExact;
3787
3788 if (isReciprocal)
3789 if (!FVal.getExactInverse(&FVal))
3790 return false;
3791
3792 // fbits is between 1 and 64 in the worst-case, which means the fmul
3793 // could have 2^64 as an actual operand. Need 65 bits of precision.
3794 APSInt IntVal(65, true);
3795 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
3796
3797 // N.b. isPowerOf2 also checks for > 0.
3798 if (!IsExact || !IntVal.isPowerOf2())
3799 return false;
3800 unsigned FBits = IntVal.logBase2();
3801
3802 // Checks above should have guaranteed that we haven't lost information in
3803 // finding FBits, but it must still be in range.
3804 if (FBits == 0 || FBits > RegWidth) return false;
3805
3806 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
3807 return true;
3808}
3809
3810bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
3811 unsigned RegWidth) {
3812 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3813 false);
3814}
3815
3816bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
3817 SDValue &FixedPos,
3818 unsigned RegWidth) {
3819 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3820 true);
3821}
3822
3823// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
3824// of the string and obtains the integer values from them and combines these
3825// into a single value to be used in the MRS/MSR instruction.
3828 RegString.split(Fields, ':');
3829
3830 if (Fields.size() == 1)
3831 return -1;
3832
3833 assert(Fields.size() == 5
3834 && "Invalid number of fields in read register string");
3835
3837 bool AllIntFields = true;
3838
3839 for (StringRef Field : Fields) {
3840 unsigned IntField;
3841 AllIntFields &= !Field.getAsInteger(10, IntField);
3842 Ops.push_back(IntField);
3843 }
3844
3845 assert(AllIntFields &&
3846 "Unexpected non-integer value in special register string.");
3847 (void)AllIntFields;
3848
3849 // Need to combine the integer fields of the string into a single value
3850 // based on the bit encoding of MRS/MSR instruction.
3851 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
3852 (Ops[3] << 3) | (Ops[4]);
3853}
3854
3855// Lower the read_register intrinsic to an MRS instruction node if the special
3856// register string argument is either of the form detailed in the ALCE (the
3857// form described in getIntOperandsFromRegsterString) or is a named register
3858// known by the MRS SysReg mapper.
3859bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
3860 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3861 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3862 SDLoc DL(N);
3863
3864 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
3865
3866 unsigned Opcode64Bit = AArch64::MRS;
3867 int Imm = getIntOperandFromRegisterString(RegString->getString());
3868 if (Imm == -1) {
3869 // No match, Use the sysreg mapper to map the remaining possible strings to
3870 // the value for the register to be used for the instruction operand.
3871 const auto *TheReg =
3872 AArch64SysReg::lookupSysRegByName(RegString->getString());
3873 if (TheReg && TheReg->Readable &&
3874 TheReg->haveFeatures(Subtarget->getFeatureBits()))
3875 Imm = TheReg->Encoding;
3876 else
3877 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
3878
3879 if (Imm == -1) {
3880 // Still no match, see if this is "pc" or give up.
3881 if (!ReadIs128Bit && RegString->getString() == "pc") {
3882 Opcode64Bit = AArch64::ADR;
3883 Imm = 0;
3884 } else {
3885 return false;
3886 }
3887 }
3888 }
3889
3890 SDValue InChain = N->getOperand(0);
3891 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
3892 if (!ReadIs128Bit) {
3893 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
3894 {SysRegImm, InChain});
3895 } else {
3896 SDNode *MRRS = CurDAG->getMachineNode(
3897 AArch64::MRRS, DL,
3898 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
3899 {SysRegImm, InChain});
3900
3901 // Sysregs are not endian. The even register always contains the low half
3902 // of the register.
3903 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
3904 SDValue(MRRS, 0));
3905 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
3906 SDValue(MRRS, 0));
3907 SDValue OutChain = SDValue(MRRS, 1);
3908
3909 ReplaceUses(SDValue(N, 0), Lo);
3910 ReplaceUses(SDValue(N, 1), Hi);
3911 ReplaceUses(SDValue(N, 2), OutChain);
3912 };
3913 return true;
3914}
3915
3916// Lower the write_register intrinsic to an MSR instruction node if the special
3917// register string argument is either of the form detailed in the ALCE (the
3918// form described in getIntOperandsFromRegsterString) or is a named register
3919// known by the MSR SysReg mapper.
3920bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
3921 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3922 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3923 SDLoc DL(N);
3924
3925 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
3926
3927 if (!WriteIs128Bit) {
3928 // Check if the register was one of those allowed as the pstatefield value
3929 // in the MSR (immediate) instruction. To accept the values allowed in the
3930 // pstatefield for the MSR (immediate) instruction, we also require that an
3931 // immediate value has been provided as an argument, we know that this is
3932 // the case as it has been ensured by semantic checking.
3933 auto trySelectPState = [&](auto PMapper, unsigned State) {
3934 if (PMapper) {
3935 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
3936 "Expected a constant integer expression.");
3937 unsigned Reg = PMapper->Encoding;
3938 uint64_t Immed = N->getConstantOperandVal(2);
3939 CurDAG->SelectNodeTo(
3940 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
3941 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
3942 return true;
3943 }
3944 return false;
3945 };
3946
3947 if (trySelectPState(
3948 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
3949 AArch64::MSRpstateImm4))
3950 return true;
3951 if (trySelectPState(
3952 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
3953 AArch64::MSRpstateImm1))
3954 return true;
3955 }
3956
3957 int Imm = getIntOperandFromRegisterString(RegString->getString());
3958 if (Imm == -1) {
3959 // Use the sysreg mapper to attempt to map the remaining possible strings
3960 // to the value for the register to be used for the MSR (register)
3961 // instruction operand.
3962 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
3963 if (TheReg && TheReg->Writeable &&
3964 TheReg->haveFeatures(Subtarget->getFeatureBits()))
3965 Imm = TheReg->Encoding;
3966 else
3967 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
3968
3969 if (Imm == -1)
3970 return false;
3971 }
3972
3973 SDValue InChain = N->getOperand(0);
3974 if (!WriteIs128Bit) {
3975 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
3976 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
3977 N->getOperand(2), InChain);
3978 } else {
3979 // No endian swap. The lower half always goes into the even subreg, and the
3980 // higher half always into the odd supreg.
3981 SDNode *Pair = CurDAG->getMachineNode(
3982 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
3983 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
3984 MVT::i32),
3985 N->getOperand(2),
3986 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
3987 N->getOperand(3),
3988 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
3989
3990 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
3991 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
3992 SDValue(Pair, 0), InChain);
3993 }
3994
3995 return true;
3996}
3997
3998/// We've got special pseudo-instructions for these
3999bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4000 unsigned Opcode;
4001 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4002
4003 // Leave IR for LSE if subtarget supports it.
4004 if (Subtarget->hasLSE()) return false;
4005
4006 if (MemTy == MVT::i8)
4007 Opcode = AArch64::CMP_SWAP_8;
4008 else if (MemTy == MVT::i16)
4009 Opcode = AArch64::CMP_SWAP_16;
4010 else if (MemTy == MVT::i32)
4011 Opcode = AArch64::CMP_SWAP_32;
4012 else if (MemTy == MVT::i64)
4013 Opcode = AArch64::CMP_SWAP_64;
4014 else
4015 llvm_unreachable("Unknown AtomicCmpSwap type");
4016
4017 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4018 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4019 N->getOperand(0)};
4020 SDNode *CmpSwap = CurDAG->getMachineNode(
4021 Opcode, SDLoc(N),
4022 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4023
4024 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4025 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4026
4027 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4028 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4029 CurDAG->RemoveDeadNode(N);
4030
4031 return true;
4032}
4033
4034bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4035 SDValue &Shift) {
4036 if (!isa<ConstantSDNode>(N))
4037 return false;
4038
4039 SDLoc DL(N);
4040 uint64_t Val = cast<ConstantSDNode>(N)
4041 ->getAPIntValue()
4042 .trunc(VT.getFixedSizeInBits())
4043 .getZExtValue();
4044
4045 switch (VT.SimpleTy) {
4046 case MVT::i8:
4047 // All immediates are supported.
4048 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4049 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4050 return true;
4051 case MVT::i16:
4052 case MVT::i32:
4053 case MVT::i64:
4054 // Support 8bit unsigned immediates.
4055 if (Val <= 255) {
4056 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4057 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4058 return true;
4059 }
4060 // Support 16bit unsigned immediates that are a multiple of 256.
4061 if (Val <= 65280 && Val % 256 == 0) {
4062 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4063 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4064 return true;
4065 }
4066 break;
4067 default:
4068 break;
4069 }
4070
4071 return false;
4072}
4073
4074bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4075 SDValue &Imm, SDValue &Shift,
4076 bool Negate) {
4077 if (!isa<ConstantSDNode>(N))
4078 return false;
4079
4080 SDLoc DL(N);
4081 int64_t Val = cast<ConstantSDNode>(N)
4082 ->getAPIntValue()
4083 .trunc(VT.getFixedSizeInBits())
4084 .getSExtValue();
4085
4086 if (Negate)
4087 Val = -Val;
4088
4089 // Signed saturating instructions treat their immediate operand as unsigned,
4090 // whereas the related intrinsics define their operands to be signed. This
4091 // means we can only use the immediate form when the operand is non-negative.
4092 if (Val < 0)
4093 return false;
4094
4095 switch (VT.SimpleTy) {
4096 case MVT::i8:
4097 // All positive immediates are supported.
4098 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4099 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4100 return true;
4101 case MVT::i16:
4102 case MVT::i32:
4103 case MVT::i64:
4104 // Support 8bit positive immediates.
4105 if (Val <= 255) {
4106 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4107 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4108 return true;
4109 }
4110 // Support 16bit positive immediates that are a multiple of 256.
4111 if (Val <= 65280 && Val % 256 == 0) {
4112 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4113 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4114 return true;
4115 }
4116 break;
4117 default:
4118 break;
4119 }
4120
4121 return false;
4122}
4123
4124bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4125 SDValue &Shift) {
4126 if (!isa<ConstantSDNode>(N))
4127 return false;
4128
4129 SDLoc DL(N);
4130 int64_t Val = cast<ConstantSDNode>(N)
4131 ->getAPIntValue()
4132 .trunc(VT.getFixedSizeInBits())
4133 .getSExtValue();
4134
4135 switch (VT.SimpleTy) {
4136 case MVT::i8:
4137 // All immediates are supported.
4138 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4139 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4140 return true;
4141 case MVT::i16:
4142 case MVT::i32:
4143 case MVT::i64:
4144 // Support 8bit signed immediates.
4145 if (Val >= -128 && Val <= 127) {
4146 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4147 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4148 return true;
4149 }
4150 // Support 16bit signed immediates that are a multiple of 256.
4151 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
4152 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4153 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
4154 return true;
4155 }
4156 break;
4157 default:
4158 break;
4159 }
4160
4161 return false;
4162}
4163
4164bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4165 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4166 int64_t ImmVal = CNode->getSExtValue();
4167 SDLoc DL(N);
4168 if (ImmVal >= -128 && ImmVal < 128) {
4169 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4170 return true;
4171 }
4172 }
4173 return false;
4174}
4175
4176bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4177 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4178 uint64_t ImmVal = CNode->getZExtValue();
4179
4180 switch (VT.SimpleTy) {
4181 case MVT::i8:
4182 ImmVal &= 0xFF;
4183 break;
4184 case MVT::i16:
4185 ImmVal &= 0xFFFF;
4186 break;
4187 case MVT::i32:
4188 ImmVal &= 0xFFFFFFFF;
4189 break;
4190 case MVT::i64:
4191 break;
4192 default:
4193 llvm_unreachable("Unexpected type");
4194 }
4195
4196 if (ImmVal < 256) {
4197 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4198 return true;
4199 }
4200 }
4201 return false;
4202}
4203
4204bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4205 bool Invert) {
4206 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4207 uint64_t ImmVal = CNode->getZExtValue();
4208 SDLoc DL(N);
4209
4210 if (Invert)
4211 ImmVal = ~ImmVal;
4212
4213 // Shift mask depending on type size.
4214 switch (VT.SimpleTy) {
4215 case MVT::i8:
4216 ImmVal &= 0xFF;
4217 ImmVal |= ImmVal << 8;
4218 ImmVal |= ImmVal << 16;
4219 ImmVal |= ImmVal << 32;
4220 break;
4221 case MVT::i16:
4222 ImmVal &= 0xFFFF;
4223 ImmVal |= ImmVal << 16;
4224 ImmVal |= ImmVal << 32;
4225 break;
4226 case MVT::i32:
4227 ImmVal &= 0xFFFFFFFF;
4228 ImmVal |= ImmVal << 32;
4229 break;
4230 case MVT::i64:
4231 break;
4232 default:
4233 llvm_unreachable("Unexpected type");
4234 }
4235
4236 uint64_t encoding;
4237 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
4238 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
4239 return true;
4240 }
4241 }
4242 return false;
4243}
4244
4245// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4246// Rather than attempt to normalise everything we can sometimes saturate the
4247// shift amount during selection. This function also allows for consistent
4248// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4249// required by the instructions.
4250bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4251 uint64_t High, bool AllowSaturation,
4252 SDValue &Imm) {
4253 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4254 uint64_t ImmVal = CN->getZExtValue();
4255
4256 // Reject shift amounts that are too small.
4257 if (ImmVal < Low)
4258 return false;
4259
4260 // Reject or saturate shift amounts that are too big.
4261 if (ImmVal > High) {
4262 if (!AllowSaturation)
4263 return false;
4264 ImmVal = High;
4265 }
4266
4267 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4268 return true;
4269 }
4270
4271 return false;
4272}
4273
4274bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4275 // tagp(FrameIndex, IRGstack, tag_offset):
4276 // since the offset between FrameIndex and IRGstack is a compile-time
4277 // constant, this can be lowered to a single ADDG instruction.
4278 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4279 return false;
4280 }
4281
4282 SDValue IRG_SP = N->getOperand(2);
4283 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4284 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4285 return false;
4286 }
4287
4288 const TargetLowering *TLI = getTargetLowering();
4289 SDLoc DL(N);
4290 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4291 SDValue FiOp = CurDAG->getTargetFrameIndex(
4292 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4293 int TagOffset = N->getConstantOperandVal(3);
4294
4295 SDNode *Out = CurDAG->getMachineNode(
4296 AArch64::TAGPstack, DL, MVT::i64,
4297 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4298 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4299 ReplaceNode(N, Out);
4300 return true;
4301}
4302
4303void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4304 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4305 "llvm.aarch64.tagp third argument must be an immediate");
4306 if (trySelectStackSlotTagP(N))
4307 return;
4308 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4309 // compile-time constant, not just for stack allocations.
4310
4311 // General case for unrelated pointers in Op1 and Op2.
4312 SDLoc DL(N);
4313 int TagOffset = N->getConstantOperandVal(3);
4314 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4315 {N->getOperand(1), N->getOperand(2)});
4316 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4317 {SDValue(N1, 0), N->getOperand(2)});
4318 SDNode *N3 = CurDAG->getMachineNode(
4319 AArch64::ADDG, DL, MVT::i64,
4320 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4321 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4322 ReplaceNode(N, N3);
4323}
4324
4325bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4326 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4327
4328 // Bail when not a "cast" like insert_subvector.
4329 if (N->getConstantOperandVal(2) != 0)
4330 return false;
4331 if (!N->getOperand(0).isUndef())
4332 return false;
4333
4334 // Bail when normal isel should do the job.
4335 EVT VT = N->getValueType(0);
4336 EVT InVT = N->getOperand(1).getValueType();
4337 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4338 return false;
4339 if (InVT.getSizeInBits() <= 128)
4340 return false;
4341
4342 // NOTE: We can only get here when doing fixed length SVE code generation.
4343 // We do manual selection because the types involved are not linked to real
4344 // registers (despite being legal) and must be coerced into SVE registers.
4345
4347 "Expected to insert into a packed scalable vector!");
4348
4349 SDLoc DL(N);
4350 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4351 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4352 N->getOperand(1), RC));
4353 return true;
4354}
4355
4356bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4357 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4358
4359 // Bail when not a "cast" like extract_subvector.
4360 if (N->getConstantOperandVal(1) != 0)
4361 return false;
4362
4363 // Bail when normal isel can do the job.
4364 EVT VT = N->getValueType(0);
4365 EVT InVT = N->getOperand(0).getValueType();
4366 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4367 return false;
4368 if (VT.getSizeInBits() <= 128)
4369 return false;
4370
4371 // NOTE: We can only get here when doing fixed length SVE code generation.
4372 // We do manual selection because the types involved are not linked to real
4373 // registers (despite being legal) and must be coerced into SVE registers.
4374
4376 "Expected to extract from a packed scalable vector!");
4377
4378 SDLoc DL(N);
4379 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4380 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4381 N->getOperand(0), RC));
4382 return true;
4383}
4384
4385bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4386 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4387
4388 SDValue N0 = N->getOperand(0);
4389 SDValue N1 = N->getOperand(1);
4390 EVT VT = N->getValueType(0);
4391
4392 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4393 // Rotate by a constant is a funnel shift in IR which is exanded to
4394 // an OR with shifted operands.
4395 // We do the following transform:
4396 // OR N0, N1 -> xar (x, y, imm)
4397 // Where:
4398 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4399 // N0 = SHL_PRED true, V, splat(bits-imm)
4400 // V = (xor x, y)
4401 if (VT.isScalableVector() &&
4402 (Subtarget->hasSVE2() ||
4403 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4404 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4406 std::swap(N0, N1);
4407 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4409 return false;
4410
4411 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4412 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4413 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4414 return false;
4415
4416 SDValue XOR = N0.getOperand(1);
4417 if (XOR.getOpcode() != ISD::XOR || XOR != N1.getOperand(1))
4418 return false;
4419
4420 APInt ShlAmt, ShrAmt;
4421 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4423 return false;
4424
4425 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4426 return false;
4427
4428 SDLoc DL(N);
4429 SDValue Imm =
4430 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4431
4432 SDValue Ops[] = {XOR.getOperand(0), XOR.getOperand(1), Imm};
4433 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4434 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4435 AArch64::XAR_ZZZI_D})) {
4436 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4437 return true;
4438 }
4439 return false;
4440 }
4441
4442 if (!Subtarget->hasSHA3())
4443 return false;
4444
4445 if (N0->getOpcode() != AArch64ISD::VSHL ||
4447 return false;
4448
4449 if (N0->getOperand(0) != N1->getOperand(0) ||
4450 N1->getOperand(0)->getOpcode() != ISD::XOR)
4451 return false;
4452
4453 SDValue XOR = N0.getOperand(0);
4454 SDValue R1 = XOR.getOperand(0);
4455 SDValue R2 = XOR.getOperand(1);
4456
4457 unsigned HsAmt = N0.getConstantOperandVal(1);
4458 unsigned ShAmt = N1.getConstantOperandVal(1);
4459
4460 SDLoc DL = SDLoc(N0.getOperand(1));
4461 SDValue Imm = CurDAG->getTargetConstant(
4462 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4463
4464 if (ShAmt + HsAmt != 64)
4465 return false;
4466
4467 SDValue Ops[] = {R1, R2, Imm};
4468 CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops);
4469
4470 return true;
4471}
4472
4473void AArch64DAGToDAGISel::Select(SDNode *Node) {
4474 // If we have a custom node, we already have selected!
4475 if (Node->isMachineOpcode()) {
4476 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4477 Node->setNodeId(-1);
4478 return;
4479 }
4480
4481 // Few custom selection stuff.
4482 EVT VT = Node->getValueType(0);
4483
4484 switch (Node->getOpcode()) {
4485 default:
4486 break;
4487
4489 if (SelectCMP_SWAP(Node))
4490 return;
4491 break;
4492
4493 case ISD::READ_REGISTER:
4494 case AArch64ISD::MRRS:
4495 if (tryReadRegister(Node))
4496 return;
4497 break;
4498
4500 case AArch64ISD::MSRR:
4501 if (tryWriteRegister(Node))
4502 return;
4503 break;
4504
4505 case ISD::LOAD: {
4506 // Try to select as an indexed load. Fall through to normal processing
4507 // if we can't.
4508 if (tryIndexedLoad(Node))
4509 return;
4510 break;
4511 }
4512
4513 case ISD::SRL:
4514 case ISD::AND:
4515 case ISD::SRA:
4517 if (tryBitfieldExtractOp(Node))
4518 return;
4519 if (tryBitfieldInsertInZeroOp(Node))
4520 return;
4521 [[fallthrough]];
4522 case ISD::ROTR:
4523 case ISD::SHL:
4524 if (tryShiftAmountMod(Node))
4525 return;
4526 break;
4527
4528 case ISD::SIGN_EXTEND:
4529 if (tryBitfieldExtractOpFromSExt(Node))
4530 return;
4531 break;
4532
4533 case ISD::OR:
4534 if (tryBitfieldInsertOp(Node))
4535 return;
4536 if (trySelectXAR(Node))
4537 return;
4538 break;
4539
4541 if (trySelectCastScalableToFixedLengthVector(Node))
4542 return;
4543 break;
4544 }
4545
4546 case ISD::INSERT_SUBVECTOR: {
4547 if (trySelectCastFixedLengthToScalableVector(Node))
4548 return;
4549 break;
4550 }
4551
4552 case ISD::Constant: {
4553 // Materialize zero constants as copies from WZR/XZR. This allows
4554 // the coalescer to propagate these into other instructions.
4555 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4556 if (ConstNode->isZero()) {
4557 if (VT == MVT::i32) {
4558 SDValue New = CurDAG->getCopyFromReg(
4559 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4560 ReplaceNode(Node, New.getNode());
4561 return;
4562 } else if (VT == MVT::i64) {
4563 SDValue New = CurDAG->getCopyFromReg(
4564 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4565 ReplaceNode(Node, New.getNode());
4566 return;
4567 }
4568 }
4569 break;
4570 }
4571
4572 case ISD::FrameIndex: {
4573 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4574 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4575 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4576 const TargetLowering *TLI = getTargetLowering();
4577 SDValue TFI = CurDAG->getTargetFrameIndex(
4578 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4579 SDLoc DL(Node);
4580 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4581 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4582 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4583 return;
4584 }
4586 unsigned IntNo = Node->getConstantOperandVal(1);
4587 switch (IntNo) {
4588 default:
4589 break;
4590 case Intrinsic::aarch64_gcsss: {
4591 SDLoc DL(Node);
4592 SDValue Chain = Node->getOperand(0);
4593 SDValue Val = Node->getOperand(2);
4594 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
4595 SDNode *SS1 =
4596 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
4597 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
4598 MVT::Other, Zero, SDValue(SS1, 0));
4599 ReplaceNode(Node, SS2);
4600 return;
4601 }
4602 case Intrinsic::aarch64_ldaxp:
4603 case Intrinsic::aarch64_ldxp: {
4604 unsigned Op =
4605 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4606 SDValue MemAddr = Node->getOperand(2);
4607 SDLoc DL(Node);
4608 SDValue Chain = Node->getOperand(0);
4609
4610 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4611 MVT::Other, MemAddr, Chain);
4612
4613 // Transfer memoperands.
4615 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4616 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4617 ReplaceNode(Node, Ld);
4618 return;
4619 }
4620 case Intrinsic::aarch64_stlxp:
4621 case Intrinsic::aarch64_stxp: {
4622 unsigned Op =
4623 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4624 SDLoc DL(Node);
4625 SDValue Chain = Node->getOperand(0);
4626 SDValue ValLo = Node->getOperand(2);
4627 SDValue ValHi = Node->getOperand(3);
4628 SDValue MemAddr = Node->getOperand(4);
4629
4630 // Place arguments in the right order.
4631 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4632
4633 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4634 // Transfer memoperands.
4636 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4637 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4638
4639 ReplaceNode(Node, St);
4640 return;
4641 }
4642 case Intrinsic::aarch64_neon_ld1x2:
4643 if (VT == MVT::v8i8) {
4644 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4645 return;
4646 } else if (VT == MVT::v16i8) {
4647 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4648 return;
4649 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4650 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4651 return;
4652 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4653 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4654 return;
4655 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4656 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4657 return;
4658 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4659 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4660 return;
4661 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4662 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4663 return;
4664 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4665 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4666 return;
4667 }
4668 break;
4669 case Intrinsic::aarch64_neon_ld1x3:
4670 if (VT == MVT::v8i8) {
4671 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4672 return;
4673 } else if (VT == MVT::v16i8) {
4674 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4675 return;
4676 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4677 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
4678 return;
4679 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4680 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
4681 return;
4682 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4683 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
4684 return;
4685 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4686 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
4687 return;
4688 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4689 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4690 return;
4691 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4692 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
4693 return;
4694 }
4695 break;
4696 case Intrinsic::aarch64_neon_ld1x4:
4697 if (VT == MVT::v8i8) {
4698 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
4699 return;
4700 } else if (VT == MVT::v16i8) {
4701 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
4702 return;
4703 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4704 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
4705 return;
4706 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4707 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
4708 return;
4709 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4710 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
4711 return;
4712 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4713 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
4714 return;
4715 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4716 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4717 return;
4718 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4719 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
4720 return;
4721 }
4722 break;
4723 case Intrinsic::aarch64_neon_ld2:
4724 if (VT == MVT::v8i8) {
4725 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
4726 return;
4727 } else if (VT == MVT::v16i8) {
4728 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
4729 return;
4730 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4731 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
4732 return;
4733 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4734 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
4735 return;
4736 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4737 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
4738 return;
4739 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4740 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
4741 return;
4742 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4743 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4744 return;
4745 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4746 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
4747 return;
4748 }
4749 break;
4750 case Intrinsic::aarch64_neon_ld3:
4751 if (VT == MVT::v8i8) {
4752 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
4753 return;
4754 } else if (VT == MVT::v16i8) {
4755 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
4756 return;
4757 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4758 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
4759 return;
4760 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4761 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
4762 return;
4763 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4764 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
4765 return;
4766 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4767 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
4768 return;
4769 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4770 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4771 return;
4772 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4773 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
4774 return;
4775 }
4776 break;
4777 case Intrinsic::aarch64_neon_ld4:
4778 if (VT == MVT::v8i8) {
4779 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
4780 return;
4781 } else if (VT == MVT::v16i8) {
4782 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
4783 return;
4784 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4785 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
4786 return;
4787 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4788 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
4789 return;
4790 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4791 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
4792 return;
4793 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4794 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
4795 return;
4796 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4797 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4798 return;
4799 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4800 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
4801 return;
4802 }
4803 break;
4804 case Intrinsic::aarch64_neon_ld2r:
4805 if (VT == MVT::v8i8) {
4806 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
4807 return;
4808 } else if (VT == MVT::v16i8) {
4809 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
4810 return;
4811 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4812 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
4813 return;
4814 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4815 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
4816 return;
4817 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4818 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
4819 return;
4820 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4821 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
4822 return;
4823 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4824 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
4825 return;
4826 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4827 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
4828 return;
4829 }
4830 break;
4831 case Intrinsic::aarch64_neon_ld3r:
4832 if (VT == MVT::v8i8) {
4833 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
4834 return;
4835 } else if (VT == MVT::v16i8) {
4836 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
4837 return;
4838 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4839 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
4840 return;
4841 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4842 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
4843 return;
4844 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4845 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
4846 return;
4847 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4848 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
4849 return;
4850 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4851