LLVM 20.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34// https://github.com/llvm/llvm-project/issues/114425
35#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36#pragma inline_depth(0)
37#endif
38
39//===--------------------------------------------------------------------===//
40/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41/// instructions for SelectionDAG operations.
42///
43namespace {
44
45class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const AArch64Subtarget *Subtarget;
50
51public:
52 AArch64DAGToDAGISel() = delete;
53
54 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55 CodeGenOptLevel OptLevel)
56 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
57
58 bool runOnMachineFunction(MachineFunction &MF) override {
59 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
61 }
62
63 void Select(SDNode *Node) override;
64
65 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
66 /// inline asm expressions.
68 InlineAsm::ConstraintCode ConstraintID,
69 std::vector<SDValue> &OutOps) override;
70
71 template <signed Low, signed High, signed Scale>
72 bool SelectRDVLImm(SDValue N, SDValue &Imm);
73
74 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
75 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
76 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
77 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
78 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
79 return SelectShiftedRegister(N, false, Reg, Shift);
80 }
81 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
82 return SelectShiftedRegister(N, true, Reg, Shift);
83 }
84 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
85 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
86 }
87 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
88 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
89 }
90 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
91 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
92 }
93 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
94 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
95 }
96 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
97 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
98 }
99 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
100 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
101 }
102 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
103 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
104 }
105 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
106 return SelectAddrModeIndexed(N, 1, Base, OffImm);
107 }
108 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
109 return SelectAddrModeIndexed(N, 2, Base, OffImm);
110 }
111 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
112 return SelectAddrModeIndexed(N, 4, Base, OffImm);
113 }
114 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
115 return SelectAddrModeIndexed(N, 8, Base, OffImm);
116 }
117 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
118 return SelectAddrModeIndexed(N, 16, Base, OffImm);
119 }
120 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
121 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
122 }
123 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
124 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
125 }
126 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
127 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
128 }
129 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
130 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
131 }
132 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
133 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
134 }
135 template <unsigned Size, unsigned Max>
136 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
137 // Test if there is an appropriate addressing mode and check if the
138 // immediate fits.
139 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
140 if (Found) {
141 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
142 int64_t C = CI->getSExtValue();
143 if (C <= Max)
144 return true;
145 }
146 }
147
148 // Otherwise, base only, materialize address in register.
149 Base = N;
150 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
151 return true;
152 }
153
154 template<int Width>
155 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
156 SDValue &SignExtend, SDValue &DoShift) {
157 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
158 }
159
160 template<int Width>
161 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
162 SDValue &SignExtend, SDValue &DoShift) {
163 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
164 }
165
166 bool SelectExtractHigh(SDValue N, SDValue &Res) {
167 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
168 N = N->getOperand(0);
169 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
170 !isa<ConstantSDNode>(N->getOperand(1)))
171 return false;
172 EVT VT = N->getValueType(0);
173 EVT LVT = N->getOperand(0).getValueType();
174 unsigned Index = N->getConstantOperandVal(1);
175 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
176 Index != VT.getVectorNumElements())
177 return false;
178 Res = N->getOperand(0);
179 return true;
180 }
181
182 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
183 if (N.getOpcode() != AArch64ISD::VLSHR)
184 return false;
185 SDValue Op = N->getOperand(0);
186 EVT VT = Op.getValueType();
187 unsigned ShtAmt = N->getConstantOperandVal(1);
188 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
189 return false;
190
191 APInt Imm;
192 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
193 Imm = APInt(VT.getScalarSizeInBits(),
194 Op.getOperand(1).getConstantOperandVal(0)
195 << Op.getOperand(1).getConstantOperandVal(1));
196 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
197 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
198 Imm = APInt(VT.getScalarSizeInBits(),
199 Op.getOperand(1).getConstantOperandVal(0));
200 else
201 return false;
202
203 if (Imm != 1ULL << (ShtAmt - 1))
204 return false;
205
206 Res1 = Op.getOperand(0);
207 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
208 return true;
209 }
210
211 bool SelectDupZeroOrUndef(SDValue N) {
212 switch(N->getOpcode()) {
213 case ISD::UNDEF:
214 return true;
215 case AArch64ISD::DUP:
216 case ISD::SPLAT_VECTOR: {
217 auto Opnd0 = N->getOperand(0);
218 if (isNullConstant(Opnd0))
219 return true;
220 if (isNullFPConstant(Opnd0))
221 return true;
222 break;
223 }
224 default:
225 break;
226 }
227
228 return false;
229 }
230
231 bool SelectAny(SDValue) { return true; }
232
233 bool SelectDupZero(SDValue N) {
234 switch(N->getOpcode()) {
235 case AArch64ISD::DUP:
236 case ISD::SPLAT_VECTOR: {
237 auto Opnd0 = N->getOperand(0);
238 if (isNullConstant(Opnd0))
239 return true;
240 if (isNullFPConstant(Opnd0))
241 return true;
242 break;
243 }
244 }
245
246 return false;
247 }
248
249 template<MVT::SimpleValueType VT>
250 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
251 return SelectSVEAddSubImm(N, VT, Imm, Shift);
252 }
253
254 template <MVT::SimpleValueType VT, bool Negate>
255 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
256 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
257 }
258
259 template <MVT::SimpleValueType VT>
260 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
261 return SelectSVECpyDupImm(N, VT, Imm, Shift);
262 }
263
264 template <MVT::SimpleValueType VT, bool Invert = false>
265 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
266 return SelectSVELogicalImm(N, VT, Imm, Invert);
267 }
268
269 template <MVT::SimpleValueType VT>
270 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
271 return SelectSVEArithImm(N, VT, Imm);
272 }
273
274 template <unsigned Low, unsigned High, bool AllowSaturation = false>
275 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
276 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
277 }
278
279 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
280 if (N->getOpcode() != ISD::SPLAT_VECTOR)
281 return false;
282
283 EVT EltVT = N->getValueType(0).getVectorElementType();
284 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
285 /* High */ EltVT.getFixedSizeInBits(),
286 /* AllowSaturation */ true, Imm);
287 }
288
289 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
290 template<signed Min, signed Max, signed Scale, bool Shift>
291 bool SelectCntImm(SDValue N, SDValue &Imm) {
292 if (!isa<ConstantSDNode>(N))
293 return false;
294
295 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
296 if (Shift)
297 MulImm = 1LL << MulImm;
298
299 if ((MulImm % std::abs(Scale)) != 0)
300 return false;
301
302 MulImm /= Scale;
303 if ((MulImm >= Min) && (MulImm <= Max)) {
304 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
305 return true;
306 }
307
308 return false;
309 }
310
311 template <signed Max, signed Scale>
312 bool SelectEXTImm(SDValue N, SDValue &Imm) {
313 if (!isa<ConstantSDNode>(N))
314 return false;
315
316 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
317
318 if (MulImm >= 0 && MulImm <= Max) {
319 MulImm *= Scale;
320 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
321 return true;
322 }
323
324 return false;
325 }
326
327 template <unsigned BaseReg, unsigned Max>
328 bool ImmToReg(SDValue N, SDValue &Imm) {
329 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
330 uint64_t C = CI->getZExtValue();
331
332 if (C > Max)
333 return false;
334
335 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
336 return true;
337 }
338 return false;
339 }
340
341 /// Form sequences of consecutive 64/128-bit registers for use in NEON
342 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
343 /// between 1 and 4 elements. If it contains a single element that is returned
344 /// unchanged; otherwise a REG_SEQUENCE value is returned.
347 // Form a sequence of SVE registers for instructions using list of vectors,
348 // e.g. structured loads and stores (ldN, stN).
349 SDValue createZTuple(ArrayRef<SDValue> Vecs);
350
351 // Similar to above, except the register must start at a multiple of the
352 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
353 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
354
355 /// Generic helper for the createDTuple/createQTuple
356 /// functions. Those should almost always be called instead.
357 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
358 const unsigned SubRegs[]);
359
360 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
361
362 bool tryIndexedLoad(SDNode *N);
363
364 void SelectPtrauthAuth(SDNode *N);
365 void SelectPtrauthResign(SDNode *N);
366
367 bool trySelectStackSlotTagP(SDNode *N);
368 void SelectTagP(SDNode *N);
369
370 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
371 unsigned SubRegIdx);
372 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
373 unsigned SubRegIdx);
374 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
375 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
376 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
377 unsigned Opc_rr, unsigned Opc_ri,
378 bool IsIntr = false);
379 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
380 unsigned Scale, unsigned Opc_ri,
381 unsigned Opc_rr);
382 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
383 bool IsZmMulti, unsigned Opcode,
384 bool HasPred = false);
385 void SelectPExtPair(SDNode *N, unsigned Opc);
386 void SelectWhilePair(SDNode *N, unsigned Opc);
387 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
388 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
389 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
390 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
391 bool IsTupleInput, unsigned Opc);
392 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
393
394 template <unsigned MaxIdx, unsigned Scale>
395 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
396 unsigned Op);
397 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
398 unsigned Op, unsigned MaxIdx, unsigned Scale,
399 unsigned BaseReg = 0);
400 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
401 /// SVE Reg+Imm addressing mode.
402 template <int64_t Min, int64_t Max>
403 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
404 SDValue &OffImm);
405 /// SVE Reg+Reg address mode.
406 template <unsigned Scale>
407 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
408 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
409 }
410
411 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
412 unsigned Opc, uint32_t MaxImm);
413
414 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
415
416 template <unsigned MaxIdx, unsigned Scale>
417 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
418 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
419 }
420
421 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
422 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
423 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
424 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
425 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
426 unsigned Opc_rr, unsigned Opc_ri);
427 std::tuple<unsigned, SDValue, SDValue>
428 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
429 const SDValue &OldBase, const SDValue &OldOffset,
430 unsigned Scale);
431
432 bool tryBitfieldExtractOp(SDNode *N);
433 bool tryBitfieldExtractOpFromSExt(SDNode *N);
434 bool tryBitfieldInsertOp(SDNode *N);
435 bool tryBitfieldInsertInZeroOp(SDNode *N);
436 bool tryShiftAmountMod(SDNode *N);
437
438 bool tryReadRegister(SDNode *N);
439 bool tryWriteRegister(SDNode *N);
440
441 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
442 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
443
444 bool trySelectXAR(SDNode *N);
445
446// Include the pieces autogenerated from the target description.
447#include "AArch64GenDAGISel.inc"
448
449private:
450 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
451 SDValue &Shift);
452 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
453 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
454 SDValue &OffImm) {
455 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
456 }
457 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
458 unsigned Size, SDValue &Base,
459 SDValue &OffImm);
460 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
461 SDValue &OffImm);
462 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
463 SDValue &OffImm);
464 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
465 SDValue &Offset, SDValue &SignExtend,
466 SDValue &DoShift);
467 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
468 SDValue &Offset, SDValue &SignExtend,
469 SDValue &DoShift);
470 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
471 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
472 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
473 SDValue &Offset, SDValue &SignExtend);
474
475 template<unsigned RegWidth>
476 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
477 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
478 }
479
480 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
481
482 template<unsigned RegWidth>
483 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
484 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
485 }
486
487 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
488 unsigned Width);
489
490 bool SelectCMP_SWAP(SDNode *N);
491
492 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
493 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
494 bool Negate);
495 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
496 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
497
498 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
499 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
500 bool AllowSaturation, SDValue &Imm);
501
502 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
503 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
504 SDValue &Offset);
505 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
506 SDValue &Offset, unsigned Scale = 1);
507
508 bool SelectAllActivePredicate(SDValue N);
509 bool SelectAnyPredicate(SDValue N);
510};
511
512class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
513public:
514 static char ID;
515 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
516 CodeGenOptLevel OptLevel)
518 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
519};
520} // end anonymous namespace
521
522char AArch64DAGToDAGISelLegacy::ID = 0;
523
524INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
525
526/// isIntImmediate - This method tests to see if the node is a constant
527/// operand. If so Imm will receive the 32-bit value.
528static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
529 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
530 Imm = C->getZExtValue();
531 return true;
532 }
533 return false;
534}
535
536// isIntImmediate - This method tests to see if a constant operand.
537// If so Imm will receive the value.
538static bool isIntImmediate(SDValue N, uint64_t &Imm) {
539 return isIntImmediate(N.getNode(), Imm);
540}
541
542// isOpcWithIntImmediate - This method tests to see if the node is a specific
543// opcode and that it has a immediate integer right operand.
544// If so Imm will receive the 32 bit value.
545static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
546 uint64_t &Imm) {
547 return N->getOpcode() == Opc &&
548 isIntImmediate(N->getOperand(1).getNode(), Imm);
549}
550
551// isIntImmediateEq - This method tests to see if N is a constant operand that
552// is equivalent to 'ImmExpected'.
553#ifndef NDEBUG
554static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
555 uint64_t Imm;
556 if (!isIntImmediate(N.getNode(), Imm))
557 return false;
558 return Imm == ImmExpected;
559}
560#endif
561
562bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
563 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
564 std::vector<SDValue> &OutOps) {
565 switch(ConstraintID) {
566 default:
567 llvm_unreachable("Unexpected asm memory constraint");
568 case InlineAsm::ConstraintCode::m:
569 case InlineAsm::ConstraintCode::o:
570 case InlineAsm::ConstraintCode::Q:
571 // We need to make sure that this one operand does not end up in XZR, thus
572 // require the address to be in a PointerRegClass register.
573 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
574 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
575 SDLoc dl(Op);
576 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
577 SDValue NewOp =
578 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
579 dl, Op.getValueType(),
580 Op, RC), 0);
581 OutOps.push_back(NewOp);
582 return false;
583 }
584 return true;
585}
586
587/// SelectArithImmed - Select an immediate value that can be represented as
588/// a 12-bit value shifted left by either 0 or 12. If so, return true with
589/// Val set to the 12-bit value and Shift set to the shifter operand.
590bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
591 SDValue &Shift) {
592 // This function is called from the addsub_shifted_imm ComplexPattern,
593 // which lists [imm] as the list of opcode it's interested in, however
594 // we still need to check whether the operand is actually an immediate
595 // here because the ComplexPattern opcode list is only used in
596 // root-level opcode matching.
597 if (!isa<ConstantSDNode>(N.getNode()))
598 return false;
599
600 uint64_t Immed = N.getNode()->getAsZExtVal();
601 unsigned ShiftAmt;
602
603 if (Immed >> 12 == 0) {
604 ShiftAmt = 0;
605 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
606 ShiftAmt = 12;
607 Immed = Immed >> 12;
608 } else
609 return false;
610
611 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
612 SDLoc dl(N);
613 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
614 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
615 return true;
616}
617
618/// SelectNegArithImmed - As above, but negates the value before trying to
619/// select it.
620bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
621 SDValue &Shift) {
622 // This function is called from the addsub_shifted_imm ComplexPattern,
623 // which lists [imm] as the list of opcode it's interested in, however
624 // we still need to check whether the operand is actually an immediate
625 // here because the ComplexPattern opcode list is only used in
626 // root-level opcode matching.
627 if (!isa<ConstantSDNode>(N.getNode()))
628 return false;
629
630 // The immediate operand must be a 24-bit zero-extended immediate.
631 uint64_t Immed = N.getNode()->getAsZExtVal();
632
633 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
634 // have the opposite effect on the C flag, so this pattern mustn't match under
635 // those circumstances.
636 if (Immed == 0)
637 return false;
638
639 if (N.getValueType() == MVT::i32)
640 Immed = ~((uint32_t)Immed) + 1;
641 else
642 Immed = ~Immed + 1ULL;
643 if (Immed & 0xFFFFFFFFFF000000ULL)
644 return false;
645
646 Immed &= 0xFFFFFFULL;
647 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
648 Shift);
649}
650
651/// getShiftTypeForNode - Translate a shift node to the corresponding
652/// ShiftType value.
654 switch (N.getOpcode()) {
655 default:
657 case ISD::SHL:
658 return AArch64_AM::LSL;
659 case ISD::SRL:
660 return AArch64_AM::LSR;
661 case ISD::SRA:
662 return AArch64_AM::ASR;
663 case ISD::ROTR:
664 return AArch64_AM::ROR;
665 }
666}
667
668/// Determine whether it is worth it to fold SHL into the addressing
669/// mode.
671 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
672 // It is worth folding logical shift of up to three places.
673 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
674 if (!CSD)
675 return false;
676 unsigned ShiftVal = CSD->getZExtValue();
677 if (ShiftVal > 3)
678 return false;
679
680 // Check if this particular node is reused in any non-memory related
681 // operation. If yes, do not try to fold this node into the address
682 // computation, since the computation will be kept.
683 const SDNode *Node = V.getNode();
684 for (SDNode *UI : Node->users())
685 if (!isa<MemSDNode>(*UI))
686 for (SDNode *UII : UI->users())
687 if (!isa<MemSDNode>(*UII))
688 return false;
689 return true;
690}
691
692/// Determine whether it is worth to fold V into an extended register addressing
693/// mode.
694bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
695 // Trivial if we are optimizing for code size or if there is only
696 // one use of the value.
697 if (CurDAG->shouldOptForSize() || V.hasOneUse())
698 return true;
699
700 // If a subtarget has a slow shift, folding a shift into multiple loads
701 // costs additional micro-ops.
702 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
703 return false;
704
705 // Check whether we're going to emit the address arithmetic anyway because
706 // it's used by a non-address operation.
707 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
708 return true;
709 if (V.getOpcode() == ISD::ADD) {
710 const SDValue LHS = V.getOperand(0);
711 const SDValue RHS = V.getOperand(1);
712 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
713 return true;
714 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
715 return true;
716 }
717
718 // It hurts otherwise, since the value will be reused.
719 return false;
720}
721
722/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
723/// to select more shifted register
724bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
725 SDValue &Shift) {
726 EVT VT = N.getValueType();
727 if (VT != MVT::i32 && VT != MVT::i64)
728 return false;
729
730 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
731 return false;
732 SDValue LHS = N.getOperand(0);
733 if (!LHS->hasOneUse())
734 return false;
735
736 unsigned LHSOpcode = LHS->getOpcode();
737 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
738 return false;
739
740 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
741 if (!ShiftAmtNode)
742 return false;
743
744 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
745 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
746 if (!RHSC)
747 return false;
748
749 APInt AndMask = RHSC->getAPIntValue();
750 unsigned LowZBits, MaskLen;
751 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
752 return false;
753
754 unsigned BitWidth = N.getValueSizeInBits();
755 SDLoc DL(LHS);
756 uint64_t NewShiftC;
757 unsigned NewShiftOp;
758 if (LHSOpcode == ISD::SHL) {
759 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
760 // BitWidth != LowZBits + MaskLen doesn't match the pattern
761 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
762 return false;
763
764 NewShiftC = LowZBits - ShiftAmtC;
765 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
766 } else {
767 if (LowZBits == 0)
768 return false;
769
770 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
771 NewShiftC = LowZBits + ShiftAmtC;
772 if (NewShiftC >= BitWidth)
773 return false;
774
775 // SRA need all high bits
776 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
777 return false;
778
779 // SRL high bits can be 0 or 1
780 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
781 return false;
782
783 if (LHSOpcode == ISD::SRL)
784 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
785 else
786 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
787 }
788
789 assert(NewShiftC < BitWidth && "Invalid shift amount");
790 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
791 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
792 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
793 NewShiftAmt, BitWidthMinus1),
794 0);
795 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
796 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
797 return true;
798}
799
800/// getExtendTypeForNode - Translate an extend node to the corresponding
801/// ExtendType value.
803getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
804 if (N.getOpcode() == ISD::SIGN_EXTEND ||
805 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
806 EVT SrcVT;
807 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
808 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
809 else
810 SrcVT = N.getOperand(0).getValueType();
811
812 if (!IsLoadStore && SrcVT == MVT::i8)
813 return AArch64_AM::SXTB;
814 else if (!IsLoadStore && SrcVT == MVT::i16)
815 return AArch64_AM::SXTH;
816 else if (SrcVT == MVT::i32)
817 return AArch64_AM::SXTW;
818 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
819
821 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
822 N.getOpcode() == ISD::ANY_EXTEND) {
823 EVT SrcVT = N.getOperand(0).getValueType();
824 if (!IsLoadStore && SrcVT == MVT::i8)
825 return AArch64_AM::UXTB;
826 else if (!IsLoadStore && SrcVT == MVT::i16)
827 return AArch64_AM::UXTH;
828 else if (SrcVT == MVT::i32)
829 return AArch64_AM::UXTW;
830 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
831
833 } else if (N.getOpcode() == ISD::AND) {
834 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
835 if (!CSD)
837 uint64_t AndMask = CSD->getZExtValue();
838
839 switch (AndMask) {
840 default:
842 case 0xFF:
843 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
844 case 0xFFFF:
845 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
846 case 0xFFFFFFFF:
847 return AArch64_AM::UXTW;
848 }
849 }
850
852}
853
854/// Determine whether it is worth to fold V into an extended register of an
855/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
856/// instruction, and the shift should be treated as worth folding even if has
857/// multiple uses.
858bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
859 // Trivial if we are optimizing for code size or if there is only
860 // one use of the value.
861 if (CurDAG->shouldOptForSize() || V.hasOneUse())
862 return true;
863
864 // If a subtarget has a fastpath LSL we can fold a logical shift into
865 // the add/sub and save a cycle.
866 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
867 V.getConstantOperandVal(1) <= 4 &&
869 return true;
870
871 // It hurts otherwise, since the value will be reused.
872 return false;
873}
874
875/// SelectShiftedRegister - Select a "shifted register" operand. If the value
876/// is not shifted, set the Shift operand to default of "LSL 0". The logical
877/// instructions allow the shifted register to be rotated, but the arithmetic
878/// instructions do not. The AllowROR parameter specifies whether ROR is
879/// supported.
880bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
881 SDValue &Reg, SDValue &Shift) {
882 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
883 return true;
884
886 if (ShType == AArch64_AM::InvalidShiftExtend)
887 return false;
888 if (!AllowROR && ShType == AArch64_AM::ROR)
889 return false;
890
891 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
892 unsigned BitSize = N.getValueSizeInBits();
893 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
894 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
895
896 Reg = N.getOperand(0);
897 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
898 return isWorthFoldingALU(N, true);
899 }
900
901 return false;
902}
903
904/// Instructions that accept extend modifiers like UXTW expect the register
905/// being extended to be a GPR32, but the incoming DAG might be acting on a
906/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
907/// this is the case.
909 if (N.getValueType() == MVT::i32)
910 return N;
911
912 SDLoc dl(N);
913 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
914}
915
916// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
917template<signed Low, signed High, signed Scale>
918bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
919 if (!isa<ConstantSDNode>(N))
920 return false;
921
922 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
923 if ((MulImm % std::abs(Scale)) == 0) {
924 int64_t RDVLImm = MulImm / Scale;
925 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
926 Imm = CurDAG->getSignedTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
927 return true;
928 }
929 }
930
931 return false;
932}
933
934/// SelectArithExtendedRegister - Select a "extended register" operand. This
935/// operand folds in an extend followed by an optional left shift.
936bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
937 SDValue &Shift) {
938 unsigned ShiftVal = 0;
940
941 if (N.getOpcode() == ISD::SHL) {
942 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
943 if (!CSD)
944 return false;
945 ShiftVal = CSD->getZExtValue();
946 if (ShiftVal > 4)
947 return false;
948
949 Ext = getExtendTypeForNode(N.getOperand(0));
951 return false;
952
953 Reg = N.getOperand(0).getOperand(0);
954 } else {
957 return false;
958
959 Reg = N.getOperand(0);
960
961 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
962 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
963 auto isDef32 = [](SDValue N) {
964 unsigned Opc = N.getOpcode();
965 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
966 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
967 Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
968 Opc != ISD::FREEZE;
969 };
970 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
971 isDef32(Reg))
972 return false;
973 }
974
975 // AArch64 mandates that the RHS of the operation must use the smallest
976 // register class that could contain the size being extended from. Thus,
977 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
978 // there might not be an actual 32-bit value in the program. We can
979 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
980 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
981 Reg = narrowIfNeeded(CurDAG, Reg);
982 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
983 MVT::i32);
984 return isWorthFoldingALU(N);
985}
986
987/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
988/// operand is refered by the instructions have SP operand
989bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
990 SDValue &Shift) {
991 unsigned ShiftVal = 0;
993
994 if (N.getOpcode() != ISD::SHL)
995 return false;
996
997 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
998 if (!CSD)
999 return false;
1000 ShiftVal = CSD->getZExtValue();
1001 if (ShiftVal > 4)
1002 return false;
1003
1005 Reg = N.getOperand(0);
1006 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1007 MVT::i32);
1008 return isWorthFoldingALU(N);
1009}
1010
1011/// If there's a use of this ADDlow that's not itself a load/store then we'll
1012/// need to create a real ADD instruction from it anyway and there's no point in
1013/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1014/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1015/// leads to duplicated ADRP instructions.
1017 for (auto *User : N->users()) {
1018 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1019 User->getOpcode() != ISD::ATOMIC_LOAD &&
1020 User->getOpcode() != ISD::ATOMIC_STORE)
1021 return false;
1022
1023 // ldar and stlr have much more restrictive addressing modes (just a
1024 // register).
1025 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
1026 return false;
1027 }
1028
1029 return true;
1030}
1031
1032/// Check if the immediate offset is valid as a scaled immediate.
1033static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1034 unsigned Size) {
1035 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1036 Offset < (Range << Log2_32(Size)))
1037 return true;
1038 return false;
1039}
1040
1041/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1042/// immediate" address. The "Size" argument is the size in bytes of the memory
1043/// reference, which determines the scale.
1044bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1045 unsigned BW, unsigned Size,
1046 SDValue &Base,
1047 SDValue &OffImm) {
1048 SDLoc dl(N);
1049 const DataLayout &DL = CurDAG->getDataLayout();
1050 const TargetLowering *TLI = getTargetLowering();
1051 if (N.getOpcode() == ISD::FrameIndex) {
1052 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1053 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1054 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1055 return true;
1056 }
1057
1058 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1059 // selected here doesn't support labels/immediates, only base+offset.
1060 if (CurDAG->isBaseWithConstantOffset(N)) {
1061 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1062 if (IsSignedImm) {
1063 int64_t RHSC = RHS->getSExtValue();
1064 unsigned Scale = Log2_32(Size);
1065 int64_t Range = 0x1LL << (BW - 1);
1066
1067 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1068 RHSC < (Range << Scale)) {
1069 Base = N.getOperand(0);
1070 if (Base.getOpcode() == ISD::FrameIndex) {
1071 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1072 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1073 }
1074 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1075 return true;
1076 }
1077 } else {
1078 // unsigned Immediate
1079 uint64_t RHSC = RHS->getZExtValue();
1080 unsigned Scale = Log2_32(Size);
1081 uint64_t Range = 0x1ULL << BW;
1082
1083 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1084 Base = N.getOperand(0);
1085 if (Base.getOpcode() == ISD::FrameIndex) {
1086 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1087 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1088 }
1089 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1090 return true;
1091 }
1092 }
1093 }
1094 }
1095 // Base only. The address will be materialized into a register before
1096 // the memory is accessed.
1097 // add x0, Xbase, #offset
1098 // stp x1, x2, [x0]
1099 Base = N;
1100 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1101 return true;
1102}
1103
1104/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1105/// immediate" address. The "Size" argument is the size in bytes of the memory
1106/// reference, which determines the scale.
1107bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1108 SDValue &Base, SDValue &OffImm) {
1109 SDLoc dl(N);
1110 const DataLayout &DL = CurDAG->getDataLayout();
1111 const TargetLowering *TLI = getTargetLowering();
1112 if (N.getOpcode() == ISD::FrameIndex) {
1113 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1114 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1115 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1116 return true;
1117 }
1118
1119 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1120 GlobalAddressSDNode *GAN =
1121 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1122 Base = N.getOperand(0);
1123 OffImm = N.getOperand(1);
1124 if (!GAN)
1125 return true;
1126
1127 if (GAN->getOffset() % Size == 0 &&
1129 return true;
1130 }
1131
1132 if (CurDAG->isBaseWithConstantOffset(N)) {
1133 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1134 int64_t RHSC = (int64_t)RHS->getZExtValue();
1135 unsigned Scale = Log2_32(Size);
1136 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1137 Base = N.getOperand(0);
1138 if (Base.getOpcode() == ISD::FrameIndex) {
1139 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1140 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1141 }
1142 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1143 return true;
1144 }
1145 }
1146 }
1147
1148 // Before falling back to our general case, check if the unscaled
1149 // instructions can handle this. If so, that's preferable.
1150 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1151 return false;
1152
1153 // Base only. The address will be materialized into a register before
1154 // the memory is accessed.
1155 // add x0, Xbase, #offset
1156 // ldr x0, [x0]
1157 Base = N;
1158 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1159 return true;
1160}
1161
1162/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1163/// immediate" address. This should only match when there is an offset that
1164/// is not valid for a scaled immediate addressing mode. The "Size" argument
1165/// is the size in bytes of the memory reference, which is needed here to know
1166/// what is valid for a scaled immediate.
1167bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1168 SDValue &Base,
1169 SDValue &OffImm) {
1170 if (!CurDAG->isBaseWithConstantOffset(N))
1171 return false;
1172 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1173 int64_t RHSC = RHS->getSExtValue();
1174 if (RHSC >= -256 && RHSC < 256) {
1175 Base = N.getOperand(0);
1176 if (Base.getOpcode() == ISD::FrameIndex) {
1177 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1178 const TargetLowering *TLI = getTargetLowering();
1179 Base = CurDAG->getTargetFrameIndex(
1180 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1181 }
1182 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1183 return true;
1184 }
1185 }
1186 return false;
1187}
1188
1190 SDLoc dl(N);
1191 SDValue ImpDef = SDValue(
1192 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1193 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1194 N);
1195}
1196
1197/// Check if the given SHL node (\p N), can be used to form an
1198/// extended register for an addressing mode.
1199bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1200 bool WantExtend, SDValue &Offset,
1201 SDValue &SignExtend) {
1202 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1203 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1204 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1205 return false;
1206
1207 SDLoc dl(N);
1208 if (WantExtend) {
1210 getExtendTypeForNode(N.getOperand(0), true);
1212 return false;
1213
1214 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1215 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1216 MVT::i32);
1217 } else {
1218 Offset = N.getOperand(0);
1219 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1220 }
1221
1222 unsigned LegalShiftVal = Log2_32(Size);
1223 unsigned ShiftVal = CSD->getZExtValue();
1224
1225 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1226 return false;
1227
1228 return isWorthFoldingAddr(N, Size);
1229}
1230
1231bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1233 SDValue &SignExtend,
1234 SDValue &DoShift) {
1235 if (N.getOpcode() != ISD::ADD)
1236 return false;
1237 SDValue LHS = N.getOperand(0);
1238 SDValue RHS = N.getOperand(1);
1239 SDLoc dl(N);
1240
1241 // We don't want to match immediate adds here, because they are better lowered
1242 // to the register-immediate addressing modes.
1243 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
1244 return false;
1245
1246 // Check if this particular node is reused in any non-memory related
1247 // operation. If yes, do not try to fold this node into the address
1248 // computation, since the computation will be kept.
1249 const SDNode *Node = N.getNode();
1250 for (SDNode *UI : Node->users()) {
1251 if (!isa<MemSDNode>(*UI))
1252 return false;
1253 }
1254
1255 // Remember if it is worth folding N when it produces extended register.
1256 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1257
1258 // Try to match a shifted extend on the RHS.
1259 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1260 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1261 Base = LHS;
1262 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1263 return true;
1264 }
1265
1266 // Try to match a shifted extend on the LHS.
1267 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1268 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1269 Base = RHS;
1270 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1271 return true;
1272 }
1273
1274 // There was no shift, whatever else we find.
1275 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1276
1278 // Try to match an unshifted extend on the LHS.
1279 if (IsExtendedRegisterWorthFolding &&
1280 (Ext = getExtendTypeForNode(LHS, true)) !=
1282 Base = RHS;
1283 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1284 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1285 MVT::i32);
1286 if (isWorthFoldingAddr(LHS, Size))
1287 return true;
1288 }
1289
1290 // Try to match an unshifted extend on the RHS.
1291 if (IsExtendedRegisterWorthFolding &&
1292 (Ext = getExtendTypeForNode(RHS, true)) !=
1294 Base = LHS;
1295 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1296 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1297 MVT::i32);
1298 if (isWorthFoldingAddr(RHS, Size))
1299 return true;
1300 }
1301
1302 return false;
1303}
1304
1305// Check if the given immediate is preferred by ADD. If an immediate can be
1306// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1307// encoded by one MOVZ, return true.
1308static bool isPreferredADD(int64_t ImmOff) {
1309 // Constant in [0x0, 0xfff] can be encoded in ADD.
1310 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1311 return true;
1312 // Check if it can be encoded in an "ADD LSL #12".
1313 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1314 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1315 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1316 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1317 return false;
1318}
1319
1320bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1322 SDValue &SignExtend,
1323 SDValue &DoShift) {
1324 if (N.getOpcode() != ISD::ADD)
1325 return false;
1326 SDValue LHS = N.getOperand(0);
1327 SDValue RHS = N.getOperand(1);
1328 SDLoc DL(N);
1329
1330 // Check if this particular node is reused in any non-memory related
1331 // operation. If yes, do not try to fold this node into the address
1332 // computation, since the computation will be kept.
1333 const SDNode *Node = N.getNode();
1334 for (SDNode *UI : Node->users()) {
1335 if (!isa<MemSDNode>(*UI))
1336 return false;
1337 }
1338
1339 // Watch out if RHS is a wide immediate, it can not be selected into
1340 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1341 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1342 // instructions like:
1343 // MOV X0, WideImmediate
1344 // ADD X1, BaseReg, X0
1345 // LDR X2, [X1, 0]
1346 // For such situation, using [BaseReg, XReg] addressing mode can save one
1347 // ADD/SUB:
1348 // MOV X0, WideImmediate
1349 // LDR X2, [BaseReg, X0]
1350 if (isa<ConstantSDNode>(RHS)) {
1351 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1352 // Skip the immediate can be selected by load/store addressing mode.
1353 // Also skip the immediate can be encoded by a single ADD (SUB is also
1354 // checked by using -ImmOff).
1355 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1356 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1357 return false;
1358
1359 SDValue Ops[] = { RHS };
1360 SDNode *MOVI =
1361 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1362 SDValue MOVIV = SDValue(MOVI, 0);
1363 // This ADD of two X register will be selected into [Reg+Reg] mode.
1364 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1365 }
1366
1367 // Remember if it is worth folding N when it produces extended register.
1368 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1369
1370 // Try to match a shifted extend on the RHS.
1371 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1372 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1373 Base = LHS;
1374 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1375 return true;
1376 }
1377
1378 // Try to match a shifted extend on the LHS.
1379 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1380 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1381 Base = RHS;
1382 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1383 return true;
1384 }
1385
1386 // Match any non-shifted, non-extend, non-immediate add expression.
1387 Base = LHS;
1388 Offset = RHS;
1389 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1390 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1391 // Reg1 + Reg2 is free: no check needed.
1392 return true;
1393}
1394
1395SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1396 static const unsigned RegClassIDs[] = {
1397 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1398 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1399 AArch64::dsub2, AArch64::dsub3};
1400
1401 return createTuple(Regs, RegClassIDs, SubRegs);
1402}
1403
1404SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1405 static const unsigned RegClassIDs[] = {
1406 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1407 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1408 AArch64::qsub2, AArch64::qsub3};
1409
1410 return createTuple(Regs, RegClassIDs, SubRegs);
1411}
1412
1413SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1414 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1415 AArch64::ZPR3RegClassID,
1416 AArch64::ZPR4RegClassID};
1417 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1418 AArch64::zsub2, AArch64::zsub3};
1419
1420 return createTuple(Regs, RegClassIDs, SubRegs);
1421}
1422
1423SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1424 assert(Regs.size() == 2 || Regs.size() == 4);
1425
1426 // The createTuple interface requires 3 RegClassIDs for each possible
1427 // tuple type even though we only have them for ZPR2 and ZPR4.
1428 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1429 AArch64::ZPR4Mul4RegClassID};
1430 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1431 AArch64::zsub2, AArch64::zsub3};
1432 return createTuple(Regs, RegClassIDs, SubRegs);
1433}
1434
1435SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1436 const unsigned RegClassIDs[],
1437 const unsigned SubRegs[]) {
1438 // There's no special register-class for a vector-list of 1 element: it's just
1439 // a vector.
1440 if (Regs.size() == 1)
1441 return Regs[0];
1442
1443 assert(Regs.size() >= 2 && Regs.size() <= 4);
1444
1445 SDLoc DL(Regs[0]);
1446
1448
1449 // First operand of REG_SEQUENCE is the desired RegClass.
1450 Ops.push_back(
1451 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1452
1453 // Then we get pairs of source & subregister-position for the components.
1454 for (unsigned i = 0; i < Regs.size(); ++i) {
1455 Ops.push_back(Regs[i]);
1456 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1457 }
1458
1459 SDNode *N =
1460 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1461 return SDValue(N, 0);
1462}
1463
1464void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1465 bool isExt) {
1466 SDLoc dl(N);
1467 EVT VT = N->getValueType(0);
1468
1469 unsigned ExtOff = isExt;
1470
1471 // Form a REG_SEQUENCE to force register allocation.
1472 unsigned Vec0Off = ExtOff + 1;
1473 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1474 N->op_begin() + Vec0Off + NumVecs);
1475 SDValue RegSeq = createQTuple(Regs);
1476
1478 if (isExt)
1479 Ops.push_back(N->getOperand(1));
1480 Ops.push_back(RegSeq);
1481 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1482 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1483}
1484
1485static std::tuple<SDValue, SDValue>
1487 SDLoc DL(Disc);
1488 SDValue AddrDisc;
1489 SDValue ConstDisc;
1490
1491 // If this is a blend, remember the constant and address discriminators.
1492 // Otherwise, it's either a constant discriminator, or a non-blended
1493 // address discriminator.
1494 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1495 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1496 AddrDisc = Disc->getOperand(1);
1497 ConstDisc = Disc->getOperand(2);
1498 } else {
1499 ConstDisc = Disc;
1500 }
1501
1502 // If the constant discriminator (either the blend RHS, or the entire
1503 // discriminator value) isn't a 16-bit constant, bail out, and let the
1504 // discriminator be computed separately.
1505 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1506 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1507 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1508
1509 // If there's no address discriminator, use XZR directly.
1510 if (!AddrDisc)
1511 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1512
1513 return std::make_tuple(
1514 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1515 AddrDisc);
1516}
1517
1518void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1519 SDLoc DL(N);
1520 // IntrinsicID is operand #0
1521 SDValue Val = N->getOperand(1);
1522 SDValue AUTKey = N->getOperand(2);
1523 SDValue AUTDisc = N->getOperand(3);
1524
1525 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1526 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1527
1528 SDValue AUTAddrDisc, AUTConstDisc;
1529 std::tie(AUTConstDisc, AUTAddrDisc) =
1530 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1531
1532 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1533 AArch64::X16, Val, SDValue());
1534 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1535
1536 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUT, DL, MVT::i64, Ops);
1537 ReplaceNode(N, AUT);
1538}
1539
1540void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1541 SDLoc DL(N);
1542 // IntrinsicID is operand #0
1543 SDValue Val = N->getOperand(1);
1544 SDValue AUTKey = N->getOperand(2);
1545 SDValue AUTDisc = N->getOperand(3);
1546 SDValue PACKey = N->getOperand(4);
1547 SDValue PACDisc = N->getOperand(5);
1548
1549 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1550 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1551
1552 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1553 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1554
1555 SDValue AUTAddrDisc, AUTConstDisc;
1556 std::tie(AUTConstDisc, AUTAddrDisc) =
1557 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1558
1559 SDValue PACAddrDisc, PACConstDisc;
1560 std::tie(PACConstDisc, PACAddrDisc) =
1561 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1562
1563 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1564 AArch64::X16, Val, SDValue());
1565
1566 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1567 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1568
1569 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1570 ReplaceNode(N, AUTPAC);
1571}
1572
1573bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1574 LoadSDNode *LD = cast<LoadSDNode>(N);
1575 if (LD->isUnindexed())
1576 return false;
1577 EVT VT = LD->getMemoryVT();
1578 EVT DstVT = N->getValueType(0);
1579 ISD::MemIndexedMode AM = LD->getAddressingMode();
1580 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1581
1582 // We're not doing validity checking here. That was done when checking
1583 // if we should mark the load as indexed or not. We're just selecting
1584 // the right instruction.
1585 unsigned Opcode = 0;
1586
1587 ISD::LoadExtType ExtType = LD->getExtensionType();
1588 bool InsertTo64 = false;
1589 if (VT == MVT::i64)
1590 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1591 else if (VT == MVT::i32) {
1592 if (ExtType == ISD::NON_EXTLOAD)
1593 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1594 else if (ExtType == ISD::SEXTLOAD)
1595 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1596 else {
1597 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1598 InsertTo64 = true;
1599 // The result of the load is only i32. It's the subreg_to_reg that makes
1600 // it into an i64.
1601 DstVT = MVT::i32;
1602 }
1603 } else if (VT == MVT::i16) {
1604 if (ExtType == ISD::SEXTLOAD) {
1605 if (DstVT == MVT::i64)
1606 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1607 else
1608 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1609 } else {
1610 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1611 InsertTo64 = DstVT == MVT::i64;
1612 // The result of the load is only i32. It's the subreg_to_reg that makes
1613 // it into an i64.
1614 DstVT = MVT::i32;
1615 }
1616 } else if (VT == MVT::i8) {
1617 if (ExtType == ISD::SEXTLOAD) {
1618 if (DstVT == MVT::i64)
1619 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1620 else
1621 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1622 } else {
1623 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1624 InsertTo64 = DstVT == MVT::i64;
1625 // The result of the load is only i32. It's the subreg_to_reg that makes
1626 // it into an i64.
1627 DstVT = MVT::i32;
1628 }
1629 } else if (VT == MVT::f16) {
1630 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1631 } else if (VT == MVT::bf16) {
1632 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1633 } else if (VT == MVT::f32) {
1634 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1635 } else if (VT == MVT::f64 || VT.is64BitVector()) {
1636 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1637 } else if (VT.is128BitVector()) {
1638 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1639 } else
1640 return false;
1641 SDValue Chain = LD->getChain();
1642 SDValue Base = LD->getBasePtr();
1643 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1644 int OffsetVal = (int)OffsetOp->getZExtValue();
1645 SDLoc dl(N);
1646 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1647 SDValue Ops[] = { Base, Offset, Chain };
1648 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1649 MVT::Other, Ops);
1650
1651 // Transfer memoperands.
1652 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1653 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1654
1655 // Either way, we're replacing the node, so tell the caller that.
1656 SDValue LoadedVal = SDValue(Res, 1);
1657 if (InsertTo64) {
1658 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1659 LoadedVal =
1660 SDValue(CurDAG->getMachineNode(
1661 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1662 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1663 SubReg),
1664 0);
1665 }
1666
1667 ReplaceUses(SDValue(N, 0), LoadedVal);
1668 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1669 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1670 CurDAG->RemoveDeadNode(N);
1671 return true;
1672}
1673
1674void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1675 unsigned SubRegIdx) {
1676 SDLoc dl(N);
1677 EVT VT = N->getValueType(0);
1678 SDValue Chain = N->getOperand(0);
1679
1680 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1681 Chain};
1682
1683 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1684
1685 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1686 SDValue SuperReg = SDValue(Ld, 0);
1687 for (unsigned i = 0; i < NumVecs; ++i)
1688 ReplaceUses(SDValue(N, i),
1689 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1690
1691 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1692
1693 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1694 // because it's too simple to have needed special treatment during lowering.
1695 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1696 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1697 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1698 }
1699
1700 CurDAG->RemoveDeadNode(N);
1701}
1702
1703void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1704 unsigned Opc, unsigned SubRegIdx) {
1705 SDLoc dl(N);
1706 EVT VT = N->getValueType(0);
1707 SDValue Chain = N->getOperand(0);
1708
1709 SDValue Ops[] = {N->getOperand(1), // Mem operand
1710 N->getOperand(2), // Incremental
1711 Chain};
1712
1713 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1714 MVT::Untyped, MVT::Other};
1715
1716 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1717
1718 // Update uses of write back register
1719 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1720
1721 // Update uses of vector list
1722 SDValue SuperReg = SDValue(Ld, 1);
1723 if (NumVecs == 1)
1724 ReplaceUses(SDValue(N, 0), SuperReg);
1725 else
1726 for (unsigned i = 0; i < NumVecs; ++i)
1727 ReplaceUses(SDValue(N, i),
1728 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1729
1730 // Update the chain
1731 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1732 CurDAG->RemoveDeadNode(N);
1733}
1734
1735/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1736/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1737/// new Base and an SDValue representing the new offset.
1738std::tuple<unsigned, SDValue, SDValue>
1739AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1740 unsigned Opc_ri,
1741 const SDValue &OldBase,
1742 const SDValue &OldOffset,
1743 unsigned Scale) {
1744 SDValue NewBase = OldBase;
1745 SDValue NewOffset = OldOffset;
1746 // Detect a possible Reg+Imm addressing mode.
1747 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1748 N, OldBase, NewBase, NewOffset);
1749
1750 // Detect a possible reg+reg addressing mode, but only if we haven't already
1751 // detected a Reg+Imm one.
1752 const bool IsRegReg =
1753 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1754
1755 // Select the instruction.
1756 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1757}
1758
1759enum class SelectTypeKind {
1760 Int1 = 0,
1761 Int = 1,
1762 FP = 2,
1763 AnyType = 3,
1764};
1765
1766/// This function selects an opcode from a list of opcodes, which is
1767/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1768/// element types, in this order.
1769template <SelectTypeKind Kind>
1770static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1771 // Only match scalable vector VTs
1772 if (!VT.isScalableVector())
1773 return 0;
1774
1775 EVT EltVT = VT.getVectorElementType();
1776 unsigned Key = VT.getVectorMinNumElements();
1777 switch (Kind) {
1779 break;
1781 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1782 EltVT != MVT::i64)
1783 return 0;
1784 break;
1786 if (EltVT != MVT::i1)
1787 return 0;
1788 break;
1789 case SelectTypeKind::FP:
1790 if (EltVT == MVT::bf16)
1791 Key = 16;
1792 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1793 EltVT != MVT::f64)
1794 return 0;
1795 break;
1796 }
1797
1798 unsigned Offset;
1799 switch (Key) {
1800 case 16: // 8-bit or bf16
1801 Offset = 0;
1802 break;
1803 case 8: // 16-bit
1804 Offset = 1;
1805 break;
1806 case 4: // 32-bit
1807 Offset = 2;
1808 break;
1809 case 2: // 64-bit
1810 Offset = 3;
1811 break;
1812 default:
1813 return 0;
1814 }
1815
1816 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1817}
1818
1819// This function is almost identical to SelectWhilePair, but has an
1820// extra check on the range of the immediate operand.
1821// TODO: Merge these two functions together at some point?
1822void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1823 // Immediate can be either 0 or 1.
1824 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1825 if (Imm->getZExtValue() > 1)
1826 return;
1827
1828 SDLoc DL(N);
1829 EVT VT = N->getValueType(0);
1830 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1831 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1832 SDValue SuperReg = SDValue(WhilePair, 0);
1833
1834 for (unsigned I = 0; I < 2; ++I)
1835 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1836 AArch64::psub0 + I, DL, VT, SuperReg));
1837
1838 CurDAG->RemoveDeadNode(N);
1839}
1840
1841void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1842 SDLoc DL(N);
1843 EVT VT = N->getValueType(0);
1844
1845 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1846
1847 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1848 SDValue SuperReg = SDValue(WhilePair, 0);
1849
1850 for (unsigned I = 0; I < 2; ++I)
1851 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1852 AArch64::psub0 + I, DL, VT, SuperReg));
1853
1854 CurDAG->RemoveDeadNode(N);
1855}
1856
1857void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1858 unsigned Opcode) {
1859 EVT VT = N->getValueType(0);
1860 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1861 SDValue Ops = createZTuple(Regs);
1862 SDLoc DL(N);
1863 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1864 SDValue SuperReg = SDValue(Intrinsic, 0);
1865 for (unsigned i = 0; i < NumVecs; ++i)
1866 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1867 AArch64::zsub0 + i, DL, VT, SuperReg));
1868
1869 CurDAG->RemoveDeadNode(N);
1870}
1871
1872void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
1873 unsigned Opcode) {
1874 SDLoc DL(N);
1875 EVT VT = N->getValueType(0);
1876 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
1877 Ops.push_back(/*Chain*/ N->getOperand(0));
1878
1880 CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
1881 SDValue SuperReg = SDValue(Instruction, 0);
1882
1883 for (unsigned i = 0; i < NumVecs; ++i)
1884 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1885 AArch64::zsub0 + i, DL, VT, SuperReg));
1886
1887 // Copy chain
1888 unsigned ChainIdx = NumVecs;
1889 ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
1890 CurDAG->RemoveDeadNode(N);
1891}
1892
1893void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1894 unsigned NumVecs,
1895 bool IsZmMulti,
1896 unsigned Opcode,
1897 bool HasPred) {
1898 assert(Opcode != 0 && "Unexpected opcode");
1899
1900 SDLoc DL(N);
1901 EVT VT = N->getValueType(0);
1902 unsigned FirstVecIdx = HasPred ? 2 : 1;
1903
1904 auto GetMultiVecOperand = [=](unsigned StartIdx) {
1905 SmallVector<SDValue, 4> Regs(N->ops().slice(StartIdx, NumVecs));
1906 return createZMulTuple(Regs);
1907 };
1908
1909 SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1910
1911 SDValue Zm;
1912 if (IsZmMulti)
1913 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1914 else
1915 Zm = N->getOperand(NumVecs + FirstVecIdx);
1916
1918 if (HasPred)
1919 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
1920 N->getOperand(1), Zdn, Zm);
1921 else
1922 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
1923 SDValue SuperReg = SDValue(Intrinsic, 0);
1924 for (unsigned i = 0; i < NumVecs; ++i)
1925 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1926 AArch64::zsub0 + i, DL, VT, SuperReg));
1927
1928 CurDAG->RemoveDeadNode(N);
1929}
1930
1931void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
1932 unsigned Scale, unsigned Opc_ri,
1933 unsigned Opc_rr, bool IsIntr) {
1934 assert(Scale < 5 && "Invalid scaling value.");
1935 SDLoc DL(N);
1936 EVT VT = N->getValueType(0);
1937 SDValue Chain = N->getOperand(0);
1938
1939 // Optimize addressing mode.
1941 unsigned Opc;
1942 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1943 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
1944 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
1945
1946 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
1947 Base, // Memory operand
1948 Offset, Chain};
1949
1950 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1951
1952 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1953 SDValue SuperReg = SDValue(Load, 0);
1954 for (unsigned i = 0; i < NumVecs; ++i)
1955 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1956 AArch64::zsub0 + i, DL, VT, SuperReg));
1957
1958 // Copy chain
1959 unsigned ChainIdx = NumVecs;
1960 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1961 CurDAG->RemoveDeadNode(N);
1962}
1963
1964void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
1965 unsigned NumVecs,
1966 unsigned Scale,
1967 unsigned Opc_ri,
1968 unsigned Opc_rr) {
1969 assert(Scale < 4 && "Invalid scaling value.");
1970 SDLoc DL(N);
1971 EVT VT = N->getValueType(0);
1972 SDValue Chain = N->getOperand(0);
1973
1974 SDValue PNg = N->getOperand(2);
1975 SDValue Base = N->getOperand(3);
1976 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
1977 unsigned Opc;
1978 std::tie(Opc, Base, Offset) =
1979 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
1980
1981 SDValue Ops[] = {PNg, // Predicate-as-counter
1982 Base, // Memory operand
1983 Offset, Chain};
1984
1985 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1986
1987 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1988 SDValue SuperReg = SDValue(Load, 0);
1989 for (unsigned i = 0; i < NumVecs; ++i)
1990 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1991 AArch64::zsub0 + i, DL, VT, SuperReg));
1992
1993 // Copy chain
1994 unsigned ChainIdx = NumVecs;
1995 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1996 CurDAG->RemoveDeadNode(N);
1997}
1998
1999void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2000 unsigned Opcode) {
2001 if (N->getValueType(0) != MVT::nxv4f32)
2002 return;
2003 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
2004}
2005
2006void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2007 unsigned NumOutVecs,
2008 unsigned Opc,
2009 uint32_t MaxImm) {
2010 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
2011 if (Imm->getZExtValue() > MaxImm)
2012 return;
2013
2014 SDValue ZtValue;
2015 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2016 return;
2017
2018 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)};
2019 SDLoc DL(Node);
2020 EVT VT = Node->getValueType(0);
2021
2023 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2024 SDValue SuperReg = SDValue(Instruction, 0);
2025
2026 for (unsigned I = 0; I < NumOutVecs; ++I)
2027 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2028 AArch64::zsub0 + I, DL, VT, SuperReg));
2029
2030 // Copy chain
2031 unsigned ChainIdx = NumOutVecs;
2032 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2033 CurDAG->RemoveDeadNode(Node);
2034}
2035
2036void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2037 unsigned NumOutVecs,
2038 unsigned Opc) {
2039
2040 SDValue ZtValue;
2042 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2043 return;
2044
2045 Ops.push_back(ZtValue);
2046 Ops.push_back(createZMulTuple({Node->getOperand(3), Node->getOperand(4)}));
2047 SDLoc DL(Node);
2048 EVT VT = Node->getValueType(0);
2049
2051 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2052 SDValue SuperReg = SDValue(Instruction, 0);
2053
2054 for (unsigned I = 0; I < NumOutVecs; ++I)
2055 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2056 AArch64::zsub0 + I, DL, VT, SuperReg));
2057
2058 // Copy chain
2059 unsigned ChainIdx = NumOutVecs;
2060 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2061 CurDAG->RemoveDeadNode(Node);
2062}
2063
2064void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2065 unsigned Op) {
2066 SDLoc DL(N);
2067 EVT VT = N->getValueType(0);
2068
2069 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2070 SDValue Zd = createZMulTuple(Regs);
2071 SDValue Zn = N->getOperand(1 + NumVecs);
2072 SDValue Zm = N->getOperand(2 + NumVecs);
2073
2074 SDValue Ops[] = {Zd, Zn, Zm};
2075
2076 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2077 SDValue SuperReg = SDValue(Intrinsic, 0);
2078 for (unsigned i = 0; i < NumVecs; ++i)
2079 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2080 AArch64::zsub0 + i, DL, VT, SuperReg));
2081
2082 CurDAG->RemoveDeadNode(N);
2083}
2084
2085bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2086 switch (BaseReg) {
2087 default:
2088 return false;
2089 case AArch64::ZA:
2090 case AArch64::ZAB0:
2091 if (TileNum == 0)
2092 break;
2093 return false;
2094 case AArch64::ZAH0:
2095 if (TileNum <= 1)
2096 break;
2097 return false;
2098 case AArch64::ZAS0:
2099 if (TileNum <= 3)
2100 break;
2101 return false;
2102 case AArch64::ZAD0:
2103 if (TileNum <= 7)
2104 break;
2105 return false;
2106 }
2107
2108 BaseReg += TileNum;
2109 return true;
2110}
2111
2112template <unsigned MaxIdx, unsigned Scale>
2113void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2114 unsigned BaseReg, unsigned Op) {
2115 unsigned TileNum = 0;
2116 if (BaseReg != AArch64::ZA)
2117 TileNum = N->getConstantOperandVal(2);
2118
2119 if (!SelectSMETile(BaseReg, TileNum))
2120 return;
2121
2122 SDValue SliceBase, Base, Offset;
2123 if (BaseReg == AArch64::ZA)
2124 SliceBase = N->getOperand(2);
2125 else
2126 SliceBase = N->getOperand(3);
2127
2128 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2129 return;
2130
2131 SDLoc DL(N);
2132 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2133 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2134 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2135
2136 EVT VT = N->getValueType(0);
2137 for (unsigned I = 0; I < NumVecs; ++I)
2138 ReplaceUses(SDValue(N, I),
2139 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2140 SDValue(Mov, 0)));
2141 // Copy chain
2142 unsigned ChainIdx = NumVecs;
2143 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2144 CurDAG->RemoveDeadNode(N);
2145}
2146
2147void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2148 unsigned Op, unsigned MaxIdx,
2149 unsigned Scale, unsigned BaseReg) {
2150 // Slice can be in different positions
2151 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2152 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2153 SDValue SliceBase = N->getOperand(2);
2154 if (BaseReg != AArch64::ZA)
2155 SliceBase = N->getOperand(3);
2156
2158 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2159 return;
2160 // The correct Za tile number is computed in Machine Instruction
2161 // See EmitZAInstr
2162 // DAG cannot select Za tile as an output register with ZReg
2163 SDLoc DL(N);
2165 if (BaseReg != AArch64::ZA )
2166 Ops.push_back(N->getOperand(2));
2167 Ops.push_back(Base);
2168 Ops.push_back(Offset);
2169 Ops.push_back(N->getOperand(0)); //Chain
2170 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2171
2172 EVT VT = N->getValueType(0);
2173 for (unsigned I = 0; I < NumVecs; ++I)
2174 ReplaceUses(SDValue(N, I),
2175 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2176 SDValue(Mov, 0)));
2177
2178 // Copy chain
2179 unsigned ChainIdx = NumVecs;
2180 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2181 CurDAG->RemoveDeadNode(N);
2182}
2183
2184void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2185 unsigned NumOutVecs,
2186 bool IsTupleInput,
2187 unsigned Opc) {
2188 SDLoc DL(N);
2189 EVT VT = N->getValueType(0);
2190 unsigned NumInVecs = N->getNumOperands() - 1;
2191
2193 if (IsTupleInput) {
2194 assert((NumInVecs == 2 || NumInVecs == 4) &&
2195 "Don't know how to handle multi-register input!");
2196 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2197 Ops.push_back(createZMulTuple(Regs));
2198 } else {
2199 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2200 for (unsigned I = 0; I < NumInVecs; I++)
2201 Ops.push_back(N->getOperand(1 + I));
2202 }
2203
2204 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2205 SDValue SuperReg = SDValue(Res, 0);
2206
2207 for (unsigned I = 0; I < NumOutVecs; I++)
2208 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2209 AArch64::zsub0 + I, DL, VT, SuperReg));
2210 CurDAG->RemoveDeadNode(N);
2211}
2212
2213void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2214 unsigned Opc) {
2215 SDLoc dl(N);
2216 EVT VT = N->getOperand(2)->getValueType(0);
2217
2218 // Form a REG_SEQUENCE to force register allocation.
2219 bool Is128Bit = VT.getSizeInBits() == 128;
2220 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2221 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2222
2223 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2224 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2225
2226 // Transfer memoperands.
2227 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2228 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2229
2230 ReplaceNode(N, St);
2231}
2232
2233void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2234 unsigned Scale, unsigned Opc_rr,
2235 unsigned Opc_ri) {
2236 SDLoc dl(N);
2237
2238 // Form a REG_SEQUENCE to force register allocation.
2239 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2240 SDValue RegSeq = createZTuple(Regs);
2241
2242 // Optimize addressing mode.
2243 unsigned Opc;
2245 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2246 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2247 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2248
2249 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2250 Base, // address
2251 Offset, // offset
2252 N->getOperand(0)}; // chain
2253 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2254
2255 ReplaceNode(N, St);
2256}
2257
2258bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2259 SDValue &OffImm) {
2260 SDLoc dl(N);
2261 const DataLayout &DL = CurDAG->getDataLayout();
2262 const TargetLowering *TLI = getTargetLowering();
2263
2264 // Try to match it for the frame address
2265 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2266 int FI = FINode->getIndex();
2267 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2268 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2269 return true;
2270 }
2271
2272 return false;
2273}
2274
2275void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2276 unsigned Opc) {
2277 SDLoc dl(N);
2278 EVT VT = N->getOperand(2)->getValueType(0);
2279 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2280 MVT::Other}; // Type for the Chain
2281
2282 // Form a REG_SEQUENCE to force register allocation.
2283 bool Is128Bit = VT.getSizeInBits() == 128;
2284 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2285 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2286
2287 SDValue Ops[] = {RegSeq,
2288 N->getOperand(NumVecs + 1), // base register
2289 N->getOperand(NumVecs + 2), // Incremental
2290 N->getOperand(0)}; // Chain
2291 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2292
2293 ReplaceNode(N, St);
2294}
2295
2296namespace {
2297/// WidenVector - Given a value in the V64 register class, produce the
2298/// equivalent value in the V128 register class.
2299class WidenVector {
2300 SelectionDAG &DAG;
2301
2302public:
2303 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2304
2305 SDValue operator()(SDValue V64Reg) {
2306 EVT VT = V64Reg.getValueType();
2307 unsigned NarrowSize = VT.getVectorNumElements();
2308 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2309 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2310 SDLoc DL(V64Reg);
2311
2312 SDValue Undef =
2313 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2314 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2315 }
2316};
2317} // namespace
2318
2319/// NarrowVector - Given a value in the V128 register class, produce the
2320/// equivalent value in the V64 register class.
2322 EVT VT = V128Reg.getValueType();
2323 unsigned WideSize = VT.getVectorNumElements();
2324 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2325 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2326
2327 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2328 V128Reg);
2329}
2330
2331void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2332 unsigned Opc) {
2333 SDLoc dl(N);
2334 EVT VT = N->getValueType(0);
2335 bool Narrow = VT.getSizeInBits() == 64;
2336
2337 // Form a REG_SEQUENCE to force register allocation.
2338 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2339
2340 if (Narrow)
2341 transform(Regs, Regs.begin(),
2342 WidenVector(*CurDAG));
2343
2344 SDValue RegSeq = createQTuple(Regs);
2345
2346 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2347
2348 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2349
2350 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2351 N->getOperand(NumVecs + 3), N->getOperand(0)};
2352 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2353 SDValue SuperReg = SDValue(Ld, 0);
2354
2355 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2356 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2357 AArch64::qsub2, AArch64::qsub3 };
2358 for (unsigned i = 0; i < NumVecs; ++i) {
2359 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2360 if (Narrow)
2361 NV = NarrowVector(NV, *CurDAG);
2362 ReplaceUses(SDValue(N, i), NV);
2363 }
2364
2365 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2366 CurDAG->RemoveDeadNode(N);
2367}
2368
2369void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2370 unsigned Opc) {
2371 SDLoc dl(N);
2372 EVT VT = N->getValueType(0);
2373 bool Narrow = VT.getSizeInBits() == 64;
2374
2375 // Form a REG_SEQUENCE to force register allocation.
2376 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2377
2378 if (Narrow)
2379 transform(Regs, Regs.begin(),
2380 WidenVector(*CurDAG));
2381
2382 SDValue RegSeq = createQTuple(Regs);
2383
2384 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2385 RegSeq->getValueType(0), MVT::Other};
2386
2387 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2388
2389 SDValue Ops[] = {RegSeq,
2390 CurDAG->getTargetConstant(LaneNo, dl,
2391 MVT::i64), // Lane Number
2392 N->getOperand(NumVecs + 2), // Base register
2393 N->getOperand(NumVecs + 3), // Incremental
2394 N->getOperand(0)};
2395 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2396
2397 // Update uses of the write back register
2398 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2399
2400 // Update uses of the vector list
2401 SDValue SuperReg = SDValue(Ld, 1);
2402 if (NumVecs == 1) {
2403 ReplaceUses(SDValue(N, 0),
2404 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2405 } else {
2406 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2407 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2408 AArch64::qsub2, AArch64::qsub3 };
2409 for (unsigned i = 0; i < NumVecs; ++i) {
2410 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2411 SuperReg);
2412 if (Narrow)
2413 NV = NarrowVector(NV, *CurDAG);
2414 ReplaceUses(SDValue(N, i), NV);
2415 }
2416 }
2417
2418 // Update the Chain
2419 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2420 CurDAG->RemoveDeadNode(N);
2421}
2422
2423void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2424 unsigned Opc) {
2425 SDLoc dl(N);
2426 EVT VT = N->getOperand(2)->getValueType(0);
2427 bool Narrow = VT.getSizeInBits() == 64;
2428
2429 // Form a REG_SEQUENCE to force register allocation.
2430 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2431
2432 if (Narrow)
2433 transform(Regs, Regs.begin(),
2434 WidenVector(*CurDAG));
2435
2436 SDValue RegSeq = createQTuple(Regs);
2437
2438 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2439
2440 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2441 N->getOperand(NumVecs + 3), N->getOperand(0)};
2442 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2443
2444 // Transfer memoperands.
2445 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2446 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2447
2448 ReplaceNode(N, St);
2449}
2450
2451void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2452 unsigned Opc) {
2453 SDLoc dl(N);
2454 EVT VT = N->getOperand(2)->getValueType(0);
2455 bool Narrow = VT.getSizeInBits() == 64;
2456
2457 // Form a REG_SEQUENCE to force register allocation.
2458 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2459
2460 if (Narrow)
2461 transform(Regs, Regs.begin(),
2462 WidenVector(*CurDAG));
2463
2464 SDValue RegSeq = createQTuple(Regs);
2465
2466 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2467 MVT::Other};
2468
2469 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2470
2471 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2472 N->getOperand(NumVecs + 2), // Base Register
2473 N->getOperand(NumVecs + 3), // Incremental
2474 N->getOperand(0)};
2475 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2476
2477 // Transfer memoperands.
2478 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2479 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2480
2481 ReplaceNode(N, St);
2482}
2483
2485 unsigned &Opc, SDValue &Opd0,
2486 unsigned &LSB, unsigned &MSB,
2487 unsigned NumberOfIgnoredLowBits,
2488 bool BiggerPattern) {
2489 assert(N->getOpcode() == ISD::AND &&
2490 "N must be a AND operation to call this function");
2491
2492 EVT VT = N->getValueType(0);
2493
2494 // Here we can test the type of VT and return false when the type does not
2495 // match, but since it is done prior to that call in the current context
2496 // we turned that into an assert to avoid redundant code.
2497 assert((VT == MVT::i32 || VT == MVT::i64) &&
2498 "Type checking must have been done before calling this function");
2499
2500 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2501 // changed the AND node to a 32-bit mask operation. We'll have to
2502 // undo that as part of the transform here if we want to catch all
2503 // the opportunities.
2504 // Currently the NumberOfIgnoredLowBits argument helps to recover
2505 // from these situations when matching bigger pattern (bitfield insert).
2506
2507 // For unsigned extracts, check for a shift right and mask
2508 uint64_t AndImm = 0;
2509 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2510 return false;
2511
2512 const SDNode *Op0 = N->getOperand(0).getNode();
2513
2514 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2515 // simplified. Try to undo that
2516 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2517
2518 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2519 if (AndImm & (AndImm + 1))
2520 return false;
2521
2522 bool ClampMSB = false;
2523 uint64_t SrlImm = 0;
2524 // Handle the SRL + ANY_EXTEND case.
2525 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2526 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2527 // Extend the incoming operand of the SRL to 64-bit.
2528 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2529 // Make sure to clamp the MSB so that we preserve the semantics of the
2530 // original operations.
2531 ClampMSB = true;
2532 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2534 SrlImm)) {
2535 // If the shift result was truncated, we can still combine them.
2536 Opd0 = Op0->getOperand(0).getOperand(0);
2537
2538 // Use the type of SRL node.
2539 VT = Opd0->getValueType(0);
2540 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2541 Opd0 = Op0->getOperand(0);
2542 ClampMSB = (VT == MVT::i32);
2543 } else if (BiggerPattern) {
2544 // Let's pretend a 0 shift right has been performed.
2545 // The resulting code will be at least as good as the original one
2546 // plus it may expose more opportunities for bitfield insert pattern.
2547 // FIXME: Currently we limit this to the bigger pattern, because
2548 // some optimizations expect AND and not UBFM.
2549 Opd0 = N->getOperand(0);
2550 } else
2551 return false;
2552
2553 // Bail out on large immediates. This happens when no proper
2554 // combining/constant folding was performed.
2555 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2556 LLVM_DEBUG(
2557 (dbgs() << N
2558 << ": Found large shift immediate, this should not happen\n"));
2559 return false;
2560 }
2561
2562 LSB = SrlImm;
2563 MSB = SrlImm +
2564 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2565 : llvm::countr_one<uint64_t>(AndImm)) -
2566 1;
2567 if (ClampMSB)
2568 // Since we're moving the extend before the right shift operation, we need
2569 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2570 // the zeros which would get shifted in with the original right shift
2571 // operation.
2572 MSB = MSB > 31 ? 31 : MSB;
2573
2574 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2575 return true;
2576}
2577
2578static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
2579 SDValue &Opd0, unsigned &Immr,
2580 unsigned &Imms) {
2581 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2582
2583 EVT VT = N->getValueType(0);
2584 unsigned BitWidth = VT.getSizeInBits();
2585 assert((VT == MVT::i32 || VT == MVT::i64) &&
2586 "Type checking must have been done before calling this function");
2587
2588 SDValue Op = N->getOperand(0);
2589 if (Op->getOpcode() == ISD::TRUNCATE) {
2590 Op = Op->getOperand(0);
2591 VT = Op->getValueType(0);
2592 BitWidth = VT.getSizeInBits();
2593 }
2594
2595 uint64_t ShiftImm;
2596 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2597 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2598 return false;
2599
2600 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2601 if (ShiftImm + Width > BitWidth)
2602 return false;
2603
2604 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2605 Opd0 = Op.getOperand(0);
2606 Immr = ShiftImm;
2607 Imms = ShiftImm + Width - 1;
2608 return true;
2609}
2610
2611static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
2612 SDValue &Opd0, unsigned &LSB,
2613 unsigned &MSB) {
2614 // We are looking for the following pattern which basically extracts several
2615 // continuous bits from the source value and places it from the LSB of the
2616 // destination value, all other bits of the destination value or set to zero:
2617 //
2618 // Value2 = AND Value, MaskImm
2619 // SRL Value2, ShiftImm
2620 //
2621 // with MaskImm >> ShiftImm to search for the bit width.
2622 //
2623 // This gets selected into a single UBFM:
2624 //
2625 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2626 //
2627
2628 if (N->getOpcode() != ISD::SRL)
2629 return false;
2630
2631 uint64_t AndMask = 0;
2632 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2633 return false;
2634
2635 Opd0 = N->getOperand(0).getOperand(0);
2636
2637 uint64_t SrlImm = 0;
2638 if (!isIntImmediate(N->getOperand(1), SrlImm))
2639 return false;
2640
2641 // Check whether we really have several bits extract here.
2642 if (!isMask_64(AndMask >> SrlImm))
2643 return false;
2644
2645 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2646 LSB = SrlImm;
2647 MSB = llvm::Log2_64(AndMask);
2648 return true;
2649}
2650
2651static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2652 unsigned &Immr, unsigned &Imms,
2653 bool BiggerPattern) {
2654 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2655 "N must be a SHR/SRA operation to call this function");
2656
2657 EVT VT = N->getValueType(0);
2658
2659 // Here we can test the type of VT and return false when the type does not
2660 // match, but since it is done prior to that call in the current context
2661 // we turned that into an assert to avoid redundant code.
2662 assert((VT == MVT::i32 || VT == MVT::i64) &&
2663 "Type checking must have been done before calling this function");
2664
2665 // Check for AND + SRL doing several bits extract.
2666 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2667 return true;
2668
2669 // We're looking for a shift of a shift.
2670 uint64_t ShlImm = 0;
2671 uint64_t TruncBits = 0;
2672 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2673 Opd0 = N->getOperand(0).getOperand(0);
2674 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2675 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2676 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2677 // be considered as setting high 32 bits as zero. Our strategy here is to
2678 // always generate 64bit UBFM. This consistency will help the CSE pass
2679 // later find more redundancy.
2680 Opd0 = N->getOperand(0).getOperand(0);
2681 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2682 VT = Opd0.getValueType();
2683 assert(VT == MVT::i64 && "the promoted type should be i64");
2684 } else if (BiggerPattern) {
2685 // Let's pretend a 0 shift left has been performed.
2686 // FIXME: Currently we limit this to the bigger pattern case,
2687 // because some optimizations expect AND and not UBFM
2688 Opd0 = N->getOperand(0);
2689 } else
2690 return false;
2691
2692 // Missing combines/constant folding may have left us with strange
2693 // constants.
2694 if (ShlImm >= VT.getSizeInBits()) {
2695 LLVM_DEBUG(
2696 (dbgs() << N
2697 << ": Found large shift immediate, this should not happen\n"));
2698 return false;
2699 }
2700
2701 uint64_t SrlImm = 0;
2702 if (!isIntImmediate(N->getOperand(1), SrlImm))
2703 return false;
2704
2705 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2706 "bad amount in shift node!");
2707 int immr = SrlImm - ShlImm;
2708 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2709 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2710 // SRA requires a signed extraction
2711 if (VT == MVT::i32)
2712 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2713 else
2714 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2715 return true;
2716}
2717
2718bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2719 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2720
2721 EVT VT = N->getValueType(0);
2722 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2723 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2724 return false;
2725
2726 uint64_t ShiftImm;
2727 SDValue Op = N->getOperand(0);
2728 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2729 return false;
2730
2731 SDLoc dl(N);
2732 // Extend the incoming operand of the shift to 64-bits.
2733 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2734 unsigned Immr = ShiftImm;
2735 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2736 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2737 CurDAG->getTargetConstant(Imms, dl, VT)};
2738 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2739 return true;
2740}
2741
2742static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2743 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2744 unsigned NumberOfIgnoredLowBits = 0,
2745 bool BiggerPattern = false) {
2746 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2747 return false;
2748
2749 switch (N->getOpcode()) {
2750 default:
2751 if (!N->isMachineOpcode())
2752 return false;
2753 break;
2754 case ISD::AND:
2755 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2756 NumberOfIgnoredLowBits, BiggerPattern);
2757 case ISD::SRL:
2758 case ISD::SRA:
2759 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2760
2762 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2763 }
2764
2765 unsigned NOpc = N->getMachineOpcode();
2766 switch (NOpc) {
2767 default:
2768 return false;
2769 case AArch64::SBFMWri:
2770 case AArch64::UBFMWri:
2771 case AArch64::SBFMXri:
2772 case AArch64::UBFMXri:
2773 Opc = NOpc;
2774 Opd0 = N->getOperand(0);
2775 Immr = N->getConstantOperandVal(1);
2776 Imms = N->getConstantOperandVal(2);
2777 return true;
2778 }
2779 // Unreachable
2780 return false;
2781}
2782
2783bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2784 unsigned Opc, Immr, Imms;
2785 SDValue Opd0;
2786 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2787 return false;
2788
2789 EVT VT = N->getValueType(0);
2790 SDLoc dl(N);
2791
2792 // If the bit extract operation is 64bit but the original type is 32bit, we
2793 // need to add one EXTRACT_SUBREG.
2794 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2795 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2796 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2797
2798 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2799 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2800 MVT::i32, SDValue(BFM, 0));
2801 ReplaceNode(N, Inner.getNode());
2802 return true;
2803 }
2804
2805 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2806 CurDAG->getTargetConstant(Imms, dl, VT)};
2807 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2808 return true;
2809}
2810
2811/// Does DstMask form a complementary pair with the mask provided by
2812/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2813/// this asks whether DstMask zeroes precisely those bits that will be set by
2814/// the other half.
2815static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2816 unsigned NumberOfIgnoredHighBits, EVT VT) {
2817 assert((VT == MVT::i32 || VT == MVT::i64) &&
2818 "i32 or i64 mask type expected!");
2819 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2820
2821 // Enable implicitTrunc as we're intentionally ignoring high bits.
2822 APInt SignificantDstMask =
2823 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
2824 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2825
2826 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2827 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2828}
2829
2830// Look for bits that will be useful for later uses.
2831// A bit is consider useless as soon as it is dropped and never used
2832// before it as been dropped.
2833// E.g., looking for useful bit of x
2834// 1. y = x & 0x7
2835// 2. z = y >> 2
2836// After #1, x useful bits are 0x7, then the useful bits of x, live through
2837// y.
2838// After #2, the useful bits of x are 0x4.
2839// However, if x is used on an unpredicatable instruction, then all its bits
2840// are useful.
2841// E.g.
2842// 1. y = x & 0x7
2843// 2. z = y >> 2
2844// 3. str x, [@x]
2845static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2846
2848 unsigned Depth) {
2849 uint64_t Imm =
2850 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2851 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2852 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2853 getUsefulBits(Op, UsefulBits, Depth + 1);
2854}
2855
2857 uint64_t Imm, uint64_t MSB,
2858 unsigned Depth) {
2859 // inherit the bitwidth value
2860 APInt OpUsefulBits(UsefulBits);
2861 OpUsefulBits = 1;
2862
2863 if (MSB >= Imm) {
2864 OpUsefulBits <<= MSB - Imm + 1;
2865 --OpUsefulBits;
2866 // The interesting part will be in the lower part of the result
2867 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2868 // The interesting part was starting at Imm in the argument
2869 OpUsefulBits <<= Imm;
2870 } else {
2871 OpUsefulBits <<= MSB + 1;
2872 --OpUsefulBits;
2873 // The interesting part will be shifted in the result
2874 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2875 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2876 // The interesting part was at zero in the argument
2877 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2878 }
2879
2880 UsefulBits &= OpUsefulBits;
2881}
2882
2883static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2884 unsigned Depth) {
2885 uint64_t Imm =
2886 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2887 uint64_t MSB =
2888 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2889
2890 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2891}
2892
2894 unsigned Depth) {
2895 uint64_t ShiftTypeAndValue =
2896 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2897 APInt Mask(UsefulBits);
2898 Mask.clearAllBits();
2899 Mask.flipAllBits();
2900
2901 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2902 // Shift Left
2903 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2904 Mask <<= ShiftAmt;
2905 getUsefulBits(Op, Mask, Depth + 1);
2906 Mask.lshrInPlace(ShiftAmt);
2907 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2908 // Shift Right
2909 // We do not handle AArch64_AM::ASR, because the sign will change the
2910 // number of useful bits
2911 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2912 Mask.lshrInPlace(ShiftAmt);
2913 getUsefulBits(Op, Mask, Depth + 1);
2914 Mask <<= ShiftAmt;
2915 } else
2916 return;
2917
2918 UsefulBits &= Mask;
2919}
2920
2921static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2922 unsigned Depth) {
2923 uint64_t Imm =
2924 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2925 uint64_t MSB =
2926 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
2927
2928 APInt OpUsefulBits(UsefulBits);
2929 OpUsefulBits = 1;
2930
2931 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
2932 ResultUsefulBits.flipAllBits();
2933 APInt Mask(UsefulBits.getBitWidth(), 0);
2934
2935 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
2936
2937 if (MSB >= Imm) {
2938 // The instruction is a BFXIL.
2939 uint64_t Width = MSB - Imm + 1;
2940 uint64_t LSB = Imm;
2941
2942 OpUsefulBits <<= Width;
2943 --OpUsefulBits;
2944
2945 if (Op.getOperand(1) == Orig) {
2946 // Copy the low bits from the result to bits starting from LSB.
2947 Mask = ResultUsefulBits & OpUsefulBits;
2948 Mask <<= LSB;
2949 }
2950
2951 if (Op.getOperand(0) == Orig)
2952 // Bits starting from LSB in the input contribute to the result.
2953 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2954 } else {
2955 // The instruction is a BFI.
2956 uint64_t Width = MSB + 1;
2957 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
2958
2959 OpUsefulBits <<= Width;
2960 --OpUsefulBits;
2961 OpUsefulBits <<= LSB;
2962
2963 if (Op.getOperand(1) == Orig) {
2964 // Copy the bits from the result to the zero bits.
2965 Mask = ResultUsefulBits & OpUsefulBits;
2966 Mask.lshrInPlace(LSB);
2967 }
2968
2969 if (Op.getOperand(0) == Orig)
2970 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2971 }
2972
2973 UsefulBits &= Mask;
2974}
2975
2976static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2977 SDValue Orig, unsigned Depth) {
2978
2979 // Users of this node should have already been instruction selected
2980 // FIXME: Can we turn that into an assert?
2981 if (!UserNode->isMachineOpcode())
2982 return;
2983
2984 switch (UserNode->getMachineOpcode()) {
2985 default:
2986 return;
2987 case AArch64::ANDSWri:
2988 case AArch64::ANDSXri:
2989 case AArch64::ANDWri:
2990 case AArch64::ANDXri:
2991 // We increment Depth only when we call the getUsefulBits
2992 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2993 Depth);
2994 case AArch64::UBFMWri:
2995 case AArch64::UBFMXri:
2996 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2997
2998 case AArch64::ORRWrs:
2999 case AArch64::ORRXrs:
3000 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
3001 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
3002 Depth);
3003 return;
3004 case AArch64::BFMWri:
3005 case AArch64::BFMXri:
3006 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3007
3008 case AArch64::STRBBui:
3009 case AArch64::STURBBi:
3010 if (UserNode->getOperand(0) != Orig)
3011 return;
3012 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3013 return;
3014
3015 case AArch64::STRHHui:
3016 case AArch64::STURHHi:
3017 if (UserNode->getOperand(0) != Orig)
3018 return;
3019 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3020 return;
3021 }
3022}
3023
3024static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3026 return;
3027 // Initialize UsefulBits
3028 if (!Depth) {
3029 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3030 // At the beginning, assume every produced bits is useful
3031 UsefulBits = APInt(Bitwidth, 0);
3032 UsefulBits.flipAllBits();
3033 }
3034 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3035
3036 for (SDNode *Node : Op.getNode()->users()) {
3037 // A use cannot produce useful bits
3038 APInt UsefulBitsForUse = APInt(UsefulBits);
3039 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
3040 UsersUsefulBits |= UsefulBitsForUse;
3041 }
3042 // UsefulBits contains the produced bits that are meaningful for the
3043 // current definition, thus a user cannot make a bit meaningful at
3044 // this point
3045 UsefulBits &= UsersUsefulBits;
3046}
3047
3048/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3049/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3050/// 0, return Op unchanged.
3051static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3052 if (ShlAmount == 0)
3053 return Op;
3054
3055 EVT VT = Op.getValueType();
3056 SDLoc dl(Op);
3057 unsigned BitWidth = VT.getSizeInBits();
3058 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3059
3060 SDNode *ShiftNode;
3061 if (ShlAmount > 0) {
3062 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3063 ShiftNode = CurDAG->getMachineNode(
3064 UBFMOpc, dl, VT, Op,
3065 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3066 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3067 } else {
3068 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3069 assert(ShlAmount < 0 && "expected right shift");
3070 int ShrAmount = -ShlAmount;
3071 ShiftNode = CurDAG->getMachineNode(
3072 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3073 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3074 }
3075
3076 return SDValue(ShiftNode, 0);
3077}
3078
3079// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3081 bool BiggerPattern,
3082 const uint64_t NonZeroBits,
3083 SDValue &Src, int &DstLSB,
3084 int &Width);
3085
3086// For bit-field-positioning pattern "shl VAL, N)".
3088 bool BiggerPattern,
3089 const uint64_t NonZeroBits,
3090 SDValue &Src, int &DstLSB,
3091 int &Width);
3092
3093/// Does this tree qualify as an attempt to move a bitfield into position,
3094/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3096 bool BiggerPattern, SDValue &Src,
3097 int &DstLSB, int &Width) {
3098 EVT VT = Op.getValueType();
3099 unsigned BitWidth = VT.getSizeInBits();
3100 (void)BitWidth;
3101 assert(BitWidth == 32 || BitWidth == 64);
3102
3103 KnownBits Known = CurDAG->computeKnownBits(Op);
3104
3105 // Non-zero in the sense that they're not provably zero, which is the key
3106 // point if we want to use this value
3107 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3108 if (!isShiftedMask_64(NonZeroBits))
3109 return false;
3110
3111 switch (Op.getOpcode()) {
3112 default:
3113 break;
3114 case ISD::AND:
3115 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3116 NonZeroBits, Src, DstLSB, Width);
3117 case ISD::SHL:
3118 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3119 NonZeroBits, Src, DstLSB, Width);
3120 }
3121
3122 return false;
3123}
3124
3126 bool BiggerPattern,
3127 const uint64_t NonZeroBits,
3128 SDValue &Src, int &DstLSB,
3129 int &Width) {
3130 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3131
3132 EVT VT = Op.getValueType();
3133 assert((VT == MVT::i32 || VT == MVT::i64) &&
3134 "Caller guarantees VT is one of i32 or i64");
3135 (void)VT;
3136
3137 uint64_t AndImm;
3138 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3139 return false;
3140
3141 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3142 // 1) (AndImm & (1 << POS) == 0)
3143 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3144 //
3145 // 1) and 2) don't agree so something must be wrong (e.g., in
3146 // 'SelectionDAG::computeKnownBits')
3147 assert((~AndImm & NonZeroBits) == 0 &&
3148 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3149
3150 SDValue AndOp0 = Op.getOperand(0);
3151
3152 uint64_t ShlImm;
3153 SDValue ShlOp0;
3154 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3155 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3156 ShlOp0 = AndOp0.getOperand(0);
3157 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3159 ShlImm)) {
3160 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3161
3162 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3163 SDValue ShlVal = AndOp0.getOperand(0);
3164
3165 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3166 // expect VT to be MVT::i32.
3167 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3168
3169 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3170 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3171 } else
3172 return false;
3173
3174 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3175 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3176 // AndOp0+AND.
3177 if (!BiggerPattern && !AndOp0.hasOneUse())
3178 return false;
3179
3180 DstLSB = llvm::countr_zero(NonZeroBits);
3181 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3182
3183 // Bail out on large Width. This happens when no proper combining / constant
3184 // folding was performed.
3185 if (Width >= (int)VT.getSizeInBits()) {
3186 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3187 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3188 // "val".
3189 // If VT is i32, what Width >= 32 means:
3190 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3191 // demands at least 'Width' bits (after dag-combiner). This together with
3192 // `any_extend` Op (undefined higher bits) indicates missed combination
3193 // when lowering the 'and' IR instruction to an machine IR instruction.
3194 LLVM_DEBUG(
3195 dbgs()
3196 << "Found large Width in bit-field-positioning -- this indicates no "
3197 "proper combining / constant folding was performed\n");
3198 return false;
3199 }
3200
3201 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3202 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3203 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3204 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3205 // which case it is not profitable to insert an extra shift.
3206 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3207 return false;
3208
3209 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3210 return true;
3211}
3212
3213// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3214// UBFIZ.
3216 SDValue &Src, int &DstLSB,
3217 int &Width) {
3218 // Caller should have verified that N is a left shift with constant shift
3219 // amount; asserts that.
3220 assert(Op.getOpcode() == ISD::SHL &&
3221 "Op.getNode() should be a SHL node to call this function");
3222 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3223 "Op.getNode() should shift ShlImm to call this function");
3224
3225 uint64_t AndImm = 0;
3226 SDValue Op0 = Op.getOperand(0);
3227 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3228 return false;
3229
3230 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3231 if (isMask_64(ShiftedAndImm)) {
3232 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3233 // should end with Mask, and could be prefixed with random bits if those
3234 // bits are shifted out.
3235 //
3236 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3237 // the AND result corresponding to those bits are shifted out, so it's fine
3238 // to not extract them.
3239 Width = llvm::countr_one(ShiftedAndImm);
3240 DstLSB = ShlImm;
3241 Src = Op0.getOperand(0);
3242 return true;
3243 }
3244 return false;
3245}
3246
3248 bool BiggerPattern,
3249 const uint64_t NonZeroBits,
3250 SDValue &Src, int &DstLSB,
3251 int &Width) {
3252 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3253
3254 EVT VT = Op.getValueType();
3255 assert((VT == MVT::i32 || VT == MVT::i64) &&
3256 "Caller guarantees that type is i32 or i64");
3257 (void)VT;
3258
3259 uint64_t ShlImm;
3260 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3261 return false;
3262
3263 if (!BiggerPattern && !Op.hasOneUse())
3264 return false;
3265
3266 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3267 return true;
3268
3269 DstLSB = llvm::countr_zero(NonZeroBits);
3270 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3271
3272 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3273 return false;
3274
3275 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3276 return true;
3277}
3278
3279static bool isShiftedMask(uint64_t Mask, EVT VT) {
3280 assert(VT == MVT::i32 || VT == MVT::i64);
3281 if (VT == MVT::i32)
3282 return isShiftedMask_32(Mask);
3283 return isShiftedMask_64(Mask);
3284}
3285
3286// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3287// inserted only sets known zero bits.
3289 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3290
3291 EVT VT = N->getValueType(0);
3292 if (VT != MVT::i32 && VT != MVT::i64)
3293 return false;
3294
3295 unsigned BitWidth = VT.getSizeInBits();
3296
3297 uint64_t OrImm;
3298 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3299 return false;
3300
3301 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3302 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3303 // performance neutral.
3305 return false;
3306
3307 uint64_t MaskImm;
3308 SDValue And = N->getOperand(0);
3309 // Must be a single use AND with an immediate operand.
3310 if (!And.hasOneUse() ||
3311 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3312 return false;
3313
3314 // Compute the Known Zero for the AND as this allows us to catch more general
3315 // cases than just looking for AND with imm.
3316 KnownBits Known = CurDAG->computeKnownBits(And);
3317
3318 // Non-zero in the sense that they're not provably zero, which is the key
3319 // point if we want to use this value.
3320 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3321
3322 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3323 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3324 return false;
3325
3326 // The bits being inserted must only set those bits that are known to be zero.
3327 if ((OrImm & NotKnownZero) != 0) {
3328 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3329 // currently handle this case.
3330 return false;
3331 }
3332
3333 // BFI/BFXIL dst, src, #lsb, #width.
3334 int LSB = llvm::countr_one(NotKnownZero);
3335 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3336
3337 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3338 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3339 unsigned ImmS = Width - 1;
3340
3341 // If we're creating a BFI instruction avoid cases where we need more
3342 // instructions to materialize the BFI constant as compared to the original
3343 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3344 // should be no worse in this case.
3345 bool IsBFI = LSB != 0;
3346 uint64_t BFIImm = OrImm >> LSB;
3347 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3348 // We have a BFI instruction and we know the constant can't be materialized
3349 // with a ORR-immediate with the zero register.
3350 unsigned OrChunks = 0, BFIChunks = 0;
3351 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3352 if (((OrImm >> Shift) & 0xFFFF) != 0)
3353 ++OrChunks;
3354 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3355 ++BFIChunks;
3356 }
3357 if (BFIChunks > OrChunks)
3358 return false;
3359 }
3360
3361 // Materialize the constant to be inserted.
3362 SDLoc DL(N);
3363 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3364 SDNode *MOVI = CurDAG->getMachineNode(
3365 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3366
3367 // Create the BFI/BFXIL instruction.
3368 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3369 CurDAG->getTargetConstant(ImmR, DL, VT),
3370 CurDAG->getTargetConstant(ImmS, DL, VT)};
3371 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3372 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3373 return true;
3374}
3375
3377 SDValue &ShiftedOperand,
3378 uint64_t &EncodedShiftImm) {
3379 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3380 if (!Dst.hasOneUse())
3381 return false;
3382
3383 EVT VT = Dst.getValueType();
3384 assert((VT == MVT::i32 || VT == MVT::i64) &&
3385 "Caller should guarantee that VT is one of i32 or i64");
3386 const unsigned SizeInBits = VT.getSizeInBits();
3387
3388 SDLoc DL(Dst.getNode());
3389 uint64_t AndImm, ShlImm;
3390 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3391 isShiftedMask_64(AndImm)) {
3392 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3393 SDValue DstOp0 = Dst.getOperand(0);
3394 if (!DstOp0.hasOneUse())
3395 return false;
3396
3397 // An example to illustrate the transformation
3398 // From:
3399 // lsr x8, x1, #1
3400 // and x8, x8, #0x3f80
3401 // bfxil x8, x1, #0, #7
3402 // To:
3403 // and x8, x23, #0x7f
3404 // ubfx x9, x23, #8, #7
3405 // orr x23, x8, x9, lsl #7
3406 //
3407 // The number of instructions remains the same, but ORR is faster than BFXIL
3408 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3409 // the dependency chain is improved after the transformation.
3410 uint64_t SrlImm;
3411 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3412 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3413 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3414 unsigned MaskWidth =
3415 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3416 unsigned UBFMOpc =
3417 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3418 SDNode *UBFMNode = CurDAG->getMachineNode(
3419 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3420 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3421 VT),
3422 CurDAG->getTargetConstant(
3423 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3424 ShiftedOperand = SDValue(UBFMNode, 0);
3425 EncodedShiftImm = AArch64_AM::getShifterImm(
3426 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3427 return true;
3428 }
3429 }
3430 return false;
3431 }
3432
3433 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3434 ShiftedOperand = Dst.getOperand(0);
3435 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3436 return true;
3437 }
3438
3439 uint64_t SrlImm;
3440 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3441 ShiftedOperand = Dst.getOperand(0);
3442 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3443 return true;
3444 }
3445 return false;
3446}
3447
3448// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3449// the operands and select it to AArch64::ORR with shifted registers if
3450// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3451static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3452 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3453 const bool BiggerPattern) {
3454 EVT VT = N->getValueType(0);
3455 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3456 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3457 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3458 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3459 assert((VT == MVT::i32 || VT == MVT::i64) &&
3460 "Expect result type to be i32 or i64 since N is combinable to BFM");
3461 SDLoc DL(N);
3462
3463 // Bail out if BFM simplifies away one node in BFM Dst.
3464 if (OrOpd1 != Dst)
3465 return false;
3466
3467 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3468 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3469 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3470 if (BiggerPattern) {
3471 uint64_t SrcAndImm;
3472 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3473 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3474 // OrOpd0 = AND Src, #Mask
3475 // So BFM simplifies away one AND node from Src and doesn't simplify away
3476 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3477 // one node (from Rd), ORR is better since it has higher throughput and
3478 // smaller latency than BFM on many AArch64 processors (and for the rest
3479 // ORR is at least as good as BFM).
3480 SDValue ShiftedOperand;
3481 uint64_t EncodedShiftImm;
3482 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3483 EncodedShiftImm)) {
3484 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3485 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3486 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3487 return true;
3488 }
3489 }
3490 return false;
3491 }
3492
3493 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3494
3495 uint64_t ShlImm;
3496 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3497 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3498 SDValue Ops[] = {
3499 Dst, Src,
3500 CurDAG->getTargetConstant(
3502 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3503 return true;
3504 }
3505
3506 // Select the following pattern to left-shifted operand rather than BFI.
3507 // %val1 = op ..
3508 // %val2 = shl %val1, #imm
3509 // %res = or %val1, %val2
3510 //
3511 // If N is selected to be BFI, we know that
3512 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3513 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3514 //
3515 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3516 if (OrOpd0.getOperand(0) == OrOpd1) {
3517 SDValue Ops[] = {
3518 OrOpd1, OrOpd1,
3519 CurDAG->getTargetConstant(
3521 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3522 return true;
3523 }
3524 }
3525
3526 uint64_t SrlImm;
3527 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3528 // Select the following pattern to right-shifted operand rather than BFXIL.
3529 // %val1 = op ..
3530 // %val2 = lshr %val1, #imm
3531 // %res = or %val1, %val2
3532 //
3533 // If N is selected to be BFXIL, we know that
3534 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3535 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3536 //
3537 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3538 if (OrOpd0.getOperand(0) == OrOpd1) {
3539 SDValue Ops[] = {
3540 OrOpd1, OrOpd1,
3541 CurDAG->getTargetConstant(
3543 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3544 return true;
3545 }
3546 }
3547
3548 return false;
3549}
3550
3551static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3552 SelectionDAG *CurDAG) {
3553 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3554
3555 EVT VT = N->getValueType(0);
3556 if (VT != MVT::i32 && VT != MVT::i64)
3557 return false;
3558
3559 unsigned BitWidth = VT.getSizeInBits();
3560
3561 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3562 // have the expected shape. Try to undo that.
3563
3564 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3565 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3566
3567 // Given a OR operation, check if we have the following pattern
3568 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3569 // isBitfieldExtractOp)
3570 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3571 // countTrailingZeros(mask2) == imm2 - imm + 1
3572 // f = d | c
3573 // if yes, replace the OR instruction with:
3574 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3575
3576 // OR is commutative, check all combinations of operand order and values of
3577 // BiggerPattern, i.e.
3578 // Opd0, Opd1, BiggerPattern=false
3579 // Opd1, Opd0, BiggerPattern=false
3580 // Opd0, Opd1, BiggerPattern=true
3581 // Opd1, Opd0, BiggerPattern=true
3582 // Several of these combinations may match, so check with BiggerPattern=false
3583 // first since that will produce better results by matching more instructions
3584 // and/or inserting fewer extra instructions.
3585 for (int I = 0; I < 4; ++I) {
3586
3587 SDValue Dst, Src;
3588 unsigned ImmR, ImmS;
3589 bool BiggerPattern = I / 2;
3590 SDValue OrOpd0Val = N->getOperand(I % 2);
3591 SDNode *OrOpd0 = OrOpd0Val.getNode();
3592 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3593 SDNode *OrOpd1 = OrOpd1Val.getNode();
3594
3595 unsigned BFXOpc;
3596 int DstLSB, Width;
3597 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3598 NumberOfIgnoredLowBits, BiggerPattern)) {
3599 // Check that the returned opcode is compatible with the pattern,
3600 // i.e., same type and zero extended (U and not S)
3601 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3602 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3603 continue;
3604
3605 // Compute the width of the bitfield insertion
3606 DstLSB = 0;
3607 Width = ImmS - ImmR + 1;
3608 // FIXME: This constraint is to catch bitfield insertion we may
3609 // want to widen the pattern if we want to grab general bitfied
3610 // move case
3611 if (Width <= 0)
3612 continue;
3613
3614 // If the mask on the insertee is correct, we have a BFXIL operation. We
3615 // can share the ImmR and ImmS values from the already-computed UBFM.
3616 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3617 BiggerPattern,
3618 Src, DstLSB, Width)) {
3619 ImmR = (BitWidth - DstLSB) % BitWidth;
3620 ImmS = Width - 1;
3621 } else
3622 continue;
3623
3624 // Check the second part of the pattern
3625 EVT VT = OrOpd1Val.getValueType();
3626 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3627
3628 // Compute the Known Zero for the candidate of the first operand.
3629 // This allows to catch more general case than just looking for
3630 // AND with imm. Indeed, simplify-demanded-bits may have removed
3631 // the AND instruction because it proves it was useless.
3632 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3633
3634 // Check if there is enough room for the second operand to appear
3635 // in the first one
3636 APInt BitsToBeInserted =
3637 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3638
3639 if ((BitsToBeInserted & ~Known.Zero) != 0)
3640 continue;
3641
3642 // Set the first operand
3643 uint64_t Imm;
3644 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3645 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3646 // In that case, we can eliminate the AND
3647 Dst = OrOpd1->getOperand(0);
3648 else
3649 // Maybe the AND has been removed by simplify-demanded-bits
3650 // or is useful because it discards more bits
3651 Dst = OrOpd1Val;
3652
3653 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3654 // with shifted operand is more efficient.
3655 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3656 BiggerPattern))
3657 return true;
3658
3659 // both parts match
3660 SDLoc DL(N);
3661 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3662 CurDAG->getTargetConstant(ImmS, DL, VT)};
3663 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3664 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3665 return true;
3666 }
3667
3668 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3669 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3670 // mask (e.g., 0x000ffff0).
3671 uint64_t Mask0Imm, Mask1Imm;
3672 SDValue And0 = N->getOperand(0);
3673 SDValue And1 = N->getOperand(1);
3674 if (And0.hasOneUse() && And1.hasOneUse() &&
3675 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3676 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3677 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3678 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3679
3680 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3681 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3682 // bits to be inserted.
3683 if (isShiftedMask(Mask0Imm, VT)) {
3684 std::swap(And0, And1);
3685 std::swap(Mask0Imm, Mask1Imm);
3686 }
3687
3688 SDValue Src = And1->getOperand(0);
3689 SDValue Dst = And0->getOperand(0);
3690 unsigned LSB = llvm::countr_zero(Mask1Imm);
3691 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3692
3693 // The BFXIL inserts the low-order bits from a source register, so right
3694 // shift the needed bits into place.
3695 SDLoc DL(N);
3696 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3697 uint64_t LsrImm = LSB;
3698 if (Src->hasOneUse() &&
3699 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3700 (LsrImm + LSB) < BitWidth) {
3701 Src = Src->getOperand(0);
3702 LsrImm += LSB;
3703 }
3704
3705 SDNode *LSR = CurDAG->getMachineNode(
3706 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3707 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3708
3709 // BFXIL is an alias of BFM, so translate to BFM operands.
3710 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3711 unsigned ImmS = Width - 1;
3712
3713 // Create the BFXIL instruction.
3714 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3715 CurDAG->getTargetConstant(ImmR, DL, VT),
3716 CurDAG->getTargetConstant(ImmS, DL, VT)};
3717 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3718 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3719 return true;
3720 }
3721
3722 return false;
3723}
3724
3725bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3726 if (N->getOpcode() != ISD::OR)
3727 return false;
3728
3729 APInt NUsefulBits;
3730 getUsefulBits(SDValue(N, 0), NUsefulBits);
3731
3732 // If all bits are not useful, just return UNDEF.
3733 if (!NUsefulBits) {
3734 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3735 return true;
3736 }
3737
3738 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3739 return true;
3740
3741 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3742}
3743
3744/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3745/// equivalent of a left shift by a constant amount followed by an and masking
3746/// out a contiguous set of bits.
3747bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3748 if (N->getOpcode() != ISD::AND)
3749 return false;
3750
3751 EVT VT = N->getValueType(0);
3752 if (VT != MVT::i32 && VT != MVT::i64)
3753 return false;
3754
3755 SDValue Op0;
3756 int DstLSB, Width;
3757 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3758 Op0, DstLSB, Width))
3759 return false;
3760
3761 // ImmR is the rotate right amount.
3762 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3763 // ImmS is the most significant bit of the source to be moved.
3764 unsigned ImmS = Width - 1;
3765
3766 SDLoc DL(N);
3767 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3768 CurDAG->getTargetConstant(ImmS, DL, VT)};
3769 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3770 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3771 return true;
3772}
3773
3774/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3775/// variable shift/rotate instructions.
3776bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3777 EVT VT = N->getValueType(0);
3778
3779 unsigned Opc;
3780 switch (N->getOpcode()) {
3781 case ISD::ROTR:
3782 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3783 break;
3784 case ISD::SHL:
3785 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3786 break;
3787 case ISD::SRL:
3788 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3789 break;
3790 case ISD::SRA:
3791 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3792 break;
3793 default:
3794 return false;
3795 }
3796
3797 uint64_t Size;
3798 uint64_t Bits;
3799 if (VT == MVT::i32) {
3800 Bits = 5;
3801 Size = 32;
3802 } else if (VT == MVT::i64) {
3803 Bits = 6;
3804 Size = 64;
3805 } else
3806 return false;
3807
3808 SDValue ShiftAmt = N->getOperand(1);
3809 SDLoc DL(N);
3810 SDValue NewShiftAmt;
3811
3812 // Skip over an extend of the shift amount.
3813 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3814 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3815 ShiftAmt = ShiftAmt->getOperand(0);
3816
3817 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3818 SDValue Add0 = ShiftAmt->getOperand(0);
3819 SDValue Add1 = ShiftAmt->getOperand(1);
3820 uint64_t Add0Imm;
3821 uint64_t Add1Imm;
3822 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3823 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3824 // to avoid the ADD/SUB.
3825 NewShiftAmt = Add0;
3826 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3827 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3828 (Add0Imm % Size == 0)) {
3829 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3830 // to generate a NEG instead of a SUB from a constant.
3831 unsigned NegOpc;
3832 unsigned ZeroReg;
3833 EVT SubVT = ShiftAmt->getValueType(0);
3834 if (SubVT == MVT::i32) {
3835 NegOpc = AArch64::SUBWrr;
3836 ZeroReg = AArch64::WZR;
3837 } else {
3838 assert(SubVT == MVT::i64);
3839 NegOpc = AArch64::SUBXrr;
3840 ZeroReg = AArch64::XZR;
3841 }
3842 SDValue Zero =
3843 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3844 MachineSDNode *Neg =
3845 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3846 NewShiftAmt = SDValue(Neg, 0);
3847 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3848 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3849 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3850 // to generate a NOT instead of a SUB from a constant.
3851 unsigned NotOpc;
3852 unsigned ZeroReg;
3853 EVT SubVT = ShiftAmt->getValueType(0);
3854 if (SubVT == MVT::i32) {
3855 NotOpc = AArch64::ORNWrr;
3856 ZeroReg = AArch64::WZR;
3857 } else {
3858 assert(SubVT == MVT::i64);
3859 NotOpc = AArch64::ORNXrr;
3860 ZeroReg = AArch64::XZR;
3861 }
3862 SDValue Zero =
3863 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3865 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3866 NewShiftAmt = SDValue(Not, 0);
3867 } else
3868 return false;
3869 } else {
3870 // If the shift amount is masked with an AND, check that the mask covers the
3871 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3872 // the AND.
3873 uint64_t MaskImm;
3874 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3875 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3876 return false;
3877
3878 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3879 return false;
3880
3881 NewShiftAmt = ShiftAmt->getOperand(0);
3882 }
3883
3884 // Narrow/widen the shift amount to match the size of the shift operation.
3885 if (VT == MVT::i32)
3886 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3887 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3888 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3889 MachineSDNode *Ext = CurDAG->getMachineNode(
3890 AArch64::SUBREG_TO_REG, DL, VT,
3891 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3892 NewShiftAmt = SDValue(Ext, 0);
3893 }
3894
3895 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3896 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3897 return true;
3898}
3899
3901 SDValue &FixedPos,
3902 unsigned RegWidth,
3903 bool isReciprocal) {
3904 APFloat FVal(0.0);
3905 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
3906 FVal = CN->getValueAPF();
3907 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3908 // Some otherwise illegal constants are allowed in this case.
3909 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3910 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3911 return false;
3912
3913 ConstantPoolSDNode *CN =
3914 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3915 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3916 } else
3917 return false;
3918
3919 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
3920 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
3921 // x-register.
3922 //
3923 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
3924 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
3925 // integers.
3926 bool IsExact;
3927
3928 if (isReciprocal)
3929 if (!FVal.getExactInverse(&FVal))
3930 return false;
3931
3932 // fbits is between 1 and 64 in the worst-case, which means the fmul
3933 // could have 2^64 as an actual operand. Need 65 bits of precision.
3934 APSInt IntVal(65, true);
3935 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
3936
3937 // N.b. isPowerOf2 also checks for > 0.
3938 if (!IsExact || !IntVal.isPowerOf2())
3939 return false;
3940 unsigned FBits = IntVal.logBase2();
3941
3942 // Checks above should have guaranteed that we haven't lost information in
3943 // finding FBits, but it must still be in range.
3944 if (FBits == 0 || FBits > RegWidth) return false;
3945
3946 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
3947 return true;
3948}
3949
3950bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
3951 unsigned RegWidth) {
3952 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3953 false);
3954}
3955
3956bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
3957 SDValue &FixedPos,
3958 unsigned RegWidth) {
3959 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3960 true);
3961}
3962
3963// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
3964// of the string and obtains the integer values from them and combines these
3965// into a single value to be used in the MRS/MSR instruction.
3968 RegString.split(Fields, ':');
3969
3970 if (Fields.size() == 1)
3971 return -1;
3972
3973 assert(Fields.size() == 5
3974 && "Invalid number of fields in read register string");
3975
3977 bool AllIntFields = true;
3978
3979 for (StringRef Field : Fields) {
3980 unsigned IntField;
3981 AllIntFields &= !Field.getAsInteger(10, IntField);
3982 Ops.push_back(IntField);
3983 }
3984
3985 assert(AllIntFields &&
3986 "Unexpected non-integer value in special register string.");
3987 (void)AllIntFields;
3988
3989 // Need to combine the integer fields of the string into a single value
3990 // based on the bit encoding of MRS/MSR instruction.
3991 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
3992 (Ops[3] << 3) | (Ops[4]);
3993}
3994
3995// Lower the read_register intrinsic to an MRS instruction node if the special
3996// register string argument is either of the form detailed in the ALCE (the
3997// form described in getIntOperandsFromRegsterString) or is a named register
3998// known by the MRS SysReg mapper.
3999bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4000 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4001 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4002 SDLoc DL(N);
4003
4004 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4005
4006 unsigned Opcode64Bit = AArch64::MRS;
4007 int Imm = getIntOperandFromRegisterString(RegString->getString());
4008 if (Imm == -1) {
4009 // No match, Use the sysreg mapper to map the remaining possible strings to
4010 // the value for the register to be used for the instruction operand.
4011 const auto *TheReg =
4012 AArch64SysReg::lookupSysRegByName(RegString->getString());
4013 if (TheReg && TheReg->Readable &&
4014 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4015 Imm = TheReg->Encoding;
4016 else
4017 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4018
4019 if (Imm == -1) {
4020 // Still no match, see if this is "pc" or give up.
4021 if (!ReadIs128Bit && RegString->getString() == "pc") {
4022 Opcode64Bit = AArch64::ADR;
4023 Imm = 0;
4024 } else {
4025 return false;
4026 }
4027 }
4028 }
4029
4030 SDValue InChain = N->getOperand(0);
4031 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
4032 if (!ReadIs128Bit) {
4033 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
4034 {SysRegImm, InChain});
4035 } else {
4036 SDNode *MRRS = CurDAG->getMachineNode(
4037 AArch64::MRRS, DL,
4038 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4039 {SysRegImm, InChain});
4040
4041 // Sysregs are not endian. The even register always contains the low half
4042 // of the register.
4043 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
4044 SDValue(MRRS, 0));
4045 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
4046 SDValue(MRRS, 0));
4047 SDValue OutChain = SDValue(MRRS, 1);
4048
4049 ReplaceUses(SDValue(N, 0), Lo);
4050 ReplaceUses(SDValue(N, 1), Hi);
4051 ReplaceUses(SDValue(N, 2), OutChain);
4052 };
4053 return true;
4054}
4055
4056// Lower the write_register intrinsic to an MSR instruction node if the special
4057// register string argument is either of the form detailed in the ALCE (the
4058// form described in getIntOperandsFromRegsterString) or is a named register
4059// known by the MSR SysReg mapper.
4060bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4061 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4062 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4063 SDLoc DL(N);
4064
4065 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4066
4067 if (!WriteIs128Bit) {
4068 // Check if the register was one of those allowed as the pstatefield value
4069 // in the MSR (immediate) instruction. To accept the values allowed in the
4070 // pstatefield for the MSR (immediate) instruction, we also require that an
4071 // immediate value has been provided as an argument, we know that this is
4072 // the case as it has been ensured by semantic checking.
4073 auto trySelectPState = [&](auto PMapper, unsigned State) {
4074 if (PMapper) {
4075 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4076 "Expected a constant integer expression.");
4077 unsigned Reg = PMapper->Encoding;
4078 uint64_t Immed = N->getConstantOperandVal(2);
4079 CurDAG->SelectNodeTo(
4080 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4081 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4082 return true;
4083 }
4084 return false;
4085 };
4086
4087 if (trySelectPState(
4088 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4089 AArch64::MSRpstateImm4))
4090 return true;
4091 if (trySelectPState(
4092 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4093 AArch64::MSRpstateImm1))
4094 return true;
4095 }
4096
4097 int Imm = getIntOperandFromRegisterString(RegString->getString());
4098 if (Imm == -1) {
4099 // Use the sysreg mapper to attempt to map the remaining possible strings
4100 // to the value for the register to be used for the MSR (register)
4101 // instruction operand.
4102 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4103 if (TheReg && TheReg->Writeable &&
4104 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4105 Imm = TheReg->Encoding;
4106 else
4107 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4108
4109 if (Imm == -1)
4110 return false;
4111 }
4112
4113 SDValue InChain = N->getOperand(0);
4114 if (!WriteIs128Bit) {
4115 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4116 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4117 N->getOperand(2), InChain);
4118 } else {
4119 // No endian swap. The lower half always goes into the even subreg, and the
4120 // higher half always into the odd supreg.
4121 SDNode *Pair = CurDAG->getMachineNode(
4122 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4123 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4124 MVT::i32),
4125 N->getOperand(2),
4126 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4127 N->getOperand(3),
4128 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4129
4130 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4131 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4132 SDValue(Pair, 0), InChain);
4133 }
4134
4135 return true;
4136}
4137
4138/// We've got special pseudo-instructions for these
4139bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4140 unsigned Opcode;
4141 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4142
4143 // Leave IR for LSE if subtarget supports it.
4144 if (Subtarget->hasLSE()) return false;
4145
4146 if (MemTy == MVT::i8)
4147 Opcode = AArch64::CMP_SWAP_8;
4148 else if (MemTy == MVT::i16)
4149 Opcode = AArch64::CMP_SWAP_16;
4150 else if (MemTy == MVT::i32)
4151 Opcode = AArch64::CMP_SWAP_32;
4152 else if (MemTy == MVT::i64)
4153 Opcode = AArch64::CMP_SWAP_64;
4154 else
4155 llvm_unreachable("Unknown AtomicCmpSwap type");
4156
4157 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4158 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4159 N->getOperand(0)};
4160 SDNode *CmpSwap = CurDAG->getMachineNode(
4161 Opcode, SDLoc(N),
4162 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4163
4164 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4165 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4166
4167 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4168 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4169 CurDAG->RemoveDeadNode(N);
4170
4171 return true;
4172}
4173
4174bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4175 SDValue &Shift) {
4176 if (!isa<ConstantSDNode>(N))
4177 return false;
4178
4179 SDLoc DL(N);
4180 uint64_t Val = cast<ConstantSDNode>(N)
4181 ->getAPIntValue()
4182 .trunc(VT.getFixedSizeInBits())
4183 .getZExtValue();
4184
4185 switch (VT.SimpleTy) {
4186 case MVT::i8:
4187 // All immediates are supported.
4188 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4189 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4190 return true;
4191 case MVT::i16:
4192 case MVT::i32:
4193 case MVT::i64:
4194 // Support 8bit unsigned immediates.
4195 if (Val <= 255) {
4196 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4197 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4198 return true;
4199 }
4200 // Support 16bit unsigned immediates that are a multiple of 256.
4201 if (Val <= 65280 && Val % 256 == 0) {
4202 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4203 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4204 return true;
4205 }
4206 break;
4207 default:
4208 break;
4209 }
4210
4211 return false;
4212}
4213
4214bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4215 SDValue &Imm, SDValue &Shift,
4216 bool Negate) {
4217 if (!isa<ConstantSDNode>(N))
4218 return false;
4219
4220 SDLoc DL(N);
4221 int64_t Val = cast<ConstantSDNode>(N)
4222 ->getAPIntValue()
4223 .trunc(VT.getFixedSizeInBits())
4224 .getSExtValue();
4225
4226 if (Negate)
4227 Val = -Val;
4228
4229 // Signed saturating instructions treat their immediate operand as unsigned,
4230 // whereas the related intrinsics define their operands to be signed. This
4231 // means we can only use the immediate form when the operand is non-negative.
4232 if (Val < 0)
4233 return false;
4234
4235 switch (VT.SimpleTy) {
4236 case MVT::i8:
4237 // All positive immediates are supported.
4238 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4239 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4240 return true;
4241 case MVT::i16:
4242 case MVT::i32:
4243 case MVT::i64:
4244 // Support 8bit positive immediates.
4245 if (Val <= 255) {
4246 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4247 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4248 return true;
4249 }
4250 // Support 16bit positive immediates that are a multiple of 256.
4251 if (Val <= 65280 && Val % 256 == 0) {
4252 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4253 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4254 return true;
4255 }
4256 break;
4257 default:
4258 break;
4259 }
4260
4261 return false;
4262}
4263
4264bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4265 SDValue &Shift) {
4266 if (!isa<ConstantSDNode>(N))
4267 return false;
4268
4269 SDLoc DL(N);
4270 int64_t Val = cast<ConstantSDNode>(N)
4271 ->getAPIntValue()
4272 .trunc(VT.getFixedSizeInBits())
4273 .getSExtValue();
4274
4275 switch (VT.SimpleTy) {
4276 case MVT::i8:
4277 // All immediates are supported.
4278 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4279 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4280 return true;
4281 case MVT::i16:
4282 case MVT::i32:
4283 case MVT::i64:
4284 // Support 8bit signed immediates.
4285 if (Val >= -128 && Val <= 127) {
4286 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4287 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4288 return true;
4289 }
4290 // Support 16bit signed immediates that are a multiple of 256.
4291 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
4292 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4293 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
4294 return true;
4295 }
4296 break;
4297 default:
4298 break;
4299 }
4300
4301 return false;
4302}
4303
4304bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4305 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4306 int64_t ImmVal = CNode->getSExtValue();
4307 SDLoc DL(N);
4308 if (ImmVal >= -128 && ImmVal < 128) {
4309 Imm = CurDAG->getSignedTargetConstant(ImmVal, DL, MVT::i32);
4310 return true;
4311 }
4312 }
4313 return false;
4314}
4315
4316bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4317 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4318 uint64_t ImmVal = CNode->getZExtValue();
4319
4320 switch (VT.SimpleTy) {
4321 case MVT::i8:
4322 ImmVal &= 0xFF;
4323 break;
4324 case MVT::i16:
4325 ImmVal &= 0xFFFF;
4326 break;
4327 case MVT::i32:
4328 ImmVal &= 0xFFFFFFFF;
4329 break;
4330 case MVT::i64:
4331 break;
4332 default:
4333 llvm_unreachable("Unexpected type");
4334 }
4335
4336 if (ImmVal < 256) {
4337 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4338 return true;
4339 }
4340 }
4341 return false;
4342}
4343
4344bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4345 bool Invert) {
4346 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4347 uint64_t ImmVal = CNode->getZExtValue();
4348 SDLoc DL(N);
4349
4350 if (Invert)
4351 ImmVal = ~ImmVal;
4352
4353 // Shift mask depending on type size.
4354 switch (VT.SimpleTy) {
4355 case MVT::i8:
4356 ImmVal &= 0xFF;
4357 ImmVal |= ImmVal << 8;
4358 ImmVal |= ImmVal << 16;
4359 ImmVal |= ImmVal << 32;
4360 break;
4361 case MVT::i16:
4362 ImmVal &= 0xFFFF;
4363 ImmVal |= ImmVal << 16;
4364 ImmVal |= ImmVal << 32;
4365 break;
4366 case MVT::i32:
4367 ImmVal &= 0xFFFFFFFF;
4368 ImmVal |= ImmVal << 32;
4369 break;
4370 case MVT::i64:
4371 break;
4372 default:
4373 llvm_unreachable("Unexpected type");
4374 }
4375
4376 uint64_t encoding;
4377 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
4378 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
4379 return true;
4380 }
4381 }
4382 return false;
4383}
4384
4385// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4386// Rather than attempt to normalise everything we can sometimes saturate the
4387// shift amount during selection. This function also allows for consistent
4388// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4389// required by the instructions.
4390bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4391 uint64_t High, bool AllowSaturation,
4392 SDValue &Imm) {
4393 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4394 uint64_t ImmVal = CN->getZExtValue();
4395
4396 // Reject shift amounts that are too small.
4397 if (ImmVal < Low)
4398 return false;
4399
4400 // Reject or saturate shift amounts that are too big.
4401 if (ImmVal > High) {
4402 if (!AllowSaturation)
4403 return false;
4404 ImmVal = High;
4405 }
4406
4407 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4408 return true;
4409 }
4410
4411 return false;
4412}
4413
4414bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4415 // tagp(FrameIndex, IRGstack, tag_offset):
4416 // since the offset between FrameIndex and IRGstack is a compile-time
4417 // constant, this can be lowered to a single ADDG instruction.
4418 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4419 return false;
4420 }
4421
4422 SDValue IRG_SP = N->getOperand(2);
4423 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4424 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4425 return false;
4426 }
4427
4428 const TargetLowering *TLI = getTargetLowering();
4429 SDLoc DL(N);
4430 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4431 SDValue FiOp = CurDAG->getTargetFrameIndex(
4432 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4433 int TagOffset = N->getConstantOperandVal(3);
4434
4435 SDNode *Out = CurDAG->getMachineNode(
4436 AArch64::TAGPstack, DL, MVT::i64,
4437 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4438 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4439 ReplaceNode(N, Out);
4440 return true;
4441}
4442
4443void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4444 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4445 "llvm.aarch64.tagp third argument must be an immediate");
4446 if (trySelectStackSlotTagP(N))
4447 return;
4448 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4449 // compile-time constant, not just for stack allocations.
4450
4451 // General case for unrelated pointers in Op1 and Op2.
4452 SDLoc DL(N);
4453 int TagOffset = N->getConstantOperandVal(3);
4454 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4455 {N->getOperand(1), N->getOperand(2)});
4456 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4457 {SDValue(N1, 0), N->getOperand(2)});
4458 SDNode *N3 = CurDAG->getMachineNode(
4459 AArch64::ADDG, DL, MVT::i64,
4460 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4461 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4462 ReplaceNode(N, N3);
4463}
4464
4465bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4466 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4467
4468 // Bail when not a "cast" like insert_subvector.
4469 if (N->getConstantOperandVal(2) != 0)
4470 return false;
4471 if (!N->getOperand(0).isUndef())
4472 return false;
4473
4474 // Bail when normal isel should do the job.
4475 EVT VT = N->getValueType(0);
4476 EVT InVT = N->getOperand(1).getValueType();
4477 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4478 return false;
4479 if (InVT.getSizeInBits() <= 128)
4480 return false;
4481
4482 // NOTE: We can only get here when doing fixed length SVE code generation.
4483 // We do manual selection because the types involved are not linked to real
4484 // registers (despite being legal) and must be coerced into SVE registers.
4485
4487 "Expected to insert into a packed scalable vector!");
4488
4489 SDLoc DL(N);
4490 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4491 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4492 N->getOperand(1), RC));
4493 return true;
4494}
4495
4496bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4497 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4498
4499 // Bail when not a "cast" like extract_subvector.
4500 if (N->getConstantOperandVal(1) != 0)
4501 return false;
4502
4503 // Bail when normal isel can do the job.
4504 EVT VT = N->getValueType(0);
4505 EVT InVT = N->getOperand(0).getValueType();
4506 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4507 return false;
4508 if (VT.getSizeInBits() <= 128)
4509 return false;
4510
4511 // NOTE: We can only get here when doing fixed length SVE code generation.
4512 // We do manual selection because the types involved are not linked to real
4513 // registers (despite being legal) and must be coerced into SVE registers.
4514
4516 "Expected to extract from a packed scalable vector!");
4517
4518 SDLoc DL(N);
4519 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4520 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4521 N->getOperand(0), RC));
4522 return true;
4523}
4524
4525bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4526 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4527
4528 SDValue N0 = N->getOperand(0);
4529 SDValue N1 = N->getOperand(1);
4530 EVT VT = N->getValueType(0);
4531
4532 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4533 // Rotate by a constant is a funnel shift in IR which is exanded to
4534 // an OR with shifted operands.
4535 // We do the following transform:
4536 // OR N0, N1 -> xar (x, y, imm)
4537 // Where:
4538 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4539 // N0 = SHL_PRED true, V, splat(bits-imm)
4540 // V = (xor x, y)
4541 if (VT.isScalableVector() &&
4542 (Subtarget->hasSVE2() ||
4543 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4544 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4546 std::swap(N0, N1);
4547 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4549 return false;
4550
4551 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4552 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4553 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4554 return false;
4555
4556 SDValue XOR = N0.getOperand(1);
4557 if (XOR.getOpcode() != ISD::XOR || XOR != N1.getOperand(1))
4558 return false;
4559
4560 APInt ShlAmt, ShrAmt;
4561 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4563 return false;
4564
4565 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4566 return false;
4567
4568 SDLoc DL(N);
4569 SDValue Imm =
4570 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4571
4572 SDValue Ops[] = {XOR.getOperand(0), XOR.getOperand(1), Imm};
4573 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4574 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4575 AArch64::XAR_ZZZI_D})) {
4576 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4577 return true;
4578 }
4579 return false;
4580 }
4581
4582 if (!Subtarget->hasSHA3())
4583 return false;
4584
4585 if (N0->getOpcode() != AArch64ISD::VSHL ||
4587 return false;
4588
4589 if (N0->getOperand(0) != N1->getOperand(0) ||
4590 N1->getOperand(0)->getOpcode() != ISD::XOR)
4591 return false;
4592
4593 SDValue XOR = N0.getOperand(0);
4594 SDValue R1 = XOR.getOperand(0);
4595 SDValue R2 = XOR.getOperand(1);
4596
4597 unsigned HsAmt = N0.getConstantOperandVal(1);
4598 unsigned ShAmt = N1.getConstantOperandVal(1);
4599
4600 SDLoc DL = SDLoc(N0.getOperand(1));
4601 SDValue Imm = CurDAG->getTargetConstant(
4602 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4603
4604 if (ShAmt + HsAmt != 64)
4605 return false;
4606
4607 SDValue Ops[] = {R1, R2, Imm};
4608 CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops);
4609
4610 return true;
4611}
4612
4613void AArch64DAGToDAGISel::Select(SDNode *Node) {
4614 // If we have a custom node, we already have selected!
4615 if (Node->isMachineOpcode()) {
4616 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4617 Node->setNodeId(-1);
4618 return;
4619 }
4620
4621 // Few custom selection stuff.
4622 EVT VT = Node->getValueType(0);
4623
4624 switch (Node->getOpcode()) {
4625 default:
4626 break;
4627
4629 if (SelectCMP_SWAP(Node))
4630 return;
4631 break;
4632
4633 case ISD::READ_REGISTER:
4634 case AArch64ISD::MRRS:
4635 if (tryReadRegister(Node))
4636 return;
4637 break;
4638
4640 case AArch64ISD::MSRR:
4641 if (tryWriteRegister(Node))
4642 return;
4643 break;
4644
4645 case ISD::LOAD: {
4646 // Try to select as an indexed load. Fall through to normal processing
4647 // if we can't.
4648 if (tryIndexedLoad(Node))
4649 return;
4650 break;
4651 }
4652
4653 case ISD::SRL:
4654 case ISD::AND:
4655 case ISD::SRA:
4657 if (tryBitfieldExtractOp(Node))
4658 return;
4659 if (tryBitfieldInsertInZeroOp(Node))
4660 return;
4661 [[fallthrough]];
4662 case ISD::ROTR:
4663 case ISD::SHL:
4664 if (tryShiftAmountMod(Node))
4665 return;
4666 break;
4667
4668 case ISD::SIGN_EXTEND:
4669 if (tryBitfieldExtractOpFromSExt(Node))
4670 return;
4671 break;
4672
4673 case ISD::OR:
4674 if (tryBitfieldInsertOp(Node))
4675 return;
4676 if (trySelectXAR(Node))
4677 return;
4678 break;
4679
4681 if (trySelectCastScalableToFixedLengthVector(Node))
4682 return;
4683 break;
4684 }
4685
4686 case ISD::INSERT_SUBVECTOR: {
4687 if (trySelectCastFixedLengthToScalableVector(Node))
4688 return;
4689 break;
4690 }
4691
4692 case ISD::Constant: {
4693 // Materialize zero constants as copies from WZR/XZR. This allows
4694 // the coalescer to propagate these into other instructions.
4695 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4696 if (ConstNode->isZero()) {
4697 if (VT == MVT::i32) {
4698 SDValue New = CurDAG->getCopyFromReg(
4699 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4700 ReplaceNode(Node, New.getNode());
4701 return;
4702 } else if (VT == MVT::i64) {
4703 SDValue New = CurDAG->getCopyFromReg(
4704 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4705 ReplaceNode(Node, New.getNode());
4706 return;
4707 }
4708 }
4709 break;
4710 }
4711
4712 case ISD::FrameIndex: {
4713 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4714 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4715 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4716 const TargetLowering *TLI = getTargetLowering();
4717 SDValue TFI = CurDAG->getTargetFrameIndex(
4718 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4719 SDLoc DL(Node);
4720 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4721 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4722 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4723 return;
4724 }
4726 unsigned IntNo = Node->getConstantOperandVal(1);
4727 switch (IntNo) {
4728 default:
4729 break;
4730 case Intrinsic::aarch64_gcsss: {
4731 SDLoc DL(Node);
4732 SDValue Chain = Node->getOperand(0);
4733 SDValue Val = Node->getOperand(2);
4734 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
4735 SDNode *SS1 =
4736 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
4737 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
4738 MVT::Other, Zero, SDValue(SS1, 0));
4739 ReplaceNode(Node, SS2);
4740 return;
4741 }
4742 case Intrinsic::aarch64_ldaxp:
4743 case Intrinsic::aarch64_ldxp: {
4744 unsigned Op =
4745 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4746 SDValue MemAddr = Node->getOperand(2);
4747 SDLoc DL(Node);
4748 SDValue Chain = Node->getOperand(0);
4749
4750 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4751 MVT::Other, MemAddr, Chain);
4752
4753 // Transfer memoperands.
4755 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4756 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4757 ReplaceNode(Node, Ld);
4758 return;
4759 }
4760 case Intrinsic::aarch64_stlxp:
4761 case Intrinsic::aarch64_stxp: {
4762 unsigned Op =
4763 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4764 SDLoc DL(Node);
4765 SDValue Chain = Node->getOperand(0);
4766 SDValue ValLo = Node->getOperand(2);
4767 SDValue ValHi = Node->getOperand(3);
4768 SDValue MemAddr = Node->getOperand(4);
4769
4770 // Place arguments in the right order.
4771 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4772
4773 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4774 // Transfer memoperands.
4776 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4777 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4778
4779 ReplaceNode(Node, St);
4780 return;
4781 }
4782 case Intrinsic::aarch64_neon_ld1x2:
4783 if (VT == MVT::v8i8) {
4784 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4785 return;
4786 } else if (VT == MVT::v16i8) {
4787 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4788 return;
4789 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4790 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4791 return;
4792 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4793 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4794 return;
4795 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4796 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4797 return;
4798 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4799 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4800 return;
4801 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4802 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4803 return;
4804 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4805 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4806 return;
4807 }
4808 break;
4809 case Intrinsic::aarch64_neon_ld1x3:
4810 if (VT == MVT::v8i8) {
4811 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4812 return;
4813 } else if (VT == MVT::v16i8) {
4814 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4815 return;
4816 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4817 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
4818 return;
4819 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4820 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
4821 return;
4822 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4823 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
4824 return;
4825 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4826 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
4827 return;
4828 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4829 SelectLoad(Node, 3,