LLVM  13.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the AArch64 target.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "AArch64TargetMachine.h"
16 #include "llvm/ADT/APSInt.h"
18 #include "llvm/IR/Function.h" // To access function attributes.
19 #include "llvm/IR/GlobalValue.h"
20 #include "llvm/IR/Intrinsics.h"
21 #include "llvm/IR/IntrinsicsAArch64.h"
22 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/KnownBits.h"
27 
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "aarch64-isel"
31 
32 //===--------------------------------------------------------------------===//
33 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
34 /// instructions for SelectionDAG operations.
35 ///
36 namespace {
37 
38 class AArch64DAGToDAGISel : public SelectionDAGISel {
39 
40  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
41  /// make the right decision when generating code for different targets.
42  const AArch64Subtarget *Subtarget;
43 
44 public:
45  explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
46  CodeGenOpt::Level OptLevel)
47  : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
48 
49  StringRef getPassName() const override {
50  return "AArch64 Instruction Selection";
51  }
52 
53  bool runOnMachineFunction(MachineFunction &MF) override {
54  Subtarget = &MF.getSubtarget<AArch64Subtarget>();
56  }
57 
58  void Select(SDNode *Node) override;
59 
60  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
61  /// inline asm expressions.
62  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
63  unsigned ConstraintID,
64  std::vector<SDValue> &OutOps) override;
65 
66  template <signed Low, signed High, signed Scale>
67  bool SelectRDVLImm(SDValue N, SDValue &Imm);
68 
69  bool tryMLAV64LaneV128(SDNode *N);
70  bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
71  bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
72  bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
73  bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
74  bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
75  return SelectShiftedRegister(N, false, Reg, Shift);
76  }
77  bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
78  return SelectShiftedRegister(N, true, Reg, Shift);
79  }
80  bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
81  return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
82  }
83  bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
84  return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
85  }
86  bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
87  return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
88  }
89  bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
90  return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
91  }
92  bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
93  return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
94  }
95  bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
96  return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
97  }
98  bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
99  return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
100  }
101  bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
102  return SelectAddrModeIndexed(N, 1, Base, OffImm);
103  }
104  bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
105  return SelectAddrModeIndexed(N, 2, Base, OffImm);
106  }
107  bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
108  return SelectAddrModeIndexed(N, 4, Base, OffImm);
109  }
110  bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
111  return SelectAddrModeIndexed(N, 8, Base, OffImm);
112  }
113  bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
114  return SelectAddrModeIndexed(N, 16, Base, OffImm);
115  }
116  bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
117  return SelectAddrModeUnscaled(N, 1, Base, OffImm);
118  }
119  bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
120  return SelectAddrModeUnscaled(N, 2, Base, OffImm);
121  }
122  bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
123  return SelectAddrModeUnscaled(N, 4, Base, OffImm);
124  }
125  bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
126  return SelectAddrModeUnscaled(N, 8, Base, OffImm);
127  }
128  bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
129  return SelectAddrModeUnscaled(N, 16, Base, OffImm);
130  }
131 
132  template<int Width>
133  bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
134  SDValue &SignExtend, SDValue &DoShift) {
135  return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
136  }
137 
138  template<int Width>
139  bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
140  SDValue &SignExtend, SDValue &DoShift) {
141  return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
142  }
143 
144  bool SelectDupZeroOrUndef(SDValue N) {
145  switch(N->getOpcode()) {
146  case ISD::UNDEF:
147  return true;
148  case AArch64ISD::DUP:
149  case ISD::SPLAT_VECTOR: {
150  auto Opnd0 = N->getOperand(0);
151  if (auto CN = dyn_cast<ConstantSDNode>(Opnd0))
152  if (CN->isNullValue())
153  return true;
154  if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0))
155  if (CN->isZero())
156  return true;
157  break;
158  }
159  default:
160  break;
161  }
162 
163  return false;
164  }
165 
166  bool SelectDupZero(SDValue N) {
167  switch(N->getOpcode()) {
168  case AArch64ISD::DUP:
169  case ISD::SPLAT_VECTOR: {
170  auto Opnd0 = N->getOperand(0);
171  if (auto CN = dyn_cast<ConstantSDNode>(Opnd0))
172  if (CN->isNullValue())
173  return true;
174  if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0))
175  if (CN->isZero())
176  return true;
177  break;
178  }
179  }
180 
181  return false;
182  }
183 
184  template<MVT::SimpleValueType VT>
185  bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
186  return SelectSVEAddSubImm(N, VT, Imm, Shift);
187  }
188 
189  template<MVT::SimpleValueType VT>
190  bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
191  return SelectSVELogicalImm(N, VT, Imm);
192  }
193 
194  template <MVT::SimpleValueType VT>
195  bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
196  return SelectSVEArithImm(N, VT, Imm);
197  }
198 
199  template <unsigned Low, unsigned High, bool AllowSaturation = false>
200  bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
201  return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
202  }
203 
204  // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
205  template<signed Min, signed Max, signed Scale, bool Shift>
206  bool SelectCntImm(SDValue N, SDValue &Imm) {
207  if (!isa<ConstantSDNode>(N))
208  return false;
209 
210  int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
211  if (Shift)
212  MulImm = 1LL << MulImm;
213 
214  if ((MulImm % std::abs(Scale)) != 0)
215  return false;
216 
217  MulImm /= Scale;
218  if ((MulImm >= Min) && (MulImm <= Max)) {
219  Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
220  return true;
221  }
222 
223  return false;
224  }
225 
226  /// Form sequences of consecutive 64/128-bit registers for use in NEON
227  /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
228  /// between 1 and 4 elements. If it contains a single element that is returned
229  /// unchanged; otherwise a REG_SEQUENCE value is returned.
230  SDValue createDTuple(ArrayRef<SDValue> Vecs);
231  SDValue createQTuple(ArrayRef<SDValue> Vecs);
232  // Form a sequence of SVE registers for instructions using list of vectors,
233  // e.g. structured loads and stores (ldN, stN).
234  SDValue createZTuple(ArrayRef<SDValue> Vecs);
235 
236  /// Generic helper for the createDTuple/createQTuple
237  /// functions. Those should almost always be called instead.
238  SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
239  const unsigned SubRegs[]);
240 
241  void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
242 
243  bool tryIndexedLoad(SDNode *N);
244 
245  bool trySelectStackSlotTagP(SDNode *N);
246  void SelectTagP(SDNode *N);
247 
248  void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
249  unsigned SubRegIdx);
250  void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
251  unsigned SubRegIdx);
252  void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
253  void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
254  void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
255  unsigned Opc_rr, unsigned Opc_ri);
256 
257  bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
258  /// SVE Reg+Imm addressing mode.
259  template <int64_t Min, int64_t Max>
260  bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
261  SDValue &OffImm);
262  /// SVE Reg+Reg address mode.
263  template <unsigned Scale>
264  bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
265  return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
266  }
267 
268  void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
269  void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
270  void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
271  void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
272  void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
273  unsigned Opc_rr, unsigned Opc_ri);
274  std::tuple<unsigned, SDValue, SDValue>
275  findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
276  const SDValue &OldBase, const SDValue &OldOffset,
277  unsigned Scale);
278 
279  bool tryBitfieldExtractOp(SDNode *N);
280  bool tryBitfieldExtractOpFromSExt(SDNode *N);
281  bool tryBitfieldInsertOp(SDNode *N);
282  bool tryBitfieldInsertInZeroOp(SDNode *N);
283  bool tryShiftAmountMod(SDNode *N);
284  bool tryHighFPExt(SDNode *N);
285 
286  bool tryReadRegister(SDNode *N);
287  bool tryWriteRegister(SDNode *N);
288 
289 // Include the pieces autogenerated from the target description.
290 #include "AArch64GenDAGISel.inc"
291 
292 private:
293  bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
294  SDValue &Shift);
295  bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
296  SDValue &OffImm) {
297  return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
298  }
299  bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
300  unsigned Size, SDValue &Base,
301  SDValue &OffImm);
302  bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
303  SDValue &OffImm);
304  bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
305  SDValue &OffImm);
306  bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
307  SDValue &Offset, SDValue &SignExtend,
308  SDValue &DoShift);
309  bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
310  SDValue &Offset, SDValue &SignExtend,
311  SDValue &DoShift);
312  bool isWorthFolding(SDValue V) const;
313  bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
314  SDValue &Offset, SDValue &SignExtend);
315 
316  template<unsigned RegWidth>
317  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
318  return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
319  }
320 
321  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
322 
323  bool SelectCMP_SWAP(SDNode *N);
324 
325  bool SelectSVE8BitLslImm(SDValue N, SDValue &Imm, SDValue &Shift);
326 
327  bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
328 
329  bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm);
330 
331  bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
332  bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
333  bool AllowSaturation, SDValue &Imm);
334 
335  bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
336  bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
337  SDValue &Offset);
338 };
339 } // end anonymous namespace
340 
341 /// isIntImmediate - This method tests to see if the node is a constant
342 /// operand. If so Imm will receive the 32-bit value.
343 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
344  if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
345  Imm = C->getZExtValue();
346  return true;
347  }
348  return false;
349 }
350 
351 // isIntImmediate - This method tests to see if a constant operand.
352 // If so Imm will receive the value.
353 static bool isIntImmediate(SDValue N, uint64_t &Imm) {
354  return isIntImmediate(N.getNode(), Imm);
355 }
356 
357 // isOpcWithIntImmediate - This method tests to see if the node is a specific
358 // opcode and that it has a immediate integer right operand.
359 // If so Imm will receive the 32 bit value.
360 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
361  uint64_t &Imm) {
362  return N->getOpcode() == Opc &&
363  isIntImmediate(N->getOperand(1).getNode(), Imm);
364 }
365 
366 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
367  const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
368  switch(ConstraintID) {
369  default:
370  llvm_unreachable("Unexpected asm memory constraint");
374  // We need to make sure that this one operand does not end up in XZR, thus
375  // require the address to be in a PointerRegClass register.
376  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
377  const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
378  SDLoc dl(Op);
379  SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
380  SDValue NewOp =
381  SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
382  dl, Op.getValueType(),
383  Op, RC), 0);
384  OutOps.push_back(NewOp);
385  return false;
386  }
387  return true;
388 }
389 
390 /// SelectArithImmed - Select an immediate value that can be represented as
391 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
392 /// Val set to the 12-bit value and Shift set to the shifter operand.
393 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
394  SDValue &Shift) {
395  // This function is called from the addsub_shifted_imm ComplexPattern,
396  // which lists [imm] as the list of opcode it's interested in, however
397  // we still need to check whether the operand is actually an immediate
398  // here because the ComplexPattern opcode list is only used in
399  // root-level opcode matching.
400  if (!isa<ConstantSDNode>(N.getNode()))
401  return false;
402 
403  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
404  unsigned ShiftAmt;
405 
406  if (Immed >> 12 == 0) {
407  ShiftAmt = 0;
408  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
409  ShiftAmt = 12;
410  Immed = Immed >> 12;
411  } else
412  return false;
413 
414  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
415  SDLoc dl(N);
416  Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
417  Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
418  return true;
419 }
420 
421 /// SelectNegArithImmed - As above, but negates the value before trying to
422 /// select it.
423 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
424  SDValue &Shift) {
425  // This function is called from the addsub_shifted_imm ComplexPattern,
426  // which lists [imm] as the list of opcode it's interested in, however
427  // we still need to check whether the operand is actually an immediate
428  // here because the ComplexPattern opcode list is only used in
429  // root-level opcode matching.
430  if (!isa<ConstantSDNode>(N.getNode()))
431  return false;
432 
433  // The immediate operand must be a 24-bit zero-extended immediate.
434  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
435 
436  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
437  // have the opposite effect on the C flag, so this pattern mustn't match under
438  // those circumstances.
439  if (Immed == 0)
440  return false;
441 
442  if (N.getValueType() == MVT::i32)
443  Immed = ~((uint32_t)Immed) + 1;
444  else
445  Immed = ~Immed + 1ULL;
446  if (Immed & 0xFFFFFFFFFF000000ULL)
447  return false;
448 
449  Immed &= 0xFFFFFFULL;
450  return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
451  Shift);
452 }
453 
454 /// getShiftTypeForNode - Translate a shift node to the corresponding
455 /// ShiftType value.
457  switch (N.getOpcode()) {
458  default:
460  case ISD::SHL:
461  return AArch64_AM::LSL;
462  case ISD::SRL:
463  return AArch64_AM::LSR;
464  case ISD::SRA:
465  return AArch64_AM::ASR;
466  case ISD::ROTR:
467  return AArch64_AM::ROR;
468  }
469 }
470 
471 /// Determine whether it is worth it to fold SHL into the addressing
472 /// mode.
473 static bool isWorthFoldingSHL(SDValue V) {
474  assert(V.getOpcode() == ISD::SHL && "invalid opcode");
475  // It is worth folding logical shift of up to three places.
476  auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
477  if (!CSD)
478  return false;
479  unsigned ShiftVal = CSD->getZExtValue();
480  if (ShiftVal > 3)
481  return false;
482 
483  // Check if this particular node is reused in any non-memory related
484  // operation. If yes, do not try to fold this node into the address
485  // computation, since the computation will be kept.
486  const SDNode *Node = V.getNode();
487  for (SDNode *UI : Node->uses())
488  if (!isa<MemSDNode>(*UI))
489  for (SDNode *UII : UI->uses())
490  if (!isa<MemSDNode>(*UII))
491  return false;
492  return true;
493 }
494 
495 /// Determine whether it is worth to fold V into an extended register.
496 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
497  // Trivial if we are optimizing for code size or if there is only
498  // one use of the value.
499  if (CurDAG->shouldOptForSize() || V.hasOneUse())
500  return true;
501  // If a subtarget has a fastpath LSL we can fold a logical shift into
502  // the addressing mode and save a cycle.
503  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
505  return true;
506  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
507  const SDValue LHS = V.getOperand(0);
508  const SDValue RHS = V.getOperand(1);
509  if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
510  return true;
511  if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
512  return true;
513  }
514 
515  // It hurts otherwise, since the value will be reused.
516  return false;
517 }
518 
519 /// SelectShiftedRegister - Select a "shifted register" operand. If the value
520 /// is not shifted, set the Shift operand to default of "LSL 0". The logical
521 /// instructions allow the shifted register to be rotated, but the arithmetic
522 /// instructions do not. The AllowROR parameter specifies whether ROR is
523 /// supported.
524 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
525  SDValue &Reg, SDValue &Shift) {
527  if (ShType == AArch64_AM::InvalidShiftExtend)
528  return false;
529  if (!AllowROR && ShType == AArch64_AM::ROR)
530  return false;
531 
532  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
533  unsigned BitSize = N.getValueSizeInBits();
534  unsigned Val = RHS->getZExtValue() & (BitSize - 1);
535  unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
536 
537  Reg = N.getOperand(0);
538  Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
539  return isWorthFolding(N);
540  }
541 
542  return false;
543 }
544 
545 /// getExtendTypeForNode - Translate an extend node to the corresponding
546 /// ExtendType value.
548 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
549  if (N.getOpcode() == ISD::SIGN_EXTEND ||
550  N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
551  EVT SrcVT;
552  if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
553  SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
554  else
555  SrcVT = N.getOperand(0).getValueType();
556 
557  if (!IsLoadStore && SrcVT == MVT::i8)
558  return AArch64_AM::SXTB;
559  else if (!IsLoadStore && SrcVT == MVT::i16)
560  return AArch64_AM::SXTH;
561  else if (SrcVT == MVT::i32)
562  return AArch64_AM::SXTW;
563  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
564 
566  } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
567  N.getOpcode() == ISD::ANY_EXTEND) {
568  EVT SrcVT = N.getOperand(0).getValueType();
569  if (!IsLoadStore && SrcVT == MVT::i8)
570  return AArch64_AM::UXTB;
571  else if (!IsLoadStore && SrcVT == MVT::i16)
572  return AArch64_AM::UXTH;
573  else if (SrcVT == MVT::i32)
574  return AArch64_AM::UXTW;
575  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
576 
578  } else if (N.getOpcode() == ISD::AND) {
579  ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
580  if (!CSD)
582  uint64_t AndMask = CSD->getZExtValue();
583 
584  switch (AndMask) {
585  default:
587  case 0xFF:
588  return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
589  case 0xFFFF:
590  return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
591  case 0xFFFFFFFF:
592  return AArch64_AM::UXTW;
593  }
594  }
595 
597 }
598 
599 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
600 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
601  if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
602  DL->getOpcode() != AArch64ISD::DUPLANE32)
603  return false;
604 
605  SDValue SV = DL->getOperand(0);
606  if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
607  return false;
608 
609  SDValue EV = SV.getOperand(1);
610  if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
611  return false;
612 
613  ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
614  ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
615  LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
616  LaneOp = EV.getOperand(0);
617 
618  return true;
619 }
620 
621 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
622 // high lane extract.
623 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
624  SDValue &LaneOp, int &LaneIdx) {
625 
626  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
627  std::swap(Op0, Op1);
628  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
629  return false;
630  }
631  StdOp = Op1;
632  return true;
633 }
634 
635 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
636 /// is a lane in the upper half of a 128-bit vector. Recognize and select this
637 /// so that we don't emit unnecessary lane extracts.
638 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) {
639  SDLoc dl(N);
640  SDValue Op0 = N->getOperand(0);
641  SDValue Op1 = N->getOperand(1);
642  SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
643  SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.
644  int LaneIdx = -1; // Will hold the lane index.
645 
646  if (Op1.getOpcode() != ISD::MUL ||
647  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
648  LaneIdx)) {
649  std::swap(Op0, Op1);
650  if (Op1.getOpcode() != ISD::MUL ||
651  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
652  LaneIdx))
653  return false;
654  }
655 
656  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
657 
658  SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
659 
660  unsigned MLAOpc = ~0U;
661 
662  switch (N->getSimpleValueType(0).SimpleTy) {
663  default:
664  llvm_unreachable("Unrecognized MLA.");
665  case MVT::v4i16:
666  MLAOpc = AArch64::MLAv4i16_indexed;
667  break;
668  case MVT::v8i16:
669  MLAOpc = AArch64::MLAv8i16_indexed;
670  break;
671  case MVT::v2i32:
672  MLAOpc = AArch64::MLAv2i32_indexed;
673  break;
674  case MVT::v4i32:
675  MLAOpc = AArch64::MLAv4i32_indexed;
676  break;
677  }
678 
679  ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops));
680  return true;
681 }
682 
683 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) {
684  SDLoc dl(N);
685  SDValue SMULLOp0;
686  SDValue SMULLOp1;
687  int LaneIdx;
688 
689  if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
690  LaneIdx))
691  return false;
692 
693  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
694 
695  SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
696 
697  unsigned SMULLOpc = ~0U;
698 
699  if (IntNo == Intrinsic::aarch64_neon_smull) {
700  switch (N->getSimpleValueType(0).SimpleTy) {
701  default:
702  llvm_unreachable("Unrecognized SMULL.");
703  case MVT::v4i32:
704  SMULLOpc = AArch64::SMULLv4i16_indexed;
705  break;
706  case MVT::v2i64:
707  SMULLOpc = AArch64::SMULLv2i32_indexed;
708  break;
709  }
710  } else if (IntNo == Intrinsic::aarch64_neon_umull) {
711  switch (N->getSimpleValueType(0).SimpleTy) {
712  default:
713  llvm_unreachable("Unrecognized SMULL.");
714  case MVT::v4i32:
715  SMULLOpc = AArch64::UMULLv4i16_indexed;
716  break;
717  case MVT::v2i64:
718  SMULLOpc = AArch64::UMULLv2i32_indexed;
719  break;
720  }
721  } else
722  llvm_unreachable("Unrecognized intrinsic.");
723 
724  ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops));
725  return true;
726 }
727 
728 /// Instructions that accept extend modifiers like UXTW expect the register
729 /// being extended to be a GPR32, but the incoming DAG might be acting on a
730 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
731 /// this is the case.
733  if (N.getValueType() == MVT::i32)
734  return N;
735 
736  SDLoc dl(N);
737  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
738  MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
739  dl, MVT::i32, N, SubReg);
740  return SDValue(Node, 0);
741 }
742 
743 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
744 template<signed Low, signed High, signed Scale>
745 bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
746  if (!isa<ConstantSDNode>(N))
747  return false;
748 
749  int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
750  if ((MulImm % std::abs(Scale)) == 0) {
751  int64_t RDVLImm = MulImm / Scale;
752  if ((RDVLImm >= Low) && (RDVLImm <= High)) {
753  Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
754  return true;
755  }
756  }
757 
758  return false;
759 }
760 
761 /// SelectArithExtendedRegister - Select a "extended register" operand. This
762 /// operand folds in an extend followed by an optional left shift.
763 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
764  SDValue &Shift) {
765  unsigned ShiftVal = 0;
767 
768  if (N.getOpcode() == ISD::SHL) {
769  ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
770  if (!CSD)
771  return false;
772  ShiftVal = CSD->getZExtValue();
773  if (ShiftVal > 4)
774  return false;
775 
776  Ext = getExtendTypeForNode(N.getOperand(0));
778  return false;
779 
780  Reg = N.getOperand(0).getOperand(0);
781  } else {
784  return false;
785 
786  Reg = N.getOperand(0);
787 
788  // Don't match if free 32-bit -> 64-bit zext can be used instead.
789  if (Ext == AArch64_AM::UXTW &&
790  Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode()))
791  return false;
792  }
793 
794  // AArch64 mandates that the RHS of the operation must use the smallest
795  // register class that could contain the size being extended from. Thus,
796  // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
797  // there might not be an actual 32-bit value in the program. We can
798  // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
800  Reg = narrowIfNeeded(CurDAG, Reg);
801  Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
802  MVT::i32);
803  return isWorthFolding(N);
804 }
805 
806 /// If there's a use of this ADDlow that's not itself a load/store then we'll
807 /// need to create a real ADD instruction from it anyway and there's no point in
808 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's
809 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
810 /// leads to duplicated ADRP instructions.
812  for (auto Use : N->uses()) {
813  if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
814  Use->getOpcode() != ISD::ATOMIC_LOAD &&
815  Use->getOpcode() != ISD::ATOMIC_STORE)
816  return false;
817 
818  // ldar and stlr have much more restrictive addressing modes (just a
819  // register).
820  if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getOrdering()))
821  return false;
822  }
823 
824  return true;
825 }
826 
827 /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
828 /// immediate" address. The "Size" argument is the size in bytes of the memory
829 /// reference, which determines the scale.
830 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
831  unsigned BW, unsigned Size,
832  SDValue &Base,
833  SDValue &OffImm) {
834  SDLoc dl(N);
835  const DataLayout &DL = CurDAG->getDataLayout();
836  const TargetLowering *TLI = getTargetLowering();
837  if (N.getOpcode() == ISD::FrameIndex) {
838  int FI = cast<FrameIndexSDNode>(N)->getIndex();
839  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
840  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
841  return true;
842  }
843 
844  // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
845  // selected here doesn't support labels/immediates, only base+offset.
846  if (CurDAG->isBaseWithConstantOffset(N)) {
847  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
848  if (IsSignedImm) {
849  int64_t RHSC = RHS->getSExtValue();
850  unsigned Scale = Log2_32(Size);
851  int64_t Range = 0x1LL << (BW - 1);
852 
853  if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
854  RHSC < (Range << Scale)) {
855  Base = N.getOperand(0);
856  if (Base.getOpcode() == ISD::FrameIndex) {
857  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
858  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
859  }
860  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
861  return true;
862  }
863  } else {
864  // unsigned Immediate
865  uint64_t RHSC = RHS->getZExtValue();
866  unsigned Scale = Log2_32(Size);
867  uint64_t Range = 0x1ULL << BW;
868 
869  if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
870  Base = N.getOperand(0);
871  if (Base.getOpcode() == ISD::FrameIndex) {
872  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
873  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
874  }
875  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
876  return true;
877  }
878  }
879  }
880  }
881  // Base only. The address will be materialized into a register before
882  // the memory is accessed.
883  // add x0, Xbase, #offset
884  // stp x1, x2, [x0]
885  Base = N;
886  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
887  return true;
888 }
889 
890 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
891 /// immediate" address. The "Size" argument is the size in bytes of the memory
892 /// reference, which determines the scale.
893 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
894  SDValue &Base, SDValue &OffImm) {
895  SDLoc dl(N);
896  const DataLayout &DL = CurDAG->getDataLayout();
897  const TargetLowering *TLI = getTargetLowering();
898  if (N.getOpcode() == ISD::FrameIndex) {
899  int FI = cast<FrameIndexSDNode>(N)->getIndex();
900  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
901  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
902  return true;
903  }
904 
905  if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
906  GlobalAddressSDNode *GAN =
907  dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
908  Base = N.getOperand(0);
909  OffImm = N.getOperand(1);
910  if (!GAN)
911  return true;
912 
913  if (GAN->getOffset() % Size == 0 &&
914  GAN->getGlobal()->getPointerAlignment(DL) >= Size)
915  return true;
916  }
917 
918  if (CurDAG->isBaseWithConstantOffset(N)) {
919  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
920  int64_t RHSC = (int64_t)RHS->getZExtValue();
921  unsigned Scale = Log2_32(Size);
922  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
923  Base = N.getOperand(0);
924  if (Base.getOpcode() == ISD::FrameIndex) {
925  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
926  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
927  }
928  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
929  return true;
930  }
931  }
932  }
933 
934  // Before falling back to our general case, check if the unscaled
935  // instructions can handle this. If so, that's preferable.
936  if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
937  return false;
938 
939  // Base only. The address will be materialized into a register before
940  // the memory is accessed.
941  // add x0, Xbase, #offset
942  // ldr x0, [x0]
943  Base = N;
944  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
945  return true;
946 }
947 
948 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
949 /// immediate" address. This should only match when there is an offset that
950 /// is not valid for a scaled immediate addressing mode. The "Size" argument
951 /// is the size in bytes of the memory reference, which is needed here to know
952 /// what is valid for a scaled immediate.
953 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
954  SDValue &Base,
955  SDValue &OffImm) {
956  if (!CurDAG->isBaseWithConstantOffset(N))
957  return false;
958  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
959  int64_t RHSC = RHS->getSExtValue();
960  // If the offset is valid as a scaled immediate, don't match here.
961  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
962  RHSC < (0x1000 << Log2_32(Size)))
963  return false;
964  if (RHSC >= -256 && RHSC < 256) {
965  Base = N.getOperand(0);
966  if (Base.getOpcode() == ISD::FrameIndex) {
967  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
968  const TargetLowering *TLI = getTargetLowering();
969  Base = CurDAG->getTargetFrameIndex(
970  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
971  }
972  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
973  return true;
974  }
975  }
976  return false;
977 }
978 
979 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
980  SDLoc dl(N);
981  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
982  SDValue ImpDef = SDValue(
983  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
984  MachineSDNode *Node = CurDAG->getMachineNode(
985  TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
986  return SDValue(Node, 0);
987 }
988 
989 /// Check if the given SHL node (\p N), can be used to form an
990 /// extended register for an addressing mode.
991 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
992  bool WantExtend, SDValue &Offset,
993  SDValue &SignExtend) {
994  assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
995  ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
996  if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
997  return false;
998 
999  SDLoc dl(N);
1000  if (WantExtend) {
1002  getExtendTypeForNode(N.getOperand(0), true);
1004  return false;
1005 
1006  Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1007  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1008  MVT::i32);
1009  } else {
1010  Offset = N.getOperand(0);
1011  SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1012  }
1013 
1014  unsigned LegalShiftVal = Log2_32(Size);
1015  unsigned ShiftVal = CSD->getZExtValue();
1016 
1017  if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1018  return false;
1019 
1020  return isWorthFolding(N);
1021 }
1022 
1023 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1024  SDValue &Base, SDValue &Offset,
1025  SDValue &SignExtend,
1026  SDValue &DoShift) {
1027  if (N.getOpcode() != ISD::ADD)
1028  return false;
1029  SDValue LHS = N.getOperand(0);
1030  SDValue RHS = N.getOperand(1);
1031  SDLoc dl(N);
1032 
1033  // We don't want to match immediate adds here, because they are better lowered
1034  // to the register-immediate addressing modes.
1035  if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
1036  return false;
1037 
1038  // Check if this particular node is reused in any non-memory related
1039  // operation. If yes, do not try to fold this node into the address
1040  // computation, since the computation will be kept.
1041  const SDNode *Node = N.getNode();
1042  for (SDNode *UI : Node->uses()) {
1043  if (!isa<MemSDNode>(*UI))
1044  return false;
1045  }
1046 
1047  // Remember if it is worth folding N when it produces extended register.
1048  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1049 
1050  // Try to match a shifted extend on the RHS.
1051  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1052  SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1053  Base = LHS;
1054  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1055  return true;
1056  }
1057 
1058  // Try to match a shifted extend on the LHS.
1059  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1060  SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1061  Base = RHS;
1062  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1063  return true;
1064  }
1065 
1066  // There was no shift, whatever else we find.
1067  DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1068 
1070  // Try to match an unshifted extend on the LHS.
1071  if (IsExtendedRegisterWorthFolding &&
1072  (Ext = getExtendTypeForNode(LHS, true)) !=
1074  Base = RHS;
1075  Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1076  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1077  MVT::i32);
1078  if (isWorthFolding(LHS))
1079  return true;
1080  }
1081 
1082  // Try to match an unshifted extend on the RHS.
1083  if (IsExtendedRegisterWorthFolding &&
1084  (Ext = getExtendTypeForNode(RHS, true)) !=
1086  Base = LHS;
1087  Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1088  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1089  MVT::i32);
1090  if (isWorthFolding(RHS))
1091  return true;
1092  }
1093 
1094  return false;
1095 }
1096 
1097 // Check if the given immediate is preferred by ADD. If an immediate can be
1098 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1099 // encoded by one MOVZ, return true.
1100 static bool isPreferredADD(int64_t ImmOff) {
1101  // Constant in [0x0, 0xfff] can be encoded in ADD.
1102  if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1103  return true;
1104  // Check if it can be encoded in an "ADD LSL #12".
1105  if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1106  // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1107  return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1108  (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1109  return false;
1110 }
1111 
1112 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1113  SDValue &Base, SDValue &Offset,
1114  SDValue &SignExtend,
1115  SDValue &DoShift) {
1116  if (N.getOpcode() != ISD::ADD)
1117  return false;
1118  SDValue LHS = N.getOperand(0);
1119  SDValue RHS = N.getOperand(1);
1120  SDLoc DL(N);
1121 
1122  // Check if this particular node is reused in any non-memory related
1123  // operation. If yes, do not try to fold this node into the address
1124  // computation, since the computation will be kept.
1125  const SDNode *Node = N.getNode();
1126  for (SDNode *UI : Node->uses()) {
1127  if (!isa<MemSDNode>(*UI))
1128  return false;
1129  }
1130 
1131  // Watch out if RHS is a wide immediate, it can not be selected into
1132  // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1133  // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1134  // instructions like:
1135  // MOV X0, WideImmediate
1136  // ADD X1, BaseReg, X0
1137  // LDR X2, [X1, 0]
1138  // For such situation, using [BaseReg, XReg] addressing mode can save one
1139  // ADD/SUB:
1140  // MOV X0, WideImmediate
1141  // LDR X2, [BaseReg, X0]
1142  if (isa<ConstantSDNode>(RHS)) {
1143  int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
1144  unsigned Scale = Log2_32(Size);
1145  // Skip the immediate can be selected by load/store addressing mode.
1146  // Also skip the immediate can be encoded by a single ADD (SUB is also
1147  // checked by using -ImmOff).
1148  if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
1149  isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1150  return false;
1151 
1152  SDValue Ops[] = { RHS };
1153  SDNode *MOVI =
1154  CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1155  SDValue MOVIV = SDValue(MOVI, 0);
1156  // This ADD of two X register will be selected into [Reg+Reg] mode.
1157  N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1158  }
1159 
1160  // Remember if it is worth folding N when it produces extended register.
1161  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1162 
1163  // Try to match a shifted extend on the RHS.
1164  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1165  SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1166  Base = LHS;
1167  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1168  return true;
1169  }
1170 
1171  // Try to match a shifted extend on the LHS.
1172  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1173  SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1174  Base = RHS;
1175  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1176  return true;
1177  }
1178 
1179  // Match any non-shifted, non-extend, non-immediate add expression.
1180  Base = LHS;
1181  Offset = RHS;
1182  SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1183  DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1184  // Reg1 + Reg2 is free: no check needed.
1185  return true;
1186 }
1187 
1188 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1189  static const unsigned RegClassIDs[] = {
1190  AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1191  static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1192  AArch64::dsub2, AArch64::dsub3};
1193 
1194  return createTuple(Regs, RegClassIDs, SubRegs);
1195 }
1196 
1197 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1198  static const unsigned RegClassIDs[] = {
1199  AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1200  static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1201  AArch64::qsub2, AArch64::qsub3};
1202 
1203  return createTuple(Regs, RegClassIDs, SubRegs);
1204 }
1205 
1206 SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1207  static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1208  AArch64::ZPR3RegClassID,
1209  AArch64::ZPR4RegClassID};
1210  static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1211  AArch64::zsub2, AArch64::zsub3};
1212 
1213  return createTuple(Regs, RegClassIDs, SubRegs);
1214 }
1215 
1217  const unsigned RegClassIDs[],
1218  const unsigned SubRegs[]) {
1219  // There's no special register-class for a vector-list of 1 element: it's just
1220  // a vector.
1221  if (Regs.size() == 1)
1222  return Regs[0];
1223 
1224  assert(Regs.size() >= 2 && Regs.size() <= 4);
1225 
1226  SDLoc DL(Regs[0]);
1227 
1229 
1230  // First operand of REG_SEQUENCE is the desired RegClass.
1231  Ops.push_back(
1232  CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1233 
1234  // Then we get pairs of source & subregister-position for the components.
1235  for (unsigned i = 0; i < Regs.size(); ++i) {
1236  Ops.push_back(Regs[i]);
1237  Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1238  }
1239 
1240  SDNode *N =
1241  CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1242  return SDValue(N, 0);
1243 }
1244 
1245 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1246  bool isExt) {
1247  SDLoc dl(N);
1248  EVT VT = N->getValueType(0);
1249 
1250  unsigned ExtOff = isExt;
1251 
1252  // Form a REG_SEQUENCE to force register allocation.
1253  unsigned Vec0Off = ExtOff + 1;
1254  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1255  N->op_begin() + Vec0Off + NumVecs);
1256  SDValue RegSeq = createQTuple(Regs);
1257 
1259  if (isExt)
1260  Ops.push_back(N->getOperand(1));
1261  Ops.push_back(RegSeq);
1262  Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1263  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1264 }
1265 
1266 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1267  LoadSDNode *LD = cast<LoadSDNode>(N);
1268  if (LD->isUnindexed())
1269  return false;
1270  EVT VT = LD->getMemoryVT();
1271  EVT DstVT = N->getValueType(0);
1272  ISD::MemIndexedMode AM = LD->getAddressingMode();
1273  bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1274 
1275  // We're not doing validity checking here. That was done when checking
1276  // if we should mark the load as indexed or not. We're just selecting
1277  // the right instruction.
1278  unsigned Opcode = 0;
1279 
1280  ISD::LoadExtType ExtType = LD->getExtensionType();
1281  bool InsertTo64 = false;
1282  if (VT == MVT::i64)
1283  Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1284  else if (VT == MVT::i32) {
1285  if (ExtType == ISD::NON_EXTLOAD)
1286  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1287  else if (ExtType == ISD::SEXTLOAD)
1288  Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1289  else {
1290  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1291  InsertTo64 = true;
1292  // The result of the load is only i32. It's the subreg_to_reg that makes
1293  // it into an i64.
1294  DstVT = MVT::i32;
1295  }
1296  } else if (VT == MVT::i16) {
1297  if (ExtType == ISD::SEXTLOAD) {
1298  if (DstVT == MVT::i64)
1299  Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1300  else
1301  Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1302  } else {
1303  Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1304  InsertTo64 = DstVT == MVT::i64;
1305  // The result of the load is only i32. It's the subreg_to_reg that makes
1306  // it into an i64.
1307  DstVT = MVT::i32;
1308  }
1309  } else if (VT == MVT::i8) {
1310  if (ExtType == ISD::SEXTLOAD) {
1311  if (DstVT == MVT::i64)
1312  Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1313  else
1314  Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1315  } else {
1316  Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1317  InsertTo64 = DstVT == MVT::i64;
1318  // The result of the load is only i32. It's the subreg_to_reg that makes
1319  // it into an i64.
1320  DstVT = MVT::i32;
1321  }
1322  } else if (VT == MVT::f16) {
1323  Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1324  } else if (VT == MVT::bf16) {
1325  Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1326  } else if (VT == MVT::f32) {
1327  Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1328  } else if (VT == MVT::f64 || VT.is64BitVector()) {
1329  Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1330  } else if (VT.is128BitVector()) {
1331  Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1332  } else
1333  return false;
1334  SDValue Chain = LD->getChain();
1335  SDValue Base = LD->getBasePtr();
1336  ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1337  int OffsetVal = (int)OffsetOp->getZExtValue();
1338  SDLoc dl(N);
1339  SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1340  SDValue Ops[] = { Base, Offset, Chain };
1341  SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1342  MVT::Other, Ops);
1343 
1344  // Transfer memoperands.
1345  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1346  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1347 
1348  // Either way, we're replacing the node, so tell the caller that.
1349  SDValue LoadedVal = SDValue(Res, 1);
1350  if (InsertTo64) {
1351  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1352  LoadedVal =
1353  SDValue(CurDAG->getMachineNode(
1354  AArch64::SUBREG_TO_REG, dl, MVT::i64,
1355  CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1356  SubReg),
1357  0);
1358  }
1359 
1360  ReplaceUses(SDValue(N, 0), LoadedVal);
1361  ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1362  ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1363  CurDAG->RemoveDeadNode(N);
1364  return true;
1365 }
1366 
1367 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1368  unsigned SubRegIdx) {
1369  SDLoc dl(N);
1370  EVT VT = N->getValueType(0);
1371  SDValue Chain = N->getOperand(0);
1372 
1373  SDValue Ops[] = {N->getOperand(2), // Mem operand;
1374  Chain};
1375 
1376  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1377 
1378  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1379  SDValue SuperReg = SDValue(Ld, 0);
1380  for (unsigned i = 0; i < NumVecs; ++i)
1381  ReplaceUses(SDValue(N, i),
1382  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1383 
1384  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1385 
1386  // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1387  // because it's too simple to have needed special treatment during lowering.
1388  if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1389  MachineMemOperand *MemOp = MemIntr->getMemOperand();
1390  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1391  }
1392 
1393  CurDAG->RemoveDeadNode(N);
1394 }
1395 
1396 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1397  unsigned Opc, unsigned SubRegIdx) {
1398  SDLoc dl(N);
1399  EVT VT = N->getValueType(0);
1400  SDValue Chain = N->getOperand(0);
1401 
1402  SDValue Ops[] = {N->getOperand(1), // Mem operand
1403  N->getOperand(2), // Incremental
1404  Chain};
1405 
1406  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1408 
1409  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1410 
1411  // Update uses of write back register
1412  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1413 
1414  // Update uses of vector list
1415  SDValue SuperReg = SDValue(Ld, 1);
1416  if (NumVecs == 1)
1417  ReplaceUses(SDValue(N, 0), SuperReg);
1418  else
1419  for (unsigned i = 0; i < NumVecs; ++i)
1420  ReplaceUses(SDValue(N, i),
1421  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1422 
1423  // Update the chain
1424  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1425  CurDAG->RemoveDeadNode(N);
1426 }
1427 
1428 /// Optimize \param OldBase and \param OldOffset selecting the best addressing
1429 /// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1430 /// new Base and an SDValue representing the new offset.
1431 std::tuple<unsigned, SDValue, SDValue>
1432 AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1433  unsigned Opc_ri,
1434  const SDValue &OldBase,
1435  const SDValue &OldOffset,
1436  unsigned Scale) {
1437  SDValue NewBase = OldBase;
1438  SDValue NewOffset = OldOffset;
1439  // Detect a possible Reg+Imm addressing mode.
1440  const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1441  N, OldBase, NewBase, NewOffset);
1442 
1443  // Detect a possible reg+reg addressing mode, but only if we haven't already
1444  // detected a Reg+Imm one.
1445  const bool IsRegReg =
1446  !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1447 
1448  // Select the instruction.
1449  return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1450 }
1451 
1452 void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
1453  unsigned Scale, unsigned Opc_ri,
1454  unsigned Opc_rr) {
1455  assert(Scale < 4 && "Invalid scaling value.");
1456  SDLoc DL(N);
1457  EVT VT = N->getValueType(0);
1458  SDValue Chain = N->getOperand(0);
1459 
1460  // Optimize addressing mode.
1461  SDValue Base, Offset;
1462  unsigned Opc;
1463  std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1464  N, Opc_rr, Opc_ri, N->getOperand(2),
1465  CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
1466 
1467  SDValue Ops[] = {N->getOperand(1), // Predicate
1468  Base, // Memory operand
1469  Offset, Chain};
1470 
1471  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1472 
1473  SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1474  SDValue SuperReg = SDValue(Load, 0);
1475  for (unsigned i = 0; i < NumVecs; ++i)
1476  ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1477  AArch64::zsub0 + i, DL, VT, SuperReg));
1478 
1479  // Copy chain
1480  unsigned ChainIdx = NumVecs;
1481  ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1482  CurDAG->RemoveDeadNode(N);
1483 }
1484 
1485 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
1486  unsigned Opc) {
1487  SDLoc dl(N);
1488  EVT VT = N->getOperand(2)->getValueType(0);
1489 
1490  // Form a REG_SEQUENCE to force register allocation.
1491  bool Is128Bit = VT.getSizeInBits() == 128;
1492  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1493  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1494 
1495  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
1496  SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1497 
1498  // Transfer memoperands.
1499  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1500  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1501 
1502  ReplaceNode(N, St);
1503 }
1504 
1505 void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
1506  unsigned Scale, unsigned Opc_rr,
1507  unsigned Opc_ri) {
1508  SDLoc dl(N);
1509 
1510  // Form a REG_SEQUENCE to force register allocation.
1511  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1512  SDValue RegSeq = createZTuple(Regs);
1513 
1514  // Optimize addressing mode.
1515  unsigned Opc;
1516  SDValue Offset, Base;
1517  std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1518  N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
1519  CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
1520 
1521  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
1522  Base, // address
1523  Offset, // offset
1524  N->getOperand(0)}; // chain
1525  SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1526 
1527  ReplaceNode(N, St);
1528 }
1529 
1530 bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
1531  SDValue &OffImm) {
1532  SDLoc dl(N);
1533  const DataLayout &DL = CurDAG->getDataLayout();
1534  const TargetLowering *TLI = getTargetLowering();
1535 
1536  // Try to match it for the frame address
1537  if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
1538  int FI = FINode->getIndex();
1539  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1540  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1541  return true;
1542  }
1543 
1544  return false;
1545 }
1546 
1547 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
1548  unsigned Opc) {
1549  SDLoc dl(N);
1550  EVT VT = N->getOperand(2)->getValueType(0);
1551  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1552  MVT::Other}; // Type for the Chain
1553 
1554  // Form a REG_SEQUENCE to force register allocation.
1555  bool Is128Bit = VT.getSizeInBits() == 128;
1556  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1557  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1558 
1559  SDValue Ops[] = {RegSeq,
1560  N->getOperand(NumVecs + 1), // base register
1561  N->getOperand(NumVecs + 2), // Incremental
1562  N->getOperand(0)}; // Chain
1563  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1564 
1565  ReplaceNode(N, St);
1566 }
1567 
1568 namespace {
1569 /// WidenVector - Given a value in the V64 register class, produce the
1570 /// equivalent value in the V128 register class.
1571 class WidenVector {
1572  SelectionDAG &DAG;
1573 
1574 public:
1575  WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
1576 
1577  SDValue operator()(SDValue V64Reg) {
1578  EVT VT = V64Reg.getValueType();
1579  unsigned NarrowSize = VT.getVectorNumElements();
1580  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1581  MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
1582  SDLoc DL(V64Reg);
1583 
1584  SDValue Undef =
1585  SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
1586  return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
1587  }
1588 };
1589 } // namespace
1590 
1591 /// NarrowVector - Given a value in the V128 register class, produce the
1592 /// equivalent value in the V64 register class.
1593 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
1594  EVT VT = V128Reg.getValueType();
1595  unsigned WideSize = VT.getVectorNumElements();
1596  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1597  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
1598 
1599  return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
1600  V128Reg);
1601 }
1602 
1603 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
1604  unsigned Opc) {
1605  SDLoc dl(N);
1606  EVT VT = N->getValueType(0);
1607  bool Narrow = VT.getSizeInBits() == 64;
1608 
1609  // Form a REG_SEQUENCE to force register allocation.
1610  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1611 
1612  if (Narrow)
1613  transform(Regs, Regs.begin(),
1614  WidenVector(*CurDAG));
1615 
1616  SDValue RegSeq = createQTuple(Regs);
1617 
1618  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1619 
1620  unsigned LaneNo =
1621  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1622 
1623  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1624  N->getOperand(NumVecs + 3), N->getOperand(0)};
1625  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1626  SDValue SuperReg = SDValue(Ld, 0);
1627 
1628  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1629  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1630  AArch64::qsub2, AArch64::qsub3 };
1631  for (unsigned i = 0; i < NumVecs; ++i) {
1632  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
1633  if (Narrow)
1634  NV = NarrowVector(NV, *CurDAG);
1635  ReplaceUses(SDValue(N, i), NV);
1636  }
1637 
1638  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1639  CurDAG->RemoveDeadNode(N);
1640 }
1641 
1642 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
1643  unsigned Opc) {
1644  SDLoc dl(N);
1645  EVT VT = N->getValueType(0);
1646  bool Narrow = VT.getSizeInBits() == 64;
1647 
1648  // Form a REG_SEQUENCE to force register allocation.
1649  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1650 
1651  if (Narrow)
1652  transform(Regs, Regs.begin(),
1653  WidenVector(*CurDAG));
1654 
1655  SDValue RegSeq = createQTuple(Regs);
1656 
1657  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1658  RegSeq->getValueType(0), MVT::Other};
1659 
1660  unsigned LaneNo =
1661  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1662 
1663  SDValue Ops[] = {RegSeq,
1664  CurDAG->getTargetConstant(LaneNo, dl,
1665  MVT::i64), // Lane Number
1666  N->getOperand(NumVecs + 2), // Base register
1667  N->getOperand(NumVecs + 3), // Incremental
1668  N->getOperand(0)};
1669  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1670 
1671  // Update uses of the write back register
1672  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1673 
1674  // Update uses of the vector list
1675  SDValue SuperReg = SDValue(Ld, 1);
1676  if (NumVecs == 1) {
1677  ReplaceUses(SDValue(N, 0),
1678  Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
1679  } else {
1680  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1681  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1682  AArch64::qsub2, AArch64::qsub3 };
1683  for (unsigned i = 0; i < NumVecs; ++i) {
1684  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
1685  SuperReg);
1686  if (Narrow)
1687  NV = NarrowVector(NV, *CurDAG);
1688  ReplaceUses(SDValue(N, i), NV);
1689  }
1690  }
1691 
1692  // Update the Chain
1693  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1694  CurDAG->RemoveDeadNode(N);
1695 }
1696 
1697 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
1698  unsigned Opc) {
1699  SDLoc dl(N);
1700  EVT VT = N->getOperand(2)->getValueType(0);
1701  bool Narrow = VT.getSizeInBits() == 64;
1702 
1703  // Form a REG_SEQUENCE to force register allocation.
1704  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1705 
1706  if (Narrow)
1707  transform(Regs, Regs.begin(),
1708  WidenVector(*CurDAG));
1709 
1710  SDValue RegSeq = createQTuple(Regs);
1711 
1712  unsigned LaneNo =
1713  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1714 
1715  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1716  N->getOperand(NumVecs + 3), N->getOperand(0)};
1717  SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
1718 
1719  // Transfer memoperands.
1720  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1721  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1722 
1723  ReplaceNode(N, St);
1724 }
1725 
1726 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
1727  unsigned Opc) {
1728  SDLoc dl(N);
1729  EVT VT = N->getOperand(2)->getValueType(0);
1730  bool Narrow = VT.getSizeInBits() == 64;
1731 
1732  // Form a REG_SEQUENCE to force register allocation.
1733  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1734 
1735  if (Narrow)
1736  transform(Regs, Regs.begin(),
1737  WidenVector(*CurDAG));
1738 
1739  SDValue RegSeq = createQTuple(Regs);
1740 
1741  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1742  MVT::Other};
1743 
1744  unsigned LaneNo =
1745  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1746 
1747  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1748  N->getOperand(NumVecs + 2), // Base Register
1749  N->getOperand(NumVecs + 3), // Incremental
1750  N->getOperand(0)};
1751  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1752 
1753  // Transfer memoperands.
1754  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1755  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1756 
1757  ReplaceNode(N, St);
1758 }
1759 
1761  unsigned &Opc, SDValue &Opd0,
1762  unsigned &LSB, unsigned &MSB,
1763  unsigned NumberOfIgnoredLowBits,
1764  bool BiggerPattern) {
1765  assert(N->getOpcode() == ISD::AND &&
1766  "N must be a AND operation to call this function");
1767 
1768  EVT VT = N->getValueType(0);
1769 
1770  // Here we can test the type of VT and return false when the type does not
1771  // match, but since it is done prior to that call in the current context
1772  // we turned that into an assert to avoid redundant code.
1773  assert((VT == MVT::i32 || VT == MVT::i64) &&
1774  "Type checking must have been done before calling this function");
1775 
1776  // FIXME: simplify-demanded-bits in DAGCombine will probably have
1777  // changed the AND node to a 32-bit mask operation. We'll have to
1778  // undo that as part of the transform here if we want to catch all
1779  // the opportunities.
1780  // Currently the NumberOfIgnoredLowBits argument helps to recover
1781  // form these situations when matching bigger pattern (bitfield insert).
1782 
1783  // For unsigned extracts, check for a shift right and mask
1784  uint64_t AndImm = 0;
1785  if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
1786  return false;
1787 
1788  const SDNode *Op0 = N->getOperand(0).getNode();
1789 
1790  // Because of simplify-demanded-bits in DAGCombine, the mask may have been
1791  // simplified. Try to undo that
1792  AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
1793 
1794  // The immediate is a mask of the low bits iff imm & (imm+1) == 0
1795  if (AndImm & (AndImm + 1))
1796  return false;
1797 
1798  bool ClampMSB = false;
1799  uint64_t SrlImm = 0;
1800  // Handle the SRL + ANY_EXTEND case.
1801  if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
1802  isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
1803  // Extend the incoming operand of the SRL to 64-bit.
1804  Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
1805  // Make sure to clamp the MSB so that we preserve the semantics of the
1806  // original operations.
1807  ClampMSB = true;
1808  } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
1810  SrlImm)) {
1811  // If the shift result was truncated, we can still combine them.
1812  Opd0 = Op0->getOperand(0).getOperand(0);
1813 
1814  // Use the type of SRL node.
1815  VT = Opd0->getValueType(0);
1816  } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
1817  Opd0 = Op0->getOperand(0);
1818  } else if (BiggerPattern) {
1819  // Let's pretend a 0 shift right has been performed.
1820  // The resulting code will be at least as good as the original one
1821  // plus it may expose more opportunities for bitfield insert pattern.
1822  // FIXME: Currently we limit this to the bigger pattern, because
1823  // some optimizations expect AND and not UBFM.
1824  Opd0 = N->getOperand(0);
1825  } else
1826  return false;
1827 
1828  // Bail out on large immediates. This happens when no proper
1829  // combining/constant folding was performed.
1830  if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
1831  LLVM_DEBUG(
1832  (dbgs() << N
1833  << ": Found large shift immediate, this should not happen\n"));
1834  return false;
1835  }
1836 
1837  LSB = SrlImm;
1838  MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm)
1839  : countTrailingOnes<uint64_t>(AndImm)) -
1840  1;
1841  if (ClampMSB)
1842  // Since we're moving the extend before the right shift operation, we need
1843  // to clamp the MSB to make sure we don't shift in undefined bits instead of
1844  // the zeros which would get shifted in with the original right shift
1845  // operation.
1846  MSB = MSB > 31 ? 31 : MSB;
1847 
1848  Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
1849  return true;
1850 }
1851 
1852 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
1853  SDValue &Opd0, unsigned &Immr,
1854  unsigned &Imms) {
1855  assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
1856 
1857  EVT VT = N->getValueType(0);
1858  unsigned BitWidth = VT.getSizeInBits();
1859  assert((VT == MVT::i32 || VT == MVT::i64) &&
1860  "Type checking must have been done before calling this function");
1861 
1862  SDValue Op = N->getOperand(0);
1863  if (Op->getOpcode() == ISD::TRUNCATE) {
1864  Op = Op->getOperand(0);
1865  VT = Op->getValueType(0);
1866  BitWidth = VT.getSizeInBits();
1867  }
1868 
1869  uint64_t ShiftImm;
1870  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
1871  !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1872  return false;
1873 
1874  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1875  if (ShiftImm + Width > BitWidth)
1876  return false;
1877 
1878  Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
1879  Opd0 = Op.getOperand(0);
1880  Immr = ShiftImm;
1881  Imms = ShiftImm + Width - 1;
1882  return true;
1883 }
1884 
1885 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
1886  SDValue &Opd0, unsigned &LSB,
1887  unsigned &MSB) {
1888  // We are looking for the following pattern which basically extracts several
1889  // continuous bits from the source value and places it from the LSB of the
1890  // destination value, all other bits of the destination value or set to zero:
1891  //
1892  // Value2 = AND Value, MaskImm
1893  // SRL Value2, ShiftImm
1894  //
1895  // with MaskImm >> ShiftImm to search for the bit width.
1896  //
1897  // This gets selected into a single UBFM:
1898  //
1899  // UBFM Value, ShiftImm, BitWide + SrlImm -1
1900  //
1901 
1902  if (N->getOpcode() != ISD::SRL)
1903  return false;
1904 
1905  uint64_t AndMask = 0;
1906  if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
1907  return false;
1908 
1909  Opd0 = N->getOperand(0).getOperand(0);
1910 
1911  uint64_t SrlImm = 0;
1912  if (!isIntImmediate(N->getOperand(1), SrlImm))
1913  return false;
1914 
1915  // Check whether we really have several bits extract here.
1916  unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm));
1917  if (BitWide && isMask_64(AndMask >> SrlImm)) {
1918  if (N->getValueType(0) == MVT::i32)
1919  Opc = AArch64::UBFMWri;
1920  else
1921  Opc = AArch64::UBFMXri;
1922 
1923  LSB = SrlImm;
1924  MSB = BitWide + SrlImm - 1;
1925  return true;
1926  }
1927 
1928  return false;
1929 }
1930 
1931 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
1932  unsigned &Immr, unsigned &Imms,
1933  bool BiggerPattern) {
1934  assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
1935  "N must be a SHR/SRA operation to call this function");
1936 
1937  EVT VT = N->getValueType(0);
1938 
1939  // Here we can test the type of VT and return false when the type does not
1940  // match, but since it is done prior to that call in the current context
1941  // we turned that into an assert to avoid redundant code.
1942  assert((VT == MVT::i32 || VT == MVT::i64) &&
1943  "Type checking must have been done before calling this function");
1944 
1945  // Check for AND + SRL doing several bits extract.
1946  if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
1947  return true;
1948 
1949  // We're looking for a shift of a shift.
1950  uint64_t ShlImm = 0;
1951  uint64_t TruncBits = 0;
1952  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
1953  Opd0 = N->getOperand(0).getOperand(0);
1954  } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
1955  N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
1956  // We are looking for a shift of truncate. Truncate from i64 to i32 could
1957  // be considered as setting high 32 bits as zero. Our strategy here is to
1958  // always generate 64bit UBFM. This consistency will help the CSE pass
1959  // later find more redundancy.
1960  Opd0 = N->getOperand(0).getOperand(0);
1961  TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
1962  VT = Opd0.getValueType();
1963  assert(VT == MVT::i64 && "the promoted type should be i64");
1964  } else if (BiggerPattern) {
1965  // Let's pretend a 0 shift left has been performed.
1966  // FIXME: Currently we limit this to the bigger pattern case,
1967  // because some optimizations expect AND and not UBFM
1968  Opd0 = N->getOperand(0);
1969  } else
1970  return false;
1971 
1972  // Missing combines/constant folding may have left us with strange
1973  // constants.
1974  if (ShlImm >= VT.getSizeInBits()) {
1975  LLVM_DEBUG(
1976  (dbgs() << N
1977  << ": Found large shift immediate, this should not happen\n"));
1978  return false;
1979  }
1980 
1981  uint64_t SrlImm = 0;
1982  if (!isIntImmediate(N->getOperand(1), SrlImm))
1983  return false;
1984 
1985  assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
1986  "bad amount in shift node!");
1987  int immr = SrlImm - ShlImm;
1988  Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
1989  Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
1990  // SRA requires a signed extraction
1991  if (VT == MVT::i32)
1992  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
1993  else
1994  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
1995  return true;
1996 }
1997 
1998 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
1999  assert(N->getOpcode() == ISD::SIGN_EXTEND);
2000 
2001  EVT VT = N->getValueType(0);
2002  EVT NarrowVT = N->getOperand(0)->getValueType(0);
2003  if (VT != MVT::i64 || NarrowVT != MVT::i32)
2004  return false;
2005 
2006  uint64_t ShiftImm;
2007  SDValue Op = N->getOperand(0);
2008  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2009  return false;
2010 
2011  SDLoc dl(N);
2012  // Extend the incoming operand of the shift to 64-bits.
2013  SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2014  unsigned Immr = ShiftImm;
2015  unsigned Imms = NarrowVT.getSizeInBits() - 1;
2016  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2017  CurDAG->getTargetConstant(Imms, dl, VT)};
2018  CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2019  return true;
2020 }
2021 
2022 /// Try to form fcvtl2 instructions from a floating-point extend of a high-half
2023 /// extract of a subvector.
2024 bool AArch64DAGToDAGISel::tryHighFPExt(SDNode *N) {
2025  assert(N->getOpcode() == ISD::FP_EXTEND);
2026 
2027  // There are 2 forms of fcvtl2 - extend to double or extend to float.
2028  SDValue Extract = N->getOperand(0);
2029  EVT VT = N->getValueType(0);
2030  EVT NarrowVT = Extract.getValueType();
2031  if ((VT != MVT::v2f64 || NarrowVT != MVT::v2f32) &&
2032  (VT != MVT::v4f32 || NarrowVT != MVT::v4f16))
2033  return false;
2034 
2035  // Optionally look past a bitcast.
2036  Extract = peekThroughBitcasts(Extract);
2037  if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2038  return false;
2039 
2040  // Match extract from start of high half index.
2041  // Example: v8i16 -> v4i16 means the extract must begin at index 4.
2042  unsigned ExtractIndex = Extract.getConstantOperandVal(1);
2043  if (ExtractIndex != Extract.getValueType().getVectorNumElements())
2044  return false;
2045 
2046  auto Opcode = VT == MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16;
2047  CurDAG->SelectNodeTo(N, Opcode, VT, Extract.getOperand(0));
2048  return true;
2049 }
2050 
2051 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2052  SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2053  unsigned NumberOfIgnoredLowBits = 0,
2054  bool BiggerPattern = false) {
2055  if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2056  return false;
2057 
2058  switch (N->getOpcode()) {
2059  default:
2060  if (!N->isMachineOpcode())
2061  return false;
2062  break;
2063  case ISD::AND:
2064  return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2065  NumberOfIgnoredLowBits, BiggerPattern);
2066  case ISD::SRL:
2067  case ISD::SRA:
2068  return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2069 
2071  return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2072  }
2073 
2074  unsigned NOpc = N->getMachineOpcode();
2075  switch (NOpc) {
2076  default:
2077  return false;
2078  case AArch64::SBFMWri:
2079  case AArch64::UBFMWri:
2080  case AArch64::SBFMXri:
2081  case AArch64::UBFMXri:
2082  Opc = NOpc;
2083  Opd0 = N->getOperand(0);
2084  Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
2085  Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
2086  return true;
2087  }
2088  // Unreachable
2089  return false;
2090 }
2091 
2092 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2093  unsigned Opc, Immr, Imms;
2094  SDValue Opd0;
2095  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2096  return false;
2097 
2098  EVT VT = N->getValueType(0);
2099  SDLoc dl(N);
2100 
2101  // If the bit extract operation is 64bit but the original type is 32bit, we
2102  // need to add one EXTRACT_SUBREG.
2103  if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2104  SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2105  CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2106 
2107  SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2108  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
2109  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
2110  MVT::i32, SDValue(BFM, 0), SubReg));
2111  return true;
2112  }
2113 
2114  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2115  CurDAG->getTargetConstant(Imms, dl, VT)};
2116  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2117  return true;
2118 }
2119 
2120 /// Does DstMask form a complementary pair with the mask provided by
2121 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2122 /// this asks whether DstMask zeroes precisely those bits that will be set by
2123 /// the other half.
2124 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2125  unsigned NumberOfIgnoredHighBits, EVT VT) {
2126  assert((VT == MVT::i32 || VT == MVT::i64) &&
2127  "i32 or i64 mask type expected!");
2128  unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2129 
2130  APInt SignificantDstMask = APInt(BitWidth, DstMask);
2131  APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2132 
2133  return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2134  (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
2135 }
2136 
2137 // Look for bits that will be useful for later uses.
2138 // A bit is consider useless as soon as it is dropped and never used
2139 // before it as been dropped.
2140 // E.g., looking for useful bit of x
2141 // 1. y = x & 0x7
2142 // 2. z = y >> 2
2143 // After #1, x useful bits are 0x7, then the useful bits of x, live through
2144 // y.
2145 // After #2, the useful bits of x are 0x4.
2146 // However, if x is used on an unpredicatable instruction, then all its bits
2147 // are useful.
2148 // E.g.
2149 // 1. y = x & 0x7
2150 // 2. z = y >> 2
2151 // 3. str x, [@x]
2152 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2153 
2155  unsigned Depth) {
2156  uint64_t Imm =
2157  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2158  Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2159  UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2160  getUsefulBits(Op, UsefulBits, Depth + 1);
2161 }
2162 
2164  uint64_t Imm, uint64_t MSB,
2165  unsigned Depth) {
2166  // inherit the bitwidth value
2167  APInt OpUsefulBits(UsefulBits);
2168  OpUsefulBits = 1;
2169 
2170  if (MSB >= Imm) {
2171  OpUsefulBits <<= MSB - Imm + 1;
2172  --OpUsefulBits;
2173  // The interesting part will be in the lower part of the result
2174  getUsefulBits(Op, OpUsefulBits, Depth + 1);
2175  // The interesting part was starting at Imm in the argument
2176  OpUsefulBits <<= Imm;
2177  } else {
2178  OpUsefulBits <<= MSB + 1;
2179  --OpUsefulBits;
2180  // The interesting part will be shifted in the result
2181  OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2182  getUsefulBits(Op, OpUsefulBits, Depth + 1);
2183  // The interesting part was at zero in the argument
2184  OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2185  }
2186 
2187  UsefulBits &= OpUsefulBits;
2188 }
2189 
2190 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2191  unsigned Depth) {
2192  uint64_t Imm =
2193  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2194  uint64_t MSB =
2195  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2196 
2197  getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2198 }
2199 
2201  unsigned Depth) {
2202  uint64_t ShiftTypeAndValue =
2203  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2204  APInt Mask(UsefulBits);
2205  Mask.clearAllBits();
2206  Mask.flipAllBits();
2207 
2208  if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2209  // Shift Left
2210  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2211  Mask <<= ShiftAmt;
2212  getUsefulBits(Op, Mask, Depth + 1);
2213  Mask.lshrInPlace(ShiftAmt);
2214  } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2215  // Shift Right
2216  // We do not handle AArch64_AM::ASR, because the sign will change the
2217  // number of useful bits
2218  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2219  Mask.lshrInPlace(ShiftAmt);
2220  getUsefulBits(Op, Mask, Depth + 1);
2221  Mask <<= ShiftAmt;
2222  } else
2223  return;
2224 
2225  UsefulBits &= Mask;
2226 }
2227 
2228 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2229  unsigned Depth) {
2230  uint64_t Imm =
2231  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2232  uint64_t MSB =
2233  cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
2234 
2235  APInt OpUsefulBits(UsefulBits);
2236  OpUsefulBits = 1;
2237 
2238  APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
2239  ResultUsefulBits.flipAllBits();
2240  APInt Mask(UsefulBits.getBitWidth(), 0);
2241 
2242  getUsefulBits(Op, ResultUsefulBits, Depth + 1);
2243 
2244  if (MSB >= Imm) {
2245  // The instruction is a BFXIL.
2246  uint64_t Width = MSB - Imm + 1;
2247  uint64_t LSB = Imm;
2248 
2249  OpUsefulBits <<= Width;
2250  --OpUsefulBits;
2251 
2252  if (Op.getOperand(1) == Orig) {
2253  // Copy the low bits from the result to bits starting from LSB.
2254  Mask = ResultUsefulBits & OpUsefulBits;
2255  Mask <<= LSB;
2256  }
2257 
2258  if (Op.getOperand(0) == Orig)
2259  // Bits starting from LSB in the input contribute to the result.
2260  Mask |= (ResultUsefulBits & ~OpUsefulBits);
2261  } else {
2262  // The instruction is a BFI.
2263  uint64_t Width = MSB + 1;
2264  uint64_t LSB = UsefulBits.getBitWidth() - Imm;
2265 
2266  OpUsefulBits <<= Width;
2267  --OpUsefulBits;
2268  OpUsefulBits <<= LSB;
2269 
2270  if (Op.getOperand(1) == Orig) {
2271  // Copy the bits from the result to the zero bits.
2272  Mask = ResultUsefulBits & OpUsefulBits;
2273  Mask.lshrInPlace(LSB);
2274  }
2275 
2276  if (Op.getOperand(0) == Orig)
2277  Mask |= (ResultUsefulBits & ~OpUsefulBits);
2278  }
2279 
2280  UsefulBits &= Mask;
2281 }
2282 
2283 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2284  SDValue Orig, unsigned Depth) {
2285 
2286  // Users of this node should have already been instruction selected
2287  // FIXME: Can we turn that into an assert?
2288  if (!UserNode->isMachineOpcode())
2289  return;
2290 
2291  switch (UserNode->getMachineOpcode()) {
2292  default:
2293  return;
2294  case AArch64::ANDSWri:
2295  case AArch64::ANDSXri:
2296  case AArch64::ANDWri:
2297  case AArch64::ANDXri:
2298  // We increment Depth only when we call the getUsefulBits
2299  return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2300  Depth);
2301  case AArch64::UBFMWri:
2302  case AArch64::UBFMXri:
2303  return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2304 
2305  case AArch64::ORRWrs:
2306  case AArch64::ORRXrs:
2307  if (UserNode->getOperand(1) != Orig)
2308  return;
2309  return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2310  Depth);
2311  case AArch64::BFMWri:
2312  case AArch64::BFMXri:
2313  return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2314 
2315  case AArch64::STRBBui:
2316  case AArch64::STURBBi:
2317  if (UserNode->getOperand(0) != Orig)
2318  return;
2319  UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2320  return;
2321 
2322  case AArch64::STRHHui:
2323  case AArch64::STURHHi:
2324  if (UserNode->getOperand(0) != Orig)
2325  return;
2326  UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2327  return;
2328  }
2329 }
2330 
2331 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2333  return;
2334  // Initialize UsefulBits
2335  if (!Depth) {
2336  unsigned Bitwidth = Op.getScalarValueSizeInBits();
2337  // At the beginning, assume every produced bits is useful
2338  UsefulBits = APInt(Bitwidth, 0);
2339  UsefulBits.flipAllBits();
2340  }
2341  APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2342 
2343  for (SDNode *Node : Op.getNode()->uses()) {
2344  // A use cannot produce useful bits
2345  APInt UsefulBitsForUse = APInt(UsefulBits);
2346  getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2347  UsersUsefulBits |= UsefulBitsForUse;
2348  }
2349  // UsefulBits contains the produced bits that are meaningful for the
2350  // current definition, thus a user cannot make a bit meaningful at
2351  // this point
2352  UsefulBits &= UsersUsefulBits;
2353 }
2354 
2355 /// Create a machine node performing a notional SHL of Op by ShlAmount. If
2356 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2357 /// 0, return Op unchanged.
2358 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2359  if (ShlAmount == 0)
2360  return Op;
2361 
2362  EVT VT = Op.getValueType();
2363  SDLoc dl(Op);
2364  unsigned BitWidth = VT.getSizeInBits();
2365  unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2366 
2367  SDNode *ShiftNode;
2368  if (ShlAmount > 0) {
2369  // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2370  ShiftNode = CurDAG->getMachineNode(
2371  UBFMOpc, dl, VT, Op,
2372  CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
2373  CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
2374  } else {
2375  // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2376  assert(ShlAmount < 0 && "expected right shift");
2377  int ShrAmount = -ShlAmount;
2378  ShiftNode = CurDAG->getMachineNode(
2379  UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
2380  CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
2381  }
2382 
2383  return SDValue(ShiftNode, 0);
2384 }
2385 
2386 /// Does this tree qualify as an attempt to move a bitfield into position,
2387 /// essentially "(and (shl VAL, N), Mask)".
2389  bool BiggerPattern,
2390  SDValue &Src, int &ShiftAmount,
2391  int &MaskWidth) {
2392  EVT VT = Op.getValueType();
2393  unsigned BitWidth = VT.getSizeInBits();
2394  (void)BitWidth;
2395  assert(BitWidth == 32 || BitWidth == 64);
2396 
2397  KnownBits Known = CurDAG->computeKnownBits(Op);
2398 
2399  // Non-zero in the sense that they're not provably zero, which is the key
2400  // point if we want to use this value
2401  uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2402 
2403  // Discard a constant AND mask if present. It's safe because the node will
2404  // already have been factored into the computeKnownBits calculation above.
2405  uint64_t AndImm;
2406  if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
2407  assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0);
2408  Op = Op.getOperand(0);
2409  }
2410 
2411  // Don't match if the SHL has more than one use, since then we'll end up
2412  // generating SHL+UBFIZ instead of just keeping SHL+AND.
2413  if (!BiggerPattern && !Op.hasOneUse())
2414  return false;
2415 
2416  uint64_t ShlImm;
2417  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
2418  return false;
2419  Op = Op.getOperand(0);
2420 
2421  if (!isShiftedMask_64(NonZeroBits))
2422  return false;
2423 
2424  ShiftAmount = countTrailingZeros(NonZeroBits);
2425  MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
2426 
2427  // BFI encompasses sufficiently many nodes that it's worth inserting an extra
2428  // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
2429  // amount. BiggerPattern is true when this pattern is being matched for BFI,
2430  // BiggerPattern is false when this pattern is being matched for UBFIZ, in
2431  // which case it is not profitable to insert an extra shift.
2432  if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
2433  return false;
2434  Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
2435 
2436  return true;
2437 }
2438 
2439 static bool isShiftedMask(uint64_t Mask, EVT VT) {
2440  assert(VT == MVT::i32 || VT == MVT::i64);
2441  if (VT == MVT::i32)
2442  return isShiftedMask_32(Mask);
2443  return isShiftedMask_64(Mask);
2444 }
2445 
2446 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
2447 // inserted only sets known zero bits.
2449  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2450 
2451  EVT VT = N->getValueType(0);
2452  if (VT != MVT::i32 && VT != MVT::i64)
2453  return false;
2454 
2455  unsigned BitWidth = VT.getSizeInBits();
2456 
2457  uint64_t OrImm;
2458  if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
2459  return false;
2460 
2461  // Skip this transformation if the ORR immediate can be encoded in the ORR.
2462  // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
2463  // performance neutral.
2465  return false;
2466 
2467  uint64_t MaskImm;
2468  SDValue And = N->getOperand(0);
2469  // Must be a single use AND with an immediate operand.
2470  if (!And.hasOneUse() ||
2471  !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
2472  return false;
2473 
2474  // Compute the Known Zero for the AND as this allows us to catch more general
2475  // cases than just looking for AND with imm.
2476  KnownBits Known = CurDAG->computeKnownBits(And);
2477 
2478  // Non-zero in the sense that they're not provably zero, which is the key
2479  // point if we want to use this value.
2480  uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
2481 
2482  // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
2483  if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
2484  return false;
2485 
2486  // The bits being inserted must only set those bits that are known to be zero.
2487  if ((OrImm & NotKnownZero) != 0) {
2488  // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
2489  // currently handle this case.
2490  return false;
2491  }
2492 
2493  // BFI/BFXIL dst, src, #lsb, #width.
2494  int LSB = countTrailingOnes(NotKnownZero);
2495  int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation();
2496 
2497  // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
2498  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2499  unsigned ImmS = Width - 1;
2500 
2501  // If we're creating a BFI instruction avoid cases where we need more
2502  // instructions to materialize the BFI constant as compared to the original
2503  // ORR. A BFXIL will use the same constant as the original ORR, so the code
2504  // should be no worse in this case.
2505  bool IsBFI = LSB != 0;
2506  uint64_t BFIImm = OrImm >> LSB;
2507  if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
2508  // We have a BFI instruction and we know the constant can't be materialized
2509  // with a ORR-immediate with the zero register.
2510  unsigned OrChunks = 0, BFIChunks = 0;
2511  for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
2512  if (((OrImm >> Shift) & 0xFFFF) != 0)
2513  ++OrChunks;
2514  if (((BFIImm >> Shift) & 0xFFFF) != 0)
2515  ++BFIChunks;
2516  }
2517  if (BFIChunks > OrChunks)
2518  return false;
2519  }
2520 
2521  // Materialize the constant to be inserted.
2522  SDLoc DL(N);
2523  unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
2524  SDNode *MOVI = CurDAG->getMachineNode(
2525  MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
2526 
2527  // Create the BFI/BFXIL instruction.
2528  SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
2529  CurDAG->getTargetConstant(ImmR, DL, VT),
2530  CurDAG->getTargetConstant(ImmS, DL, VT)};
2531  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2532  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2533  return true;
2534 }
2535 
2536 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
2537  SelectionDAG *CurDAG) {
2538  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2539 
2540  EVT VT = N->getValueType(0);
2541  if (VT != MVT::i32 && VT != MVT::i64)
2542  return false;
2543 
2544  unsigned BitWidth = VT.getSizeInBits();
2545 
2546  // Because of simplify-demanded-bits in DAGCombine, involved masks may not
2547  // have the expected shape. Try to undo that.
2548 
2549  unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
2550  unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
2551 
2552  // Given a OR operation, check if we have the following pattern
2553  // ubfm c, b, imm, imm2 (or something that does the same jobs, see
2554  // isBitfieldExtractOp)
2555  // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
2556  // countTrailingZeros(mask2) == imm2 - imm + 1
2557  // f = d | c
2558  // if yes, replace the OR instruction with:
2559  // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
2560 
2561  // OR is commutative, check all combinations of operand order and values of
2562  // BiggerPattern, i.e.
2563  // Opd0, Opd1, BiggerPattern=false
2564  // Opd1, Opd0, BiggerPattern=false
2565  // Opd0, Opd1, BiggerPattern=true
2566  // Opd1, Opd0, BiggerPattern=true
2567  // Several of these combinations may match, so check with BiggerPattern=false
2568  // first since that will produce better results by matching more instructions
2569  // and/or inserting fewer extra instructions.
2570  for (int I = 0; I < 4; ++I) {
2571 
2572  SDValue Dst, Src;
2573  unsigned ImmR, ImmS;
2574  bool BiggerPattern = I / 2;
2575  SDValue OrOpd0Val = N->getOperand(I % 2);
2576  SDNode *OrOpd0 = OrOpd0Val.getNode();
2577  SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
2578  SDNode *OrOpd1 = OrOpd1Val.getNode();
2579 
2580  unsigned BFXOpc;
2581  int DstLSB, Width;
2582  if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
2583  NumberOfIgnoredLowBits, BiggerPattern)) {
2584  // Check that the returned opcode is compatible with the pattern,
2585  // i.e., same type and zero extended (U and not S)
2586  if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
2587  (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
2588  continue;
2589 
2590  // Compute the width of the bitfield insertion
2591  DstLSB = 0;
2592  Width = ImmS - ImmR + 1;
2593  // FIXME: This constraint is to catch bitfield insertion we may
2594  // want to widen the pattern if we want to grab general bitfied
2595  // move case
2596  if (Width <= 0)
2597  continue;
2598 
2599  // If the mask on the insertee is correct, we have a BFXIL operation. We
2600  // can share the ImmR and ImmS values from the already-computed UBFM.
2601  } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
2602  BiggerPattern,
2603  Src, DstLSB, Width)) {
2604  ImmR = (BitWidth - DstLSB) % BitWidth;
2605  ImmS = Width - 1;
2606  } else
2607  continue;
2608 
2609  // Check the second part of the pattern
2610  EVT VT = OrOpd1Val.getValueType();
2611  assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
2612 
2613  // Compute the Known Zero for the candidate of the first operand.
2614  // This allows to catch more general case than just looking for
2615  // AND with imm. Indeed, simplify-demanded-bits may have removed
2616  // the AND instruction because it proves it was useless.
2617  KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
2618 
2619  // Check if there is enough room for the second operand to appear
2620  // in the first one
2621  APInt BitsToBeInserted =
2622  APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
2623 
2624  if ((BitsToBeInserted & ~Known.Zero) != 0)
2625  continue;
2626 
2627  // Set the first operand
2628  uint64_t Imm;
2629  if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
2630  isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
2631  // In that case, we can eliminate the AND
2632  Dst = OrOpd1->getOperand(0);
2633  else
2634  // Maybe the AND has been removed by simplify-demanded-bits
2635  // or is useful because it discards more bits
2636  Dst = OrOpd1Val;
2637 
2638  // both parts match
2639  SDLoc DL(N);
2640  SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
2641  CurDAG->getTargetConstant(ImmS, DL, VT)};
2642  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2643  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2644  return true;
2645  }
2646 
2647  // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
2648  // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
2649  // mask (e.g., 0x000ffff0).
2650  uint64_t Mask0Imm, Mask1Imm;
2651  SDValue And0 = N->getOperand(0);
2652  SDValue And1 = N->getOperand(1);
2653  if (And0.hasOneUse() && And1.hasOneUse() &&
2654  isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
2655  isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
2656  APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
2657  (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
2658 
2659  // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
2660  // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
2661  // bits to be inserted.
2662  if (isShiftedMask(Mask0Imm, VT)) {
2663  std::swap(And0, And1);
2664  std::swap(Mask0Imm, Mask1Imm);
2665  }
2666 
2667  SDValue Src = And1->getOperand(0);
2668  SDValue Dst = And0->getOperand(0);
2669  unsigned LSB = countTrailingZeros(Mask1Imm);
2670  int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation();
2671 
2672  // The BFXIL inserts the low-order bits from a source register, so right
2673  // shift the needed bits into place.
2674  SDLoc DL(N);
2675  unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2676  SDNode *LSR = CurDAG->getMachineNode(
2677  ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT),
2678  CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
2679 
2680  // BFXIL is an alias of BFM, so translate to BFM operands.
2681  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2682  unsigned ImmS = Width - 1;
2683 
2684  // Create the BFXIL instruction.
2685  SDValue Ops[] = {Dst, SDValue(LSR, 0),
2686  CurDAG->getTargetConstant(ImmR, DL, VT),
2687  CurDAG->getTargetConstant(ImmS, DL, VT)};
2688  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2689  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2690  return true;
2691  }
2692 
2693  return false;
2694 }
2695 
2696 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
2697  if (N->getOpcode() != ISD::OR)
2698  return false;
2699 
2700  APInt NUsefulBits;
2701  getUsefulBits(SDValue(N, 0), NUsefulBits);
2702 
2703  // If all bits are not useful, just return UNDEF.
2704  if (!NUsefulBits) {
2705  CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
2706  return true;
2707  }
2708 
2709  if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
2710  return true;
2711 
2712  return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
2713 }
2714 
2715 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
2716 /// equivalent of a left shift by a constant amount followed by an and masking
2717 /// out a contiguous set of bits.
2718 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
2719  if (N->getOpcode() != ISD::AND)
2720  return false;
2721 
2722  EVT VT = N->getValueType(0);
2723  if (VT != MVT::i32 && VT != MVT::i64)
2724  return false;
2725 
2726  SDValue Op0;
2727  int DstLSB, Width;
2728  if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
2729  Op0, DstLSB, Width))
2730  return false;
2731 
2732  // ImmR is the rotate right amount.
2733  unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
2734  // ImmS is the most significant bit of the source to be moved.
2735  unsigned ImmS = Width - 1;
2736 
2737  SDLoc DL(N);
2738  SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
2739  CurDAG->getTargetConstant(ImmS, DL, VT)};
2740  unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2741  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2742  return true;
2743 }
2744 
2745 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
2746 /// variable shift/rotate instructions.
2747 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
2748  EVT VT = N->getValueType(0);
2749 
2750  unsigned Opc;
2751  switch (N->getOpcode()) {
2752  case ISD::ROTR:
2753  Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
2754  break;
2755  case ISD::SHL:
2756  Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
2757  break;
2758  case ISD::SRL:
2759  Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
2760  break;
2761  case ISD::SRA:
2762  Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
2763  break;
2764  default:
2765  return false;
2766  }
2767 
2768  uint64_t Size;
2769  uint64_t Bits;
2770  if (VT == MVT::i32) {
2771  Bits = 5;
2772  Size = 32;
2773  } else if (VT == MVT::i64) {
2774  Bits = 6;
2775  Size = 64;
2776  } else
2777  return false;
2778 
2779  SDValue ShiftAmt = N->getOperand(1);
2780  SDLoc DL(N);
2781  SDValue NewShiftAmt;
2782 
2783  // Skip over an extend of the shift amount.
2784  if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
2785  ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
2786  ShiftAmt = ShiftAmt->getOperand(0);
2787 
2788  if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
2789  SDValue Add0 = ShiftAmt->getOperand(0);
2790  SDValue Add1 = ShiftAmt->getOperand(1);
2791  uint64_t Add0Imm;
2792  uint64_t Add1Imm;
2793  // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
2794  // to avoid the ADD/SUB.
2795  if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0))
2796  NewShiftAmt = Add0;
2797  // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2798  // generate a NEG instead of a SUB of a constant.
2799  else if (ShiftAmt->getOpcode() == ISD::SUB &&
2800  isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
2801  (Add0Imm % Size == 0)) {
2802  unsigned NegOpc;
2803  unsigned ZeroReg;
2804  EVT SubVT = ShiftAmt->getValueType(0);
2805  if (SubVT == MVT::i32) {
2806  NegOpc = AArch64::SUBWrr;
2807  ZeroReg = AArch64::WZR;
2808  } else {
2809  assert(SubVT == MVT::i64);
2810  NegOpc = AArch64::SUBXrr;
2811  ZeroReg = AArch64::XZR;
2812  }
2813  SDValue Zero =
2814  CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
2815  MachineSDNode *Neg =
2816  CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
2817  NewShiftAmt = SDValue(Neg, 0);
2818  } else
2819  return false;
2820  } else {
2821  // If the shift amount is masked with an AND, check that the mask covers the
2822  // bits that are implicitly ANDed off by the above opcodes and if so, skip
2823  // the AND.
2824  uint64_t MaskImm;
2825  if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
2826  !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
2827  return false;
2828 
2829  if (countTrailingOnes(MaskImm) < Bits)
2830  return false;
2831 
2832  NewShiftAmt = ShiftAmt->getOperand(0);
2833  }
2834 
2835  // Narrow/widen the shift amount to match the size of the shift operation.
2836  if (VT == MVT::i32)
2837  NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
2838  else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
2839  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
2840  MachineSDNode *Ext = CurDAG->getMachineNode(
2841  AArch64::SUBREG_TO_REG, DL, VT,
2842  CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
2843  NewShiftAmt = SDValue(Ext, 0);
2844  }
2845 
2846  SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
2847  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2848  return true;
2849 }
2850 
2851 bool
2852 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
2853  unsigned RegWidth) {
2854  APFloat FVal(0.0);
2855  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
2856  FVal = CN->getValueAPF();
2857  else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
2858  // Some otherwise illegal constants are allowed in this case.
2859  if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
2860  !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
2861  return false;
2862 
2863  ConstantPoolSDNode *CN =
2864  dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
2865  FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
2866  } else
2867  return false;
2868 
2869  // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
2870  // is between 1 and 32 for a destination w-register, or 1 and 64 for an
2871  // x-register.
2872  //
2873  // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
2874  // want THIS_NODE to be 2^fbits. This is much easier to deal with using
2875  // integers.
2876  bool IsExact;
2877 
2878  // fbits is between 1 and 64 in the worst-case, which means the fmul
2879  // could have 2^64 as an actual operand. Need 65 bits of precision.
2880  APSInt IntVal(65, true);
2881  FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
2882 
2883  // N.b. isPowerOf2 also checks for > 0.
2884  if (!IsExact || !IntVal.isPowerOf2()) return false;
2885  unsigned FBits = IntVal.logBase2();
2886 
2887  // Checks above should have guaranteed that we haven't lost information in
2888  // finding FBits, but it must still be in range.
2889  if (FBits == 0 || FBits > RegWidth) return false;
2890 
2891  FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
2892  return true;
2893 }
2894 
2895 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
2896 // of the string and obtains the integer values from them and combines these
2897 // into a single value to be used in the MRS/MSR instruction.
2900  RegString.split(Fields, ':');
2901 
2902  if (Fields.size() == 1)
2903  return -1;
2904 
2905  assert(Fields.size() == 5
2906  && "Invalid number of fields in read register string");
2907 
2908  SmallVector<int, 5> Ops;
2909  bool AllIntFields = true;
2910 
2911  for (StringRef Field : Fields) {
2912  unsigned IntField;
2913  AllIntFields &= !Field.getAsInteger(10, IntField);
2914  Ops.push_back(IntField);
2915  }
2916 
2917  assert(AllIntFields &&
2918  "Unexpected non-integer value in special register string.");
2919 
2920  // Need to combine the integer fields of the string into a single value
2921  // based on the bit encoding of MRS/MSR instruction.
2922  return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
2923  (Ops[3] << 3) | (Ops[4]);
2924 }
2925 
2926 // Lower the read_register intrinsic to an MRS instruction node if the special
2927 // register string argument is either of the form detailed in the ALCE (the
2928 // form described in getIntOperandsFromRegsterString) or is a named register
2929 // known by the MRS SysReg mapper.
2930 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
2931  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2932  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2933  SDLoc DL(N);
2934 
2935  int Reg = getIntOperandFromRegisterString(RegString->getString());
2936  if (Reg != -1) {
2937  ReplaceNode(N, CurDAG->getMachineNode(
2938  AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2939  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2940  N->getOperand(0)));
2941  return true;
2942  }
2943 
2944  // Use the sysreg mapper to map the remaining possible strings to the
2945  // value for the register to be used for the instruction operand.
2946  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2947  if (TheReg && TheReg->Readable &&
2948  TheReg->haveFeatures(Subtarget->getFeatureBits()))
2949  Reg = TheReg->Encoding;
2950  else
2952 
2953  if (Reg != -1) {
2954  ReplaceNode(N, CurDAG->getMachineNode(
2955  AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2956  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2957  N->getOperand(0)));
2958  return true;
2959  }
2960 
2961  if (RegString->getString() == "pc") {
2962  ReplaceNode(N, CurDAG->getMachineNode(
2963  AArch64::ADR, DL, N->getSimpleValueType(0), MVT::Other,
2964  CurDAG->getTargetConstant(0, DL, MVT::i32),
2965  N->getOperand(0)));
2966  return true;
2967  }
2968 
2969  return false;
2970 }
2971 
2972 // Lower the write_register intrinsic to an MSR instruction node if the special
2973 // register string argument is either of the form detailed in the ALCE (the
2974 // form described in getIntOperandsFromRegsterString) or is a named register
2975 // known by the MSR SysReg mapper.
2976 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
2977  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2978  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2979  SDLoc DL(N);
2980 
2981  int Reg = getIntOperandFromRegisterString(RegString->getString());
2982  if (Reg != -1) {
2983  ReplaceNode(
2984  N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
2985  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2986  N->getOperand(2), N->getOperand(0)));
2987  return true;
2988  }
2989 
2990  // Check if the register was one of those allowed as the pstatefield value in
2991  // the MSR (immediate) instruction. To accept the values allowed in the
2992  // pstatefield for the MSR (immediate) instruction, we also require that an
2993  // immediate value has been provided as an argument, we know that this is
2994  // the case as it has been ensured by semantic checking.
2995  auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());
2996  if (PMapper) {
2997  assert (isa<ConstantSDNode>(N->getOperand(2))
2998  && "Expected a constant integer expression.");
2999  unsigned Reg = PMapper->Encoding;
3000  uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
3001  unsigned State;
3002  if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) {
3003  assert(Immed < 2 && "Bad imm");
3004  State = AArch64::MSRpstateImm1;
3005  } else {
3006  assert(Immed < 16 && "Bad imm");
3007  State = AArch64::MSRpstateImm4;
3008  }
3009  ReplaceNode(N, CurDAG->getMachineNode(
3010  State, DL, MVT::Other,
3011  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
3012  CurDAG->getTargetConstant(Immed, DL, MVT::i16),
3013  N->getOperand(0)));
3014  return true;
3015  }
3016 
3017  // Use the sysreg mapper to attempt to map the remaining possible strings
3018  // to the value for the register to be used for the MSR (register)
3019  // instruction operand.
3020  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
3021  if (TheReg && TheReg->Writeable &&
3022  TheReg->haveFeatures(Subtarget->getFeatureBits()))
3023  Reg = TheReg->Encoding;
3024  else
3026  if (Reg != -1) {
3027  ReplaceNode(N, CurDAG->getMachineNode(
3028  AArch64::MSR, DL, MVT::Other,
3029  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
3030  N->getOperand(2), N->getOperand(0)));
3031  return true;
3032  }
3033 
3034  return false;
3035 }
3036 
3037 /// We've got special pseudo-instructions for these
3038 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3039  unsigned Opcode;
3040  EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3041 
3042  // Leave IR for LSE if subtarget supports it.
3043  if (Subtarget->hasLSE()) return false;
3044 
3045  if (MemTy == MVT::i8)
3046  Opcode = AArch64::CMP_SWAP_8;
3047  else if (MemTy == MVT::i16)
3048  Opcode = AArch64::CMP_SWAP_16;
3049  else if (MemTy == MVT::i32)
3050  Opcode = AArch64::CMP_SWAP_32;
3051  else if (MemTy == MVT::i64)
3052  Opcode = AArch64::CMP_SWAP_64;
3053  else
3054  llvm_unreachable("Unknown AtomicCmpSwap type");
3055 
3056  MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
3057  SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3058  N->getOperand(0)};
3059  SDNode *CmpSwap = CurDAG->getMachineNode(
3060  Opcode, SDLoc(N),
3061  CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
3062 
3063  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3064  CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3065 
3066  ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3067  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3068  CurDAG->RemoveDeadNode(N);
3069 
3070  return true;
3071 }
3072 
3073 bool AArch64DAGToDAGISel::SelectSVE8BitLslImm(SDValue N, SDValue &Base,
3074  SDValue &Offset) {
3075  auto C = dyn_cast<ConstantSDNode>(N);
3076  if (!C)
3077  return false;
3078 
3079  auto Ty = N->getValueType(0);
3080 
3081  int64_t Imm = C->getSExtValue();
3082  SDLoc DL(N);
3083 
3084  if ((Imm >= -128) && (Imm <= 127)) {
3085  Base = CurDAG->getTargetConstant(Imm, DL, Ty);
3086  Offset = CurDAG->getTargetConstant(0, DL, Ty);
3087  return true;
3088  }
3089 
3090  if (((Imm % 256) == 0) && (Imm >= -32768) && (Imm <= 32512)) {
3091  Base = CurDAG->getTargetConstant(Imm/256, DL, Ty);
3092  Offset = CurDAG->getTargetConstant(8, DL, Ty);
3093  return true;
3094  }
3095 
3096  return false;
3097 }
3098 
3099 bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift) {
3100  if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
3101  const int64_t ImmVal = CNode->getZExtValue();
3102  SDLoc DL(N);
3103 
3104  switch (VT.SimpleTy) {
3105  case MVT::i8:
3106  if ((ImmVal & 0xFF) == ImmVal) {
3107  Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
3108  Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
3109  return true;
3110  }
3111  break;
3112  case MVT::i16:
3113  case MVT::i32:
3114  case MVT::i64:
3115  if ((ImmVal & 0xFF) == ImmVal) {
3116  Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
3117  Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
3118  return true;
3119  } else if ((ImmVal & 0xFF00) == ImmVal) {
3120  Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
3121  Imm = CurDAG->getTargetConstant(ImmVal >> 8, DL, MVT::i32);
3122  return true;
3123  }
3124  break;
3125  default:
3126  break;
3127  }
3128  }
3129 
3130  return false;
3131 }
3132 
3133 bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
3134  if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
3135  int64_t ImmVal = CNode->getSExtValue();
3136  SDLoc DL(N);
3137  if (ImmVal >= -128 && ImmVal < 128) {
3138  Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
3139  return true;
3140  }
3141  }
3142  return false;
3143 }
3144 
3145 bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
3146  if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
3147  uint64_t ImmVal = CNode->getZExtValue();
3148 
3149  switch (VT.SimpleTy) {
3150  case MVT::i8:
3151  ImmVal &= 0xFF;
3152  break;
3153  case MVT::i16:
3154  ImmVal &= 0xFFFF;
3155  break;
3156  case MVT::i32:
3157  ImmVal &= 0xFFFFFFFF;
3158  break;
3159  case MVT::i64:
3160  break;
3161  default:
3162  llvm_unreachable("Unexpected type");
3163  }
3164 
3165  if (ImmVal < 256) {
3166  Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
3167  return true;
3168  }
3169  }
3170  return false;
3171 }
3172 
3173 bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm) {
3174  if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
3175  uint64_t ImmVal = CNode->getZExtValue();
3176  SDLoc DL(N);
3177 
3178  // Shift mask depending on type size.
3179  switch (VT.SimpleTy) {
3180  case MVT::i8:
3181  ImmVal &= 0xFF;
3182  ImmVal |= ImmVal << 8;
3183  ImmVal |= ImmVal << 16;
3184  ImmVal |= ImmVal << 32;
3185  break;
3186  case MVT::i16:
3187  ImmVal &= 0xFFFF;
3188  ImmVal |= ImmVal << 16;
3189  ImmVal |= ImmVal << 32;
3190  break;
3191  case MVT::i32:
3192  ImmVal &= 0xFFFFFFFF;
3193  ImmVal |= ImmVal << 32;
3194  break;
3195  case MVT::i64:
3196  break;
3197  default:
3198  llvm_unreachable("Unexpected type");
3199  }
3200 
3201  uint64_t encoding;
3202  if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
3203  Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
3204  return true;
3205  }
3206  }
3207  return false;
3208 }
3209 
3210 // SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
3211 // Rather than attempt to normalise everything we can sometimes saturate the
3212 // shift amount during selection. This function also allows for consistent
3213 // isel patterns by ensuring the resulting "Imm" node is of the i32 type
3214 // required by the instructions.
3215 bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
3216  uint64_t High, bool AllowSaturation,
3217  SDValue &Imm) {
3218  if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
3219  uint64_t ImmVal = CN->getZExtValue();
3220 
3221  // Reject shift amounts that are too small.
3222  if (ImmVal < Low)
3223  return false;
3224 
3225  // Reject or saturate shift amounts that are too big.
3226  if (ImmVal > High) {
3227  if (!AllowSaturation)
3228  return false;
3229  ImmVal = High;
3230  }
3231 
3232  Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
3233  return true;
3234  }
3235 
3236  return false;
3237 }
3238 
3239 bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
3240  // tagp(FrameIndex, IRGstack, tag_offset):
3241  // since the offset between FrameIndex and IRGstack is a compile-time
3242  // constant, this can be lowered to a single ADDG instruction.
3243  if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
3244  return false;
3245  }
3246 
3247  SDValue IRG_SP = N->getOperand(2);
3248  if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
3249  cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() !=
3250  Intrinsic::aarch64_irg_sp) {
3251  return false;
3252  }
3253 
3254  const TargetLowering *TLI = getTargetLowering();
3255  SDLoc DL(N);
3256  int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
3257  SDValue FiOp = CurDAG->getTargetFrameIndex(
3258  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3259  int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
3260 
3261  SDNode *Out = CurDAG->getMachineNode(
3262  AArch64::TAGPstack, DL, MVT::i64,
3263  {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
3264  CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
3265  ReplaceNode(N, Out);
3266  return true;
3267 }
3268 
3269 void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
3270  assert(isa<ConstantSDNode>(N->getOperand(3)) &&
3271  "llvm.aarch64.tagp third argument must be an immediate");
3272  if (trySelectStackSlotTagP(N))
3273  return;
3274  // FIXME: above applies in any case when offset between Op1 and Op2 is a
3275  // compile-time constant, not just for stack allocations.
3276 
3277  // General case for unrelated pointers in Op1 and Op2.
3278  SDLoc DL(N);
3279  int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
3280  SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
3281  {N->getOperand(1), N->getOperand(2)});
3282  SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
3283  {SDValue(N1, 0), N->getOperand(2)});
3284  SDNode *N3 = CurDAG->getMachineNode(
3285  AArch64::ADDG, DL, MVT::i64,
3286  {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
3287  CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
3288  ReplaceNode(N, N3);
3289 }
3290 
3291 // NOTE: We cannot use EXTRACT_SUBREG in all cases because the fixed length
3292 // vector types larger than NEON don't have a matching SubRegIndex.
3297  "Expected to extract from a packed scalable vector!");
3298  assert(VT.isFixedLengthVector() &&
3299  "Expected to extract a fixed length vector!");
3300 
3301  SDLoc DL(V);
3302  switch (VT.getSizeInBits()) {
3303  case 64: {
3304  auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
3305  return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
3306  }
3307  case 128: {
3308  auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
3309  return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
3310  }
3311  default: {
3312  auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
3313  return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
3314  }
3315  }
3316 }
3317 
3318 // NOTE: We cannot use INSERT_SUBREG in all cases because the fixed length
3319 // vector types larger than NEON don't have a matching SubRegIndex.
3321  assert(VT.isScalableVector() &&
3323  "Expected to insert into a packed scalable vector!");
3325  "Expected to insert a fixed length vector!");
3326 
3327  SDLoc DL(V);
3328  switch (V.getValueType().getSizeInBits()) {
3329  case 64: {
3330  auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
3331  auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
3332  return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
3333  SDValue(Container, 0), V, SubReg);
3334  }
3335  case 128: {
3336  auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
3337  auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
3338  return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
3339  SDValue(Container, 0), V, SubReg);
3340  }
3341  default: {
3342  auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
3343  return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
3344  }
3345  }
3346 }
3347 
3348 void AArch64DAGToDAGISel::Select(SDNode *Node) {
3349  // If we have a custom node, we already have selected!
3350  if (Node->isMachineOpcode()) {
3351  LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
3352  Node->setNodeId(-1);
3353  return;
3354  }
3355 
3356  // Few custom selection stuff.
3357  EVT VT = Node->getValueType(0);
3358 
3359  switch (Node->getOpcode()) {
3360  default:
3361  break;
3362 
3363  case ISD::ATOMIC_CMP_SWAP:
3364  if (SelectCMP_SWAP(Node))
3365  return;
3366  break;
3367 
3368  case ISD::READ_REGISTER:
3369  if (tryReadRegister(Node))
3370  return;
3371  break;
3372 
3373  case ISD::WRITE_REGISTER:
3374  if (tryWriteRegister(Node))
3375  return;
3376  break;
3377 
3378  case ISD::ADD:
3379  if (tryMLAV64LaneV128(Node))
3380  return;
3381  break;
3382 
3383  case ISD::LOAD: {
3384  // Try to select as an indexed load. Fall through to normal processing
3385  // if we can't.
3386  if (tryIndexedLoad(Node))
3387  return;
3388  break;
3389  }
3390 
3391  case ISD::SRL:
3392  case ISD::AND:
3393  case ISD::SRA:
3395  if (tryBitfieldExtractOp(Node))
3396  return;
3397  if (tryBitfieldInsertInZeroOp(Node))
3398  return;
3400  case ISD::ROTR:
3401  case ISD::SHL:
3402  if (tryShiftAmountMod(Node))
3403  return;
3404  break;
3405 
3406  case ISD::SIGN_EXTEND:
3407  if (tryBitfieldExtractOpFromSExt(Node))
3408  return;
3409  break;
3410 
3411  case ISD::FP_EXTEND:
3412  if (tryHighFPExt(Node))
3413  return;
3414  break;
3415 
3416  case ISD::OR:
3417  if (tryBitfieldInsertOp(Node))
3418  return;
3419  break;
3420 
3421  case ISD::EXTRACT_SUBVECTOR: {
3422  // Bail when not a "cast" like extract_subvector.
3423  if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue() != 0)
3424  break;
3425 
3426  // Bail when normal isel can do the job.
3427  EVT InVT = Node->getOperand(0).getValueType();
3428  if (VT.isScalableVector() || InVT.isFixedLengthVector())
3429  break;
3430 
3431  // NOTE: We can only get here when doing fixed length SVE code generation.
3432  // We do manual selection because the types involved are not linked to real
3433  // registers (despite being legal) and must be coerced into SVE registers.
3434  //
3435  // NOTE: If the above changes, be aware that selection will still not work
3436  // because the td definition of extract_vector does not support extracting
3437  // a fixed length vector from a scalable vector.
3438 
3439  ReplaceNode(Node, extractSubReg(CurDAG, VT, Node->getOperand(0)));
3440  return;
3441  }
3442 
3443  case ISD::INSERT_SUBVECTOR: {
3444  // Bail when not a "cast" like insert_subvector.
3445  if (cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue() != 0)
3446  break;
3447  if (!Node->getOperand(0).isUndef())
3448  break;
3449 
3450  // Bail when normal isel should do the job.
3451  EVT InVT = Node->getOperand(1).getValueType();
3452  if (VT.isFixedLengthVector() || InVT.isScalableVector())
3453  break;
3454 
3455  // NOTE: We can only get here when doing fixed length SVE code generation.
3456  // We do manual selection because the types involved are not linked to real
3457  // registers (despite being legal) and must be coerced into SVE registers.
3458  //
3459  // NOTE: If the above changes, be aware that selection will still not work
3460  // because the td definition of insert_vector does not support inserting a
3461  // fixed length vector into a scalable vector.
3462 
3463  ReplaceNode(Node, insertSubReg(CurDAG, VT, Node->getOperand(1)));
3464  return;
3465  }
3466 
3467  case ISD::Constant: {
3468  // Materialize zero constants as copies from WZR/XZR. This allows
3469  // the coalescer to propagate these into other instructions.
3470  ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
3471  if (ConstNode->isNullValue()) {
3472  if (VT == MVT::i32) {
3473  SDValue New = CurDAG->getCopyFromReg(
3474  CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
3475  ReplaceNode(Node, New.getNode());
3476  return;
3477  } else if (VT == MVT::i64) {
3478  SDValue New = CurDAG->getCopyFromReg(
3479  CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
3480  ReplaceNode(Node, New.getNode());
3481  return;
3482  }
3483  }
3484  break;
3485  }
3486 
3487  case ISD::FrameIndex: {
3488  // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
3489  int FI = cast<FrameIndexSDNode>(Node)->getIndex();
3490  unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
3491  const TargetLowering *TLI = getTargetLowering();
3492  SDValue TFI = CurDAG->getTargetFrameIndex(
3493  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3494  SDLoc DL(Node);
3495  SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
3496  CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
3497  CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
3498  return;
3499  }
3500  case ISD::INTRINSIC_W_CHAIN: {
3501  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
3502  switch (IntNo) {
3503  default:
3504  break;
3505  case Intrinsic::aarch64_ldaxp:
3506  case Intrinsic::aarch64_ldxp: {
3507  unsigned Op =
3508  IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
3509  SDValue MemAddr = Node->getOperand(2);
3510  SDLoc DL(Node);
3511  SDValue Chain = Node->getOperand(0);
3512 
3513  SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
3514  MVT::Other, MemAddr, Chain);
3515 
3516  // Transfer memoperands.
3518  cast<MemIntrinsicSDNode>(Node)->getMemOperand();
3519  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
3520  ReplaceNode(Node, Ld);
3521  return;
3522  }
3523  case Intrinsic::aarch64_stlxp:
3524  case Intrinsic::aarch64_stxp: {
3525  unsigned Op =
3526  IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
3527  SDLoc DL(Node);
3528  SDValue Chain = Node->getOperand(0);
3529  SDValue ValLo = Node->getOperand(2);
3530  SDValue ValHi = Node->getOperand(3);
3531  SDValue MemAddr = Node->getOperand(4);
3532 
3533  // Place arguments in the right order.
3534  SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
3535 
3536  SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
3537  // Transfer memoperands.
3539  cast<MemIntrinsicSDNode>(Node)->getMemOperand();
3540  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
3541 
3542  ReplaceNode(Node, St);
3543  return;
3544  }
3545  case Intrinsic::aarch64_neon_ld1x2:
3546  if (VT == MVT::v8i8) {
3547  SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
3548  return;
3549  } else if (VT == MVT::v16i8) {
3550  SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
3551  return;
3552  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3553  SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
3554  return;
3555  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3556  SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
3557  return;
3558  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3559  SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
3560  return;
3561  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3562  SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
3563  return;
3564  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3565  SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
3566  return;
3567  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3568  SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
3569  return;
3570  }
3571  break;
3572  case Intrinsic::aarch64_neon_ld1x3:
3573  if (VT == MVT::v8i8) {
3574  SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
3575  return;
3576  } else if (VT == MVT::v16i8) {
3577  SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
3578  return;
3579  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3580  SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
3581  return;
3582  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3583  SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
3584  return;
3585  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3586  SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
3587  return;
3588  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3589  SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
3590  return;
3591  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3592  SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3593  return;
3594  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3595  SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
3596  return;
3597  }
3598  break;
3599  case Intrinsic::aarch64_neon_ld1x4:
3600  if (VT == MVT::v8i8) {
3601  SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
3602  return;
3603  } else if (VT == MVT::v16i8) {
3604  SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
3605  return;
3606  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3607  SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
3608  return;
3609  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3610  SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
3611  return;
3612  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3613  SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
3614  return;
3615  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3616  SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
3617  return;
3618  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3619  SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3620  return;
3621  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3622  SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
3623  return;
3624  }
3625  break;
3626  case Intrinsic::aarch64_neon_ld2:
3627  if (VT == MVT::v8i8) {
3628  SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
3629  return;
3630  } else if (VT == MVT::v16i8) {
3631  SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
3632  return;
3633  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3634  SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
3635  return;
3636  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3637  SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
3638  return;
3639  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3640  SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
3641  return;
3642  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3643  SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
3644  return;
3645  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3646  SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
3647  return;
3648  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3649  SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
3650  return;
3651  }
3652  break;
3653  case Intrinsic::aarch64_neon_ld3:
3654  if (VT == MVT::v8i8) {
3655  SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
3656  return;
3657  } else if (VT == MVT::v16i8) {
3658  SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
3659  return;
3660  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3661  SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
3662  return;
3663  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3664  SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
3665  return;
3666  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3667  SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
3668  return;
3669  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3670  SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
3671  return;
3672  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3673  SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3674  return;
3675  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3676  SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
3677  return;
3678  }
3679  break;
3680  case Intrinsic::aarch64_neon_ld4:
3681  if (VT == MVT::v8i8) {
3682  SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
3683  return;
3684  } else if (VT == MVT::v16i8) {
3685  SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
3686  return;
3687  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3688  SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
3689  return;
3690  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3691  SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
3692  return;
3693  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3694  SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
3695  return;
3696  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3697  SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
3698  return;
3699  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3700  SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3701  return;
3702  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3703  SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
3704  return;
3705  }
3706  break;
3707  case Intrinsic::aarch64_neon_ld2r:
3708  if (VT == MVT::v8i8) {
3709  SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
3710  return;
3711  } else if (VT == MVT::v16i8) {
3712  SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
3713  return;
3714  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3715  SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
3716  return;
3717  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3718  SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
3719  return;
3720  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3721  SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
3722  return;
3723  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3724  SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
3725  return;
3726  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3727  SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
3728  return;
3729  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3730  SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
3731  return;
3732  }
3733  break;
3734  case Intrinsic::aarch64_neon_ld3r:
3735  if (VT == MVT::v8i8) {
3736  SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
3737  return;
3738  } else if (VT == MVT::v16i8) {
3739  SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
3740  return;
3741  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3742  SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
3743  return;
3744  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3745  SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
3746  return;
3747  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3748  SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
3749  return;
3750  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3751  SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
3752  return;
3753  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3754  SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
3755  return;
3756  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3757  SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
3758  return;
3759  }
3760  break;
3761  case Intrinsic::aarch64_neon_ld4r:
3762  if (VT == MVT::v8i8) {
3763  SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
3764  return;
3765  } else if (VT == MVT::v16i8) {
3766  SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
3767  return;
3768  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3769  SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
3770  return;
3771  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3772  SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
3773  return;
3774  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3775  SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
3776  return;
3777  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3778  SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
3779  return;
3780  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3781  SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
3782  return;
3783  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3784  SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
3785  return;
3786  }
3787  break;
3788  case Intrinsic::aarch64_neon_ld2lane:
3789  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3790  SelectLoadLane(Node, 2, AArch64::LD2i8);
3791  return;
3792  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3793  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
3794  SelectLoadLane(Node, 2, AArch64::LD2i16);
3795  return;
3796  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3797  VT == MVT::v2f32) {
3798  SelectLoadLane(Node, 2, AArch64::LD2i32);
3799  return;
3800  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3801  VT == MVT::v1f64) {
3802  SelectLoadLane(Node, 2, AArch64::LD2i64);
3803  return;
3804  }
3805  break;
3806  case Intrinsic::aarch64_neon_ld3lane:
3807  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3808  SelectLoadLane(Node, 3, AArch64::LD3i8);
3809  return;
3810  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3811  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
3812  SelectLoadLane(Node, 3, AArch64::LD3i16);
3813  return;
3814  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3815  VT == MVT::v2f32) {
3816  SelectLoadLane(Node, 3, AArch64::LD3i32);
3817  return;
3818  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3819  VT == MVT::v1f64) {
3820  SelectLoadLane(Node, 3, AArch64::LD3i64);
3821  return;
3822  }
3823  break;
3824  case Intrinsic::aarch64_neon_ld4lane:
3825  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3826  SelectLoadLane(Node, 4, AArch64::LD4i8);
3827  return;
3828  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3829  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
3830  SelectLoadLane(Node, 4, AArch64::LD4i16);
3831  return;
3832  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3833  VT == MVT::v2f32) {
3834  SelectLoadLane(Node, 4, AArch64::LD4i32);
3835  return;
3836  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3837  VT == MVT::v1f64) {
3838  SelectLoadLane(Node, 4, AArch64::LD4i64);
3839  return;
3840  }
3841  break;
3842  case Intrinsic::aarch64_ld64b:
3843  SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
3844  return;
3845  }
3846  } break;
3847  case ISD::INTRINSIC_WO_CHAIN: {
3848  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
3849  switch (IntNo) {
3850  default:
3851  break;
3852  case Intrinsic::aarch64_tagp:
3853  SelectTagP(Node);
3854  return;
3855  case Intrinsic::aarch64_neon_tbl2:
3856  SelectTable(Node, 2,
3857  VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
3858  false);
3859  return;
3860  case Intrinsic::aarch64_neon_tbl3:
3861  SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
3862  : AArch64::TBLv16i8Three,
3863  false);
3864  return;
3865  case Intrinsic::aarch64_neon_tbl4:
3866  SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
3867  : AArch64::TBLv16i8Four,
3868  false);
3869  return;
3870  case Intrinsic::aarch64_neon_tbx2:
3871  SelectTable(Node, 2,
3872  VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
3873  true);
3874  return;
3875  case Intrinsic::aarch64_neon_tbx3:
3876  SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
3877  : AArch64::TBXv16i8Three,
3878  true);
3879  return;
3880  case Intrinsic::aarch64_neon_tbx4:
3881  SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
3882  : AArch64::TBXv16i8Four,
3883  true);
3884  return;
3885  case Intrinsic::aarch64_neon_smull:
3886  case Intrinsic::aarch64_neon_umull:
3887  if (tryMULLV64LaneV128(IntNo, Node))
3888  return;
3889  break;
3890  }
3891  break;
3892  }
3893  case ISD::INTRINSIC_VOID: {
3894  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
3895  if (Node->getNumOperands() >= 3)
3896  VT = Node->getOperand(2)->getValueType(0);
3897  switch (IntNo) {
3898  default:
3899  break;
3900  case Intrinsic::aarch64_neon_st1x2: {
3901  if (VT == MVT::v8i8) {
3902  SelectStore(Node, 2, AArch64::ST1Twov8b);
3903  return;
3904  } else if (VT == MVT::v16i8) {
3905  SelectStore(Node, 2, AArch64::ST1Twov16b);
3906  return;
3907  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
3908  VT == MVT::v4bf16) {
3909  SelectStore(Node, 2, AArch64::ST1Twov4h);
3910  return;
3911  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
3912  VT == MVT::v8bf16) {
3913  SelectStore(Node, 2, AArch64::ST1Twov8h);
3914  return;
3915  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3916  SelectStore(Node, 2, AArch64::ST1Twov2s);
3917  return;
3918  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3919  SelectStore(Node, 2, AArch64::ST1Twov4s);
3920  return;
3921  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3922  SelectStore(Node, 2, AArch64::ST1Twov2d);
3923  return;
3924  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3925  SelectStore(Node, 2, AArch64::ST1Twov1d);
3926  return;
3927  }
3928  break;
3929  }
3930  case Intrinsic::aarch64_neon_st1x3: {
3931  if (VT == MVT::v8i8) {
3932  SelectStore(Node, 3, AArch64::ST1Threev8b);
3933  return;
3934  } else if (VT == MVT::v16i8) {
3935  SelectStore(Node, 3, AArch64::ST1Threev16b);
3936  return;
3937  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
3938  VT == MVT::v4bf16) {
3939  SelectStore(Node, 3, AArch64::ST1Threev4h);
3940  return;
3941  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
3942  VT == MVT::v8bf16) {
3943  SelectStore(Node, 3, AArch64::ST1Threev8h);
3944  return;
3945  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3946  SelectStore(Node, 3, AArch64::ST1Threev2s);
3947  return;
3948  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3949  SelectStore(Node, 3, AArch64::ST1Threev4s);
3950  return;
3951  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3952  SelectStore(Node, 3, AArch64::ST1Threev2d);
3953  return;
3954  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3955  SelectStore(Node, 3, AArch64::ST1Threev1d);
3956  return;
3957  }
3958  break;
3959  }
3960  case Intrinsic::aarch64_neon_st1x4: {
3961  if (VT == MVT::v8i8) {
3962  SelectStore(Node, 4, AArch64::ST1Fourv8b);
3963  return;
3964  } else if (VT == MVT::v16i8) {
3965  SelectStore(Node, 4, AArch64::ST1Fourv16b);
3966  return;
3967  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
3968  VT == MVT::v4bf16) {
3969  SelectStore(Node, 4, AArch64::ST1Fourv4h);
3970  return;
3971  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
3972  VT == MVT::v8bf16) {
3973  SelectStore(Node, 4, AArch64::ST1Fourv8h);
3974  return;
3975  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3976  SelectStore(Node, 4, AArch64::ST1Fourv2s);
3977  return;
3978  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3979  SelectStore(Node, 4, AArch64::ST1Fourv4s);
3980  return;
3981  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3982  SelectStore(Node, 4, AArch64::ST1Fourv2d);
3983  return;
3984  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3985  SelectStore(Node, 4, AArch64::ST1Fourv1d);
3986  return;
3987  }
3988  break;
3989  }
3990  case Intrinsic::aarch64_neon_st2: {
3991  if (VT == MVT::v8i8) {
3992  SelectStore(Node, 2, AArch64::ST2Twov8b);
3993  return;
3994  } else if (VT == MVT::v16i8) {
3995  SelectStore(Node, 2, AArch64::ST2Twov16b);
3996  return;
3997  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
3998  VT == MVT::v4bf16) {
3999  SelectStore(Node, 2, AArch64::ST2Twov4h);
4000  return;
4001  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
4002  VT == MVT::v8bf16) {
4003  SelectStore(Node, 2, AArch64::ST2Twov8h);
4004  return;
4005  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4006  SelectStore(Node, 2, AArch64::ST2Twov2s);
4007  return;
4008  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4009  SelectStore(Node, 2, AArch64::ST2Twov4s);
4010  return;
4011  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4012  SelectStore(Node, 2, AArch64::ST2Twov2d);
4013  return;
4014  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4015  SelectStore(Node, 2, AArch64::ST1Twov1d);
4016  return;
4017  }
4018  break;
4019  }
4020  case Intrinsic::aarch64_neon_st3: {
4021  if (VT == MVT::v8i8) {
4022  SelectStore(Node, 3, AArch64::ST3Threev8b);
4023  return;
4024  } else if (VT == MVT::v16i8) {
4025  SelectStore(Node, 3, AArch64::ST3Threev16b);
4026  return;
4027  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
4028  VT == MVT::v4bf16) {
4029  SelectStore(Node, 3, AArch64::ST3Threev4h);
4030  return;
4031  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
4032  VT == MVT::v8bf16) {
4033  SelectStore(Node, 3, AArch64::ST3Threev8h);
4034  return;
4035  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4036  SelectStore(Node, 3, AArch64::ST3Threev2s);
4037  return;
4038  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4039  SelectStore(Node, 3, AArch64::ST3Threev4s);
4040  return;
4041  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4042  SelectStore(Node, 3, AArch64::ST3Threev2d);
4043  return;
4044  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4045  SelectStore(Node, 3, AArch64::ST1Threev1d);
4046  return;
4047  }
4048  break;
4049  }
4050  case Intrinsic::aarch64_neon_st4: {
4051  if (VT == MVT::v8i8) {
4052  SelectStore(Node, 4, AArch64::ST4Fourv8b);
4053  return;
4054  } else if (VT == MVT::v16i8) {
4055  SelectStore(Node, 4, AArch64::ST4Fourv16b);
4056  return;
4057  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
4058  VT == MVT::v4bf16) {
4059  SelectStore(Node, 4, AArch64::ST4Fourv4h);
4060  return;
4061  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
4062  VT == MVT::v8bf16) {
4063  SelectStore(Node, 4, AArch64::ST4Fourv8h);
4064  return;
4065  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4066  SelectStore(Node, 4, AArch64::ST4Fourv2s);
4067  return;
4068  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4069  SelectStore(Node, 4, AArch64::ST4Fourv4s);
4070  return;
4071  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4072  SelectStore(Node, 4, AArch64::ST4Fourv2d);
4073  return;
4074  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4075  SelectStore(Node, 4, AArch64::ST1Fourv1d);
4076  return;
4077  }
4078  break;
4079  }
4080  case Intrinsic::aarch64_neon_st2lane: {
4081  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4082  SelectStoreLane(Node, 2, AArch64::ST2i8);
4083  return;
4084  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4085  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4086  SelectStoreLane(Node, 2, AArch64::ST2i16);
4087  return;
4088  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4089  VT == MVT::v2f32) {
4090  SelectStoreLane(Node, 2, AArch64::ST2i32);
4091  return;
4092  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4093  VT == MVT::v1f64) {
4094  SelectStoreLane(Node, 2, AArch64::ST2i64);
4095  return;
4096  }
4097  break;
4098  }
4099  case Intrinsic::aarch64_neon_st3lane: {
4100  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4101  SelectStoreLane(Node, 3, AArch64::ST3i8);
4102  return;
4103  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4104  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4105  SelectStoreLane(Node, 3, AArch64::ST3i16);
4106  return;
4107  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4108  VT == MVT::v2f32) {
4109  SelectStoreLane(Node, 3, AArch64::ST3i32);
4110  return;
4111  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4112  VT == MVT::v1f64) {
4113  SelectStoreLane(Node, 3, AArch64::ST3i64);
4114  return;
4115  }
4116  break;
4117  }
4118  case Intrinsic::aarch64_neon_st4lane: {
4119  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4120  SelectStoreLane(Node, 4, AArch64::ST4i8);
4121  return;
4122  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4123  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4124  SelectStoreLane(Node, 4, AArch64::ST4i16);
4125  return;
4126  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4127  VT == MVT::v2f32) {
4128  SelectStoreLane(Node, 4, AArch64::ST4i32);
4129  return;
4130  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4131  VT == MVT::v1f64) {
4132  SelectStoreLane(Node, 4, AArch64::ST4i64);
4133  return;
4134  }
4135  break;
4136  }
4137  case Intrinsic::aarch64_sve_st2: {
4138  if (VT == MVT::nxv16i8) {
4139  SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
4140  return;
4141  } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4142  (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
4143  SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
4144  return;
4145  } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4146  SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
4147  return;
4148  } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4149  SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
4150  return;
4151  }
4152  break;
4153  }
4154  case Intrinsic::aarch64_sve_st3: {
4155  if (VT == MVT::nxv16i8) {
4156  SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
4157  return;
4158  } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4159  (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
4160  SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
4161  return;
4162  } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4163  SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
4164  return;
4165  } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4166  SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
4167  return;
4168  }
4169  break;
4170  }
4171  case Intrinsic::aarch64_sve_st4: {
4172  if (VT == MVT::nxv16i8) {
4173  SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
4174  return;
4175  } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4176  (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
4177  SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
4178  return;
4179  } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4180  SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
4181  return;
4182  } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4183  SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
4184  return;
4185  }
4186  break;
4187  }
4188  }
4189  break;
4190  }
4191  case AArch64ISD::LD2post: {
4192  if (VT == MVT::v8i8) {
4193  SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
4194  return;
4195  } else if (VT == MVT::v16i8) {
4196  SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
4197  return;
4198  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4199  SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
4200  return;
4201  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4202  SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
4203  return;
4204  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4205  SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
4206  return;
4207  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4208  SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
4209  return;
4210  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4211  SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
4212  return;
4213  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4214  SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
4215  return;
4216  }
4217  break;
4218  }
4219  case AArch64ISD::LD3post: {
4220  if (VT == MVT::v8i8) {
4221  SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
4222  return;
4223  } else if (VT == MVT::v16i8) {
4224  SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
4225  return;
4226  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4227  SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
4228  return;
4229  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4230  SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
4231  return;
4232  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4233  SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
4234  return;
4235  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4236  SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
4237  return;
4238  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4239  SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
4240  return;
4241  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4242  SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
4243  return;
4244  }
4245  break;
4246  }
4247  case AArch64ISD::LD4post: {
4248  if (VT == MVT::v8i8) {
4249  SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
4250  return;
4251  } else if (VT == MVT::v16i8) {
4252  SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
4253  return;
4254  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4255  SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
4256  return;
4257  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4258  SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
4259  return;
4260  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4261  SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
4262  return;
4263  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4264  SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
4265  return;
4266  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4267  SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
4268  return;
4269  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4270  SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
4271  return;
4272  }
4273  break;
4274  }
4275  case AArch64ISD::LD1x2post: {
4276  if (VT == MVT::v8i8) {
4277  SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
4278  return;
4279  } else if (VT == MVT::v16i8) {
4280  SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
4281  return;
4282  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4283  SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
4284  return;
4285  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4286  SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
4287  return;
4288  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4289  SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
4290  return;
4291  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4292  SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
4293  return;
4294  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4295  SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
4296  return;
4297  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4298  SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
4299  return;
4300  }
4301  break;
4302  }
4303  case AArch64ISD::LD1x3post: {
4304  if (VT == MVT::v8i8) {
4305  SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
4306  return;
4307  } else if (VT == MVT::v16i8) {
4308  SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
4309  return;
4310  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4311  SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
4312  return;
4313  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4314  SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
4315  return;
4316  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4317  SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
4318  return;
4319  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4320  SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
4321  return;
4322  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4323  SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
4324  return;
4325  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4326  SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
4327  return;
4328  }
4329  break;
4330  }
4331  case AArch64ISD::LD1x4post: {
4332  if (VT == MVT::v8i8) {
4333  SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
4334  return;
4335  } else if (VT == MVT::v16i8) {
4336  SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
4337  return;
4338  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4339  SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
4340  return;
4341  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4342  SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
4343  return;
4344  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4345  SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
4346  return;
4347  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4348  SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
4349  return;
4350  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4351  SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
4352  return;
4353  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4354  SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
4355  return;
4356  }
4357  break;
4358  }
4359  case AArch64ISD::LD1DUPpost: {
4360  if (VT == MVT::v8i8) {
4361  SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
4362  return;
4363  } else if (VT == MVT::v16i8) {
4364  SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
4365  return;
4366  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4367  SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
4368  return;
4369  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4370  SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
4371  return;
4372  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4373  SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
4374  return;
4375  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4376  SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
4377  return;
4378  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4379  SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
4380  return;
4381  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4382  SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
4383  return;
4384  }
4385  break;
4386  }
4387  case AArch64ISD::LD2DUPpost: {
4388  if (VT == MVT::v8i8) {
4389  SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
4390  return;
4391  } else if (VT == MVT::v16i8) {
4392  SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
4393  return;
4394  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4395  SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
4396  return;
4397  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4398  SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
4399  return;
4400  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4401  SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
4402  return;
4403  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4404  SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
4405  return;
4406  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4407  SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
4408  return;
4409  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4410  SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
4411  return;
4412  }
4413  break;
4414  }
4415  case AArch64ISD::LD3DUPpost: {
4416  if (VT == MVT::v8i8) {
4417  SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
4418  return;
4419  } else if (VT == MVT::v16i8) {
4420  SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
4421  return;
4422  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4423  SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
4424  return;
4425  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4426  SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
4427  return;
4428  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4429  SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
4430  return;
4431  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4432  SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
4433  return;
4434  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4435  SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
4436  return;
4437  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4438  SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
4439  return;
4440  }
4441  break;
4442  }
4443  case AArch64ISD::LD4DUPpost: {
4444  if (VT == MVT::v8i8) {
4445  SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
4446  return;
4447  } else if (VT == MVT::v16i8) {
4448  SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
4449  return;
4450  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4451  SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
4452  return;
4453  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4454  SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
4455  return;
4456  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4457  SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
4458  return;
4459  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4460  SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
4461  return;
4462  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4463  SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
4464  return;
4465  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4466  SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
4467  return;
4468  }
4469  break;
4470  }
4471  case AArch64ISD::LD1LANEpost: {
4472  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4473  SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
4474  return;
4475  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4476  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4477  SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
4478  return;
4479  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4480  VT == MVT::v2f32) {
4481  SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
4482  return;
4483  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4484  VT == MVT::v1f64) {
4485  SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
4486  return;
4487  }
4488  break;
4489  }
4490  case AArch64ISD::LD2LANEpost: {
4491  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4492  SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
4493  return;
4494  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4495  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4496  SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
4497  return;
4498  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4499  VT == MVT::v2f32) {
4500  SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
4501  return;
4502  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4503  VT == MVT::v1f64) {
4504  SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
4505  return;
4506  }
4507  break;
4508  }
4509  case AArch64ISD::LD3LANEpost: {
4510  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4511  SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
4512  return;
4513  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4514  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4515  SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
4516  return;
4517  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4518  VT == MVT::v2f32) {
4519  SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
4520  return;
4521  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4522  VT == MVT::v1f64) {
4523  SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
4524  return;
4525  }
4526  break;
4527  }
4528  case AArch64ISD::LD4LANEpost: {
4529  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4530  SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
4531  return;
4532  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4533  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4534  SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
4535  return;
4536  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4537  VT == MVT::v2f32) {
4538  SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
4539  return;
4540  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4541  VT == MVT::v1f64) {
4542  SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
4543  return;
4544  }
4545  break;
4546  }
4547  case AArch64ISD::ST2post: {
4548  VT = Node->getOperand(1).getValueType();
4549  if (VT == MVT::v8i8) {
4550  SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
4551  return;
4552  } else if (VT == MVT::v16i8) {
4553  SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
4554  return;
4555  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4556  SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
4557  return;
4558  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4559  SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
4560  return;
4561  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4562  SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
4563  return;
4564  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4565  SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
4566  return;
4567  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4568  SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
4569  return;
4570  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4571  SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
4572  return;
4573  }
4574  break;
4575  }
4576  case AArch64ISD::ST3post: {
4577  VT = Node->getOperand(1).getValueType();
4578  if (VT == MVT::v8i8) {
4579  SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
4580  return;
4581  } else if (VT == MVT::v16i8) {
4582  SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
4583  return;
4584  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4585  SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
4586  return;
4587  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4588  SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
4589  return;
4590  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4591  SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
4592  return;
4593  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4594  SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
4595  return;
4596  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4597  SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
4598  return;
4599  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4600  SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
4601  return;
4602  }
4603  break;
4604  }
4605  case AArch64ISD::ST4post: {
4606  VT = Node->getOperand(1).getValueType();
4607  if (VT == MVT::v8i8) {
4608  SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
4609  return;
4610  } else if (VT == MVT::v16i8) {
4611  SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
4612  return;
4613  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4614  SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
4615  return;
4616  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4617  SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
4618  return;
4619  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4620  SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
4621  return;
4622  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4623  SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
4624  return;
4625  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4626  SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
4627  return;
4628  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4629  SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
4630  return;
4631  }
4632  break;
4633  }
4634  case AArch64ISD::ST1x2post: {
4635  VT = Node->getOperand(1).getValueType();
4636  if (VT == MVT::v8i8) {
4637  SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
4638  return;
4639  } else if (VT == MVT::v16i8) {
4640  SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
4641  return;
4642  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4643  SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
4644  return;
4645  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4646  SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
4647  return;
4648  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4649  SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
4650  return;
4651  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4652  SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
4653  return;
4654  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4655  SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
4656  return;
4657  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4658  SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
4659  return;
4660  }
4661  break;
4662  }
4663  case AArch64ISD::ST1x3post: {
4664  VT = Node->getOperand(1).getValueType();
4665  if (VT == MVT::v8i8) {
4666  SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
4667  return;
4668  } else if (VT == MVT::v16i8) {
4669  SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
4670  return;
4671  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4672  SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
4673  return;
4674  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
4675  SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
4676  return;
4677  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4678  SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
4679  return;
4680  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4681  SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
4682  return;
4683  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4684  SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
4685  return;
4686  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4687  SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
4688  return;
4689  }
4690  break;
4691  }
4692  case AArch64ISD::ST1x4post: {
4693  VT = Node->getOperand(1).getValueType();
4694  if (VT == MVT::v8i8) {
4695  SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
4696  return;
4697  } else if (VT == MVT::v16i8) {
4698  SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
4699  return;
4700  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4701  SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
4702  return;
4703  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4704  SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
4705  return;
4706  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4707  SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
4708  return;
4709  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4710  SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
4711  return;
4712  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4713  SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
4714  return;
4715  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4716  SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
4717  return;
4718  }
4719  break;
4720  }
4721  case AArch64ISD::ST2LANEpost: {
4722  VT = Node->getOperand(1).getValueType();
4723  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4724  SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
4725  return;
4726  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4727  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4728  SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
4729  return;
4730  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4731  VT == MVT::v2f32) {
4732  SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
4733  return;
4734  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4735  VT == MVT::v1f64) {
4736  SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
4737  return;
4738  }
4739  break;
4740  }
4741  case AArch64ISD::ST3LANEpost: {
4742  VT = Node->getOperand(1).getValueType();
4743  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4744  SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
4745  return;
4746  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4747  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4748  SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
4749  return;
4750  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4751  VT == MVT::v2f32) {
4752  SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
4753  return;
4754  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4755  VT == MVT::v1f64) {
4756  SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
4757  return;
4758  }
4759  break;
4760  }
4761  case AArch64ISD::ST4LANEpost: {
4762  VT = Node->getOperand(1).getValueType();
4763  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4764  SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
4765  return;
4766  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4767  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4768  SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
4769  return;
4770  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4771  VT == MVT::v2f32) {
4772  SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
4773  return;
4774  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4775  VT == MVT::v1f64) {
4776  SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
4777  return;
4778  }
4779  break;
4780  }
4782  if (VT == MVT::nxv16i8) {
4783  SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
4784  return;
4785  } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4786  (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
4787  SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
4788  return;
4789  } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4790  SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);
4791  return;
4792  } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4793  SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);
4794  return;
4795  }
4796  break;
4797  }
4799  if (VT == MVT::nxv16i8) {
4800  SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
4801  return;
4802  } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4803  (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
4804  SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
4805  return;
4806  } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4807  SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);
4808  return;
4809  } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4810  SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);
4811  return;
4812  }
4813  break;
4814  }
4816  if (VT == MVT::nxv16i8) {
4817  SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
4818  return;
4819  } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4820  (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
4821  SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
4822  return;
4823  } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4824  SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);
4825  return;
4826  } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4827  SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);
4828  return;
4829  }
4830  break;
4831  }
4832  }
4833 
4834  // Select the default instruction
4835  SelectCode(Node);
4836 }
4837 
4838 /// createAArch64ISelDag - This pass converts a legalized DAG into a
4839 /// AArch64-specific DAG, ready for instruction scheduling.
4841  CodeGenOpt::Level OptLevel) {
4842  return new AArch64DAGToDAGISel(TM, OptLevel);
4843 }
4844 
4845 /// When \p PredVT is a scalable vector predicate in the form
4846 /// MVT::nx<M>xi1, it builds the correspondent scalable vector of
4847 /// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
4848 /// structured vectors (NumVec >1), the output data type is
4849 /// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
4850 /// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
4851 /// EVT.
4853  unsigned NumVec) {
4854  assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
4855  if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
4856  return EVT();
4857 
4858  if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
4859  PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
4860  return EVT();
4861 
4862  ElementCount EC = PredVT.getVectorElementCount();
4863  EVT ScalarVT =
4864  EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
4865  EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
4866 
4867  return MemVT;
4868 }
4869 
4870 /// Return the EVT of the data associated to a memory operation in \p
4871 /// Root. If such EVT cannot be retrived, it returns an invalid EVT.
4873  if (isa<MemSDNode>(Root))
4874  return cast<MemSDNode>(Root)->getMemoryVT();
4875 
4876  if (isa<MemIntrinsicSDNode>(Root))
4877  return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
4878 
4879  const unsigned Opcode = Root->getOpcode();
4880  // For custom ISD nodes, we have to look at them individually to extract the
4881  // type of the data moved to/from memory.
4882  switch (Opcode) {
4887  return cast<VTSDNode>(Root->getOperand(3))->getVT();
4888  case AArch64ISD::ST1_PRED:
4889  return cast<VTSDNode>(Root->getOperand(4))->getVT();
4892  Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2);
4895  Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3);
4898  Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4);
4899  default:
4900  break;
4901  }
4902 
4903  if (Opcode != ISD::INTRINSIC_VOID)
4904  return EVT();
4905 
4906  const unsigned IntNo =
4907  cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue();
4908  if (IntNo != Intrinsic::aarch64_sve_prf)
4909  return EVT();
4910 
4911  // We are using an SVE prefetch intrinsic. Type must be inferred
4912  // from the width of the predicate.
4914  Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*