LLVM  15.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the AArch64 target.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "AArch64TargetMachine.h"
16 #include "llvm/ADT/APSInt.h"
18 #include "llvm/IR/Function.h" // To access function attributes.
19 #include "llvm/IR/GlobalValue.h"
20 #include "llvm/IR/Intrinsics.h"
21 #include "llvm/IR/IntrinsicsAArch64.h"
22 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/KnownBits.h"
27 
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "aarch64-isel"
31 
32 //===--------------------------------------------------------------------===//
33 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
34 /// instructions for SelectionDAG operations.
35 ///
36 namespace {
37 
38 class AArch64DAGToDAGISel : public SelectionDAGISel {
39 
40  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
41  /// make the right decision when generating code for different targets.
42  const AArch64Subtarget *Subtarget;
43 
44 public:
45  explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
46  CodeGenOpt::Level OptLevel)
47  : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
48 
49  StringRef getPassName() const override {
50  return "AArch64 Instruction Selection";
51  }
52 
53  bool runOnMachineFunction(MachineFunction &MF) override {
54  Subtarget = &MF.getSubtarget<AArch64Subtarget>();
56  }
57 
58  void Select(SDNode *Node) override;
59 
60  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
61  /// inline asm expressions.
62  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
63  unsigned ConstraintID,
64  std::vector<SDValue> &OutOps) override;
65 
66  template <signed Low, signed High, signed Scale>
67  bool SelectRDVLImm(SDValue N, SDValue &Imm);
68 
69  bool tryMLAV64LaneV128(SDNode *N);
70  bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
71  bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
72  bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
73  bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
74  bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
75  return SelectShiftedRegister(N, false, Reg, Shift);
76  }
77  bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
78  return SelectShiftedRegister(N, true, Reg, Shift);
79  }
80  bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
81  return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
82  }
83  bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
84  return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
85  }
86  bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
87  return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
88  }
89  bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
90  return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
91  }
92  bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
93  return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
94  }
95  bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
96  return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
97  }
98  bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
99  return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
100  }
101  bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
102  return SelectAddrModeIndexed(N, 1, Base, OffImm);
103  }
104  bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
105  return SelectAddrModeIndexed(N, 2, Base, OffImm);
106  }
107  bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
108  return SelectAddrModeIndexed(N, 4, Base, OffImm);
109  }
110  bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
111  return SelectAddrModeIndexed(N, 8, Base, OffImm);
112  }
113  bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
114  return SelectAddrModeIndexed(N, 16, Base, OffImm);
115  }
116  bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
117  return SelectAddrModeUnscaled(N, 1, Base, OffImm);
118  }
119  bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
120  return SelectAddrModeUnscaled(N, 2, Base, OffImm);
121  }
122  bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
123  return SelectAddrModeUnscaled(N, 4, Base, OffImm);
124  }
125  bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
126  return SelectAddrModeUnscaled(N, 8, Base, OffImm);
127  }
128  bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
129  return SelectAddrModeUnscaled(N, 16, Base, OffImm);
130  }
131  template <unsigned Size, unsigned Max>
132  bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
133  // Test if there is an appropriate addressing mode and check if the
134  // immediate fits.
135  bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
136  if (Found) {
137  if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
138  int64_t C = CI->getSExtValue();
139  if (C <= Max)
140  return true;
141  }
142  }
143 
144  // Otherwise, base only, materialize address in register.
145  Base = N;
146  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
147  return true;
148  }
149 
150  template<int Width>
151  bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
152  SDValue &SignExtend, SDValue &DoShift) {
153  return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
154  }
155 
156  template<int Width>
157  bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
158  SDValue &SignExtend, SDValue &DoShift) {
159  return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
160  }
161 
162  bool SelectDupZeroOrUndef(SDValue N) {
163  switch(N->getOpcode()) {
164  case ISD::UNDEF:
165  return true;
166  case AArch64ISD::DUP:
167  case ISD::SPLAT_VECTOR: {
168  auto Opnd0 = N->getOperand(0);
169  if (auto CN = dyn_cast<ConstantSDNode>(Opnd0))
170  if (CN->isZero())
171  return true;
172  if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0))
173  if (CN->isZero())
174  return true;
175  break;
176  }
177  default:
178  break;
179  }
180 
181  return false;
182  }
183 
184  bool SelectDupZero(SDValue N) {
185  switch(N->getOpcode()) {
186  case AArch64ISD::DUP:
187  case ISD::SPLAT_VECTOR: {
188  auto Opnd0 = N->getOperand(0);
189  if (auto CN = dyn_cast<ConstantSDNode>(Opnd0))
190  if (CN->isZero())
191  return true;
192  if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0))
193  if (CN->isZero())
194  return true;
195  break;
196  }
197  }
198 
199  return false;
200  }
201 
202  template<MVT::SimpleValueType VT>
203  bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
204  return SelectSVEAddSubImm(N, VT, Imm, Shift);
205  }
206 
207  template <MVT::SimpleValueType VT>
208  bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
209  return SelectSVECpyDupImm(N, VT, Imm, Shift);
210  }
211 
212  template <MVT::SimpleValueType VT, bool Invert = false>
213  bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
214  return SelectSVELogicalImm(N, VT, Imm, Invert);
215  }
216 
217  template <MVT::SimpleValueType VT>
218  bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
219  return SelectSVEArithImm(N, VT, Imm);
220  }
221 
222  template <unsigned Low, unsigned High, bool AllowSaturation = false>
223  bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
224  return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
225  }
226 
227  // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
228  template<signed Min, signed Max, signed Scale, bool Shift>
229  bool SelectCntImm(SDValue N, SDValue &Imm) {
230  if (!isa<ConstantSDNode>(N))
231  return false;
232 
233  int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
234  if (Shift)
235  MulImm = 1LL << MulImm;
236 
237  if ((MulImm % std::abs(Scale)) != 0)
238  return false;
239 
240  MulImm /= Scale;
241  if ((MulImm >= Min) && (MulImm <= Max)) {
242  Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
243  return true;
244  }
245 
246  return false;
247  }
248 
249  template <signed Max, signed Scale>
250  bool SelectEXTImm(SDValue N, SDValue &Imm) {
251  if (!isa<ConstantSDNode>(N))
252  return false;
253 
254  int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
255 
256  if (MulImm >= 0 && MulImm <= Max) {
257  MulImm *= Scale;
258  Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
259  return true;
260  }
261 
262  return false;
263  }
264 
265  /// Form sequences of consecutive 64/128-bit registers for use in NEON
266  /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
267  /// between 1 and 4 elements. If it contains a single element that is returned
268  /// unchanged; otherwise a REG_SEQUENCE value is returned.
271  // Form a sequence of SVE registers for instructions using list of vectors,
272  // e.g. structured loads and stores (ldN, stN).
273  SDValue createZTuple(ArrayRef<SDValue> Vecs);
274 
275  /// Generic helper for the createDTuple/createQTuple
276  /// functions. Those should almost always be called instead.
277  SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
278  const unsigned SubRegs[]);
279 
280  void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
281 
282  bool tryIndexedLoad(SDNode *N);
283 
284  bool trySelectStackSlotTagP(SDNode *N);
285  void SelectTagP(SDNode *N);
286 
287  void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
288  unsigned SubRegIdx);
289  void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
290  unsigned SubRegIdx);
291  void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
292  void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
293  void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
294  unsigned Opc_rr, unsigned Opc_ri,
295  bool IsIntr = false);
296 
297  bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
298  /// SVE Reg+Imm addressing mode.
299  template <int64_t Min, int64_t Max>
300  bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
301  SDValue &OffImm);
302  /// SVE Reg+Reg address mode.
303  template <unsigned Scale>
304  bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
305  return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
306  }
307 
308  void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
309  void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
310  void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
311  void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
312  void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
313  unsigned Opc_rr, unsigned Opc_ri);
314  std::tuple<unsigned, SDValue, SDValue>
315  findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
316  const SDValue &OldBase, const SDValue &OldOffset,
317  unsigned Scale);
318 
319  bool tryBitfieldExtractOp(SDNode *N);
320  bool tryBitfieldExtractOpFromSExt(SDNode *N);
321  bool tryBitfieldInsertOp(SDNode *N);
322  bool tryBitfieldInsertInZeroOp(SDNode *N);
323  bool tryShiftAmountMod(SDNode *N);
324  bool tryHighFPExt(SDNode *N);
325 
326  bool tryReadRegister(SDNode *N);
327  bool tryWriteRegister(SDNode *N);
328 
329 // Include the pieces autogenerated from the target description.
330 #include "AArch64GenDAGISel.inc"
331 
332 private:
333  bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
334  SDValue &Shift);
335  bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
336  SDValue &OffImm) {
337  return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
338  }
339  bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
340  unsigned Size, SDValue &Base,
341  SDValue &OffImm);
342  bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
343  SDValue &OffImm);
344  bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
345  SDValue &OffImm);
346  bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
347  SDValue &Offset, SDValue &SignExtend,
348  SDValue &DoShift);
349  bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
350  SDValue &Offset, SDValue &SignExtend,
351  SDValue &DoShift);
352  bool isWorthFolding(SDValue V) const;
353  bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
354  SDValue &Offset, SDValue &SignExtend);
355 
356  template<unsigned RegWidth>
357  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
358  return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
359  }
360 
361  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
362 
363  bool SelectCMP_SWAP(SDNode *N);
364 
365  bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
366  bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
367  bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
368 
369  bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
370  bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
371  bool AllowSaturation, SDValue &Imm);
372 
373  bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
374  bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
375  SDValue &Offset);
376 
377  bool SelectAllActivePredicate(SDValue N);
378 };
379 } // end anonymous namespace
380 
381 /// isIntImmediate - This method tests to see if the node is a constant
382 /// operand. If so Imm will receive the 32-bit value.
383 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
384  if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
385  Imm = C->getZExtValue();
386  return true;
387  }
388  return false;
389 }
390 
391 // isIntImmediate - This method tests to see if a constant operand.
392 // If so Imm will receive the value.
393 static bool isIntImmediate(SDValue N, uint64_t &Imm) {
394  return isIntImmediate(N.getNode(), Imm);
395 }
396 
397 // isOpcWithIntImmediate - This method tests to see if the node is a specific
398 // opcode and that it has a immediate integer right operand.
399 // If so Imm will receive the 32 bit value.
400 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
401  uint64_t &Imm) {
402  return N->getOpcode() == Opc &&
403  isIntImmediate(N->getOperand(1).getNode(), Imm);
404 }
405 
406 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
407  const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
408  switch(ConstraintID) {
409  default:
410  llvm_unreachable("Unexpected asm memory constraint");
414  // We need to make sure that this one operand does not end up in XZR, thus
415  // require the address to be in a PointerRegClass register.
416  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
417  const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
418  SDLoc dl(Op);
419  SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
420  SDValue NewOp =
421  SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
422  dl, Op.getValueType(),
423  Op, RC), 0);
424  OutOps.push_back(NewOp);
425  return false;
426  }
427  return true;
428 }
429 
430 /// SelectArithImmed - Select an immediate value that can be represented as
431 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
432 /// Val set to the 12-bit value and Shift set to the shifter operand.
433 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
434  SDValue &Shift) {
435  // This function is called from the addsub_shifted_imm ComplexPattern,
436  // which lists [imm] as the list of opcode it's interested in, however
437  // we still need to check whether the operand is actually an immediate
438  // here because the ComplexPattern opcode list is only used in
439  // root-level opcode matching.
440  if (!isa<ConstantSDNode>(N.getNode()))
441  return false;
442 
443  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
444  unsigned ShiftAmt;
445 
446  if (Immed >> 12 == 0) {
447  ShiftAmt = 0;
448  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
449  ShiftAmt = 12;
450  Immed = Immed >> 12;
451  } else
452  return false;
453 
454  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
455  SDLoc dl(N);
456  Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
457  Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
458  return true;
459 }
460 
461 /// SelectNegArithImmed - As above, but negates the value before trying to
462 /// select it.
463 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
464  SDValue &Shift) {
465  // This function is called from the addsub_shifted_imm ComplexPattern,
466  // which lists [imm] as the list of opcode it's interested in, however
467  // we still need to check whether the operand is actually an immediate
468  // here because the ComplexPattern opcode list is only used in
469  // root-level opcode matching.
470  if (!isa<ConstantSDNode>(N.getNode()))
471  return false;
472 
473  // The immediate operand must be a 24-bit zero-extended immediate.
474  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
475 
476  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
477  // have the opposite effect on the C flag, so this pattern mustn't match under
478  // those circumstances.
479  if (Immed == 0)
480  return false;
481 
482  if (N.getValueType() == MVT::i32)
483  Immed = ~((uint32_t)Immed) + 1;
484  else
485  Immed = ~Immed + 1ULL;
486  if (Immed & 0xFFFFFFFFFF000000ULL)
487  return false;
488 
489  Immed &= 0xFFFFFFULL;
490  return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
491  Shift);
492 }
493 
494 /// getShiftTypeForNode - Translate a shift node to the corresponding
495 /// ShiftType value.
497  switch (N.getOpcode()) {
498  default:
500  case ISD::SHL:
501  return AArch64_AM::LSL;
502  case ISD::SRL:
503  return AArch64_AM::LSR;
504  case ISD::SRA:
505  return AArch64_AM::ASR;
506  case ISD::ROTR:
507  return AArch64_AM::ROR;
508  }
509 }
510 
511 /// Determine whether it is worth it to fold SHL into the addressing
512 /// mode.
513 static bool isWorthFoldingSHL(SDValue V) {
514  assert(V.getOpcode() == ISD::SHL && "invalid opcode");
515  // It is worth folding logical shift of up to three places.
516  auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
517  if (!CSD)
518  return false;
519  unsigned ShiftVal = CSD->getZExtValue();
520  if (ShiftVal > 3)
521  return false;
522 
523  // Check if this particular node is reused in any non-memory related
524  // operation. If yes, do not try to fold this node into the address
525  // computation, since the computation will be kept.
526  const SDNode *Node = V.getNode();
527  for (SDNode *UI : Node->uses())
528  if (!isa<MemSDNode>(*UI))
529  for (SDNode *UII : UI->uses())
530  if (!isa<MemSDNode>(*UII))
531  return false;
532  return true;
533 }
534 
535 /// Determine whether it is worth to fold V into an extended register.
536 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
537  // Trivial if we are optimizing for code size or if there is only
538  // one use of the value.
539  if (CurDAG->shouldOptForSize() || V.hasOneUse())
540  return true;
541  // If a subtarget has a fastpath LSL we can fold a logical shift into
542  // the addressing mode and save a cycle.
543  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
545  return true;
546  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
547  const SDValue LHS = V.getOperand(0);
548  const SDValue RHS = V.getOperand(1);
549  if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
550  return true;
551  if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
552  return true;
553  }
554 
555  // It hurts otherwise, since the value will be reused.
556  return false;
557 }
558 
559 /// SelectShiftedRegister - Select a "shifted register" operand. If the value
560 /// is not shifted, set the Shift operand to default of "LSL 0". The logical
561 /// instructions allow the shifted register to be rotated, but the arithmetic
562 /// instructions do not. The AllowROR parameter specifies whether ROR is
563 /// supported.
564 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
565  SDValue &Reg, SDValue &Shift) {
567  if (ShType == AArch64_AM::InvalidShiftExtend)
568  return false;
569  if (!AllowROR && ShType == AArch64_AM::ROR)
570  return false;
571 
572  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
573  unsigned BitSize = N.getValueSizeInBits();
574  unsigned Val = RHS->getZExtValue() & (BitSize - 1);
575  unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
576 
577  Reg = N.getOperand(0);
578  Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
579  return isWorthFolding(N);
580  }
581 
582  return false;
583 }
584 
585 /// getExtendTypeForNode - Translate an extend node to the corresponding
586 /// ExtendType value.
588 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
589  if (N.getOpcode() == ISD::SIGN_EXTEND ||
590  N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
591  EVT SrcVT;
592  if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
593  SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
594  else
595  SrcVT = N.getOperand(0).getValueType();
596 
597  if (!IsLoadStore && SrcVT == MVT::i8)
598  return AArch64_AM::SXTB;
599  else if (!IsLoadStore && SrcVT == MVT::i16)
600  return AArch64_AM::SXTH;
601  else if (SrcVT == MVT::i32)
602  return AArch64_AM::SXTW;
603  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
604 
606  } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
607  N.getOpcode() == ISD::ANY_EXTEND) {
608  EVT SrcVT = N.getOperand(0).getValueType();
609  if (!IsLoadStore && SrcVT == MVT::i8)
610  return AArch64_AM::UXTB;
611  else if (!IsLoadStore && SrcVT == MVT::i16)
612  return AArch64_AM::UXTH;
613  else if (SrcVT == MVT::i32)
614  return AArch64_AM::UXTW;
615  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
616 
618  } else if (N.getOpcode() == ISD::AND) {
619  ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
620  if (!CSD)
622  uint64_t AndMask = CSD->getZExtValue();
623 
624  switch (AndMask) {
625  default:
627  case 0xFF:
628  return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
629  case 0xFFFF:
630  return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
631  case 0xFFFFFFFF:
632  return AArch64_AM::UXTW;
633  }
634  }
635 
637 }
638 
639 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
640 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
641  if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
642  DL->getOpcode() != AArch64ISD::DUPLANE32)
643  return false;
644 
645  SDValue SV = DL->getOperand(0);
646  if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
647  return false;
648 
649  SDValue EV = SV.getOperand(1);
650  if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
651  return false;
652 
653  ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
654  ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
655  LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
656  LaneOp = EV.getOperand(0);
657 
658  return true;
659 }
660 
661 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
662 // high lane extract.
663 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
664  SDValue &LaneOp, int &LaneIdx) {
665 
666  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
667  std::swap(Op0, Op1);
668  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
669  return false;
670  }
671  StdOp = Op1;
672  return true;
673 }
674 
675 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
676 /// is a lane in the upper half of a 128-bit vector. Recognize and select this
677 /// so that we don't emit unnecessary lane extracts.
678 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) {
679  SDLoc dl(N);
680  SDValue Op0 = N->getOperand(0);
681  SDValue Op1 = N->getOperand(1);
682  SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
683  SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.
684  int LaneIdx = -1; // Will hold the lane index.
685 
686  if (Op1.getOpcode() != ISD::MUL ||
687  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
688  LaneIdx)) {
689  std::swap(Op0, Op1);
690  if (Op1.getOpcode() != ISD::MUL ||
691  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
692  LaneIdx))
693  return false;
694  }
695 
696  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
697 
698  SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
699 
700  unsigned MLAOpc = ~0U;
701 
702  switch (N->getSimpleValueType(0).SimpleTy) {
703  default:
704  llvm_unreachable("Unrecognized MLA.");
705  case MVT::v4i16:
706  MLAOpc = AArch64::MLAv4i16_indexed;
707  break;
708  case MVT::v8i16:
709  MLAOpc = AArch64::MLAv8i16_indexed;
710  break;
711  case MVT::v2i32:
712  MLAOpc = AArch64::MLAv2i32_indexed;
713  break;
714  case MVT::v4i32:
715  MLAOpc = AArch64::MLAv4i32_indexed;
716  break;
717  }
718 
719  ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops));
720  return true;
721 }
722 
723 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) {
724  SDLoc dl(N);
725  SDValue SMULLOp0;
726  SDValue SMULLOp1;
727  int LaneIdx;
728 
729  if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
730  LaneIdx))
731  return false;
732 
733  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
734 
735  SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
736 
737  unsigned SMULLOpc = ~0U;
738 
739  if (IntNo == Intrinsic::aarch64_neon_smull) {
740  switch (N->getSimpleValueType(0).SimpleTy) {
741  default:
742  llvm_unreachable("Unrecognized SMULL.");
743  case MVT::v4i32:
744  SMULLOpc = AArch64::SMULLv4i16_indexed;
745  break;
746  case MVT::v2i64:
747  SMULLOpc = AArch64::SMULLv2i32_indexed;
748  break;
749  }
750  } else if (IntNo == Intrinsic::aarch64_neon_umull) {
751  switch (N->getSimpleValueType(0).SimpleTy) {
752  default:
753  llvm_unreachable("Unrecognized SMULL.");
754  case MVT::v4i32:
755  SMULLOpc = AArch64::UMULLv4i16_indexed;
756  break;
757  case MVT::v2i64:
758  SMULLOpc = AArch64::UMULLv2i32_indexed;
759  break;
760  }
761  } else
762  llvm_unreachable("Unrecognized intrinsic.");
763 
764  ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops));
765  return true;
766 }
767 
768 /// Instructions that accept extend modifiers like UXTW expect the register
769 /// being extended to be a GPR32, but the incoming DAG might be acting on a
770 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
771 /// this is the case.
773  if (N.getValueType() == MVT::i32)
774  return N;
775 
776  SDLoc dl(N);
777  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
778  MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
779  dl, MVT::i32, N, SubReg);
780  return SDValue(Node, 0);
781 }
782 
783 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
784 template<signed Low, signed High, signed Scale>
785 bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
786  if (!isa<ConstantSDNode>(N))
787  return false;
788 
789  int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
790  if ((MulImm % std::abs(Scale)) == 0) {
791  int64_t RDVLImm = MulImm / Scale;
792  if ((RDVLImm >= Low) && (RDVLImm <= High)) {
793  Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
794  return true;
795  }
796  }
797 
798  return false;
799 }
800 
801 /// SelectArithExtendedRegister - Select a "extended register" operand. This
802 /// operand folds in an extend followed by an optional left shift.
803 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
804  SDValue &Shift) {
805  unsigned ShiftVal = 0;
807 
808  if (N.getOpcode() == ISD::SHL) {
809  ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
810  if (!CSD)
811  return false;
812  ShiftVal = CSD->getZExtValue();
813  if (ShiftVal > 4)
814  return false;
815 
816  Ext = getExtendTypeForNode(N.getOperand(0));
818  return false;
819 
820  Reg = N.getOperand(0).getOperand(0);
821  } else {
824  return false;
825 
826  Reg = N.getOperand(0);
827 
828  // Don't match if free 32-bit -> 64-bit zext can be used instead.
829  if (Ext == AArch64_AM::UXTW &&
830  Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode()))
831  return false;
832  }
833 
834  // AArch64 mandates that the RHS of the operation must use the smallest
835  // register class that could contain the size being extended from. Thus,
836  // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
837  // there might not be an actual 32-bit value in the program. We can
838  // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
840  Reg = narrowIfNeeded(CurDAG, Reg);
841  Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
842  MVT::i32);
843  return isWorthFolding(N);
844 }
845 
846 /// If there's a use of this ADDlow that's not itself a load/store then we'll
847 /// need to create a real ADD instruction from it anyway and there's no point in
848 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's
849 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
850 /// leads to duplicated ADRP instructions.
852  for (auto Use : N->uses()) {
853  if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
854  Use->getOpcode() != ISD::ATOMIC_LOAD &&
855  Use->getOpcode() != ISD::ATOMIC_STORE)
856  return false;
857 
858  // ldar and stlr have much more restrictive addressing modes (just a
859  // register).
860  if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering()))
861  return false;
862  }
863 
864  return true;
865 }
866 
867 /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
868 /// immediate" address. The "Size" argument is the size in bytes of the memory
869 /// reference, which determines the scale.
870 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
871  unsigned BW, unsigned Size,
872  SDValue &Base,
873  SDValue &OffImm) {
874  SDLoc dl(N);
875  const DataLayout &DL = CurDAG->getDataLayout();
876  const TargetLowering *TLI = getTargetLowering();
877  if (N.getOpcode() == ISD::FrameIndex) {
878  int FI = cast<FrameIndexSDNode>(N)->getIndex();
879  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
880  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
881  return true;
882  }
883 
884  // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
885  // selected here doesn't support labels/immediates, only base+offset.
886  if (CurDAG->isBaseWithConstantOffset(N)) {
887  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
888  if (IsSignedImm) {
889  int64_t RHSC = RHS->getSExtValue();
890  unsigned Scale = Log2_32(Size);
891  int64_t Range = 0x1LL << (BW - 1);
892 
893  if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
894  RHSC < (Range << Scale)) {
895  Base = N.getOperand(0);
896  if (Base.getOpcode() == ISD::FrameIndex) {
897  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
898  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
899  }
900  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
901  return true;
902  }
903  } else {
904  // unsigned Immediate
905  uint64_t RHSC = RHS->getZExtValue();
906  unsigned Scale = Log2_32(Size);
907  uint64_t Range = 0x1ULL << BW;
908 
909  if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
910  Base = N.getOperand(0);
911  if (Base.getOpcode() == ISD::FrameIndex) {
912  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
913  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
914  }
915  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
916  return true;
917  }
918  }
919  }
920  }
921  // Base only. The address will be materialized into a register before
922  // the memory is accessed.
923  // add x0, Xbase, #offset
924  // stp x1, x2, [x0]
925  Base = N;
926  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
927  return true;
928 }
929 
930 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
931 /// immediate" address. The "Size" argument is the size in bytes of the memory
932 /// reference, which determines the scale.
933 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
934  SDValue &Base, SDValue &OffImm) {
935  SDLoc dl(N);
936  const DataLayout &DL = CurDAG->getDataLayout();
937  const TargetLowering *TLI = getTargetLowering();
938  if (N.getOpcode() == ISD::FrameIndex) {
939  int FI = cast<FrameIndexSDNode>(N)->getIndex();
940  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
941  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
942  return true;
943  }
944 
945  if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
946  GlobalAddressSDNode *GAN =
947  dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
948  Base = N.getOperand(0);
949  OffImm = N.getOperand(1);
950  if (!GAN)
951  return true;
952 
953  if (GAN->getOffset() % Size == 0 &&
954  GAN->getGlobal()->getPointerAlignment(DL) >= Size)
955  return true;
956  }
957 
958  if (CurDAG->isBaseWithConstantOffset(N)) {
959  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
960  int64_t RHSC = (int64_t)RHS->getZExtValue();
961  unsigned Scale = Log2_32(Size);
962  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
963  Base = N.getOperand(0);
964  if (Base.getOpcode() == ISD::FrameIndex) {
965  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
966  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
967  }
968  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
969  return true;
970  }
971  }
972  }
973 
974  // Before falling back to our general case, check if the unscaled
975  // instructions can handle this. If so, that's preferable.
976  if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
977  return false;
978 
979  // Base only. The address will be materialized into a register before
980  // the memory is accessed.
981  // add x0, Xbase, #offset
982  // ldr x0, [x0]
983  Base = N;
984  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
985  return true;
986 }
987 
988 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
989 /// immediate" address. This should only match when there is an offset that
990 /// is not valid for a scaled immediate addressing mode. The "Size" argument
991 /// is the size in bytes of the memory reference, which is needed here to know
992 /// what is valid for a scaled immediate.
993 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
994  SDValue &Base,
995  SDValue &OffImm) {
996  if (!CurDAG->isBaseWithConstantOffset(N))
997  return false;
998  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
999  int64_t RHSC = RHS->getSExtValue();
1000  // If the offset is valid as a scaled immediate, don't match here.
1001  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
1002  RHSC < (0x1000 << Log2_32(Size)))
1003  return false;
1004  if (RHSC >= -256 && RHSC < 256) {
1005  Base = N.getOperand(0);
1006  if (Base.getOpcode() == ISD::FrameIndex) {
1007  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1008  const TargetLowering *TLI = getTargetLowering();
1009  Base = CurDAG->getTargetFrameIndex(
1010  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1011  }
1012  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1013  return true;
1014  }
1015  }
1016  return false;
1017 }
1018 
1019 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
1020  SDLoc dl(N);
1021  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1022  SDValue ImpDef = SDValue(
1023  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1024  MachineSDNode *Node = CurDAG->getMachineNode(
1025  TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
1026  return SDValue(Node, 0);
1027 }
1028 
1029 /// Check if the given SHL node (\p N), can be used to form an
1030 /// extended register for an addressing mode.
1031 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1032  bool WantExtend, SDValue &Offset,
1033  SDValue &SignExtend) {
1034  assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1035  ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1036  if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1037  return false;
1038 
1039  SDLoc dl(N);
1040  if (WantExtend) {
1042  getExtendTypeForNode(N.getOperand(0), true);
1044  return false;
1045 
1046  Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1047  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1048  MVT::i32);
1049  } else {
1050  Offset = N.getOperand(0);
1051  SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1052  }
1053 
1054  unsigned LegalShiftVal = Log2_32(Size);
1055  unsigned ShiftVal = CSD->getZExtValue();
1056 
1057  if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1058  return false;
1059 
1060  return isWorthFolding(N);
1061 }
1062 
1063 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1064  SDValue &Base, SDValue &Offset,
1065  SDValue &SignExtend,
1066  SDValue &DoShift) {
1067  if (N.getOpcode() != ISD::ADD)
1068  return false;
1069  SDValue LHS = N.getOperand(0);
1070  SDValue RHS = N.getOperand(1);
1071  SDLoc dl(N);
1072 
1073  // We don't want to match immediate adds here, because they are better lowered
1074  // to the register-immediate addressing modes.
1075  if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
1076  return false;
1077 
1078  // Check if this particular node is reused in any non-memory related
1079  // operation. If yes, do not try to fold this node into the address
1080  // computation, since the computation will be kept.
1081  const SDNode *Node = N.getNode();
1082  for (SDNode *UI : Node->uses()) {
1083  if (!isa<MemSDNode>(*UI))
1084  return false;
1085  }
1086 
1087  // Remember if it is worth folding N when it produces extended register.
1088  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1089 
1090  // Try to match a shifted extend on the RHS.
1091  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1092  SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1093  Base = LHS;
1094  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1095  return true;
1096  }
1097 
1098  // Try to match a shifted extend on the LHS.
1099  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1100  SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1101  Base = RHS;
1102  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1103  return true;
1104  }
1105 
1106  // There was no shift, whatever else we find.
1107  DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1108 
1110  // Try to match an unshifted extend on the LHS.
1111  if (IsExtendedRegisterWorthFolding &&
1112  (Ext = getExtendTypeForNode(LHS, true)) !=
1114  Base = RHS;
1115  Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1116  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1117  MVT::i32);
1118  if (isWorthFolding(LHS))
1119  return true;
1120  }
1121 
1122  // Try to match an unshifted extend on the RHS.
1123  if (IsExtendedRegisterWorthFolding &&
1124  (Ext = getExtendTypeForNode(RHS, true)) !=
1126  Base = LHS;
1127  Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1128  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1129  MVT::i32);
1130  if (isWorthFolding(RHS))
1131  return true;
1132  }
1133 
1134  return false;
1135 }
1136 
1137 // Check if the given immediate is preferred by ADD. If an immediate can be
1138 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1139 // encoded by one MOVZ, return true.
1140 static bool isPreferredADD(int64_t ImmOff) {
1141  // Constant in [0x0, 0xfff] can be encoded in ADD.
1142  if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1143  return true;
1144  // Check if it can be encoded in an "ADD LSL #12".
1145  if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1146  // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1147  return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1148  (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1149  return false;
1150 }
1151 
1152 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1153  SDValue &Base, SDValue &Offset,
1154  SDValue &SignExtend,
1155  SDValue &DoShift) {
1156  if (N.getOpcode() != ISD::ADD)
1157  return false;
1158  SDValue LHS = N.getOperand(0);
1159  SDValue RHS = N.getOperand(1);
1160  SDLoc DL(N);
1161 
1162  // Check if this particular node is reused in any non-memory related
1163  // operation. If yes, do not try to fold this node into the address
1164  // computation, since the computation will be kept.
1165  const SDNode *Node = N.getNode();
1166  for (SDNode *UI : Node->uses()) {
1167  if (!isa<MemSDNode>(*UI))
1168  return false;
1169  }
1170 
1171  // Watch out if RHS is a wide immediate, it can not be selected into
1172  // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1173  // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1174  // instructions like:
1175  // MOV X0, WideImmediate
1176  // ADD X1, BaseReg, X0
1177  // LDR X2, [X1, 0]
1178  // For such situation, using [BaseReg, XReg] addressing mode can save one
1179  // ADD/SUB:
1180  // MOV X0, WideImmediate
1181  // LDR X2, [BaseReg, X0]
1182  if (isa<ConstantSDNode>(RHS)) {
1183  int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
1184  unsigned Scale = Log2_32(Size);
1185  // Skip the immediate can be selected by load/store addressing mode.
1186  // Also skip the immediate can be encoded by a single ADD (SUB is also
1187  // checked by using -ImmOff).
1188  if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
1189  isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1190  return false;
1191 
1192  SDValue Ops[] = { RHS };
1193  SDNode *MOVI =
1194  CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1195  SDValue MOVIV = SDValue(MOVI, 0);
1196  // This ADD of two X register will be selected into [Reg+Reg] mode.
1197  N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1198  }
1199 
1200  // Remember if it is worth folding N when it produces extended register.
1201  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1202 
1203  // Try to match a shifted extend on the RHS.
1204  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1205  SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1206  Base = LHS;
1207  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1208  return true;
1209  }
1210 
1211  // Try to match a shifted extend on the LHS.
1212  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1213  SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1214  Base = RHS;
1215  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1216  return true;
1217  }
1218 
1219  // Match any non-shifted, non-extend, non-immediate add expression.
1220  Base = LHS;
1221  Offset = RHS;
1222  SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1223  DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1224  // Reg1 + Reg2 is free: no check needed.
1225  return true;
1226 }
1227 
1229  static const unsigned RegClassIDs[] = {
1230  AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1231  static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1232  AArch64::dsub2, AArch64::dsub3};
1233 
1234  return createTuple(Regs, RegClassIDs, SubRegs);
1235 }
1236 
1238  static const unsigned RegClassIDs[] = {
1239  AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1240  static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1241  AArch64::qsub2, AArch64::qsub3};
1242 
1243  return createTuple(Regs, RegClassIDs, SubRegs);
1244 }
1245 
1246 SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1247  static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1248  AArch64::ZPR3RegClassID,
1249  AArch64::ZPR4RegClassID};
1250  static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1251  AArch64::zsub2, AArch64::zsub3};
1252 
1253  return createTuple(Regs, RegClassIDs, SubRegs);
1254 }
1255 
1257  const unsigned RegClassIDs[],
1258  const unsigned SubRegs[]) {
1259  // There's no special register-class for a vector-list of 1 element: it's just
1260  // a vector.
1261  if (Regs.size() == 1)
1262  return Regs[0];
1263 
1264  assert(Regs.size() >= 2 && Regs.size() <= 4);
1265 
1266  SDLoc DL(Regs[0]);
1267 
1269 
1270  // First operand of REG_SEQUENCE is the desired RegClass.
1271  Ops.push_back(
1272  CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1273 
1274  // Then we get pairs of source & subregister-position for the components.
1275  for (unsigned i = 0; i < Regs.size(); ++i) {
1276  Ops.push_back(Regs[i]);
1277  Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1278  }
1279 
1280  SDNode *N =
1281  CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1282  return SDValue(N, 0);
1283 }
1284 
1285 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1286  bool isExt) {
1287  SDLoc dl(N);
1288  EVT VT = N->getValueType(0);
1289 
1290  unsigned ExtOff = isExt;
1291 
1292  // Form a REG_SEQUENCE to force register allocation.
1293  unsigned Vec0Off = ExtOff + 1;
1294  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1295  N->op_begin() + Vec0Off + NumVecs);
1296  SDValue RegSeq = createQTuple(Regs);
1297 
1299  if (isExt)
1300  Ops.push_back(N->getOperand(1));
1301  Ops.push_back(RegSeq);
1302  Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1303  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1304 }
1305 
1306 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1307  LoadSDNode *LD = cast<LoadSDNode>(N);
1308  if (LD->isUnindexed())
1309  return false;
1310  EVT VT = LD->getMemoryVT();
1311  EVT DstVT = N->getValueType(0);
1312  ISD::MemIndexedMode AM = LD->getAddressingMode();
1313  bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1314 
1315  // We're not doing validity checking here. That was done when checking
1316  // if we should mark the load as indexed or not. We're just selecting
1317  // the right instruction.
1318  unsigned Opcode = 0;
1319 
1320  ISD::LoadExtType ExtType = LD->getExtensionType();
1321  bool InsertTo64 = false;
1322  if (VT == MVT::i64)
1323  Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1324  else if (VT == MVT::i32) {
1325  if (ExtType == ISD::NON_EXTLOAD)
1326  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1327  else if (ExtType == ISD::SEXTLOAD)
1328  Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1329  else {
1330  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1331  InsertTo64 = true;
1332  // The result of the load is only i32. It's the subreg_to_reg that makes
1333  // it into an i64.
1334  DstVT = MVT::i32;
1335  }
1336  } else if (VT == MVT::i16) {
1337  if (ExtType == ISD::SEXTLOAD) {
1338  if (DstVT == MVT::i64)
1339  Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1340  else
1341  Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1342  } else {
1343  Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1344  InsertTo64 = DstVT == MVT::i64;
1345  // The result of the load is only i32. It's the subreg_to_reg that makes
1346  // it into an i64.
1347  DstVT = MVT::i32;
1348  }
1349  } else if (VT == MVT::i8) {
1350  if (ExtType == ISD::SEXTLOAD) {
1351  if (DstVT == MVT::i64)
1352  Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1353  else
1354  Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1355  } else {
1356  Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1357  InsertTo64 = DstVT == MVT::i64;
1358  // The result of the load is only i32. It's the subreg_to_reg that makes
1359  // it into an i64.
1360  DstVT = MVT::i32;
1361  }
1362  } else if (VT == MVT::f16) {
1363  Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1364  } else if (VT == MVT::bf16) {
1365  Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1366  } else if (VT == MVT::f32) {
1367  Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1368  } else if (VT == MVT::f64 || VT.is64BitVector()) {
1369  Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1370  } else if (VT.is128BitVector()) {
1371  Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1372  } else
1373  return false;
1374  SDValue Chain = LD->getChain();
1375  SDValue Base = LD->getBasePtr();
1376  ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1377  int OffsetVal = (int)OffsetOp->getZExtValue();
1378  SDLoc dl(N);
1379  SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1380  SDValue Ops[] = { Base, Offset, Chain };
1381  SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1382  MVT::Other, Ops);
1383 
1384  // Transfer memoperands.
1385  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1386  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1387 
1388  // Either way, we're replacing the node, so tell the caller that.
1389  SDValue LoadedVal = SDValue(Res, 1);
1390  if (InsertTo64) {
1391  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1392  LoadedVal =
1393  SDValue(CurDAG->getMachineNode(
1394  AArch64::SUBREG_TO_REG, dl, MVT::i64,
1395  CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1396  SubReg),
1397  0);
1398  }
1399 
1400  ReplaceUses(SDValue(N, 0), LoadedVal);
1401  ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1402  ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1403  CurDAG->RemoveDeadNode(N);
1404  return true;
1405 }
1406 
1407 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1408  unsigned SubRegIdx) {
1409  SDLoc dl(N);
1410  EVT VT = N->getValueType(0);
1411  SDValue Chain = N->getOperand(0);
1412 
1413  SDValue Ops[] = {N->getOperand(2), // Mem operand;
1414  Chain};
1415 
1416  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1417 
1418  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1419  SDValue SuperReg = SDValue(Ld, 0);
1420  for (unsigned i = 0; i < NumVecs; ++i)
1421  ReplaceUses(SDValue(N, i),
1422  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1423 
1424  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1425 
1426  // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1427  // because it's too simple to have needed special treatment during lowering.
1428  if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1429  MachineMemOperand *MemOp = MemIntr->getMemOperand();
1430  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1431  }
1432 
1433  CurDAG->RemoveDeadNode(N);
1434 }
1435 
1436 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1437  unsigned Opc, unsigned SubRegIdx) {
1438  SDLoc dl(N);
1439  EVT VT = N->getValueType(0);
1440  SDValue Chain = N->getOperand(0);
1441 
1442  SDValue Ops[] = {N->getOperand(1), // Mem operand
1443  N->getOperand(2), // Incremental
1444  Chain};
1445 
1446  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1448 
1449  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1450 
1451  // Update uses of write back register
1452  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1453 
1454  // Update uses of vector list
1455  SDValue SuperReg = SDValue(Ld, 1);
1456  if (NumVecs == 1)
1457  ReplaceUses(SDValue(N, 0), SuperReg);
1458  else
1459  for (unsigned i = 0; i < NumVecs; ++i)
1460  ReplaceUses(SDValue(N, i),
1461  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1462 
1463  // Update the chain
1464  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1465  CurDAG->RemoveDeadNode(N);
1466 }
1467 
1468 /// Optimize \param OldBase and \param OldOffset selecting the best addressing
1469 /// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1470 /// new Base and an SDValue representing the new offset.
1471 std::tuple<unsigned, SDValue, SDValue>
1472 AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1473  unsigned Opc_ri,
1474  const SDValue &OldBase,
1475  const SDValue &OldOffset,
1476  unsigned Scale) {
1477  SDValue NewBase = OldBase;
1478  SDValue NewOffset = OldOffset;
1479  // Detect a possible Reg+Imm addressing mode.
1480  const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1481  N, OldBase, NewBase, NewOffset);
1482 
1483  // Detect a possible reg+reg addressing mode, but only if we haven't already
1484  // detected a Reg+Imm one.
1485  const bool IsRegReg =
1486  !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1487 
1488  // Select the instruction.
1489  return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1490 }
1491 
1492 void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
1493  unsigned Scale, unsigned Opc_ri,
1494  unsigned Opc_rr, bool IsIntr) {
1495  assert(Scale < 4 && "Invalid scaling value.");
1496  SDLoc DL(N);
1497  EVT VT = N->getValueType(0);
1498  SDValue Chain = N->getOperand(0);
1499 
1500  // Optimize addressing mode.
1501  SDValue Base, Offset;
1502  unsigned Opc;
1503  std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1504  N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
1505  CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
1506 
1507  SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
1508  Base, // Memory operand
1509  Offset, Chain};
1510 
1511  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1512 
1513  SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1514  SDValue SuperReg = SDValue(Load, 0);
1515  for (unsigned i = 0; i < NumVecs; ++i)
1516  ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1517  AArch64::zsub0 + i, DL, VT, SuperReg));
1518 
1519  // Copy chain
1520  unsigned ChainIdx = NumVecs;
1521  ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1522  CurDAG->RemoveDeadNode(N);
1523 }
1524 
1525 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
1526  unsigned Opc) {
1527  SDLoc dl(N);
1528  EVT VT = N->getOperand(2)->getValueType(0);
1529 
1530  // Form a REG_SEQUENCE to force register allocation.
1531  bool Is128Bit = VT.getSizeInBits() == 128;
1532  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1533  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1534 
1535  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
1536  SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1537 
1538  // Transfer memoperands.
1539  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1540  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1541 
1542  ReplaceNode(N, St);
1543 }
1544 
1545 void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
1546  unsigned Scale, unsigned Opc_rr,
1547  unsigned Opc_ri) {
1548  SDLoc dl(N);
1549 
1550  // Form a REG_SEQUENCE to force register allocation.
1551  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1552  SDValue RegSeq = createZTuple(Regs);
1553 
1554  // Optimize addressing mode.
1555  unsigned Opc;
1556  SDValue Offset, Base;
1557  std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1558  N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
1559  CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
1560 
1561  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
1562  Base, // address
1563  Offset, // offset
1564  N->getOperand(0)}; // chain
1565  SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1566 
1567  ReplaceNode(N, St);
1568 }
1569 
1570 bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
1571  SDValue &OffImm) {
1572  SDLoc dl(N);
1573  const DataLayout &DL = CurDAG->getDataLayout();
1574  const TargetLowering *TLI = getTargetLowering();
1575 
1576  // Try to match it for the frame address
1577  if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
1578  int FI = FINode->getIndex();
1579  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1580  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1581  return true;
1582  }
1583 
1584  return false;
1585 }
1586 
1587 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
1588  unsigned Opc) {
1589  SDLoc dl(N);
1590  EVT VT = N->getOperand(2)->getValueType(0);
1591  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1592  MVT::Other}; // Type for the Chain
1593 
1594  // Form a REG_SEQUENCE to force register allocation.
1595  bool Is128Bit = VT.getSizeInBits() == 128;
1596  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1597  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1598 
1599  SDValue Ops[] = {RegSeq,
1600  N->getOperand(NumVecs + 1), // base register
1601  N->getOperand(NumVecs + 2), // Incremental
1602  N->getOperand(0)}; // Chain
1603  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1604 
1605  ReplaceNode(N, St);
1606 }
1607 
1608 namespace {
1609 /// WidenVector - Given a value in the V64 register class, produce the
1610 /// equivalent value in the V128 register class.
1611 class WidenVector {
1612  SelectionDAG &DAG;
1613 
1614 public:
1615  WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
1616 
1617  SDValue operator()(SDValue V64Reg) {
1618  EVT VT = V64Reg.getValueType();
1619  unsigned NarrowSize = VT.getVectorNumElements();
1620  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1621  MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
1622  SDLoc DL(V64Reg);
1623 
1624  SDValue Undef =
1625  SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
1626  return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
1627  }
1628 };
1629 } // namespace
1630 
1631 /// NarrowVector - Given a value in the V128 register class, produce the
1632 /// equivalent value in the V64 register class.
1633 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
1634  EVT VT = V128Reg.getValueType();
1635  unsigned WideSize = VT.getVectorNumElements();
1636  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1637  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
1638 
1639  return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
1640  V128Reg);
1641 }
1642 
1643 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
1644  unsigned Opc) {
1645  SDLoc dl(N);
1646  EVT VT = N->getValueType(0);
1647  bool Narrow = VT.getSizeInBits() == 64;
1648 
1649  // Form a REG_SEQUENCE to force register allocation.
1650  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1651 
1652  if (Narrow)
1653  transform(Regs, Regs.begin(),
1654  WidenVector(*CurDAG));
1655 
1656  SDValue RegSeq = createQTuple(Regs);
1657 
1658  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1659 
1660  unsigned LaneNo =
1661  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1662 
1663  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1664  N->getOperand(NumVecs + 3), N->getOperand(0)};
1665  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1666  SDValue SuperReg = SDValue(Ld, 0);
1667 
1668  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1669  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1670  AArch64::qsub2, AArch64::qsub3 };
1671  for (unsigned i = 0; i < NumVecs; ++i) {
1672  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
1673  if (Narrow)
1674  NV = NarrowVector(NV, *CurDAG);
1675  ReplaceUses(SDValue(N, i), NV);
1676  }
1677 
1678  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1679  CurDAG->RemoveDeadNode(N);
1680 }
1681 
1682 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
1683  unsigned Opc) {
1684  SDLoc dl(N);
1685  EVT VT = N->getValueType(0);
1686  bool Narrow = VT.getSizeInBits() == 64;
1687 
1688  // Form a REG_SEQUENCE to force register allocation.
1689  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1690 
1691  if (Narrow)
1692  transform(Regs, Regs.begin(),
1693  WidenVector(*CurDAG));
1694 
1695  SDValue RegSeq = createQTuple(Regs);
1696 
1697  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1698  RegSeq->getValueType(0), MVT::Other};
1699 
1700  unsigned LaneNo =
1701  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1702 
1703  SDValue Ops[] = {RegSeq,
1704  CurDAG->getTargetConstant(LaneNo, dl,
1705  MVT::i64), // Lane Number
1706  N->getOperand(NumVecs + 2), // Base register
1707  N->getOperand(NumVecs + 3), // Incremental
1708  N->getOperand(0)};
1709  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1710 
1711  // Update uses of the write back register
1712  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1713 
1714  // Update uses of the vector list
1715  SDValue SuperReg = SDValue(Ld, 1);
1716  if (NumVecs == 1) {
1717  ReplaceUses(SDValue(N, 0),
1718  Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
1719  } else {
1720  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1721  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1722  AArch64::qsub2, AArch64::qsub3 };
1723  for (unsigned i = 0; i < NumVecs; ++i) {
1724  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
1725  SuperReg);
1726  if (Narrow)
1727  NV = NarrowVector(NV, *CurDAG);
1728  ReplaceUses(SDValue(N, i), NV);
1729  }
1730  }
1731 
1732  // Update the Chain
1733  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1734  CurDAG->RemoveDeadNode(N);
1735 }
1736 
1737 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
1738  unsigned Opc) {
1739  SDLoc dl(N);
1740  EVT VT = N->getOperand(2)->getValueType(0);
1741  bool Narrow = VT.getSizeInBits() == 64;
1742 
1743  // Form a REG_SEQUENCE to force register allocation.
1744  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1745 
1746  if (Narrow)
1747  transform(Regs, Regs.begin(),
1748  WidenVector(*CurDAG));
1749 
1750  SDValue RegSeq = createQTuple(Regs);
1751 
1752  unsigned LaneNo =
1753  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1754 
1755  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1756  N->getOperand(NumVecs + 3), N->getOperand(0)};
1757  SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
1758 
1759  // Transfer memoperands.
1760  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1761  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1762 
1763  ReplaceNode(N, St);
1764 }
1765 
1766 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
1767  unsigned Opc) {
1768  SDLoc dl(N);
1769  EVT VT = N->getOperand(2)->getValueType(0);
1770  bool Narrow = VT.getSizeInBits() == 64;
1771 
1772  // Form a REG_SEQUENCE to force register allocation.
1773  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1774 
1775  if (Narrow)
1776  transform(Regs, Regs.begin(),
1777  WidenVector(*CurDAG));
1778 
1779  SDValue RegSeq = createQTuple(Regs);
1780 
1781  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1782  MVT::Other};
1783 
1784  unsigned LaneNo =
1785  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1786 
1787  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1788  N->getOperand(NumVecs + 2), // Base Register
1789  N->getOperand(NumVecs + 3), // Incremental
1790  N->getOperand(0)};
1791  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1792 
1793  // Transfer memoperands.
1794  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1795  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1796 
1797  ReplaceNode(N, St);
1798 }
1799 
1801  unsigned &Opc, SDValue &Opd0,
1802  unsigned &LSB, unsigned &MSB,
1803  unsigned NumberOfIgnoredLowBits,
1804  bool BiggerPattern) {
1805  assert(N->getOpcode() == ISD::AND &&
1806  "N must be a AND operation to call this function");
1807 
1808  EVT VT = N->getValueType(0);
1809 
1810  // Here we can test the type of VT and return false when the type does not
1811  // match, but since it is done prior to that call in the current context
1812  // we turned that into an assert to avoid redundant code.
1813  assert((VT == MVT::i32 || VT == MVT::i64) &&
1814  "Type checking must have been done before calling this function");
1815 
1816  // FIXME: simplify-demanded-bits in DAGCombine will probably have
1817  // changed the AND node to a 32-bit mask operation. We'll have to
1818  // undo that as part of the transform here if we want to catch all
1819  // the opportunities.
1820  // Currently the NumberOfIgnoredLowBits argument helps to recover
1821  // form these situations when matching bigger pattern (bitfield insert).
1822 
1823  // For unsigned extracts, check for a shift right and mask
1824  uint64_t AndImm = 0;
1825  if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
1826  return false;
1827 
1828  const SDNode *Op0 = N->getOperand(0).getNode();
1829 
1830  // Because of simplify-demanded-bits in DAGCombine, the mask may have been
1831  // simplified. Try to undo that
1832  AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
1833 
1834  // The immediate is a mask of the low bits iff imm & (imm+1) == 0
1835  if (AndImm & (AndImm + 1))
1836  return false;
1837 
1838  bool ClampMSB = false;
1839  uint64_t SrlImm = 0;
1840  // Handle the SRL + ANY_EXTEND case.
1841  if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
1842  isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
1843  // Extend the incoming operand of the SRL to 64-bit.
1844  Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
1845  // Make sure to clamp the MSB so that we preserve the semantics of the
1846  // original operations.
1847  ClampMSB = true;
1848  } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
1850  SrlImm)) {
1851  // If the shift result was truncated, we can still combine them.
1852  Opd0 = Op0->getOperand(0).getOperand(0);
1853 
1854  // Use the type of SRL node.
1855  VT = Opd0->getValueType(0);
1856  } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
1857  Opd0 = Op0->getOperand(0);
1858  ClampMSB = (VT == MVT::i32);
1859  } else if (BiggerPattern) {
1860  // Let's pretend a 0 shift right has been performed.
1861  // The resulting code will be at least as good as the original one
1862  // plus it may expose more opportunities for bitfield insert pattern.
1863  // FIXME: Currently we limit this to the bigger pattern, because
1864  // some optimizations expect AND and not UBFM.
1865  Opd0 = N->getOperand(0);
1866  } else
1867  return false;
1868 
1869  // Bail out on large immediates. This happens when no proper
1870  // combining/constant folding was performed.
1871  if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
1872  LLVM_DEBUG(
1873  (dbgs() << N
1874  << ": Found large shift immediate, this should not happen\n"));
1875  return false;
1876  }
1877 
1878  LSB = SrlImm;
1879  MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm)
1880  : countTrailingOnes<uint64_t>(AndImm)) -
1881  1;
1882  if (ClampMSB)
1883  // Since we're moving the extend before the right shift operation, we need
1884  // to clamp the MSB to make sure we don't shift in undefined bits instead of
1885  // the zeros which would get shifted in with the original right shift
1886  // operation.
1887  MSB = MSB > 31 ? 31 : MSB;
1888 
1889  Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
1890  return true;
1891 }
1892 
1893 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
1894  SDValue &Opd0, unsigned &Immr,
1895  unsigned &Imms) {
1896  assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
1897 
1898  EVT VT = N->getValueType(0);
1899  unsigned BitWidth = VT.getSizeInBits();
1900  assert((VT == MVT::i32 || VT == MVT::i64) &&
1901  "Type checking must have been done before calling this function");
1902 
1903  SDValue Op = N->getOperand(0);
1904  if (Op->getOpcode() == ISD::TRUNCATE) {
1905  Op = Op->getOperand(0);
1906  VT = Op->getValueType(0);
1907  BitWidth = VT.getSizeInBits();
1908  }
1909 
1910  uint64_t ShiftImm;
1911  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
1912  !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1913  return false;
1914 
1915  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1916  if (ShiftImm + Width > BitWidth)
1917  return false;
1918 
1919  Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
1920  Opd0 = Op.getOperand(0);
1921  Immr = ShiftImm;
1922  Imms = ShiftImm + Width - 1;
1923  return true;
1924 }
1925 
1926 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
1927  SDValue &Opd0, unsigned &LSB,
1928  unsigned &MSB) {
1929  // We are looking for the following pattern which basically extracts several
1930  // continuous bits from the source value and places it from the LSB of the
1931  // destination value, all other bits of the destination value or set to zero:
1932  //
1933  // Value2 = AND Value, MaskImm
1934  // SRL Value2, ShiftImm
1935  //
1936  // with MaskImm >> ShiftImm to search for the bit width.
1937  //
1938  // This gets selected into a single UBFM:
1939  //
1940  // UBFM Value, ShiftImm, BitWide + SrlImm -1
1941  //
1942 
1943  if (N->getOpcode() != ISD::SRL)
1944  return false;
1945 
1946  uint64_t AndMask = 0;
1947  if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
1948  return false;
1949 
1950  Opd0 = N->getOperand(0).getOperand(0);
1951 
1952  uint64_t SrlImm = 0;
1953  if (!isIntImmediate(N->getOperand(1), SrlImm))
1954  return false;
1955 
1956  // Check whether we really have several bits extract here.
1957  unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm));
1958  if (BitWide && isMask_64(AndMask >> SrlImm)) {
1959  if (N->getValueType(0) == MVT::i32)
1960  Opc = AArch64::UBFMWri;
1961  else
1962  Opc = AArch64::UBFMXri;
1963 
1964  LSB = SrlImm;
1965  MSB = BitWide + SrlImm - 1;
1966  return true;
1967  }
1968 
1969  return false;
1970 }
1971 
1972 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
1973  unsigned &Immr, unsigned &Imms,
1974  bool BiggerPattern) {
1975  assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
1976  "N must be a SHR/SRA operation to call this function");
1977 
1978  EVT VT = N->getValueType(0);
1979 
1980  // Here we can test the type of VT and return false when the type does not
1981  // match, but since it is done prior to that call in the current context
1982  // we turned that into an assert to avoid redundant code.
1983  assert((VT == MVT::i32 || VT == MVT::i64) &&
1984  "Type checking must have been done before calling this function");
1985 
1986  // Check for AND + SRL doing several bits extract.
1987  if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
1988  return true;
1989 
1990  // We're looking for a shift of a shift.
1991  uint64_t ShlImm = 0;
1992  uint64_t TruncBits = 0;
1993  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
1994  Opd0 = N->getOperand(0).getOperand(0);
1995  } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
1996  N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
1997  // We are looking for a shift of truncate. Truncate from i64 to i32 could
1998  // be considered as setting high 32 bits as zero. Our strategy here is to
1999  // always generate 64bit UBFM. This consistency will help the CSE pass
2000  // later find more redundancy.
2001  Opd0 = N->getOperand(0).getOperand(0);
2002  TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2003  VT = Opd0.getValueType();
2004  assert(VT == MVT::i64 && "the promoted type should be i64");
2005  } else if (BiggerPattern) {
2006  // Let's pretend a 0 shift left has been performed.
2007  // FIXME: Currently we limit this to the bigger pattern case,
2008  // because some optimizations expect AND and not UBFM
2009  Opd0 = N->getOperand(0);
2010  } else
2011  return false;
2012 
2013  // Missing combines/constant folding may have left us with strange
2014  // constants.
2015  if (ShlImm >= VT.getSizeInBits()) {
2016  LLVM_DEBUG(
2017  (dbgs() << N
2018  << ": Found large shift immediate, this should not happen\n"));
2019  return false;
2020  }
2021 
2022  uint64_t SrlImm = 0;
2023  if (!isIntImmediate(N->getOperand(1), SrlImm))
2024  return false;
2025 
2026  assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2027  "bad amount in shift node!");
2028  int immr = SrlImm - ShlImm;
2029  Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2030  Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2031  // SRA requires a signed extraction
2032  if (VT == MVT::i32)
2033  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2034  else
2035  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2036  return true;
2037 }
2038 
2039 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2040  assert(N->getOpcode() == ISD::SIGN_EXTEND);
2041 
2042  EVT VT = N->getValueType(0);
2043  EVT NarrowVT = N->getOperand(0)->getValueType(0);
2044  if (VT != MVT::i64 || NarrowVT != MVT::i32)
2045  return false;
2046 
2047  uint64_t ShiftImm;
2048  SDValue Op = N->getOperand(0);
2049  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2050  return false;
2051 
2052  SDLoc dl(N);
2053  // Extend the incoming operand of the shift to 64-bits.
2054  SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2055  unsigned Immr = ShiftImm;
2056  unsigned Imms = NarrowVT.getSizeInBits() - 1;
2057  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2058  CurDAG->getTargetConstant(Imms, dl, VT)};
2059  CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2060  return true;
2061 }
2062 
2063 /// Try to form fcvtl2 instructions from a floating-point extend of a high-half
2064 /// extract of a subvector.
2065 bool AArch64DAGToDAGISel::tryHighFPExt(SDNode *N) {
2066  assert(N->getOpcode() == ISD::FP_EXTEND);
2067 
2068  // There are 2 forms of fcvtl2 - extend to double or extend to float.
2069  SDValue Extract = N->getOperand(0);
2070  EVT VT = N->getValueType(0);
2071  EVT NarrowVT = Extract.getValueType();
2072  if ((VT != MVT::v2f64 || NarrowVT != MVT::v2f32) &&
2073  (VT != MVT::v4f32 || NarrowVT != MVT::v4f16))
2074  return false;
2075 
2076  // Optionally look past a bitcast.
2077  Extract = peekThroughBitcasts(Extract);
2078  if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2079  return false;
2080 
2081  // Match extract from start of high half index.
2082  // Example: v8i16 -> v4i16 means the extract must begin at index 4.
2083  unsigned ExtractIndex = Extract.getConstantOperandVal(1);
2084  if (ExtractIndex != Extract.getValueType().getVectorNumElements())
2085  return false;
2086 
2087  auto Opcode = VT == MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16;
2088  CurDAG->SelectNodeTo(N, Opcode, VT, Extract.getOperand(0));
2089  return true;
2090 }
2091 
2092 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2093  SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2094  unsigned NumberOfIgnoredLowBits = 0,
2095  bool BiggerPattern = false) {
2096  if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2097  return false;
2098 
2099  switch (N->getOpcode()) {
2100  default:
2101  if (!N->isMachineOpcode())
2102  return false;
2103  break;
2104  case ISD::AND:
2105  return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2106  NumberOfIgnoredLowBits, BiggerPattern);
2107  case ISD::SRL:
2108  case ISD::SRA:
2109  return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2110 
2112  return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2113  }
2114 
2115  unsigned NOpc = N->getMachineOpcode();
2116  switch (NOpc) {
2117  default:
2118  return false;
2119  case AArch64::SBFMWri:
2120  case AArch64::UBFMWri:
2121  case AArch64::SBFMXri:
2122  case AArch64::UBFMXri:
2123  Opc = NOpc;
2124  Opd0 = N->getOperand(0);
2125  Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
2126  Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
2127  return true;
2128  }
2129  // Unreachable
2130  return false;
2131 }
2132 
2133 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2134  unsigned Opc, Immr, Imms;
2135  SDValue Opd0;
2136  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2137  return false;
2138 
2139  EVT VT = N->getValueType(0);
2140  SDLoc dl(N);
2141 
2142  // If the bit extract operation is 64bit but the original type is 32bit, we
2143  // need to add one EXTRACT_SUBREG.
2144  if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2145  SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2146  CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2147 
2148  SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2149  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
2150  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
2151  MVT::i32, SDValue(BFM, 0), SubReg));
2152  return true;
2153  }
2154 
2155  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2156  CurDAG->getTargetConstant(Imms, dl, VT)};
2157  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2158  return true;
2159 }
2160 
2161 /// Does DstMask form a complementary pair with the mask provided by
2162 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2163 /// this asks whether DstMask zeroes precisely those bits that will be set by
2164 /// the other half.
2165 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2166  unsigned NumberOfIgnoredHighBits, EVT VT) {
2167  assert((VT == MVT::i32 || VT == MVT::i64) &&
2168  "i32 or i64 mask type expected!");
2169  unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2170 
2171  APInt SignificantDstMask = APInt(BitWidth, DstMask);
2172  APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2173 
2174  return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2175  (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2176 }
2177 
2178 // Look for bits that will be useful for later uses.
2179 // A bit is consider useless as soon as it is dropped and never used
2180 // before it as been dropped.
2181 // E.g., looking for useful bit of x
2182 // 1. y = x & 0x7
2183 // 2. z = y >> 2
2184 // After #1, x useful bits are 0x7, then the useful bits of x, live through
2185 // y.
2186 // After #2, the useful bits of x are 0x4.
2187 // However, if x is used on an unpredicatable instruction, then all its bits
2188 // are useful.
2189 // E.g.
2190 // 1. y = x & 0x7
2191 // 2. z = y >> 2
2192 // 3. str x, [@x]
2193 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2194 
2196  unsigned Depth) {
2197  uint64_t Imm =
2198  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2199  Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2200  UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2201  getUsefulBits(Op, UsefulBits, Depth + 1);
2202 }
2203 
2205  uint64_t Imm, uint64_t MSB,
2206  unsigned Depth) {
2207  // inherit the bitwidth value
2208  APInt OpUsefulBits(UsefulBits);
2209  OpUsefulBits = 1;
2210 
2211  if (MSB >= Imm) {
2212  OpUsefulBits <<= MSB - Imm + 1;
2213  --OpUsefulBits;
2214  // The interesting part will be in the lower part of the result
2215  getUsefulBits(Op, OpUsefulBits, Depth + 1);
2216  // The interesting part was starting at Imm in the argument
2217  OpUsefulBits <<= Imm;
2218  } else {
2219  OpUsefulBits <<= MSB + 1;
2220  --OpUsefulBits;
2221  // The interesting part will be shifted in the result
2222  OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2223  getUsefulBits(Op, OpUsefulBits, Depth + 1);
2224  // The interesting part was at zero in the argument
2225  OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2226  }
2227 
2228  UsefulBits &= OpUsefulBits;
2229 }
2230 
2231 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2232  unsigned Depth) {
2233  uint64_t Imm =
2234  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2235  uint64_t MSB =
2236  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2237 
2238  getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2239 }
2240 
2242  unsigned Depth) {
2243  uint64_t ShiftTypeAndValue =
2244  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2245  APInt Mask(UsefulBits);
2246  Mask.clearAllBits();
2247  Mask.flipAllBits();
2248 
2249  if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2250  // Shift Left
2251  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2252  Mask <<= ShiftAmt;
2253  getUsefulBits(Op, Mask, Depth + 1);
2254  Mask.lshrInPlace(ShiftAmt);
2255  } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2256  // Shift Right
2257  // We do not handle AArch64_AM::ASR, because the sign will change the
2258  // number of useful bits
2259  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2260  Mask.lshrInPlace(ShiftAmt);
2261  getUsefulBits(Op, Mask, Depth + 1);
2262  Mask <<= ShiftAmt;
2263  } else
2264  return;
2265 
2266  UsefulBits &= Mask;
2267 }
2268 
2269 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2270  unsigned Depth) {
2271  uint64_t Imm =
2272  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2273  uint64_t MSB =
2274  cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
2275 
2276  APInt OpUsefulBits(UsefulBits);
2277  OpUsefulBits = 1;
2278 
2279  APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
2280  ResultUsefulBits.flipAllBits();
2281  APInt Mask(UsefulBits.getBitWidth(), 0);
2282 
2283  getUsefulBits(Op, ResultUsefulBits, Depth + 1);
2284 
2285  if (MSB >= Imm) {
2286  // The instruction is a BFXIL.
2287  uint64_t Width = MSB - Imm + 1;
2288  uint64_t LSB = Imm;
2289 
2290  OpUsefulBits <<= Width;
2291  --OpUsefulBits;
2292 
2293  if (Op.getOperand(1) == Orig) {
2294  // Copy the low bits from the result to bits starting from LSB.
2295  Mask = ResultUsefulBits & OpUsefulBits;
2296  Mask <<= LSB;
2297  }
2298 
2299  if (Op.getOperand(0) == Orig)
2300  // Bits starting from LSB in the input contribute to the result.
2301  Mask |= (ResultUsefulBits & ~OpUsefulBits);
2302  } else {
2303  // The instruction is a BFI.
2304  uint64_t Width = MSB + 1;
2305  uint64_t LSB = UsefulBits.getBitWidth() - Imm;
2306 
2307  OpUsefulBits <<= Width;
2308  --OpUsefulBits;
2309  OpUsefulBits <<= LSB;
2310 
2311  if (Op.getOperand(1) == Orig) {
2312  // Copy the bits from the result to the zero bits.
2313  Mask = ResultUsefulBits & OpUsefulBits;
2314  Mask.lshrInPlace(LSB);
2315  }
2316 
2317  if (Op.getOperand(0) == Orig)
2318  Mask |= (ResultUsefulBits & ~OpUsefulBits);
2319  }
2320 
2321  UsefulBits &= Mask;
2322 }
2323 
2324 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2325  SDValue Orig, unsigned Depth) {
2326 
2327  // Users of this node should have already been instruction selected
2328  // FIXME: Can we turn that into an assert?
2329  if (!UserNode->isMachineOpcode())
2330  return;
2331 
2332  switch (UserNode->getMachineOpcode()) {
2333  default:
2334  return;
2335  case AArch64::ANDSWri:
2336  case AArch64::ANDSXri:
2337  case AArch64::ANDWri:
2338  case AArch64::ANDXri:
2339  // We increment Depth only when we call the getUsefulBits
2340  return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2341  Depth);
2342  case AArch64::UBFMWri:
2343  case AArch64::UBFMXri:
2344  return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2345 
2346  case AArch64::ORRWrs:
2347  case AArch64::ORRXrs:
2348  if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
2349  getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2350  Depth);
2351  return;
2352  case AArch64::BFMWri:
2353  case AArch64::BFMXri:
2354  return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2355 
2356  case AArch64::STRBBui:
2357  case AArch64::STURBBi:
2358  if (UserNode->getOperand(0) != Orig)
2359  return;
2360  UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2361  return;
2362 
2363  case AArch64::STRHHui:
2364  case AArch64::STURHHi:
2365  if (UserNode->getOperand(0) != Orig)
2366  return;
2367  UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2368  return;
2369  }
2370 }
2371 
2372 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2374  return;
2375  // Initialize UsefulBits
2376  if (!Depth) {
2377  unsigned Bitwidth = Op.getScalarValueSizeInBits();
2378  // At the beginning, assume every produced bits is useful
2379  UsefulBits = APInt(Bitwidth, 0);
2380  UsefulBits.flipAllBits();
2381  }
2382  APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2383 
2384  for (SDNode *Node : Op.getNode()->uses()) {
2385  // A use cannot produce useful bits
2386  APInt UsefulBitsForUse = APInt(UsefulBits);
2387  getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2388  UsersUsefulBits |= UsefulBitsForUse;
2389  }
2390  // UsefulBits contains the produced bits that are meaningful for the
2391  // current definition, thus a user cannot make a bit meaningful at
2392  // this point
2393  UsefulBits &= UsersUsefulBits;
2394 }
2395 
2396 /// Create a machine node performing a notional SHL of Op by ShlAmount. If
2397 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2398 /// 0, return Op unchanged.
2399 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2400  if (ShlAmount == 0)
2401  return Op;
2402 
2403  EVT VT = Op.getValueType();
2404  SDLoc dl(Op);
2405  unsigned BitWidth = VT.getSizeInBits();
2406  unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2407 
2408  SDNode *ShiftNode;
2409  if (ShlAmount > 0) {
2410  // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2411  ShiftNode = CurDAG->getMachineNode(
2412  UBFMOpc, dl, VT, Op,
2413  CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
2414  CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
2415  } else {
2416  // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2417  assert(ShlAmount < 0 && "expected right shift");
2418  int ShrAmount = -ShlAmount;
2419  ShiftNode = CurDAG->getMachineNode(
2420  UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
2421  CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
2422  }
2423 
2424  return SDValue(ShiftNode, 0);
2425 }
2426 
2427 /// Does this tree qualify as an attempt to move a bitfield into position,
2428 /// essentially "(and (shl VAL, N), Mask)".
2430  bool BiggerPattern,
2431  SDValue &Src, int &ShiftAmount,
2432  int &MaskWidth) {
2433  EVT VT = Op.getValueType();
2434  unsigned BitWidth = VT.getSizeInBits();
2435  (void)BitWidth;
2436  assert(BitWidth == 32 || BitWidth == 64);
2437 
2438  KnownBits Known = CurDAG->computeKnownBits(Op);
2439 
2440  // Non-zero in the sense that they're not provably zero, which is the key
2441  // point if we want to use this value
2442  uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2443 
2444  // Discard a constant AND mask if present. It's safe because the node will
2445  // already have been factored into the computeKnownBits calculation above.
2446  uint64_t AndImm;
2447  if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
2448  assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0);
2449  Op = Op.getOperand(0);
2450  }
2451 
2452  // Don't match if the SHL has more than one use, since then we'll end up
2453  // generating SHL+UBFIZ instead of just keeping SHL+AND.
2454  if (!BiggerPattern && !Op.hasOneUse())
2455  return false;
2456 
2457  uint64_t ShlImm;
2458  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
2459  return false;
2460  Op = Op.getOperand(0);
2461 
2462  if (!isShiftedMask_64(NonZeroBits))
2463  return false;
2464 
2465  ShiftAmount = countTrailingZeros(NonZeroBits);
2466  MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
2467 
2468  // BFI encompasses sufficiently many nodes that it's worth inserting an extra
2469  // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
2470  // amount. BiggerPattern is true when this pattern is being matched for BFI,
2471  // BiggerPattern is false when this pattern is being matched for UBFIZ, in
2472  // which case it is not profitable to insert an extra shift.
2473  if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
2474  return false;
2475  Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
2476 
2477  return true;
2478 }
2479 
2480 static bool isShiftedMask(uint64_t Mask, EVT VT) {
2481  assert(VT == MVT::i32 || VT == MVT::i64);
2482  if (VT == MVT::i32)
2483  return isShiftedMask_32(Mask);
2484  return isShiftedMask_64(Mask);
2485 }
2486 
2487 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
2488 // inserted only sets known zero bits.
2490  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2491 
2492  EVT VT = N->getValueType(0);
2493  if (VT != MVT::i32 && VT != MVT::i64)
2494  return false;
2495 
2496  unsigned BitWidth = VT.getSizeInBits();
2497 
2498  uint64_t OrImm;
2499  if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
2500  return false;
2501 
2502  // Skip this transformation if the ORR immediate can be encoded in the ORR.
2503  // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
2504  // performance neutral.
2506  return false;
2507 
2508  uint64_t MaskImm;
2509  SDValue And = N->getOperand(0);
2510  // Must be a single use AND with an immediate operand.
2511  if (!And.hasOneUse() ||
2512  !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
2513  return false;
2514 
2515  // Compute the Known Zero for the AND as this allows us to catch more general
2516  // cases than just looking for AND with imm.
2517  KnownBits Known = CurDAG->computeKnownBits(And);
2518 
2519  // Non-zero in the sense that they're not provably zero, which is the key
2520  // point if we want to use this value.
2521  uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
2522 
2523  // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
2524  if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
2525  return false;
2526 
2527  // The bits being inserted must only set those bits that are known to be zero.
2528  if ((OrImm & NotKnownZero) != 0) {
2529  // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
2530  // currently handle this case.
2531  return false;
2532  }
2533 
2534  // BFI/BFXIL dst, src, #lsb, #width.
2535  int LSB = countTrailingOnes(NotKnownZero);
2536  int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation();
2537 
2538  // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
2539  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2540  unsigned ImmS = Width - 1;
2541 
2542  // If we're creating a BFI instruction avoid cases where we need more
2543  // instructions to materialize the BFI constant as compared to the original
2544  // ORR. A BFXIL will use the same constant as the original ORR, so the code
2545  // should be no worse in this case.
2546  bool IsBFI = LSB != 0;
2547  uint64_t BFIImm = OrImm >> LSB;
2548  if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
2549  // We have a BFI instruction and we know the constant can't be materialized
2550  // with a ORR-immediate with the zero register.
2551  unsigned OrChunks = 0, BFIChunks = 0;
2552  for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
2553  if (((OrImm >> Shift) & 0xFFFF) != 0)
2554  ++OrChunks;
2555  if (((BFIImm >> Shift) & 0xFFFF) != 0)
2556  ++BFIChunks;
2557  }
2558  if (BFIChunks > OrChunks)
2559  return false;
2560  }
2561 
2562  // Materialize the constant to be inserted.
2563  SDLoc DL(N);
2564  unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
2565  SDNode *MOVI = CurDAG->getMachineNode(
2566  MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
2567 
2568  // Create the BFI/BFXIL instruction.
2569  SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
2570  CurDAG->getTargetConstant(ImmR, DL, VT),
2571  CurDAG->getTargetConstant(ImmS, DL, VT)};
2572  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2573  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2574  return true;
2575 }
2576 
2577 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
2578  SelectionDAG *CurDAG) {
2579  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2580 
2581  EVT VT = N->getValueType(0);
2582  if (VT != MVT::i32 && VT != MVT::i64)
2583  return false;
2584 
2585  unsigned BitWidth = VT.getSizeInBits();
2586 
2587  // Because of simplify-demanded-bits in DAGCombine, involved masks may not
2588  // have the expected shape. Try to undo that.
2589 
2590  unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
2591  unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
2592 
2593  // Given a OR operation, check if we have the following pattern
2594  // ubfm c, b, imm, imm2 (or something that does the same jobs, see
2595  // isBitfieldExtractOp)
2596  // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
2597  // countTrailingZeros(mask2) == imm2 - imm + 1
2598  // f = d | c
2599  // if yes, replace the OR instruction with:
2600  // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
2601 
2602  // OR is commutative, check all combinations of operand order and values of
2603  // BiggerPattern, i.e.
2604  // Opd0, Opd1, BiggerPattern=false
2605  // Opd1, Opd0, BiggerPattern=false
2606  // Opd0, Opd1, BiggerPattern=true
2607  // Opd1, Opd0, BiggerPattern=true
2608  // Several of these combinations may match, so check with BiggerPattern=false
2609  // first since that will produce better results by matching more instructions
2610  // and/or inserting fewer extra instructions.
2611  for (int I = 0; I < 4; ++I) {
2612 
2613  SDValue Dst, Src;
2614  unsigned ImmR, ImmS;
2615  bool BiggerPattern = I / 2;
2616  SDValue OrOpd0Val = N->getOperand(I % 2);
2617  SDNode *OrOpd0 = OrOpd0Val.getNode();
2618  SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
2619  SDNode *OrOpd1 = OrOpd1Val.getNode();
2620 
2621  unsigned BFXOpc;
2622  int DstLSB, Width;
2623  if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
2624  NumberOfIgnoredLowBits, BiggerPattern)) {
2625  // Check that the returned opcode is compatible with the pattern,
2626  // i.e., same type and zero extended (U and not S)
2627  if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
2628  (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
2629  continue;
2630 
2631  // Compute the width of the bitfield insertion
2632  DstLSB = 0;
2633  Width = ImmS - ImmR + 1;
2634  // FIXME: This constraint is to catch bitfield insertion we may
2635  // want to widen the pattern if we want to grab general bitfied
2636  // move case
2637  if (Width <= 0)
2638  continue;
2639 
2640  // If the mask on the insertee is correct, we have a BFXIL operation. We
2641  // can share the ImmR and ImmS values from the already-computed UBFM.
2642  } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
2643  BiggerPattern,
2644  Src, DstLSB, Width)) {
2645  ImmR = (BitWidth - DstLSB) % BitWidth;
2646  ImmS = Width - 1;
2647  } else
2648  continue;
2649 
2650  // Check the second part of the pattern
2651  EVT VT = OrOpd1Val.getValueType();
2652  assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
2653 
2654  // Compute the Known Zero for the candidate of the first operand.
2655  // This allows to catch more general case than just looking for
2656  // AND with imm. Indeed, simplify-demanded-bits may have removed
2657  // the AND instruction because it proves it was useless.
2658  KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
2659 
2660  // Check if there is enough room for the second operand to appear
2661  // in the first one
2662  APInt BitsToBeInserted =
2663  APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
2664 
2665  if ((BitsToBeInserted & ~Known.Zero) != 0)
2666  continue;
2667 
2668  // Set the first operand
2669  uint64_t Imm;
2670  if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
2671  isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
2672  // In that case, we can eliminate the AND
2673  Dst = OrOpd1->getOperand(0);
2674  else
2675  // Maybe the AND has been removed by simplify-demanded-bits
2676  // or is useful because it discards more bits
2677  Dst = OrOpd1Val;
2678 
2679  // both parts match
2680  SDLoc DL(N);
2681  SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
2682  CurDAG->getTargetConstant(ImmS, DL, VT)};
2683  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2684  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2685  return true;
2686  }
2687 
2688  // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
2689  // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
2690  // mask (e.g., 0x000ffff0).
2691  uint64_t Mask0Imm, Mask1Imm;
2692  SDValue And0 = N->getOperand(0);
2693  SDValue And1 = N->getOperand(1);
2694  if (And0.hasOneUse() && And1.hasOneUse() &&
2695  isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
2696  isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
2697  APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
2698  (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
2699 
2700  // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
2701  // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
2702  // bits to be inserted.
2703  if (isShiftedMask(Mask0Imm, VT)) {
2704  std::swap(And0, And1);
2705  std::swap(Mask0Imm, Mask1Imm);
2706  }
2707 
2708  SDValue Src = And1->getOperand(0);
2709  SDValue Dst = And0->getOperand(0);
2710  unsigned LSB = countTrailingZeros(Mask1Imm);
2711  int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation();
2712 
2713  // The BFXIL inserts the low-order bits from a source register, so right
2714  // shift the needed bits into place.
2715  SDLoc DL(N);
2716  unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2717  uint64_t LsrImm = LSB;
2718  if (Src->hasOneUse() &&
2719  isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
2720  (LsrImm + LSB) < BitWidth) {
2721  Src = Src->getOperand(0);
2722  LsrImm += LSB;
2723  }
2724 
2725  SDNode *LSR = CurDAG->getMachineNode(
2726  ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
2727  CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
2728 
2729  // BFXIL is an alias of BFM, so translate to BFM operands.
2730  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2731  unsigned ImmS = Width - 1;
2732 
2733  // Create the BFXIL instruction.
2734  SDValue Ops[] = {Dst, SDValue(LSR, 0),
2735  CurDAG->getTargetConstant(ImmR, DL, VT),
2736  CurDAG->getTargetConstant(ImmS, DL, VT)};
2737  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2738  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2739  return true;
2740  }
2741 
2742  return false;
2743 }
2744 
2745 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
2746  if (N->getOpcode() != ISD::OR)
2747  return false;
2748 
2749  APInt NUsefulBits;
2750  getUsefulBits(SDValue(N, 0), NUsefulBits);
2751 
2752  // If all bits are not useful, just return UNDEF.
2753  if (!NUsefulBits) {
2754  CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
2755  return true;
2756  }
2757 
2758  if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
2759  return true;
2760 
2761  return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
2762 }
2763 
2764 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
2765 /// equivalent of a left shift by a constant amount followed by an and masking
2766 /// out a contiguous set of bits.
2767 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
2768  if (N->getOpcode() != ISD::AND)
2769  return false;
2770 
2771  EVT VT = N->getValueType(0);
2772  if (VT != MVT::i32 && VT != MVT::i64)
2773  return false;
2774 
2775  SDValue Op0;
2776  int DstLSB, Width;
2777  if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
2778  Op0, DstLSB, Width))
2779  return false;
2780 
2781  // ImmR is the rotate right amount.
2782  unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
2783  // ImmS is the most significant bit of the source to be moved.
2784  unsigned ImmS = Width - 1;
2785 
2786  SDLoc DL(N);
2787  SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
2788  CurDAG->getTargetConstant(ImmS, DL, VT)};
2789  unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2790  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2791  return true;
2792 }
2793 
2794 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
2795 /// variable shift/rotate instructions.
2796 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
2797  EVT VT = N->getValueType(0);
2798 
2799  unsigned Opc;
2800  switch (N->getOpcode()) {
2801  case ISD::ROTR:
2802  Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
2803  break;
2804  case ISD::SHL:
2805  Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
2806  break;
2807  case ISD::SRL:
2808  Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
2809  break;
2810  case ISD::SRA:
2811  Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
2812  break;
2813  default:
2814  return false;
2815  }
2816 
2817  uint64_t Size;
2818  uint64_t Bits;
2819  if (VT == MVT::i32) {
2820  Bits = 5;
2821  Size = 32;
2822  } else if (VT == MVT::i64) {
2823  Bits = 6;
2824  Size = 64;
2825  } else
2826  return false;
2827 
2828  SDValue ShiftAmt = N->getOperand(1);
2829  SDLoc DL(N);
2830  SDValue NewShiftAmt;
2831 
2832  // Skip over an extend of the shift amount.
2833  if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
2834  ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
2835  ShiftAmt = ShiftAmt->getOperand(0);
2836 
2837  if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
2838  SDValue Add0 = ShiftAmt->getOperand(0);
2839  SDValue Add1 = ShiftAmt->getOperand(1);
2840  uint64_t Add0Imm;
2841  uint64_t Add1Imm;
2842  if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
2843  // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
2844  // to avoid the ADD/SUB.
2845  NewShiftAmt = Add0;
2846  } else if (ShiftAmt->getOpcode() == ISD::SUB &&
2847  isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
2848  (Add0Imm % Size == 0)) {
2849  // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
2850  // to generate a NEG instead of a SUB from a constant.
2851  unsigned NegOpc;
2852  unsigned ZeroReg;
2853  EVT SubVT = ShiftAmt->getValueType(0);
2854  if (SubVT == MVT::i32) {
2855  NegOpc = AArch64::SUBWrr;
2856  ZeroReg = AArch64::WZR;
2857  } else {
2858  assert(SubVT == MVT::i64);
2859  NegOpc = AArch64::SUBXrr;
2860  ZeroReg = AArch64::XZR;
2861  }
2862  SDValue Zero =
2863  CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
2864  MachineSDNode *Neg =
2865  CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
2866  NewShiftAmt = SDValue(Neg, 0);
2867  } else if (ShiftAmt->getOpcode() == ISD::SUB &&
2868  isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
2869  // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
2870  // to generate a NOT instead of a SUB from a constant.
2871  unsigned NotOpc;
2872  unsigned ZeroReg;
2873  EVT SubVT = ShiftAmt->getValueType(0);
2874  if (SubVT == MVT::i32) {
2875  NotOpc = AArch64::ORNWrr;
2876  ZeroReg = AArch64::WZR;
2877  } else {
2878  assert(SubVT == MVT::i64);
2879  NotOpc = AArch64::ORNXrr;
2880  ZeroReg = AArch64::XZR;
2881  }
2882  SDValue Zero =
2883  CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
2884  MachineSDNode *Not =
2885  CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
2886  NewShiftAmt = SDValue(Not, 0);
2887  } else
2888  return false;
2889  } else {
2890  // If the shift amount is masked with an AND, check that the mask covers the
2891  // bits that are implicitly ANDed off by the above opcodes and if so, skip
2892  // the AND.
2893  uint64_t MaskImm;
2894  if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
2895  !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
2896  return false;
2897 
2898  if (countTrailingOnes(MaskImm) < Bits)
2899  return false;
2900 
2901  NewShiftAmt = ShiftAmt->getOperand(0);
2902  }
2903 
2904  // Narrow/widen the shift amount to match the size of the shift operation.
2905  if (VT == MVT::i32)
2906  NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
2907  else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
2908  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
2909  MachineSDNode *Ext = CurDAG->getMachineNode(
2910  AArch64::SUBREG_TO_REG, DL, VT,
2911  CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
2912  NewShiftAmt = SDValue(Ext, 0);
2913  }
2914 
2915  SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
2916  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2917  return true;
2918 }
2919 
2920 bool
2921 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
2922  unsigned RegWidth) {
2923  APFloat FVal(0.0);
2924  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
2925  FVal = CN->getValueAPF();
2926  else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
2927  // Some otherwise illegal constants are allowed in this case.
2928  if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
2929  !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
2930  return false;
2931 
2932  ConstantPoolSDNode *CN =
2933  dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
2934  FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
2935  } else
2936  return false;
2937 
2938  // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
2939  // is between 1 and 32 for a destination w-register, or 1 and 64 for an
2940  // x-register.
2941  //
2942  // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
2943  // want THIS_NODE to be 2^fbits. This is much easier to deal with using
2944  // integers.
2945  bool IsExact;
2946 
2947  // fbits is between 1 and 64 in the worst-case, which means the fmul
2948  // could have 2^64 as an actual operand. Need 65 bits of precision.
2949  APSInt IntVal(65, true);
2950  FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
2951 
2952  // N.b. isPowerOf2 also checks for > 0.
2953  if (!IsExact || !IntVal.isPowerOf2()) return false;
2954  unsigned FBits = IntVal.logBase2();
2955 
2956  // Checks above should have guaranteed that we haven't lost information in
2957  // finding FBits, but it must still be in range.
2958  if (FBits == 0 || FBits > RegWidth) return false;
2959 
2960  FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
2961  return true;
2962 }
2963 
2964 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
2965 // of the string and obtains the integer values from them and combines these
2966 // into a single value to be used in the MRS/MSR instruction.
2969  RegString.split(Fields, ':');
2970 
2971  if (Fields.size() == 1)
2972  return -1;
2973 
2974  assert(Fields.size() == 5
2975  && "Invalid number of fields in read register string");
2976 
2977  SmallVector<int, 5> Ops;
2978  bool AllIntFields = true;
2979 
2980  for (StringRef Field : Fields) {
2981  unsigned IntField;
2982  AllIntFields &= !Field.getAsInteger(10, IntField);
2983  Ops.push_back(IntField);
2984  }
2985 
2986  assert(AllIntFields &&
2987  "Unexpected non-integer value in special register string.");
2988  (void)AllIntFields;
2989 
2990  // Need to combine the integer fields of the string into a single value
2991  // based on the bit encoding of MRS/MSR instruction.
2992  return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
2993  (Ops[3] << 3) | (Ops[4]);
2994 }
2995 
2996 // Lower the read_register intrinsic to an MRS instruction node if the special
2997 // register string argument is either of the form detailed in the ALCE (the
2998 // form described in getIntOperandsFromRegsterString) or is a named register
2999 // known by the MRS SysReg mapper.
3000 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
3001  const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3002  const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3003  SDLoc DL(N);
3004 
3005  int Reg = getIntOperandFromRegisterString(RegString->getString());
3006  if (Reg != -1) {
3007  ReplaceNode(N, CurDAG->getMachineNode(
3008  AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
3009  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
3010  N->getOperand(0)));
3011  return true;
3012  }
3013 
3014  // Use the sysreg mapper to map the remaining possible strings to the
3015  // value for the register to be used for the instruction operand.
3016  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
3017  if (TheReg && TheReg->Readable &&
3018  TheReg->haveFeatures(Subtarget->getFeatureBits()))
3019  Reg = TheReg->Encoding;
3020  else
3021  Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
3022 
3023  if (Reg != -1) {
3024  ReplaceNode(N, CurDAG->getMachineNode(
3025  AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
3026  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
3027  N->getOperand(0)));
3028  return true;
3029  }
3030 
3031  if (RegString->getString() == "pc") {
3032  ReplaceNode(N, CurDAG->getMachineNode(
3033  AArch64::ADR, DL, N->getSimpleValueType(0), MVT::Other,
3034  CurDAG->getTargetConstant(0, DL, MVT::i32),
3035  N->getOperand(0)));
3036  return true;
3037  }
3038 
3039  return false;
3040 }
3041 
3042 // Lower the write_register intrinsic to an MSR instruction node if the special
3043 // register string argument is either of the form detailed in the ALCE (the
3044 // form described in getIntOperandsFromRegsterString) or is a named register
3045 // known by the MSR SysReg mapper.
3046 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
3047  const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3048  const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3049  SDLoc DL(N);
3050 
3051  int Reg = getIntOperandFromRegisterString(RegString->getString());
3052  if (Reg != -1) {
3053  ReplaceNode(
3054  N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
3055  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
3056  N->getOperand(2), N->getOperand(0)));
3057  return true;
3058  }
3059 
3060  // Check if the register was one of those allowed as the pstatefield value in
3061  // the MSR (immediate) instruction. To accept the values allowed in the
3062  // pstatefield for the MSR (immediate) instruction, we also require that an
3063  // immediate value has been provided as an argument, we know that this is
3064  // the case as it has been ensured by semantic checking.
3065  auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());
3066  if (PMapper) {
3067  assert (isa<ConstantSDNode>(N->getOperand(2))
3068  && "Expected a constant integer expression.");
3069  unsigned Reg = PMapper->Encoding;
3070  uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
3071  unsigned State;
3072  if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) {
3073  assert(Immed < 2 && "Bad imm");
3074  State = AArch64::MSRpstateImm1;
3075  } else {
3076  assert(Immed < 16 && "Bad imm");
3077  State = AArch64::MSRpstateImm4;
3078  }
3079  ReplaceNode(N, CurDAG->getMachineNode(
3080  State, DL, MVT::Other,
3081  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
3082  CurDAG->getTargetConstant(Immed, DL, MVT::i16),
3083  N->getOperand(0)));
3084  return true;
3085  }
3086 
3087  // Use the sysreg mapper to attempt to map the remaining possible strings
3088  // to the value for the register to be used for the MSR (register)
3089  // instruction operand.
3090  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
3091  if (TheReg && TheReg->Writeable &&
3092  TheReg->haveFeatures(Subtarget->getFeatureBits()))
3093  Reg = TheReg->Encoding;
3094  else
3095  Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
3096  if (Reg != -1) {
3097  ReplaceNode(N, CurDAG->getMachineNode(
3098  AArch64::MSR, DL, MVT::Other,
3099  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
3100  N->getOperand(2), N->getOperand(0)));
3101  return true;
3102  }
3103 
3104  return false;
3105 }
3106 
3107 /// We've got special pseudo-instructions for these
3108 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3109  unsigned Opcode;
3110  EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3111 
3112  // Leave IR for LSE if subtarget supports it.
3113  if (Subtarget->hasLSE()) return false;
3114 
3115  if (MemTy == MVT::i8)
3116  Opcode = AArch64::CMP_SWAP_8;
3117  else if (MemTy == MVT::i16)
3118  Opcode = AArch64::CMP_SWAP_16;
3119  else if (MemTy == MVT::i32)
3120  Opcode = AArch64::CMP_SWAP_32;
3121  else if (MemTy == MVT::i64)
3122  Opcode = AArch64::CMP_SWAP_64;
3123  else
3124  llvm_unreachable("Unknown AtomicCmpSwap type");
3125 
3126  MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
3127  SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3128  N->getOperand(0)};
3129  SDNode *CmpSwap = CurDAG->getMachineNode(
3130  Opcode, SDLoc(N),
3131  CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
3132 
3133  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3134  CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3135 
3136  ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3137  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3138  CurDAG->RemoveDeadNode(N);
3139 
3140  return true;
3141 }
3142 
3143 bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
3144  SDValue &Shift) {
3145  if (!isa<ConstantSDNode>(N))
3146  return false;
3147 
3148  SDLoc DL(N);
3149  uint64_t Val = cast<ConstantSDNode>(N)
3150  ->getAPIntValue()
3151  .trunc(VT.getFixedSizeInBits())
3152  .getZExtValue();
3153 
3154  switch (VT.SimpleTy) {
3155  case MVT::i8:
3156  // All immediates are supported.
3157  Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
3158  Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
3159  return true;
3160  case MVT::i16:
3161  case MVT::i32:
3162  case MVT::i64:
3163  // Support 8bit unsigned immediates.
3164  if (Val <= 255) {
3165  Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
3166  Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
3167  return true;
3168  }
3169  // Support 16bit unsigned immediates that are a multiple of 256.
3170  if (Val <= 65280 && Val % 256 == 0) {
3171  Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
3172  Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
3173  return true;
3174  }
3175  break;
3176  default:
3177  break;
3178  }
3179 
3180  return false;
3181 }
3182 
3183 bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
3184  SDValue &Shift) {
3185  if (!isa<ConstantSDNode>(N))
3186  return false;
3187 
3188  SDLoc DL(N);
3189  int64_t Val = cast<ConstantSDNode>(N)
3190  ->getAPIntValue()
3191  .trunc(VT.getFixedSizeInBits())
3192  .getSExtValue();
3193 
3194  switch (VT.SimpleTy) {
3195  case MVT::i8:
3196  // All immediates are supported.
3197  Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
3198  Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
3199  return true;
3200  case MVT::i16:
3201  case MVT::i32:
3202  case MVT::i64:
3203  // Support 8bit signed immediates.
3204  if (Val >= -128 && Val <= 127) {
3205  Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
3206  Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
3207  return true;
3208  }
3209  // Support 16bit signed immediates that are a multiple of 256.
3210  if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
3211  Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
3212  Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
3213  return true;
3214  }
3215  break;
3216  default:
3217  break;
3218  }
3219 
3220  return false;
3221 }
3222 
3223 bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
3224  if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
3225  int64_t ImmVal = CNode->getSExtValue();
3226  SDLoc DL(N);
3227  if (ImmVal >= -128 && ImmVal < 128) {
3228  Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
3229  return true;
3230  }
3231  }
3232  return false;
3233 }
3234 
3235 bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
3236  if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
3237  uint64_t ImmVal = CNode->getZExtValue();
3238 
3239  switch (VT.SimpleTy) {
3240  case MVT::i8:
3241  ImmVal &= 0xFF;
3242  break;
3243  case MVT::i16:
3244  ImmVal &= 0xFFFF;
3245  break;
3246  case MVT::i32:
3247  ImmVal &= 0xFFFFFFFF;
3248  break;
3249  case MVT::i64:
3250  break;
3251  default:
3252  llvm_unreachable("Unexpected type");
3253  }
3254 
3255  if (ImmVal < 256) {
3256  Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
3257  return true;
3258  }
3259  }
3260  return false;
3261 }
3262 
3263 bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
3264  bool Invert) {
3265  if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
3266  uint64_t ImmVal = CNode->getZExtValue();
3267  SDLoc DL(N);
3268 
3269  if (Invert)
3270  ImmVal = ~ImmVal;
3271 
3272  // Shift mask depending on type size.
3273  switch (VT.SimpleTy) {
3274  case MVT::i8:
3275  ImmVal &= 0xFF;
3276  ImmVal |= ImmVal << 8;
3277  ImmVal |= ImmVal << 16;
3278  ImmVal |= ImmVal << 32;
3279  break;
3280  case MVT::i16:
3281  ImmVal &= 0xFFFF;
3282  ImmVal |= ImmVal << 16;
3283  ImmVal |= ImmVal << 32;
3284  break;
3285  case MVT::i32:
3286  ImmVal &= 0xFFFFFFFF;
3287  ImmVal |= ImmVal << 32;
3288  break;
3289  case MVT::i64:
3290  break;
3291  default:
3292  llvm_unreachable("Unexpected type");
3293  }
3294 
3295  uint64_t encoding;
3296  if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
3297  Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
3298  return true;
3299  }
3300  }
3301  return false;
3302 }
3303 
3304 // SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
3305 // Rather than attempt to normalise everything we can sometimes saturate the
3306 // shift amount during selection. This function also allows for consistent
3307 // isel patterns by ensuring the resulting "Imm" node is of the i32 type
3308 // required by the instructions.
3309 bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
3310  uint64_t High, bool AllowSaturation,
3311  SDValue &Imm) {
3312  if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
3313  uint64_t ImmVal = CN->getZExtValue();
3314 
3315  // Reject shift amounts that are too small.
3316  if (ImmVal < Low)
3317  return false;
3318 
3319  // Reject or saturate shift amounts that are too big.
3320  if (ImmVal > High) {
3321  if (!AllowSaturation)
3322  return false;
3323  ImmVal = High;
3324  }
3325 
3326  Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
3327  return true;
3328  }
3329 
3330  return false;
3331 }
3332 
3333 bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
3334  // tagp(FrameIndex, IRGstack, tag_offset):
3335  // since the offset between FrameIndex and IRGstack is a compile-time
3336  // constant, this can be lowered to a single ADDG instruction.
3337  if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
3338  return false;
3339  }
3340 
3341  SDValue IRG_SP = N->getOperand(2);
3342  if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
3343  cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() !=
3344  Intrinsic::aarch64_irg_sp) {
3345  return false;
3346  }
3347 
3348  const TargetLowering *TLI = getTargetLowering();
3349  SDLoc DL(N);
3350  int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
3351  SDValue FiOp = CurDAG->getTargetFrameIndex(
3352  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3353  int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
3354 
3355  SDNode *Out = CurDAG->getMachineNode(
3356  AArch64::TAGPstack, DL, MVT::i64,
3357  {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
3358  CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
3359  ReplaceNode(N, Out);
3360  return true;
3361 }
3362 
3363 void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
3364  assert(isa<ConstantSDNode>(N->getOperand(3)) &&
3365  "llvm.aarch64.tagp third argument must be an immediate");
3366  if (trySelectStackSlotTagP(N))
3367  return;
3368  // FIXME: above applies in any case when offset between Op1 and Op2 is a
3369  // compile-time constant, not just for stack allocations.
3370 
3371  // General case for unrelated pointers in Op1 and Op2.
3372  SDLoc DL(N);
3373  int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
3374  SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
3375  {N->getOperand(1), N->getOperand(2)});
3376  SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
3377  {SDValue(N1, 0), N->getOperand(2)});
3378  SDNode *N3 = CurDAG->getMachineNode(
3379  AArch64::ADDG, DL, MVT::i64,
3380  {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
3381  CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
3382  ReplaceNode(N, N3);
3383 }
3384 
3385 // NOTE: We cannot use EXTRACT_SUBREG in all cases because the fixed length
3386 // vector types larger than NEON don't have a matching SubRegIndex.
3391  "Expected to extract from a packed scalable vector!");
3392  assert(VT.isFixedLengthVector() &&
3393  "Expected to extract a fixed length vector!");
3394 
3395  SDLoc DL(V);
3396  switch (VT.getSizeInBits()) {
3397  case 64: {
3398  auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
3399  return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
3400  }
3401  case 128: {
3402  auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
3403  return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
3404  }
3405  default: {
3406  auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
3407  return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
3408  }
3409  }
3410 }
3411 
3412 // NOTE: We cannot use INSERT_SUBREG in all cases because the fixed length
3413 // vector types larger than NEON don't have a matching SubRegIndex.
3415  assert(VT.isScalableVector() &&
3417  "Expected to insert into a packed scalable vector!");
3419  "Expected to insert a fixed length vector!");
3420 
3421  SDLoc DL(V);
3422  switch (V.getValueType().getSizeInBits()) {
3423  case 64: {
3424  auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
3425  auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
3426  return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
3427  SDValue(Container, 0), V, SubReg);
3428  }
3429  case 128: {
3430  auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
3431  auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
3432  return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
3433  SDValue(Container, 0), V, SubReg);
3434  }
3435  default: {
3436  auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
3437  return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
3438  }
3439  }
3440 }
3441 
3442 void AArch64DAGToDAGISel::Select(SDNode *Node) {
3443  // If we have a custom node, we already have selected!
3444  if (Node->isMachineOpcode()) {
3445  LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
3446  Node->setNodeId(-1);
3447  return;
3448  }
3449 
3450  // Few custom selection stuff.
3451  EVT VT = Node->getValueType(0);
3452 
3453  switch (Node->getOpcode()) {
3454  default:
3455  break;
3456 
3457  case ISD::ATOMIC_CMP_SWAP:
3458  if (SelectCMP_SWAP(Node))
3459  return;
3460  break;
3461 
3462  case ISD::READ_REGISTER:
3463  if (tryReadRegister(Node))
3464  return;
3465  break;
3466 
3467  case ISD::WRITE_REGISTER:
3468  if (tryWriteRegister(Node))
3469  return;
3470  break;
3471 
3472  case ISD::ADD:
3473  if (tryMLAV64LaneV128(Node))
3474  return;
3475  break;
3476 
3477  case ISD::LOAD: {
3478  // Try to select as an indexed load. Fall through to normal processing
3479  // if we can't.
3480  if (tryIndexedLoad(Node))
3481  return;
3482  break;
3483  }
3484 
3485  case ISD::SRL:
3486  case ISD::AND:
3487  case ISD::SRA:
3489  if (tryBitfieldExtractOp(Node))
3490  return;
3491  if (tryBitfieldInsertInZeroOp(Node))
3492  return;
3494  case ISD::ROTR:
3495  case ISD::SHL:
3496  if (tryShiftAmountMod(Node))
3497  return;
3498  break;
3499 
3500  case ISD::SIGN_EXTEND:
3501  if (tryBitfieldExtractOpFromSExt(Node))
3502  return;
3503  break;
3504 
3505  case ISD::FP_EXTEND:
3506  if (tryHighFPExt(Node))
3507  return;
3508  break;
3509 
3510  case ISD::OR:
3511  if (tryBitfieldInsertOp(Node))
3512  return;
3513  break;
3514 
3515  case ISD::EXTRACT_SUBVECTOR: {
3516  // Bail when not a "cast" like extract_subvector.
3517  if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue() != 0)
3518  break;
3519 
3520  // Bail when normal isel can do the job.
3521  EVT InVT = Node->getOperand(0).getValueType();
3522  if (VT.isScalableVector() || InVT.isFixedLengthVector())
3523  break;
3524 
3525  // NOTE: We can only get here when doing fixed length SVE code generation.
3526  // We do manual selection because the types involved are not linked to real
3527  // registers (despite being legal) and must be coerced into SVE registers.
3528  //
3529  // NOTE: If the above changes, be aware that selection will still not work
3530  // because the td definition of extract_vector does not support extracting
3531  // a fixed length vector from a scalable vector.
3532 
3533  ReplaceNode(Node, extractSubReg(CurDAG, VT, Node->getOperand(0)));
3534  return;
3535  }
3536 
3537  case ISD::INSERT_SUBVECTOR: {
3538  // Bail when not a "cast" like insert_subvector.
3539  if (cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue() != 0)
3540  break;
3541  if (!Node->getOperand(0).isUndef())
3542  break;
3543 
3544  // Bail when normal isel should do the job.
3545  EVT InVT = Node->getOperand(1).getValueType();
3546  if (VT.isFixedLengthVector() || InVT.isScalableVector())
3547  break;
3548 
3549  // NOTE: We can only get here when doing fixed length SVE code generation.
3550  // We do manual selection because the types involved are not linked to real
3551  // registers (despite being legal) and must be coerced into SVE registers.
3552  //
3553  // NOTE: If the above changes, be aware that selection will still not work
3554  // because the td definition of insert_vector does not support inserting a
3555  // fixed length vector into a scalable vector.
3556 
3557  ReplaceNode(Node, insertSubReg(CurDAG, VT, Node->getOperand(1)));
3558  return;
3559  }
3560 
3561  case ISD::Constant: {
3562  // Materialize zero constants as copies from WZR/XZR. This allows
3563  // the coalescer to propagate these into other instructions.
3564  ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
3565  if (ConstNode->isZero()) {
3566  if (VT == MVT::i32) {
3567  SDValue New = CurDAG->getCopyFromReg(
3568  CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
3569  ReplaceNode(Node, New.getNode());
3570  return;
3571  } else if (VT == MVT::i64) {
3572  SDValue New = CurDAG->getCopyFromReg(
3573  CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
3574  ReplaceNode(Node, New.getNode());
3575  return;
3576  }
3577  }
3578  break;
3579  }
3580 
3581  case ISD::FrameIndex: {
3582  // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
3583  int FI = cast<FrameIndexSDNode>(Node)->getIndex();
3584  unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
3585  const TargetLowering *TLI = getTargetLowering();
3586  SDValue TFI = CurDAG->getTargetFrameIndex(
3587  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3588  SDLoc DL(Node);
3589  SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
3590  CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
3591  CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
3592  return;
3593  }
3594  case ISD::INTRINSIC_W_CHAIN: {
3595  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
3596  switch (IntNo) {
3597  default:
3598  break;
3599  case Intrinsic::aarch64_ldaxp:
3600  case Intrinsic::aarch64_ldxp: {
3601  unsigned Op =
3602  IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
3603  SDValue MemAddr = Node->getOperand(2);
3604  SDLoc DL(Node);
3605  SDValue Chain = Node->getOperand(0);
3606 
3607  SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
3608  MVT::Other, MemAddr, Chain);
3609 
3610  // Transfer memoperands.
3612  cast<MemIntrinsicSDNode>(Node)->getMemOperand();
3613  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
3614  ReplaceNode(Node, Ld);
3615  return;
3616  }
3617  case Intrinsic::aarch64_stlxp:
3618  case Intrinsic::aarch64_stxp: {
3619  unsigned Op =
3620  IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
3621  SDLoc DL(Node);
3622  SDValue Chain = Node->getOperand(0);
3623  SDValue ValLo = Node->getOperand(2);
3624  SDValue ValHi = Node->getOperand(3);
3625  SDValue MemAddr = Node->getOperand(4);
3626 
3627  // Place arguments in the right order.
3628  SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
3629 
3630  SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
3631  // Transfer memoperands.
3633  cast<MemIntrinsicSDNode>(Node)->getMemOperand();
3634  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
3635 
3636  ReplaceNode(Node, St);
3637  return;
3638  }
3639  case Intrinsic::aarch64_neon_ld1x2:
3640  if (VT == MVT::v8i8) {
3641  SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
3642  return;
3643  } else if (VT == MVT::v16i8) {
3644  SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
3645  return;
3646  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3647  SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
3648  return;
3649  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3650  SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
3651  return;
3652  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3653  SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
3654  return;
3655  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3656  SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
3657  return;
3658  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3659  SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
3660  return;
3661  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3662  SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
3663  return;
3664  }
3665  break;
3666  case Intrinsic::aarch64_neon_ld1x3:
3667  if (VT == MVT::v8i8) {
3668  SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
3669  return;
3670  } else if (VT == MVT::v16i8) {
3671  SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
3672  return;
3673  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3674  SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
3675  return;
3676  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3677  SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
3678  return;
3679  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3680  SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
3681  return;
3682  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3683  SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
3684  return;
3685  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3686  SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3687  return;
3688  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3689  SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
3690  return;
3691  }
3692  break;
3693  case Intrinsic::aarch64_neon_ld1x4:
3694  if (VT == MVT::v8i8) {
3695  SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
3696  return;
3697  } else if (VT == MVT::v16i8) {
3698  SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
3699  return;
3700  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3701  SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
3702  return;
3703  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3704  SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
3705  return;
3706  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3707  SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
3708  return;
3709  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3710  SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
3711  return;
3712  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3713  SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3714  return;
3715  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3716  SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
3717  return;
3718  }
3719  break;
3720  case Intrinsic::aarch64_neon_ld2:
3721  if (VT == MVT::v8i8) {
3722  SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
3723  return;
3724  } else if (VT == MVT::v16i8) {
3725  SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
3726  return;
3727  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3728  SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
3729  return;
3730  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3731  SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
3732  return;
3733  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3734  SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
3735  return;
3736  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3737  SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
3738  return;
3739  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3740  SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
3741  return;
3742  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3743  SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
3744  return;
3745  }
3746  break;
3747  case Intrinsic::aarch64_neon_ld3:
3748  if (VT == MVT::v8i8) {
3749  SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
3750  return;
3751  } else if (VT == MVT::v16i8) {
3752  SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
3753  return;
3754  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3755  SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
3756  return;
3757  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3758  SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
3759  return;
3760  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3761  SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
3762  return;
3763  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3764  SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
3765  return;
3766  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3767  SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3768  return;
3769  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3770  SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
3771  return;
3772  }
3773  break;
3774  case Intrinsic::aarch64_neon_ld4:
3775  if (VT == MVT::v8i8) {
3776  SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
3777  return;
3778  } else if (VT == MVT::v16i8) {
3779  SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
3780  return;
3781  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3782  SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
3783  return;
3784  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3785  SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
3786  return;
3787  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3788  SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
3789  return;
3790  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3791  SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
3792  return;
3793  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3794  SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3795  return;
3796  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3797  SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
3798  return;
3799  }
3800  break;
3801  case Intrinsic::aarch64_neon_ld2r:
3802  if (VT == MVT::v8i8) {
3803  SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
3804  return;
3805  } else if (VT == MVT::v16i8) {
3806  SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
3807  return;
3808  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3809  SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
3810  return;
3811  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3812  SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
3813  return;
3814  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3815  SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
3816  return;
3817  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3818  SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
3819  return;
3820  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3821  SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
3822  return;
3823  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3824  SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
3825  return;
3826  }
3827  break;
3828  case Intrinsic::aarch64_neon_ld3r:
3829  if (VT == MVT::v8i8) {
3830  SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
3831  return;
3832  } else if (VT == MVT::v16i8) {
3833  SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
3834  return;
3835  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3836  SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
3837  return;
3838  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3839  SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
3840  return;
3841  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3842  SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
3843  return;
3844  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3845  SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
3846  return;
3847  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3848  SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
3849  return;
3850  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3851  SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
3852  return;
3853  }
3854  break;
3855  case Intrinsic::aarch64_neon_ld4r:
3856  if (VT == MVT::v8i8) {
3857  SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
3858  return;
3859  } else if (VT == MVT::v16i8) {
3860  SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
3861  return;
3862  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
3863  SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
3864  return;
3865  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
3866  SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
3867  return;
3868  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3869  SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
3870  return;
3871  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3872  SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
3873  return;
3874  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3875  SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
3876  return;
3877  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3878  SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
3879  return;
3880  }
3881  break;
3882  case Intrinsic::aarch64_neon_ld2lane:
3883  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3884  SelectLoadLane(Node, 2, AArch64::LD2i8);
3885  return;
3886  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3887  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
3888  SelectLoadLane(Node, 2, AArch64::LD2i16);
3889  return;
3890  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3891  VT == MVT::v2f32) {
3892  SelectLoadLane(Node, 2, AArch64::LD2i32);
3893  return;
3894  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3895  VT == MVT::v1f64) {
3896  SelectLoadLane(Node, 2, AArch64::LD2i64);
3897  return;
3898  }
3899  break;
3900  case Intrinsic::aarch64_neon_ld3lane:
3901  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3902  SelectLoadLane(Node, 3, AArch64::LD3i8);
3903  return;
3904  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3905  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
3906  SelectLoadLane(Node, 3, AArch64::LD3i16);
3907  return;
3908  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3909  VT == MVT::v2f32) {
3910  SelectLoadLane(Node, 3, AArch64::LD3i32);
3911  return;
3912  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3913  VT == MVT::v1f64) {
3914  SelectLoadLane(Node, 3, AArch64::LD3i64);
3915  return;
3916  }
3917  break;
3918  case Intrinsic::aarch64_neon_ld4lane:
3919  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3920  SelectLoadLane(Node, 4, AArch64::LD4i8);
3921  return;
3922  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3923  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
3924  SelectLoadLane(Node, 4, AArch64::LD4i16);
3925  return;
3926  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3927  VT == MVT::v2f32) {
3928  SelectLoadLane(Node, 4, AArch64::LD4i32);
3929  return;
3930  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3931  VT == MVT::v1f64) {
3932  SelectLoadLane(Node, 4, AArch64::LD4i64);
3933  return;
3934  }
3935  break;
3936  case Intrinsic::aarch64_ld64b:
3937  SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
3938  return;
3939  case Intrinsic::aarch64_sve_ld2_sret: {
3940  if (VT == MVT::nxv16i8) {
3941  SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
3942  true);
3943  return;
3944  } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
3945  VT == MVT::nxv8bf16) {
3946  SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
3947  true);
3948  return;
3949  } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
3950  SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
3951  true);
3952  return;
3953  } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
3954  SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
3955  true);
3956  return;
3957  }
3958  break;
3959  }
3960  case Intrinsic::aarch64_sve_ld3_sret: {
3961  if (VT == MVT::nxv16i8) {
3962  SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
3963  true);
3964  return;
3965  } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
3966  VT == MVT::nxv8bf16) {
3967  SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
3968  true);
3969  return;
3970  } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
3971  SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
3972  true);
3973  return;
3974  } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
3975  SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
3976  true);
3977  return;
3978  }
3979  break;
3980  }
3981  case Intrinsic::aarch64_sve_ld4_sret: {
3982  if (VT == MVT::nxv16i8) {
3983  SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
3984  true);
3985  return;
3986  } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
3987  VT == MVT::nxv8bf16) {
3988  SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
3989  true);
3990  return;
3991  } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
3992  SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
3993  true);
3994  return;
3995  } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
3996  SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
3997  true);
3998  return;
3999  }
4000  break;
4001  }
4002  }
4003  } break;
4004  case ISD::INTRINSIC_WO_CHAIN: {
4005  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
4006  switch (IntNo) {
4007  default:
4008  break;
4009  case Intrinsic::aarch64_tagp:
4010  SelectTagP(Node);
4011  return;
4012  case Intrinsic::aarch64_neon_tbl2:
4013  SelectTable(Node, 2,
4014  VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
4015  false);
4016  return;
4017  case Intrinsic::aarch64_neon_tbl3:
4018  SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
4019  : AArch64::TBLv16i8Three,
4020  false);
4021  return;
4022  case Intrinsic::aarch64_neon_tbl4:
4023  SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
4024  : AArch64::TBLv16i8Four,
4025  false);
4026  return;
4027  case Intrinsic::aarch64_neon_tbx2:
4028  SelectTable(Node, 2,
4029  VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
4030  true);
4031  return;
4032  case Intrinsic::aarch64_neon_tbx3:
4033  SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
4034  : AArch64::TBXv16i8Three,
4035  true);
4036  return;
4037  case Intrinsic::aarch64_neon_tbx4:
4038  SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
4039  : AArch64::TBXv16i8Four,
4040  true);
4041  return;
4042  case Intrinsic::aarch64_neon_smull:
4043  case Intrinsic::aarch64_neon_umull:
4044  if (tryMULLV64LaneV128(IntNo, Node))
4045  return;
4046  break;
4047  case Intrinsic::swift_async_context_addr: {
4048  SDLoc DL(Node);
4049  CurDAG->SelectNodeTo(Node, AArch64::SUBXri, MVT::i64,
4050  CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
4051  AArch64::FP, MVT::i64),
4052  CurDAG->getTargetConstant(8, DL, MVT::i32),
4053  CurDAG->getTargetConstant(0, DL, MVT::i32));
4054  auto &MF = CurDAG->getMachineFunction();
4055  MF.getFrameInfo().setFrameAddressIsTaken(true);
4056  MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
4057  return;
4058  }
4059  }
4060  break;
4061  }
4062  case ISD::INTRINSIC_VOID: {
4063  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
4064  if (Node->getNumOperands() >= 3)
4065  VT = Node->getOperand(2)->getValueType(0);
4066  switch (IntNo) {
4067  default:
4068  break;
4069  case Intrinsic::aarch64_neon_st1x2: {
4070  if (VT == MVT::v8i8) {
4071  SelectStore(Node, 2, AArch64::ST1Twov8b);
4072  return;
4073  } else if (VT == MVT::v16i8) {
4074  SelectStore(Node, 2, AArch64::ST1Twov16b);
4075  return;
4076  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
4077  VT == MVT::v4bf16) {
4078  SelectStore(Node, 2, AArch64::ST1Twov4h);
4079  return;
4080  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
4081  VT == MVT::v8bf16) {
4082  SelectStore(Node, 2, AArch64::ST1Twov8h);
4083  return;
4084  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4085  SelectStore(Node, 2, AArch64::ST1Twov2s);
4086  return;
4087  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4088  SelectStore(Node, 2, AArch64::ST1Twov4s);
4089  return;
4090  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4091  SelectStore(Node, 2, AArch64::ST1Twov2d);
4092  return;
4093  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4094  SelectStore(Node, 2, AArch64::ST1Twov1d);
4095  return;
4096  }
4097  break;
4098  }
4099  case Intrinsic::aarch64_neon_st1x3: {
4100  if (VT == MVT::v8i8) {
4101  SelectStore(Node, 3, AArch64::ST1Threev8b);
4102  return;
4103  } else if (VT == MVT::v16i8) {
4104  SelectStore(Node, 3, AArch64::ST1Threev16b);
4105  return;
4106  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
4107  VT == MVT::v4bf16) {
4108  SelectStore(Node, 3, AArch64::ST1Threev4h);
4109  return;
4110  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
4111  VT == MVT::v8bf16) {
4112  SelectStore(Node, 3, AArch64::ST1Threev8h);
4113  return;
4114  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4115  SelectStore(Node, 3, AArch64::ST1Threev2s);
4116  return;
4117  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4118  SelectStore(Node, 3, AArch64::ST1Threev4s);
4119  return;
4120  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4121  SelectStore(Node, 3, AArch64::ST1Threev2d);
4122  return;
4123  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4124  SelectStore(Node, 3, AArch64::ST1Threev1d);
4125  return;
4126  }
4127  break;
4128  }
4129  case Intrinsic::aarch64_neon_st1x4: {
4130  if (VT == MVT::v8i8) {
4131  SelectStore(Node, 4, AArch64::ST1Fourv8b);
4132  return;
4133  } else if (VT == MVT::v16i8) {
4134  SelectStore(Node, 4, AArch64::ST1Fourv16b);
4135  return;
4136  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
4137  VT == MVT::v4bf16) {
4138  SelectStore(Node, 4, AArch64::ST1Fourv4h);
4139  return;
4140  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
4141  VT == MVT::v8bf16) {
4142  SelectStore(Node, 4, AArch64::ST1Fourv8h);
4143  return;
4144  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4145  SelectStore(Node, 4, AArch64::ST1Fourv2s);
4146  return;
4147  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4148  SelectStore(Node, 4, AArch64::ST1Fourv4s);
4149  return;
4150  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4151  SelectStore(Node, 4, AArch64::ST1Fourv2d);
4152  return;
4153  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4154  SelectStore(Node, 4, AArch64::ST1Fourv1d);
4155  return;
4156  }
4157  break;
4158  }
4159  case Intrinsic::aarch64_neon_st2: {
4160  if (VT == MVT::v8i8) {
4161  SelectStore(Node, 2, AArch64::ST2Twov8b);
4162  return;
4163  } else if (VT == MVT::v16i8) {
4164  SelectStore(Node, 2, AArch64::ST2Twov16b);
4165  return;
4166  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
4167  VT == MVT::v4bf16) {
4168  SelectStore(Node, 2, AArch64::ST2Twov4h);
4169  return;
4170  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
4171  VT == MVT::v8bf16) {
4172  SelectStore(Node, 2, AArch64::ST2Twov8h);
4173  return;
4174  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4175  SelectStore(Node, 2, AArch64::ST2Twov2s);
4176  return;
4177  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4178  SelectStore(Node, 2, AArch64::ST2Twov4s);
4179  return;
4180  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4181  SelectStore(Node, 2, AArch64::ST2Twov2d);
4182  return;
4183  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4184  SelectStore(Node, 2, AArch64::ST1Twov1d);
4185  return;
4186  }
4187  break;
4188  }
4189  case Intrinsic::aarch64_neon_st3: {
4190  if (VT == MVT::v8i8) {
4191  SelectStore(Node, 3, AArch64::ST3Threev8b);
4192  return;
4193  } else if (VT == MVT::v16i8) {
4194  SelectStore(Node, 3, AArch64::ST3Threev16b);
4195  return;
4196  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
4197  VT == MVT::v4bf16) {
4198  SelectStore(Node, 3, AArch64::ST3Threev4h);
4199  return;
4200  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
4201  VT == MVT::v8bf16) {
4202  SelectStore(Node, 3, AArch64::ST3Threev8h);
4203  return;
4204  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4205  SelectStore(Node, 3, AArch64::ST3Threev2s);
4206  return;
4207  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4208  SelectStore(Node, 3, AArch64::ST3Threev4s);
4209  return;
4210  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4211  SelectStore(Node, 3, AArch64::ST3Threev2d);
4212  return;
4213  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4214  SelectStore(Node, 3, AArch64::ST1Threev1d);
4215  return;
4216  }
4217  break;
4218  }
4219  case Intrinsic::aarch64_neon_st4: {
4220  if (VT == MVT::v8i8) {
4221  SelectStore(Node, 4, AArch64::ST4Fourv8b);
4222  return;
4223  } else if (VT == MVT::v16i8) {
4224  SelectStore(Node, 4, AArch64::ST4Fourv16b);
4225  return;
4226  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
4227  VT == MVT::v4bf16) {
4228  SelectStore(Node, 4, AArch64::ST4Fourv4h);
4229  return;
4230  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
4231  VT == MVT::v8bf16) {
4232  SelectStore(Node, 4, AArch64::ST4Fourv8h);
4233  return;
4234  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4235  SelectStore(Node, 4, AArch64::ST4Fourv2s);
4236  return;
4237  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4238  SelectStore(Node, 4, AArch64::ST4Fourv4s);
4239  return;
4240  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4241  SelectStore(Node, 4, AArch64::ST4Fourv2d);
4242  return;
4243  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4244  SelectStore(Node, 4, AArch64::ST1Fourv1d);
4245  return;
4246  }
4247  break;
4248  }
4249  case Intrinsic::aarch64_neon_st2lane: {
4250  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4251  SelectStoreLane(Node, 2, AArch64::ST2i8);
4252  return;
4253  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4254  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4255  SelectStoreLane(Node, 2, AArch64::ST2i16);
4256  return;
4257  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4258  VT == MVT::v2f32) {
4259  SelectStoreLane(Node, 2, AArch64::ST2i32);
4260  return;
4261  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4262  VT == MVT::v1f64) {
4263  SelectStoreLane(Node, 2, AArch64::ST2i64);
4264  return;
4265  }
4266  break;
4267  }
4268  case Intrinsic::aarch64_neon_st3lane: {
4269  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4270  SelectStoreLane(Node, 3, AArch64::ST3i8);
4271  return;
4272  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4273  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4274  SelectStoreLane(Node, 3, AArch64::ST3i16);
4275  return;
4276  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4277  VT == MVT::v2f32) {
4278  SelectStoreLane(Node, 3, AArch64::ST3i32);
4279  return;
4280  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4281  VT == MVT::v1f64) {
4282  SelectStoreLane(Node, 3, AArch64::ST3i64);
4283  return;
4284  }
4285  break;
4286  }
4287  case Intrinsic::aarch64_neon_st4lane: {
4288  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4289  SelectStoreLane(Node, 4, AArch64::ST4i8);
4290  return;
4291  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4292  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4293  SelectStoreLane(Node, 4, AArch64::ST4i16);
4294  return;
4295  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4296  VT == MVT::v2f32) {
4297  SelectStoreLane(Node, 4, AArch64::ST4i32);
4298  return;
4299  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4300  VT == MVT::v1f64) {
4301  SelectStoreLane(Node, 4, AArch64::ST4i64);
4302  return;
4303  }
4304  break;
4305  }
4306  case Intrinsic::aarch64_sve_st2: {
4307  if (VT == MVT::nxv16i8) {
4308  SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
4309  return;
4310  } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4311  VT == MVT::nxv8bf16) {
4312  SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
4313  return;
4314  } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4315  SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
4316  return;
4317  } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4318  SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
4319  return;
4320  }
4321  break;
4322  }
4323  case Intrinsic::aarch64_sve_st3: {
4324  if (VT == MVT::nxv16i8) {
4325  SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
4326  return;
4327  } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4328  VT == MVT::nxv8bf16) {
4329  SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
4330  return;
4331  } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4332  SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
4333  return;
4334  } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4335  SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
4336  return;
4337  }
4338  break;
4339  }
4340  case Intrinsic::aarch64_sve_st4: {
4341  if (VT == MVT::nxv16i8) {
4342  SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
4343  return;
4344  } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4345  VT == MVT::nxv8bf16) {
4346  SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
4347  return;
4348  } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4349  SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
4350  return;
4351  } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4352  SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
4353  return;
4354  }
4355  break;
4356  }
4357  }
4358  break;
4359  }
4360  case AArch64ISD::LD2post: {
4361  if (VT == MVT::v8i8) {
4362  SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
4363  return;
4364  } else if (VT == MVT::v16i8) {
4365  SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
4366  return;
4367  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4368  SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
4369  return;
4370  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4371  SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
4372  return;
4373  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4374  SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
4375  return;
4376  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4377  SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
4378  return;
4379  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4380  SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
4381  return;
4382  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4383  SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
4384  return;
4385  }
4386  break;
4387  }
4388  case AArch64ISD::LD3post: {
4389  if (VT == MVT::v8i8) {
4390  SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
4391  return;
4392  } else if (VT == MVT::v16i8) {
4393  SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
4394  return;
4395  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4396  SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
4397  return;
4398  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4399  SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
4400  return;
4401  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4402  SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
4403  return;
4404  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4405  SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
4406  return;
4407  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4408  SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
4409  return;
4410  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4411  SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
4412  return;
4413  }
4414  break;
4415  }
4416  case AArch64ISD::LD4post: {
4417  if (VT == MVT::v8i8) {
4418  SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
4419  return;
4420  } else if (VT == MVT::v16i8) {
4421  SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
4422  return;
4423  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4424  SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
4425  return;
4426  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4427  SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
4428  return;
4429  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4430  SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
4431  return;
4432  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4433  SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
4434  return;
4435  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4436  SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
4437  return;
4438  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4439  SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
4440  return;
4441  }
4442  break;
4443  }
4444  case AArch64ISD::LD1x2post: {
4445  if (VT == MVT::v8i8) {
4446  SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
4447  return;
4448  } else if (VT == MVT::v16i8) {
4449  SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
4450  return;
4451  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4452  SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
4453  return;
4454  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4455  SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
4456  return;
4457  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4458  SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
4459  return;
4460  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4461  SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
4462  return;
4463  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4464  SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
4465  return;
4466  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4467  SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
4468  return;
4469  }
4470  break;
4471  }
4472  case AArch64ISD::LD1x3post: {
4473  if (VT == MVT::v8i8) {
4474  SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
4475  return;
4476  } else if (VT == MVT::v16i8) {
4477  SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
4478  return;
4479  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4480  SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
4481  return;
4482  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4483  SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
4484  return;
4485  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4486  SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
4487  return;
4488  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4489  SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
4490  return;
4491  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4492  SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
4493  return;
4494  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4495  SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
4496  return;
4497  }
4498  break;
4499  }
4500  case AArch64ISD::LD1x4post: {
4501  if (VT == MVT::v8i8) {
4502  SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
4503  return;
4504  } else if (VT == MVT::v16i8) {
4505  SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
4506  return;
4507  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4508  SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
4509  return;
4510  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4511  SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
4512  return;
4513  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4514  SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
4515  return;
4516  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4517  SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
4518  return;
4519  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4520  SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
4521  return;
4522  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4523  SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
4524  return;
4525  }
4526  break;
4527  }
4528  case AArch64ISD::LD1DUPpost: {
4529  if (VT == MVT::v8i8) {
4530  SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
4531  return;
4532  } else if (VT == MVT::v16i8) {
4533  SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
4534  return;
4535  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4536  SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
4537  return;
4538  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4539  SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
4540  return;
4541  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4542  SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
4543  return;
4544  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4545  SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
4546  return;
4547  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4548  SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
4549  return;
4550  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4551  SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
4552  return;
4553  }
4554  break;
4555  }
4556  case AArch64ISD::LD2DUPpost: {
4557  if (VT == MVT::v8i8) {
4558  SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
4559  return;
4560  } else if (VT == MVT::v16i8) {
4561  SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
4562  return;
4563  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4564  SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
4565  return;
4566  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4567  SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
4568  return;
4569  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4570  SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
4571  return;
4572  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4573  SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
4574  return;
4575  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4576  SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
4577  return;
4578  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4579  SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
4580  return;
4581  }
4582  break;
4583  }
4584  case AArch64ISD::LD3DUPpost: {
4585  if (VT == MVT::v8i8) {
4586  SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
4587  return;
4588  } else if (VT == MVT::v16i8) {
4589  SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
4590  return;
4591  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4592  SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
4593  return;
4594  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4595  SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
4596  return;
4597  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4598  SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
4599  return;
4600  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4601  SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
4602  return;
4603  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4604  SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
4605  return;
4606  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4607  SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
4608  return;
4609  }
4610  break;
4611  }
4612  case AArch64ISD::LD4DUPpost: {
4613  if (VT == MVT::v8i8) {
4614  SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
4615  return;
4616  } else if (VT == MVT::v16i8) {
4617  SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
4618  return;
4619  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4620  SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
4621  return;
4622  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4623  SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
4624  return;
4625  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4626  SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
4627  return;
4628  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4629  SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
4630  return;
4631  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4632  SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
4633  return;
4634  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4635  SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
4636  return;
4637  }
4638  break;
4639  }
4640  case AArch64ISD::LD1LANEpost: {
4641  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4642  SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
4643  return;
4644  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4645  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4646  SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
4647  return;
4648  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4649  VT == MVT::v2f32) {
4650  SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
4651  return;
4652  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4653  VT == MVT::v1f64) {
4654  SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
4655  return;
4656  }
4657  break;
4658  }
4659  case AArch64ISD::LD2LANEpost: {
4660  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4661  SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
4662  return;
4663  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4664  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4665  SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
4666  return;
4667  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4668  VT == MVT::v2f32) {
4669  SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
4670  return;
4671  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4672  VT == MVT::v1f64) {
4673  SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
4674  return;
4675  }
4676  break;
4677  }
4678  case AArch64ISD::LD3LANEpost: {
4679  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4680  SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
4681  return;
4682  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4683  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4684  SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
4685  return;
4686  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4687  VT == MVT::v2f32) {
4688  SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
4689  return;
4690  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4691  VT == MVT::v1f64) {
4692  SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
4693  return;
4694  }
4695  break;
4696  }
4697  case AArch64ISD::LD4LANEpost: {
4698  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4699  SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
4700  return;
4701  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4702  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4703  SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
4704  return;
4705  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4706  VT == MVT::v2f32) {
4707  SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
4708  return;
4709  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4710  VT == MVT::v1f64) {
4711  SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
4712  return;
4713  }
4714  break;
4715  }
4716  case AArch64ISD::ST2post: {
4717  VT = Node->getOperand(1).getValueType();
4718  if (VT == MVT::v8i8) {
4719  SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
4720  return;
4721  } else if (VT == MVT::v16i8) {
4722  SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
4723  return;
4724  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4725  SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
4726  return;
4727  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4728  SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
4729  return;
4730  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4731  SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
4732  return;
4733  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4734  SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
4735  return;
4736  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4737  SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
4738  return;
4739  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4740  SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
4741  return;
4742  }
4743  break;
4744  }
4745  case AArch64ISD::ST3post: {
4746  VT = Node->getOperand(1).getValueType();
4747  if (VT == MVT::v8i8) {
4748  SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
4749  return;
4750  } else if (VT == MVT::v16i8) {
4751  SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
4752  return;
4753  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4754  SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
4755  return;
4756  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4757  SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
4758  return;
4759  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4760  SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
4761  return;
4762  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4763  SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
4764  return;
4765  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4766  SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
4767  return;
4768  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4769  SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
4770  return;
4771  }
4772  break;
4773  }
4774  case AArch64ISD::ST4post: {
4775  VT = Node->getOperand(1).getValueType();
4776  if (VT == MVT::v8i8) {
4777  SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
4778  return;
4779  } else if (VT == MVT::v16i8) {
4780  SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
4781  return;
4782  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4783  SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
4784  return;
4785  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4786  SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
4787  return;
4788  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4789  SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
4790  return;
4791  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4792  SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
4793  return;
4794  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4795  SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
4796  return;
4797  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4798  SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
4799  return;
4800  }
4801  break;
4802  }
4803  case AArch64ISD::ST1x2post: {
4804  VT = Node->getOperand(1).getValueType();
4805  if (VT == MVT::v8i8) {
4806  SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
4807  return;
4808  } else if (VT == MVT::v16i8) {
4809  SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
4810  return;
4811  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4812  SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
4813  return;
4814  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4815  SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
4816  return;
4817  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4818  SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
4819  return;
4820  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4821  SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
4822  return;
4823  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4824  SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
4825  return;
4826  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4827  SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
4828  return;
4829  }
4830  break;
4831  }
4832  case AArch64ISD::ST1x3post: {
4833  VT = Node->getOperand(1).getValueType();
4834  if (VT == MVT::v8i8) {
4835  SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
4836  return;
4837  } else if (VT == MVT::v16i8) {
4838  SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
4839  return;
4840  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4841  SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
4842  return;
4843  } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
4844  SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
4845  return;
4846  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4847  SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
4848  return;
4849  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4850  SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
4851  return;
4852  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4853  SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
4854  return;
4855  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4856  SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
4857  return;
4858  }
4859  break;
4860  }
4861  case AArch64ISD::ST1x4post: {
4862  VT = Node->getOperand(1).getValueType();
4863  if (VT == MVT::v8i8) {
4864  SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
4865  return;
4866  } else if (VT == MVT::v16i8) {
4867  SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
4868  return;
4869  } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4870  SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
4871