LLVM  9.0.0svn
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the AArch64 target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AArch64TargetMachine.h"
15 #include "llvm/ADT/APSInt.h"
17 #include "llvm/IR/Function.h" // To access function attributes.
18 #include "llvm/IR/GlobalValue.h"
19 #include "llvm/IR/Intrinsics.h"
20 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/KnownBits.h"
25 
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "aarch64-isel"
29 
30 //===--------------------------------------------------------------------===//
31 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
32 /// instructions for SelectionDAG operations.
33 ///
34 namespace {
35 
36 class AArch64DAGToDAGISel : public SelectionDAGISel {
37 
38  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
39  /// make the right decision when generating code for different targets.
40  const AArch64Subtarget *Subtarget;
41 
42  bool ForCodeSize;
43 
44 public:
45  explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
46  CodeGenOpt::Level OptLevel)
47  : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
48  ForCodeSize(false) {}
49 
50  StringRef getPassName() const override {
51  return "AArch64 Instruction Selection";
52  }
53 
54  bool runOnMachineFunction(MachineFunction &MF) override {
55  ForCodeSize = MF.getFunction().hasOptSize();
56  Subtarget = &MF.getSubtarget<AArch64Subtarget>();
58  }
59 
60  void Select(SDNode *Node) override;
61 
62  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
63  /// inline asm expressions.
64  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
65  unsigned ConstraintID,
66  std::vector<SDValue> &OutOps) override;
67 
68  bool tryMLAV64LaneV128(SDNode *N);
69  bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
70  bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
71  bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
72  bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
73  bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
74  return SelectShiftedRegister(N, false, Reg, Shift);
75  }
76  bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
77  return SelectShiftedRegister(N, true, Reg, Shift);
78  }
79  bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
80  return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
81  }
82  bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
83  return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
84  }
85  bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
86  return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
87  }
88  bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
89  return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
90  }
91  bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
92  return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
93  }
94  bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
95  return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
96  }
97  bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
98  return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
99  }
100  bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
101  return SelectAddrModeIndexed(N, 1, Base, OffImm);
102  }
103  bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
104  return SelectAddrModeIndexed(N, 2, Base, OffImm);
105  }
106  bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
107  return SelectAddrModeIndexed(N, 4, Base, OffImm);
108  }
109  bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
110  return SelectAddrModeIndexed(N, 8, Base, OffImm);
111  }
112  bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
113  return SelectAddrModeIndexed(N, 16, Base, OffImm);
114  }
115  bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
116  return SelectAddrModeUnscaled(N, 1, Base, OffImm);
117  }
118  bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
119  return SelectAddrModeUnscaled(N, 2, Base, OffImm);
120  }
121  bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
122  return SelectAddrModeUnscaled(N, 4, Base, OffImm);
123  }
124  bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
125  return SelectAddrModeUnscaled(N, 8, Base, OffImm);
126  }
127  bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
128  return SelectAddrModeUnscaled(N, 16, Base, OffImm);
129  }
130 
131  template<int Width>
132  bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
133  SDValue &SignExtend, SDValue &DoShift) {
134  return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
135  }
136 
137  template<int Width>
138  bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
139  SDValue &SignExtend, SDValue &DoShift) {
140  return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
141  }
142 
143 
144  /// Form sequences of consecutive 64/128-bit registers for use in NEON
145  /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
146  /// between 1 and 4 elements. If it contains a single element that is returned
147  /// unchanged; otherwise a REG_SEQUENCE value is returned.
148  SDValue createDTuple(ArrayRef<SDValue> Vecs);
149  SDValue createQTuple(ArrayRef<SDValue> Vecs);
150 
151  /// Generic helper for the createDTuple/createQTuple
152  /// functions. Those should almost always be called instead.
153  SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
154  const unsigned SubRegs[]);
155 
156  void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
157 
158  bool tryIndexedLoad(SDNode *N);
159 
160  void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
161  unsigned SubRegIdx);
162  void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
163  unsigned SubRegIdx);
164  void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
165  void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
166 
167  void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
168  void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
169  void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
170  void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
171 
172  bool tryBitfieldExtractOp(SDNode *N);
173  bool tryBitfieldExtractOpFromSExt(SDNode *N);
174  bool tryBitfieldInsertOp(SDNode *N);
175  bool tryBitfieldInsertInZeroOp(SDNode *N);
176  bool tryShiftAmountMod(SDNode *N);
177 
178  bool tryReadRegister(SDNode *N);
179  bool tryWriteRegister(SDNode *N);
180 
181 // Include the pieces autogenerated from the target description.
182 #include "AArch64GenDAGISel.inc"
183 
184 private:
185  bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
186  SDValue &Shift);
187  bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
188  SDValue &OffImm) {
189  return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
190  }
191  bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
192  unsigned Size, SDValue &Base,
193  SDValue &OffImm);
194  bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
195  SDValue &OffImm);
196  bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
197  SDValue &OffImm);
198  bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
199  SDValue &Offset, SDValue &SignExtend,
200  SDValue &DoShift);
201  bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
202  SDValue &Offset, SDValue &SignExtend,
203  SDValue &DoShift);
204  bool isWorthFolding(SDValue V) const;
205  bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
206  SDValue &Offset, SDValue &SignExtend);
207 
208  template<unsigned RegWidth>
209  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
210  return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
211  }
212 
213  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
214 
215  bool SelectCMP_SWAP(SDNode *N);
216 
217 };
218 } // end anonymous namespace
219 
220 /// isIntImmediate - This method tests to see if the node is a constant
221 /// operand. If so Imm will receive the 32-bit value.
222 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
223  if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
224  Imm = C->getZExtValue();
225  return true;
226  }
227  return false;
228 }
229 
230 // isIntImmediate - This method tests to see if a constant operand.
231 // If so Imm will receive the value.
232 static bool isIntImmediate(SDValue N, uint64_t &Imm) {
233  return isIntImmediate(N.getNode(), Imm);
234 }
235 
236 // isOpcWithIntImmediate - This method tests to see if the node is a specific
237 // opcode and that it has a immediate integer right operand.
238 // If so Imm will receive the 32 bit value.
239 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
240  uint64_t &Imm) {
241  return N->getOpcode() == Opc &&
242  isIntImmediate(N->getOperand(1).getNode(), Imm);
243 }
244 
245 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
246  const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
247  switch(ConstraintID) {
248  default:
249  llvm_unreachable("Unexpected asm memory constraint");
253  // We need to make sure that this one operand does not end up in XZR, thus
254  // require the address to be in a PointerRegClass register.
255  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
256  const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
257  SDLoc dl(Op);
258  SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
259  SDValue NewOp =
260  SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
261  dl, Op.getValueType(),
262  Op, RC), 0);
263  OutOps.push_back(NewOp);
264  return false;
265  }
266  return true;
267 }
268 
269 /// SelectArithImmed - Select an immediate value that can be represented as
270 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
271 /// Val set to the 12-bit value and Shift set to the shifter operand.
272 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
273  SDValue &Shift) {
274  // This function is called from the addsub_shifted_imm ComplexPattern,
275  // which lists [imm] as the list of opcode it's interested in, however
276  // we still need to check whether the operand is actually an immediate
277  // here because the ComplexPattern opcode list is only used in
278  // root-level opcode matching.
279  if (!isa<ConstantSDNode>(N.getNode()))
280  return false;
281 
282  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
283  unsigned ShiftAmt;
284 
285  if (Immed >> 12 == 0) {
286  ShiftAmt = 0;
287  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
288  ShiftAmt = 12;
289  Immed = Immed >> 12;
290  } else
291  return false;
292 
293  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
294  SDLoc dl(N);
295  Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
296  Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
297  return true;
298 }
299 
300 /// SelectNegArithImmed - As above, but negates the value before trying to
301 /// select it.
302 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
303  SDValue &Shift) {
304  // This function is called from the addsub_shifted_imm ComplexPattern,
305  // which lists [imm] as the list of opcode it's interested in, however
306  // we still need to check whether the operand is actually an immediate
307  // here because the ComplexPattern opcode list is only used in
308  // root-level opcode matching.
309  if (!isa<ConstantSDNode>(N.getNode()))
310  return false;
311 
312  // The immediate operand must be a 24-bit zero-extended immediate.
313  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
314 
315  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
316  // have the opposite effect on the C flag, so this pattern mustn't match under
317  // those circumstances.
318  if (Immed == 0)
319  return false;
320 
321  if (N.getValueType() == MVT::i32)
322  Immed = ~((uint32_t)Immed) + 1;
323  else
324  Immed = ~Immed + 1ULL;
325  if (Immed & 0xFFFFFFFFFF000000ULL)
326  return false;
327 
328  Immed &= 0xFFFFFFULL;
329  return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
330  Shift);
331 }
332 
333 /// getShiftTypeForNode - Translate a shift node to the corresponding
334 /// ShiftType value.
336  switch (N.getOpcode()) {
337  default:
339  case ISD::SHL:
340  return AArch64_AM::LSL;
341  case ISD::SRL:
342  return AArch64_AM::LSR;
343  case ISD::SRA:
344  return AArch64_AM::ASR;
345  case ISD::ROTR:
346  return AArch64_AM::ROR;
347  }
348 }
349 
350 /// Determine whether it is worth it to fold SHL into the addressing
351 /// mode.
352 static bool isWorthFoldingSHL(SDValue V) {
353  assert(V.getOpcode() == ISD::SHL && "invalid opcode");
354  // It is worth folding logical shift of up to three places.
355  auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
356  if (!CSD)
357  return false;
358  unsigned ShiftVal = CSD->getZExtValue();
359  if (ShiftVal > 3)
360  return false;
361 
362  // Check if this particular node is reused in any non-memory related
363  // operation. If yes, do not try to fold this node into the address
364  // computation, since the computation will be kept.
365  const SDNode *Node = V.getNode();
366  for (SDNode *UI : Node->uses())
367  if (!isa<MemSDNode>(*UI))
368  for (SDNode *UII : UI->uses())
369  if (!isa<MemSDNode>(*UII))
370  return false;
371  return true;
372 }
373 
374 /// Determine whether it is worth to fold V into an extended register.
375 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
376  // Trivial if we are optimizing for code size or if there is only
377  // one use of the value.
378  if (ForCodeSize || V.hasOneUse())
379  return true;
380  // If a subtarget has a fastpath LSL we can fold a logical shift into
381  // the addressing mode and save a cycle.
382  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
384  return true;
385  if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
386  const SDValue LHS = V.getOperand(0);
387  const SDValue RHS = V.getOperand(1);
388  if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
389  return true;
390  if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
391  return true;
392  }
393 
394  // It hurts otherwise, since the value will be reused.
395  return false;
396 }
397 
398 /// SelectShiftedRegister - Select a "shifted register" operand. If the value
399 /// is not shifted, set the Shift operand to default of "LSL 0". The logical
400 /// instructions allow the shifted register to be rotated, but the arithmetic
401 /// instructions do not. The AllowROR parameter specifies whether ROR is
402 /// supported.
403 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
404  SDValue &Reg, SDValue &Shift) {
406  if (ShType == AArch64_AM::InvalidShiftExtend)
407  return false;
408  if (!AllowROR && ShType == AArch64_AM::ROR)
409  return false;
410 
411  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
412  unsigned BitSize = N.getValueSizeInBits();
413  unsigned Val = RHS->getZExtValue() & (BitSize - 1);
414  unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
415 
416  Reg = N.getOperand(0);
417  Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
418  return isWorthFolding(N);
419  }
420 
421  return false;
422 }
423 
424 /// getExtendTypeForNode - Translate an extend node to the corresponding
425 /// ExtendType value.
427 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
428  if (N.getOpcode() == ISD::SIGN_EXTEND ||
430  EVT SrcVT;
432  SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
433  else
434  SrcVT = N.getOperand(0).getValueType();
435 
436  if (!IsLoadStore && SrcVT == MVT::i8)
437  return AArch64_AM::SXTB;
438  else if (!IsLoadStore && SrcVT == MVT::i16)
439  return AArch64_AM::SXTH;
440  else if (SrcVT == MVT::i32)
441  return AArch64_AM::SXTW;
442  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
443 
445  } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
446  N.getOpcode() == ISD::ANY_EXTEND) {
447  EVT SrcVT = N.getOperand(0).getValueType();
448  if (!IsLoadStore && SrcVT == MVT::i8)
449  return AArch64_AM::UXTB;
450  else if (!IsLoadStore && SrcVT == MVT::i16)
451  return AArch64_AM::UXTH;
452  else if (SrcVT == MVT::i32)
453  return AArch64_AM::UXTW;
454  assert(SrcVT != MVT::i64 && "extend from 64-bits?");
455 
457  } else if (N.getOpcode() == ISD::AND) {
459  if (!CSD)
461  uint64_t AndMask = CSD->getZExtValue();
462 
463  switch (AndMask) {
464  default:
466  case 0xFF:
467  return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
468  case 0xFFFF:
469  return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
470  case 0xFFFFFFFF:
471  return AArch64_AM::UXTW;
472  }
473  }
474 
476 }
477 
478 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
479 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
480  if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
482  return false;
483 
484  SDValue SV = DL->getOperand(0);
485  if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
486  return false;
487 
488  SDValue EV = SV.getOperand(1);
489  if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
490  return false;
491 
492  ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
493  ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
494  LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
495  LaneOp = EV.getOperand(0);
496 
497  return true;
498 }
499 
500 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
501 // high lane extract.
502 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
503  SDValue &LaneOp, int &LaneIdx) {
504 
505  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
506  std::swap(Op0, Op1);
507  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
508  return false;
509  }
510  StdOp = Op1;
511  return true;
512 }
513 
514 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
515 /// is a lane in the upper half of a 128-bit vector. Recognize and select this
516 /// so that we don't emit unnecessary lane extracts.
517 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) {
518  SDLoc dl(N);
519  SDValue Op0 = N->getOperand(0);
520  SDValue Op1 = N->getOperand(1);
521  SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
522  SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.
523  int LaneIdx = -1; // Will hold the lane index.
524 
525  if (Op1.getOpcode() != ISD::MUL ||
526  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
527  LaneIdx)) {
528  std::swap(Op0, Op1);
529  if (Op1.getOpcode() != ISD::MUL ||
530  !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
531  LaneIdx))
532  return false;
533  }
534 
535  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
536 
537  SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
538 
539  unsigned MLAOpc = ~0U;
540 
541  switch (N->getSimpleValueType(0).SimpleTy) {
542  default:
543  llvm_unreachable("Unrecognized MLA.");
544  case MVT::v4i16:
545  MLAOpc = AArch64::MLAv4i16_indexed;
546  break;
547  case MVT::v8i16:
548  MLAOpc = AArch64::MLAv8i16_indexed;
549  break;
550  case MVT::v2i32:
551  MLAOpc = AArch64::MLAv2i32_indexed;
552  break;
553  case MVT::v4i32:
554  MLAOpc = AArch64::MLAv4i32_indexed;
555  break;
556  }
557 
558  ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops));
559  return true;
560 }
561 
562 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) {
563  SDLoc dl(N);
564  SDValue SMULLOp0;
565  SDValue SMULLOp1;
566  int LaneIdx;
567 
568  if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
569  LaneIdx))
570  return false;
571 
572  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
573 
574  SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
575 
576  unsigned SMULLOpc = ~0U;
577 
578  if (IntNo == Intrinsic::aarch64_neon_smull) {
579  switch (N->getSimpleValueType(0).SimpleTy) {
580  default:
581  llvm_unreachable("Unrecognized SMULL.");
582  case MVT::v4i32:
583  SMULLOpc = AArch64::SMULLv4i16_indexed;
584  break;
585  case MVT::v2i64:
586  SMULLOpc = AArch64::SMULLv2i32_indexed;
587  break;
588  }
589  } else if (IntNo == Intrinsic::aarch64_neon_umull) {
590  switch (N->getSimpleValueType(0).SimpleTy) {
591  default:
592  llvm_unreachable("Unrecognized SMULL.");
593  case MVT::v4i32:
594  SMULLOpc = AArch64::UMULLv4i16_indexed;
595  break;
596  case MVT::v2i64:
597  SMULLOpc = AArch64::UMULLv2i32_indexed;
598  break;
599  }
600  } else
601  llvm_unreachable("Unrecognized intrinsic.");
602 
603  ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops));
604  return true;
605 }
606 
607 /// Instructions that accept extend modifiers like UXTW expect the register
608 /// being extended to be a GPR32, but the incoming DAG might be acting on a
609 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
610 /// this is the case.
612  if (N.getValueType() == MVT::i32)
613  return N;
614 
615  SDLoc dl(N);
616  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
617  MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
618  dl, MVT::i32, N, SubReg);
619  return SDValue(Node, 0);
620 }
621 
622 
623 /// SelectArithExtendedRegister - Select a "extended register" operand. This
624 /// operand folds in an extend followed by an optional left shift.
625 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
626  SDValue &Shift) {
627  unsigned ShiftVal = 0;
629 
630  if (N.getOpcode() == ISD::SHL) {
632  if (!CSD)
633  return false;
634  ShiftVal = CSD->getZExtValue();
635  if (ShiftVal > 4)
636  return false;
637 
638  Ext = getExtendTypeForNode(N.getOperand(0));
640  return false;
641 
642  Reg = N.getOperand(0).getOperand(0);
643  } else {
644  Ext = getExtendTypeForNode(N);
646  return false;
647 
648  Reg = N.getOperand(0);
649 
650  // Don't match if free 32-bit -> 64-bit zext can be used instead.
651  if (Ext == AArch64_AM::UXTW &&
652  Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode()))
653  return false;
654  }
655 
656  // AArch64 mandates that the RHS of the operation must use the smallest
657  // register class that could contain the size being extended from. Thus,
658  // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
659  // there might not be an actual 32-bit value in the program. We can
660  // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
661  assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
662  Reg = narrowIfNeeded(CurDAG, Reg);
663  Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
664  MVT::i32);
665  return isWorthFolding(N);
666 }
667 
668 /// If there's a use of this ADDlow that's not itself a load/store then we'll
669 /// need to create a real ADD instruction from it anyway and there's no point in
670 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's
671 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
672 /// leads to duplicated ADRP instructions.
674  for (auto Use : N->uses()) {
675  if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
676  Use->getOpcode() != ISD::ATOMIC_LOAD &&
677  Use->getOpcode() != ISD::ATOMIC_STORE)
678  return false;
679 
680  // ldar and stlr have much more restrictive addressing modes (just a
681  // register).
682  if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getOrdering()))
683  return false;
684  }
685 
686  return true;
687 }
688 
689 /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
690 /// immediate" address. The "Size" argument is the size in bytes of the memory
691 /// reference, which determines the scale.
692 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
693  unsigned BW, unsigned Size,
694  SDValue &Base,
695  SDValue &OffImm) {
696  SDLoc dl(N);
697  const DataLayout &DL = CurDAG->getDataLayout();
698  const TargetLowering *TLI = getTargetLowering();
699  if (N.getOpcode() == ISD::FrameIndex) {
700  int FI = cast<FrameIndexSDNode>(N)->getIndex();
701  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
702  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
703  return true;
704  }
705 
706  // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed
707  // selected here doesn't support labels/immediates, only base+offset.
708  if (CurDAG->isBaseWithConstantOffset(N)) {
709  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
710  if (IsSignedImm) {
711  int64_t RHSC = RHS->getSExtValue();
712  unsigned Scale = Log2_32(Size);
713  int64_t Range = 0x1LL << (BW - 1);
714 
715  if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
716  RHSC < (Range << Scale)) {
717  Base = N.getOperand(0);
718  if (Base.getOpcode() == ISD::FrameIndex) {
719  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
720  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
721  }
722  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
723  return true;
724  }
725  } else {
726  // unsigned Immediate
727  uint64_t RHSC = RHS->getZExtValue();
728  unsigned Scale = Log2_32(Size);
729  uint64_t Range = 0x1ULL << BW;
730 
731  if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
732  Base = N.getOperand(0);
733  if (Base.getOpcode() == ISD::FrameIndex) {
734  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
735  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
736  }
737  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
738  return true;
739  }
740  }
741  }
742  }
743  // Base only. The address will be materialized into a register before
744  // the memory is accessed.
745  // add x0, Xbase, #offset
746  // stp x1, x2, [x0]
747  Base = N;
748  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
749  return true;
750 }
751 
752 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
753 /// immediate" address. The "Size" argument is the size in bytes of the memory
754 /// reference, which determines the scale.
755 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
756  SDValue &Base, SDValue &OffImm) {
757  SDLoc dl(N);
758  const DataLayout &DL = CurDAG->getDataLayout();
759  const TargetLowering *TLI = getTargetLowering();
760  if (N.getOpcode() == ISD::FrameIndex) {
761  int FI = cast<FrameIndexSDNode>(N)->getIndex();
762  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
763  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
764  return true;
765  }
766 
768  GlobalAddressSDNode *GAN =
770  Base = N.getOperand(0);
771  OffImm = N.getOperand(1);
772  if (!GAN)
773  return true;
774 
775  if (GAN->getOffset() % Size == 0) {
776  const GlobalValue *GV = GAN->getGlobal();
777  unsigned Alignment = GV->getAlignment();
778  Type *Ty = GV->getValueType();
779  if (Alignment == 0 && Ty->isSized())
780  Alignment = DL.getABITypeAlignment(Ty);
781 
782  if (Alignment >= Size)
783  return true;
784  }
785  }
786 
787  if (CurDAG->isBaseWithConstantOffset(N)) {
788  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
789  int64_t RHSC = (int64_t)RHS->getZExtValue();
790  unsigned Scale = Log2_32(Size);
791  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
792  Base = N.getOperand(0);
793  if (Base.getOpcode() == ISD::FrameIndex) {
794  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
795  Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
796  }
797  OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
798  return true;
799  }
800  }
801  }
802 
803  // Before falling back to our general case, check if the unscaled
804  // instructions can handle this. If so, that's preferable.
805  if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
806  return false;
807 
808  // Base only. The address will be materialized into a register before
809  // the memory is accessed.
810  // add x0, Xbase, #offset
811  // ldr x0, [x0]
812  Base = N;
813  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
814  return true;
815 }
816 
817 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
818 /// immediate" address. This should only match when there is an offset that
819 /// is not valid for a scaled immediate addressing mode. The "Size" argument
820 /// is the size in bytes of the memory reference, which is needed here to know
821 /// what is valid for a scaled immediate.
822 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
823  SDValue &Base,
824  SDValue &OffImm) {
825  if (!CurDAG->isBaseWithConstantOffset(N))
826  return false;
827  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
828  int64_t RHSC = RHS->getSExtValue();
829  // If the offset is valid as a scaled immediate, don't match here.
830  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
831  RHSC < (0x1000 << Log2_32(Size)))
832  return false;
833  if (RHSC >= -256 && RHSC < 256) {
834  Base = N.getOperand(0);
835  if (Base.getOpcode() == ISD::FrameIndex) {
836  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
837  const TargetLowering *TLI = getTargetLowering();
838  Base = CurDAG->getTargetFrameIndex(
839  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
840  }
841  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
842  return true;
843  }
844  }
845  return false;
846 }
847 
848 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
849  SDLoc dl(N);
850  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
851  SDValue ImpDef = SDValue(
852  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
853  MachineSDNode *Node = CurDAG->getMachineNode(
854  TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
855  return SDValue(Node, 0);
856 }
857 
858 /// Check if the given SHL node (\p N), can be used to form an
859 /// extended register for an addressing mode.
860 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
861  bool WantExtend, SDValue &Offset,
862  SDValue &SignExtend) {
863  assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
865  if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
866  return false;
867 
868  SDLoc dl(N);
869  if (WantExtend) {
871  getExtendTypeForNode(N.getOperand(0), true);
873  return false;
874 
875  Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
876  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
877  MVT::i32);
878  } else {
879  Offset = N.getOperand(0);
880  SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
881  }
882 
883  unsigned LegalShiftVal = Log2_32(Size);
884  unsigned ShiftVal = CSD->getZExtValue();
885 
886  if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
887  return false;
888 
889  return isWorthFolding(N);
890 }
891 
892 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
893  SDValue &Base, SDValue &Offset,
894  SDValue &SignExtend,
895  SDValue &DoShift) {
896  if (N.getOpcode() != ISD::ADD)
897  return false;
898  SDValue LHS = N.getOperand(0);
899  SDValue RHS = N.getOperand(1);
900  SDLoc dl(N);
901 
902  // We don't want to match immediate adds here, because they are better lowered
903  // to the register-immediate addressing modes.
904  if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
905  return false;
906 
907  // Check if this particular node is reused in any non-memory related
908  // operation. If yes, do not try to fold this node into the address
909  // computation, since the computation will be kept.
910  const SDNode *Node = N.getNode();
911  for (SDNode *UI : Node->uses()) {
912  if (!isa<MemSDNode>(*UI))
913  return false;
914  }
915 
916  // Remember if it is worth folding N when it produces extended register.
917  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
918 
919  // Try to match a shifted extend on the RHS.
920  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
921  SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
922  Base = LHS;
923  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
924  return true;
925  }
926 
927  // Try to match a shifted extend on the LHS.
928  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
929  SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
930  Base = RHS;
931  DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
932  return true;
933  }
934 
935  // There was no shift, whatever else we find.
936  DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
937 
939  // Try to match an unshifted extend on the LHS.
940  if (IsExtendedRegisterWorthFolding &&
941  (Ext = getExtendTypeForNode(LHS, true)) !=
943  Base = RHS;
944  Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
945  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
946  MVT::i32);
947  if (isWorthFolding(LHS))
948  return true;
949  }
950 
951  // Try to match an unshifted extend on the RHS.
952  if (IsExtendedRegisterWorthFolding &&
953  (Ext = getExtendTypeForNode(RHS, true)) !=
955  Base = LHS;
956  Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
957  SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
958  MVT::i32);
959  if (isWorthFolding(RHS))
960  return true;
961  }
962 
963  return false;
964 }
965 
966 // Check if the given immediate is preferred by ADD. If an immediate can be
967 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
968 // encoded by one MOVZ, return true.
969 static bool isPreferredADD(int64_t ImmOff) {
970  // Constant in [0x0, 0xfff] can be encoded in ADD.
971  if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
972  return true;
973  // Check if it can be encoded in an "ADD LSL #12".
974  if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
975  // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
976  return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
977  (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
978  return false;
979 }
980 
981 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
982  SDValue &Base, SDValue &Offset,
983  SDValue &SignExtend,
984  SDValue &DoShift) {
985  if (N.getOpcode() != ISD::ADD)
986  return false;
987  SDValue LHS = N.getOperand(0);
988  SDValue RHS = N.getOperand(1);
989  SDLoc DL(N);
990 
991  // Check if this particular node is reused in any non-memory related
992  // operation. If yes, do not try to fold this node into the address
993  // computation, since the computation will be kept.
994  const SDNode *Node = N.getNode();
995  for (SDNode *UI : Node->uses()) {
996  if (!isa<MemSDNode>(*UI))
997  return false;
998  }
999 
1000  // Watch out if RHS is a wide immediate, it can not be selected into
1001  // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1002  // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1003  // instructions like:
1004  // MOV X0, WideImmediate
1005  // ADD X1, BaseReg, X0
1006  // LDR X2, [X1, 0]
1007  // For such situation, using [BaseReg, XReg] addressing mode can save one
1008  // ADD/SUB:
1009  // MOV X0, WideImmediate
1010  // LDR X2, [BaseReg, X0]
1011  if (isa<ConstantSDNode>(RHS)) {
1012  int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
1013  unsigned Scale = Log2_32(Size);
1014  // Skip the immediate can be selected by load/store addressing mode.
1015  // Also skip the immediate can be encoded by a single ADD (SUB is also
1016  // checked by using -ImmOff).
1017  if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
1018  isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1019  return false;
1020 
1021  SDValue Ops[] = { RHS };
1022  SDNode *MOVI =
1023  CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1024  SDValue MOVIV = SDValue(MOVI, 0);
1025  // This ADD of two X register will be selected into [Reg+Reg] mode.
1026  N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1027  }
1028 
1029  // Remember if it is worth folding N when it produces extended register.
1030  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1031 
1032  // Try to match a shifted extend on the RHS.
1033  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1034  SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1035  Base = LHS;
1036  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1037  return true;
1038  }
1039 
1040  // Try to match a shifted extend on the LHS.
1041  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1042  SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1043  Base = RHS;
1044  DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1045  return true;
1046  }
1047 
1048  // Match any non-shifted, non-extend, non-immediate add expression.
1049  Base = LHS;
1050  Offset = RHS;
1051  SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1052  DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1053  // Reg1 + Reg2 is free: no check needed.
1054  return true;
1055 }
1056 
1057 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1058  static const unsigned RegClassIDs[] = {
1059  AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1060  static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1061  AArch64::dsub2, AArch64::dsub3};
1062 
1063  return createTuple(Regs, RegClassIDs, SubRegs);
1064 }
1065 
1066 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1067  static const unsigned RegClassIDs[] = {
1068  AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1069  static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1070  AArch64::qsub2, AArch64::qsub3};
1071 
1072  return createTuple(Regs, RegClassIDs, SubRegs);
1073 }
1074 
1075 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1076  const unsigned RegClassIDs[],
1077  const unsigned SubRegs[]) {
1078  // There's no special register-class for a vector-list of 1 element: it's just
1079  // a vector.
1080  if (Regs.size() == 1)
1081  return Regs[0];
1082 
1083  assert(Regs.size() >= 2 && Regs.size() <= 4);
1084 
1085  SDLoc DL(Regs[0]);
1086 
1088 
1089  // First operand of REG_SEQUENCE is the desired RegClass.
1090  Ops.push_back(
1091  CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1092 
1093  // Then we get pairs of source & subregister-position for the components.
1094  for (unsigned i = 0; i < Regs.size(); ++i) {
1095  Ops.push_back(Regs[i]);
1096  Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1097  }
1098 
1099  SDNode *N =
1100  CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1101  return SDValue(N, 0);
1102 }
1103 
1104 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1105  bool isExt) {
1106  SDLoc dl(N);
1107  EVT VT = N->getValueType(0);
1108 
1109  unsigned ExtOff = isExt;
1110 
1111  // Form a REG_SEQUENCE to force register allocation.
1112  unsigned Vec0Off = ExtOff + 1;
1113  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1114  N->op_begin() + Vec0Off + NumVecs);
1115  SDValue RegSeq = createQTuple(Regs);
1116 
1118  if (isExt)
1119  Ops.push_back(N->getOperand(1));
1120  Ops.push_back(RegSeq);
1121  Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1122  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1123 }
1124 
1125 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1126  LoadSDNode *LD = cast<LoadSDNode>(N);
1127  if (LD->isUnindexed())
1128  return false;
1129  EVT VT = LD->getMemoryVT();
1130  EVT DstVT = N->getValueType(0);
1132  bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1133 
1134  // We're not doing validity checking here. That was done when checking
1135  // if we should mark the load as indexed or not. We're just selecting
1136  // the right instruction.
1137  unsigned Opcode = 0;
1138 
1140  bool InsertTo64 = false;
1141  if (VT == MVT::i64)
1142  Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1143  else if (VT == MVT::i32) {
1144  if (ExtType == ISD::NON_EXTLOAD)
1145  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1146  else if (ExtType == ISD::SEXTLOAD)
1147  Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1148  else {
1149  Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1150  InsertTo64 = true;
1151  // The result of the load is only i32. It's the subreg_to_reg that makes
1152  // it into an i64.
1153  DstVT = MVT::i32;
1154  }
1155  } else if (VT == MVT::i16) {
1156  if (ExtType == ISD::SEXTLOAD) {
1157  if (DstVT == MVT::i64)
1158  Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1159  else
1160  Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1161  } else {
1162  Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1163  InsertTo64 = DstVT == MVT::i64;
1164  // The result of the load is only i32. It's the subreg_to_reg that makes
1165  // it into an i64.
1166  DstVT = MVT::i32;
1167  }
1168  } else if (VT == MVT::i8) {
1169  if (ExtType == ISD::SEXTLOAD) {
1170  if (DstVT == MVT::i64)
1171  Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1172  else
1173  Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1174  } else {
1175  Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1176  InsertTo64 = DstVT == MVT::i64;
1177  // The result of the load is only i32. It's the subreg_to_reg that makes
1178  // it into an i64.
1179  DstVT = MVT::i32;
1180  }
1181  } else if (VT == MVT::f16) {
1182  Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1183  } else if (VT == MVT::f32) {
1184  Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1185  } else if (VT == MVT::f64 || VT.is64BitVector()) {
1186  Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1187  } else if (VT.is128BitVector()) {
1188  Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1189  } else
1190  return false;
1191  SDValue Chain = LD->getChain();
1192  SDValue Base = LD->getBasePtr();
1193  ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1194  int OffsetVal = (int)OffsetOp->getZExtValue();
1195  SDLoc dl(N);
1196  SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1197  SDValue Ops[] = { Base, Offset, Chain };
1198  SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1199  MVT::Other, Ops);
1200  // Either way, we're replacing the node, so tell the caller that.
1201  SDValue LoadedVal = SDValue(Res, 1);
1202  if (InsertTo64) {
1203  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1204  LoadedVal =
1205  SDValue(CurDAG->getMachineNode(
1206  AArch64::SUBREG_TO_REG, dl, MVT::i64,
1207  CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1208  SubReg),
1209  0);
1210  }
1211 
1212  ReplaceUses(SDValue(N, 0), LoadedVal);
1213  ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1214  ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1215  CurDAG->RemoveDeadNode(N);
1216  return true;
1217 }
1218 
1219 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1220  unsigned SubRegIdx) {
1221  SDLoc dl(N);
1222  EVT VT = N->getValueType(0);
1223  SDValue Chain = N->getOperand(0);
1224 
1225  SDValue Ops[] = {N->getOperand(2), // Mem operand;
1226  Chain};
1227 
1228  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1229 
1230  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1231  SDValue SuperReg = SDValue(Ld, 0);
1232  for (unsigned i = 0; i < NumVecs; ++i)
1233  ReplaceUses(SDValue(N, i),
1234  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1235 
1236  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1237 
1238  // Transfer memoperands.
1239  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1240  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1241 
1242  CurDAG->RemoveDeadNode(N);
1243 }
1244 
1245 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1246  unsigned Opc, unsigned SubRegIdx) {
1247  SDLoc dl(N);
1248  EVT VT = N->getValueType(0);
1249  SDValue Chain = N->getOperand(0);
1250 
1251  SDValue Ops[] = {N->getOperand(1), // Mem operand
1252  N->getOperand(2), // Incremental
1253  Chain};
1254 
1255  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1257 
1258  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1259 
1260  // Update uses of write back register
1261  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1262 
1263  // Update uses of vector list
1264  SDValue SuperReg = SDValue(Ld, 1);
1265  if (NumVecs == 1)
1266  ReplaceUses(SDValue(N, 0), SuperReg);
1267  else
1268  for (unsigned i = 0; i < NumVecs; ++i)
1269  ReplaceUses(SDValue(N, i),
1270  CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1271 
1272  // Update the chain
1273  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1274  CurDAG->RemoveDeadNode(N);
1275 }
1276 
1277 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
1278  unsigned Opc) {
1279  SDLoc dl(N);
1280  EVT VT = N->getOperand(2)->getValueType(0);
1281 
1282  // Form a REG_SEQUENCE to force register allocation.
1283  bool Is128Bit = VT.getSizeInBits() == 128;
1284  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1285  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1286 
1287  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
1288  SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1289 
1290  // Transfer memoperands.
1291  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1292  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1293 
1294  ReplaceNode(N, St);
1295 }
1296 
1297 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
1298  unsigned Opc) {
1299  SDLoc dl(N);
1300  EVT VT = N->getOperand(2)->getValueType(0);
1301  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1302  MVT::Other}; // Type for the Chain
1303 
1304  // Form a REG_SEQUENCE to force register allocation.
1305  bool Is128Bit = VT.getSizeInBits() == 128;
1306  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1307  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1308 
1309  SDValue Ops[] = {RegSeq,
1310  N->getOperand(NumVecs + 1), // base register
1311  N->getOperand(NumVecs + 2), // Incremental
1312  N->getOperand(0)}; // Chain
1313  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1314 
1315  ReplaceNode(N, St);
1316 }
1317 
1318 namespace {
1319 /// WidenVector - Given a value in the V64 register class, produce the
1320 /// equivalent value in the V128 register class.
1321 class WidenVector {
1322  SelectionDAG &DAG;
1323 
1324 public:
1325  WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
1326 
1327  SDValue operator()(SDValue V64Reg) {
1328  EVT VT = V64Reg.getValueType();
1329  unsigned NarrowSize = VT.getVectorNumElements();
1330  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1331  MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
1332  SDLoc DL(V64Reg);
1333 
1334  SDValue Undef =
1335  SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
1336  return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
1337  }
1338 };
1339 } // namespace
1340 
1341 /// NarrowVector - Given a value in the V128 register class, produce the
1342 /// equivalent value in the V64 register class.
1343 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
1344  EVT VT = V128Reg.getValueType();
1345  unsigned WideSize = VT.getVectorNumElements();
1346  MVT EltTy = VT.getVectorElementType().getSimpleVT();
1347  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
1348 
1349  return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
1350  V128Reg);
1351 }
1352 
1353 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
1354  unsigned Opc) {
1355  SDLoc dl(N);
1356  EVT VT = N->getValueType(0);
1357  bool Narrow = VT.getSizeInBits() == 64;
1358 
1359  // Form a REG_SEQUENCE to force register allocation.
1360  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1361 
1362  if (Narrow)
1363  transform(Regs, Regs.begin(),
1364  WidenVector(*CurDAG));
1365 
1366  SDValue RegSeq = createQTuple(Regs);
1367 
1368  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1369 
1370  unsigned LaneNo =
1371  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1372 
1373  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1374  N->getOperand(NumVecs + 3), N->getOperand(0)};
1375  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1376  SDValue SuperReg = SDValue(Ld, 0);
1377 
1378  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1379  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1380  AArch64::qsub2, AArch64::qsub3 };
1381  for (unsigned i = 0; i < NumVecs; ++i) {
1382  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
1383  if (Narrow)
1384  NV = NarrowVector(NV, *CurDAG);
1385  ReplaceUses(SDValue(N, i), NV);
1386  }
1387 
1388  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1389  CurDAG->RemoveDeadNode(N);
1390 }
1391 
1392 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
1393  unsigned Opc) {
1394  SDLoc dl(N);
1395  EVT VT = N->getValueType(0);
1396  bool Narrow = VT.getSizeInBits() == 64;
1397 
1398  // Form a REG_SEQUENCE to force register allocation.
1399  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1400 
1401  if (Narrow)
1402  transform(Regs, Regs.begin(),
1403  WidenVector(*CurDAG));
1404 
1405  SDValue RegSeq = createQTuple(Regs);
1406 
1407  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1408  RegSeq->getValueType(0), MVT::Other};
1409 
1410  unsigned LaneNo =
1411  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1412 
1413  SDValue Ops[] = {RegSeq,
1414  CurDAG->getTargetConstant(LaneNo, dl,
1415  MVT::i64), // Lane Number
1416  N->getOperand(NumVecs + 2), // Base register
1417  N->getOperand(NumVecs + 3), // Incremental
1418  N->getOperand(0)};
1419  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1420 
1421  // Update uses of the write back register
1422  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1423 
1424  // Update uses of the vector list
1425  SDValue SuperReg = SDValue(Ld, 1);
1426  if (NumVecs == 1) {
1427  ReplaceUses(SDValue(N, 0),
1428  Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
1429  } else {
1430  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1431  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1432  AArch64::qsub2, AArch64::qsub3 };
1433  for (unsigned i = 0; i < NumVecs; ++i) {
1434  SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
1435  SuperReg);
1436  if (Narrow)
1437  NV = NarrowVector(NV, *CurDAG);
1438  ReplaceUses(SDValue(N, i), NV);
1439  }
1440  }
1441 
1442  // Update the Chain
1443  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1444  CurDAG->RemoveDeadNode(N);
1445 }
1446 
1447 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
1448  unsigned Opc) {
1449  SDLoc dl(N);
1450  EVT VT = N->getOperand(2)->getValueType(0);
1451  bool Narrow = VT.getSizeInBits() == 64;
1452 
1453  // Form a REG_SEQUENCE to force register allocation.
1454  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1455 
1456  if (Narrow)
1457  transform(Regs, Regs.begin(),
1458  WidenVector(*CurDAG));
1459 
1460  SDValue RegSeq = createQTuple(Regs);
1461 
1462  unsigned LaneNo =
1463  cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1464 
1465  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1466  N->getOperand(NumVecs + 3), N->getOperand(0)};
1467  SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
1468 
1469  // Transfer memoperands.
1470  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1471  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1472 
1473  ReplaceNode(N, St);
1474 }
1475 
1476 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
1477  unsigned Opc) {
1478  SDLoc dl(N);
1479  EVT VT = N->getOperand(2)->getValueType(0);
1480  bool Narrow = VT.getSizeInBits() == 64;
1481 
1482  // Form a REG_SEQUENCE to force register allocation.
1483  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1484 
1485  if (Narrow)
1486  transform(Regs, Regs.begin(),
1487  WidenVector(*CurDAG));
1488 
1489  SDValue RegSeq = createQTuple(Regs);
1490 
1491  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1492  MVT::Other};
1493 
1494  unsigned LaneNo =
1495  cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1496 
1497  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1498  N->getOperand(NumVecs + 2), // Base Register
1499  N->getOperand(NumVecs + 3), // Incremental
1500  N->getOperand(0)};
1501  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1502 
1503  // Transfer memoperands.
1504  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1505  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1506 
1507  ReplaceNode(N, St);
1508 }
1509 
1511  unsigned &Opc, SDValue &Opd0,
1512  unsigned &LSB, unsigned &MSB,
1513  unsigned NumberOfIgnoredLowBits,
1514  bool BiggerPattern) {
1515  assert(N->getOpcode() == ISD::AND &&
1516  "N must be a AND operation to call this function");
1517 
1518  EVT VT = N->getValueType(0);
1519 
1520  // Here we can test the type of VT and return false when the type does not
1521  // match, but since it is done prior to that call in the current context
1522  // we turned that into an assert to avoid redundant code.
1523  assert((VT == MVT::i32 || VT == MVT::i64) &&
1524  "Type checking must have been done before calling this function");
1525 
1526  // FIXME: simplify-demanded-bits in DAGCombine will probably have
1527  // changed the AND node to a 32-bit mask operation. We'll have to
1528  // undo that as part of the transform here if we want to catch all
1529  // the opportunities.
1530  // Currently the NumberOfIgnoredLowBits argument helps to recover
1531  // form these situations when matching bigger pattern (bitfield insert).
1532 
1533  // For unsigned extracts, check for a shift right and mask
1534  uint64_t AndImm = 0;
1535  if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
1536  return false;
1537 
1538  const SDNode *Op0 = N->getOperand(0).getNode();
1539 
1540  // Because of simplify-demanded-bits in DAGCombine, the mask may have been
1541  // simplified. Try to undo that
1542  AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
1543 
1544  // The immediate is a mask of the low bits iff imm & (imm+1) == 0
1545  if (AndImm & (AndImm + 1))
1546  return false;
1547 
1548  bool ClampMSB = false;
1549  uint64_t SrlImm = 0;
1550  // Handle the SRL + ANY_EXTEND case.
1551  if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
1552  isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
1553  // Extend the incoming operand of the SRL to 64-bit.
1554  Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
1555  // Make sure to clamp the MSB so that we preserve the semantics of the
1556  // original operations.
1557  ClampMSB = true;
1558  } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
1560  SrlImm)) {
1561  // If the shift result was truncated, we can still combine them.
1562  Opd0 = Op0->getOperand(0).getOperand(0);
1563 
1564  // Use the type of SRL node.
1565  VT = Opd0->getValueType(0);
1566  } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
1567  Opd0 = Op0->getOperand(0);
1568  } else if (BiggerPattern) {
1569  // Let's pretend a 0 shift right has been performed.
1570  // The resulting code will be at least as good as the original one
1571  // plus it may expose more opportunities for bitfield insert pattern.
1572  // FIXME: Currently we limit this to the bigger pattern, because
1573  // some optimizations expect AND and not UBFM.
1574  Opd0 = N->getOperand(0);
1575  } else
1576  return false;
1577 
1578  // Bail out on large immediates. This happens when no proper
1579  // combining/constant folding was performed.
1580  if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
1581  LLVM_DEBUG(
1582  (dbgs() << N
1583  << ": Found large shift immediate, this should not happen\n"));
1584  return false;
1585  }
1586 
1587  LSB = SrlImm;
1588  MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm)
1589  : countTrailingOnes<uint64_t>(AndImm)) -
1590  1;
1591  if (ClampMSB)
1592  // Since we're moving the extend before the right shift operation, we need
1593  // to clamp the MSB to make sure we don't shift in undefined bits instead of
1594  // the zeros which would get shifted in with the original right shift
1595  // operation.
1596  MSB = MSB > 31 ? 31 : MSB;
1597 
1598  Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
1599  return true;
1600 }
1601 
1602 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
1603  SDValue &Opd0, unsigned &Immr,
1604  unsigned &Imms) {
1606 
1607  EVT VT = N->getValueType(0);
1608  unsigned BitWidth = VT.getSizeInBits();
1609  assert((VT == MVT::i32 || VT == MVT::i64) &&
1610  "Type checking must have been done before calling this function");
1611 
1612  SDValue Op = N->getOperand(0);
1613  if (Op->getOpcode() == ISD::TRUNCATE) {
1614  Op = Op->getOperand(0);
1615  VT = Op->getValueType(0);
1616  BitWidth = VT.getSizeInBits();
1617  }
1618 
1619  uint64_t ShiftImm;
1620  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
1621  !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1622  return false;
1623 
1624  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1625  if (ShiftImm + Width > BitWidth)
1626  return false;
1627 
1628  Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
1629  Opd0 = Op.getOperand(0);
1630  Immr = ShiftImm;
1631  Imms = ShiftImm + Width - 1;
1632  return true;
1633 }
1634 
1635 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
1636  SDValue &Opd0, unsigned &LSB,
1637  unsigned &MSB) {
1638  // We are looking for the following pattern which basically extracts several
1639  // continuous bits from the source value and places it from the LSB of the
1640  // destination value, all other bits of the destination value or set to zero:
1641  //
1642  // Value2 = AND Value, MaskImm
1643  // SRL Value2, ShiftImm
1644  //
1645  // with MaskImm >> ShiftImm to search for the bit width.
1646  //
1647  // This gets selected into a single UBFM:
1648  //
1649  // UBFM Value, ShiftImm, BitWide + SrlImm -1
1650  //
1651 
1652  if (N->getOpcode() != ISD::SRL)
1653  return false;
1654 
1655  uint64_t AndMask = 0;
1656  if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
1657  return false;
1658 
1659  Opd0 = N->getOperand(0).getOperand(0);
1660 
1661  uint64_t SrlImm = 0;
1662  if (!isIntImmediate(N->getOperand(1), SrlImm))
1663  return false;
1664 
1665  // Check whether we really have several bits extract here.
1666  unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm));
1667  if (BitWide && isMask_64(AndMask >> SrlImm)) {
1668  if (N->getValueType(0) == MVT::i32)
1669  Opc = AArch64::UBFMWri;
1670  else
1671  Opc = AArch64::UBFMXri;
1672 
1673  LSB = SrlImm;
1674  MSB = BitWide + SrlImm - 1;
1675  return true;
1676  }
1677 
1678  return false;
1679 }
1680 
1681 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
1682  unsigned &Immr, unsigned &Imms,
1683  bool BiggerPattern) {
1684  assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
1685  "N must be a SHR/SRA operation to call this function");
1686 
1687  EVT VT = N->getValueType(0);
1688 
1689  // Here we can test the type of VT and return false when the type does not
1690  // match, but since it is done prior to that call in the current context
1691  // we turned that into an assert to avoid redundant code.
1692  assert((VT == MVT::i32 || VT == MVT::i64) &&
1693  "Type checking must have been done before calling this function");
1694 
1695  // Check for AND + SRL doing several bits extract.
1696  if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
1697  return true;
1698 
1699  // We're looking for a shift of a shift.
1700  uint64_t ShlImm = 0;
1701  uint64_t TruncBits = 0;
1702  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
1703  Opd0 = N->getOperand(0).getOperand(0);
1704  } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
1705  N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
1706  // We are looking for a shift of truncate. Truncate from i64 to i32 could
1707  // be considered as setting high 32 bits as zero. Our strategy here is to
1708  // always generate 64bit UBFM. This consistency will help the CSE pass
1709  // later find more redundancy.
1710  Opd0 = N->getOperand(0).getOperand(0);
1711  TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
1712  VT = Opd0.getValueType();
1713  assert(VT == MVT::i64 && "the promoted type should be i64");
1714  } else if (BiggerPattern) {
1715  // Let's pretend a 0 shift left has been performed.
1716  // FIXME: Currently we limit this to the bigger pattern case,
1717  // because some optimizations expect AND and not UBFM
1718  Opd0 = N->getOperand(0);
1719  } else
1720  return false;
1721 
1722  // Missing combines/constant folding may have left us with strange
1723  // constants.
1724  if (ShlImm >= VT.getSizeInBits()) {
1725  LLVM_DEBUG(
1726  (dbgs() << N
1727  << ": Found large shift immediate, this should not happen\n"));
1728  return false;
1729  }
1730 
1731  uint64_t SrlImm = 0;
1732  if (!isIntImmediate(N->getOperand(1), SrlImm))
1733  return false;
1734 
1735  assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
1736  "bad amount in shift node!");
1737  int immr = SrlImm - ShlImm;
1738  Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
1739  Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
1740  // SRA requires a signed extraction
1741  if (VT == MVT::i32)
1742  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
1743  else
1744  Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
1745  return true;
1746 }
1747 
1748 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
1750 
1751  EVT VT = N->getValueType(0);
1752  EVT NarrowVT = N->getOperand(0)->getValueType(0);
1753  if (VT != MVT::i64 || NarrowVT != MVT::i32)
1754  return false;
1755 
1756  uint64_t ShiftImm;
1757  SDValue Op = N->getOperand(0);
1758  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
1759  return false;
1760 
1761  SDLoc dl(N);
1762  // Extend the incoming operand of the shift to 64-bits.
1763  SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
1764  unsigned Immr = ShiftImm;
1765  unsigned Imms = NarrowVT.getSizeInBits() - 1;
1766  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1767  CurDAG->getTargetConstant(Imms, dl, VT)};
1768  CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
1769  return true;
1770 }
1771 
1772 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
1773  SDValue &Opd0, unsigned &Immr, unsigned &Imms,
1774  unsigned NumberOfIgnoredLowBits = 0,
1775  bool BiggerPattern = false) {
1776  if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
1777  return false;
1778 
1779  switch (N->getOpcode()) {
1780  default:
1781  if (!N->isMachineOpcode())
1782  return false;
1783  break;
1784  case ISD::AND:
1785  return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
1786  NumberOfIgnoredLowBits, BiggerPattern);
1787  case ISD::SRL:
1788  case ISD::SRA:
1789  return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
1790 
1792  return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
1793  }
1794 
1795  unsigned NOpc = N->getMachineOpcode();
1796  switch (NOpc) {
1797  default:
1798  return false;
1799  case AArch64::SBFMWri:
1800  case AArch64::UBFMWri:
1801  case AArch64::SBFMXri:
1802  case AArch64::UBFMXri:
1803  Opc = NOpc;
1804  Opd0 = N->getOperand(0);
1805  Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
1806  Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
1807  return true;
1808  }
1809  // Unreachable
1810  return false;
1811 }
1812 
1813 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
1814  unsigned Opc, Immr, Imms;
1815  SDValue Opd0;
1816  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
1817  return false;
1818 
1819  EVT VT = N->getValueType(0);
1820  SDLoc dl(N);
1821 
1822  // If the bit extract operation is 64bit but the original type is 32bit, we
1823  // need to add one EXTRACT_SUBREG.
1824  if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
1825  SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
1826  CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
1827 
1828  SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
1829  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1830  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
1831  MVT::i32, SDValue(BFM, 0), SubReg));
1832  return true;
1833  }
1834 
1835  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
1836  CurDAG->getTargetConstant(Imms, dl, VT)};
1837  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
1838  return true;
1839 }
1840 
1841 /// Does DstMask form a complementary pair with the mask provided by
1842 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
1843 /// this asks whether DstMask zeroes precisely those bits that will be set by
1844 /// the other half.
1845 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
1846  unsigned NumberOfIgnoredHighBits, EVT VT) {
1847  assert((VT == MVT::i32 || VT == MVT::i64) &&
1848  "i32 or i64 mask type expected!");
1849  unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
1850 
1851  APInt SignificantDstMask = APInt(BitWidth, DstMask);
1852  APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
1853 
1854  return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
1855  (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
1856 }
1857 
1858 // Look for bits that will be useful for later uses.
1859 // A bit is consider useless as soon as it is dropped and never used
1860 // before it as been dropped.
1861 // E.g., looking for useful bit of x
1862 // 1. y = x & 0x7
1863 // 2. z = y >> 2
1864 // After #1, x useful bits are 0x7, then the useful bits of x, live through
1865 // y.
1866 // After #2, the useful bits of x are 0x4.
1867 // However, if x is used on an unpredicatable instruction, then all its bits
1868 // are useful.
1869 // E.g.
1870 // 1. y = x & 0x7
1871 // 2. z = y >> 2
1872 // 3. str x, [@x]
1873 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
1874 
1876  unsigned Depth) {
1877  uint64_t Imm =
1878  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1879  Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
1880  UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
1881  getUsefulBits(Op, UsefulBits, Depth + 1);
1882 }
1883 
1884 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
1885  uint64_t Imm, uint64_t MSB,
1886  unsigned Depth) {
1887  // inherit the bitwidth value
1888  APInt OpUsefulBits(UsefulBits);
1889  OpUsefulBits = 1;
1890 
1891  if (MSB >= Imm) {
1892  OpUsefulBits <<= MSB - Imm + 1;
1893  --OpUsefulBits;
1894  // The interesting part will be in the lower part of the result
1895  getUsefulBits(Op, OpUsefulBits, Depth + 1);
1896  // The interesting part was starting at Imm in the argument
1897  OpUsefulBits <<= Imm;
1898  } else {
1899  OpUsefulBits <<= MSB + 1;
1900  --OpUsefulBits;
1901  // The interesting part will be shifted in the result
1902  OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
1903  getUsefulBits(Op, OpUsefulBits, Depth + 1);
1904  // The interesting part was at zero in the argument
1905  OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
1906  }
1907 
1908  UsefulBits &= OpUsefulBits;
1909 }
1910 
1911 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
1912  unsigned Depth) {
1913  uint64_t Imm =
1914  cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
1915  uint64_t MSB =
1916  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1917 
1918  getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
1919 }
1920 
1922  unsigned Depth) {
1923  uint64_t ShiftTypeAndValue =
1924  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1925  APInt Mask(UsefulBits);
1926  Mask.clearAllBits();
1927  Mask.flipAllBits();
1928 
1929  if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
1930  // Shift Left
1931  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1932  Mask <<= ShiftAmt;
1933  getUsefulBits(Op, Mask, Depth + 1);
1934  Mask.lshrInPlace(ShiftAmt);
1935  } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
1936  // Shift Right
1937  // We do not handle AArch64_AM::ASR, because the sign will change the
1938  // number of useful bits
1939  uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
1940  Mask.lshrInPlace(ShiftAmt);
1941  getUsefulBits(Op, Mask, Depth + 1);
1942  Mask <<= ShiftAmt;
1943  } else
1944  return;
1945 
1946  UsefulBits &= Mask;
1947 }
1948 
1949 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
1950  unsigned Depth) {
1951  uint64_t Imm =
1952  cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1953  uint64_t MSB =
1954  cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
1955 
1956  APInt OpUsefulBits(UsefulBits);
1957  OpUsefulBits = 1;
1958 
1959  APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
1960  ResultUsefulBits.flipAllBits();
1961  APInt Mask(UsefulBits.getBitWidth(), 0);
1962 
1963  getUsefulBits(Op, ResultUsefulBits, Depth + 1);
1964 
1965  if (MSB >= Imm) {
1966  // The instruction is a BFXIL.
1967  uint64_t Width = MSB - Imm + 1;
1968  uint64_t LSB = Imm;
1969 
1970  OpUsefulBits <<= Width;
1971  --OpUsefulBits;
1972 
1973  if (Op.getOperand(1) == Orig) {
1974  // Copy the low bits from the result to bits starting from LSB.
1975  Mask = ResultUsefulBits & OpUsefulBits;
1976  Mask <<= LSB;
1977  }
1978 
1979  if (Op.getOperand(0) == Orig)
1980  // Bits starting from LSB in the input contribute to the result.
1981  Mask |= (ResultUsefulBits & ~OpUsefulBits);
1982  } else {
1983  // The instruction is a BFI.
1984  uint64_t Width = MSB + 1;
1985  uint64_t LSB = UsefulBits.getBitWidth() - Imm;
1986 
1987  OpUsefulBits <<= Width;
1988  --OpUsefulBits;
1989  OpUsefulBits <<= LSB;
1990 
1991  if (Op.getOperand(1) == Orig) {
1992  // Copy the bits from the result to the zero bits.
1993  Mask = ResultUsefulBits & OpUsefulBits;
1994  Mask.lshrInPlace(LSB);
1995  }
1996 
1997  if (Op.getOperand(0) == Orig)
1998  Mask |= (ResultUsefulBits & ~OpUsefulBits);
1999  }
2000 
2001  UsefulBits &= Mask;
2002 }
2003 
2004 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2005  SDValue Orig, unsigned Depth) {
2006 
2007  // Users of this node should have already been instruction selected
2008  // FIXME: Can we turn that into an assert?
2009  if (!UserNode->isMachineOpcode())
2010  return;
2011 
2012  switch (UserNode->getMachineOpcode()) {
2013  default:
2014  return;
2015  case AArch64::ANDSWri:
2016  case AArch64::ANDSXri:
2017  case AArch64::ANDWri:
2018  case AArch64::ANDXri:
2019  // We increment Depth only when we call the getUsefulBits
2020  return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2021  Depth);
2022  case AArch64::UBFMWri:
2023  case AArch64::UBFMXri:
2024  return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2025 
2026  case AArch64::ORRWrs:
2027  case AArch64::ORRXrs:
2028  if (UserNode->getOperand(1) != Orig)
2029  return;
2030  return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2031  Depth);
2032  case AArch64::BFMWri:
2033  case AArch64::BFMXri:
2034  return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2035 
2036  case AArch64::STRBBui:
2037  case AArch64::STURBBi:
2038  if (UserNode->getOperand(0) != Orig)
2039  return;
2040  UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2041  return;
2042 
2043  case AArch64::STRHHui:
2044  case AArch64::STURHHi:
2045  if (UserNode->getOperand(0) != Orig)
2046  return;
2047  UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2048  return;
2049  }
2050 }
2051 
2052 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2053  if (Depth >= 6)
2054  return;
2055  // Initialize UsefulBits
2056  if (!Depth) {
2057  unsigned Bitwidth = Op.getScalarValueSizeInBits();
2058  // At the beginning, assume every produced bits is useful
2059  UsefulBits = APInt(Bitwidth, 0);
2060  UsefulBits.flipAllBits();
2061  }
2062  APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2063 
2064  for (SDNode *Node : Op.getNode()->uses()) {
2065  // A use cannot produce useful bits
2066  APInt UsefulBitsForUse = APInt(UsefulBits);
2067  getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2068  UsersUsefulBits |= UsefulBitsForUse;
2069  }
2070  // UsefulBits contains the produced bits that are meaningful for the
2071  // current definition, thus a user cannot make a bit meaningful at
2072  // this point
2073  UsefulBits &= UsersUsefulBits;
2074 }
2075 
2076 /// Create a machine node performing a notional SHL of Op by ShlAmount. If
2077 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2078 /// 0, return Op unchanged.
2079 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2080  if (ShlAmount == 0)
2081  return Op;
2082 
2083  EVT VT = Op.getValueType();
2084  SDLoc dl(Op);
2085  unsigned BitWidth = VT.getSizeInBits();
2086  unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2087 
2088  SDNode *ShiftNode;
2089  if (ShlAmount > 0) {
2090  // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2091  ShiftNode = CurDAG->getMachineNode(
2092  UBFMOpc, dl, VT, Op,
2093  CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
2094  CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
2095  } else {
2096  // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2097  assert(ShlAmount < 0 && "expected right shift");
2098  int ShrAmount = -ShlAmount;
2099  ShiftNode = CurDAG->getMachineNode(
2100  UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
2101  CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
2102  }
2103 
2104  return SDValue(ShiftNode, 0);
2105 }
2106 
2107 /// Does this tree qualify as an attempt to move a bitfield into position,
2108 /// essentially "(and (shl VAL, N), Mask)".
2110  bool BiggerPattern,
2111  SDValue &Src, int &ShiftAmount,
2112  int &MaskWidth) {
2113  EVT VT = Op.getValueType();
2114  unsigned BitWidth = VT.getSizeInBits();
2115  (void)BitWidth;
2116  assert(BitWidth == 32 || BitWidth == 64);
2117 
2118  KnownBits Known = CurDAG->computeKnownBits(Op);
2119 
2120  // Non-zero in the sense that they're not provably zero, which is the key
2121  // point if we want to use this value
2122  uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2123 
2124  // Discard a constant AND mask if present. It's safe because the node will
2125  // already have been factored into the computeKnownBits calculation above.
2126  uint64_t AndImm;
2127  if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
2128  assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0);
2129  Op = Op.getOperand(0);
2130  }
2131 
2132  // Don't match if the SHL has more than one use, since then we'll end up
2133  // generating SHL+UBFIZ instead of just keeping SHL+AND.
2134  if (!BiggerPattern && !Op.hasOneUse())
2135  return false;
2136 
2137  uint64_t ShlImm;
2138  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
2139  return false;
2140  Op = Op.getOperand(0);
2141 
2142  if (!isShiftedMask_64(NonZeroBits))
2143  return false;
2144 
2145  ShiftAmount = countTrailingZeros(NonZeroBits);
2146  MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
2147 
2148  // BFI encompasses sufficiently many nodes that it's worth inserting an extra
2149  // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
2150  // amount. BiggerPattern is true when this pattern is being matched for BFI,
2151  // BiggerPattern is false when this pattern is being matched for UBFIZ, in
2152  // which case it is not profitable to insert an extra shift.
2153  if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
2154  return false;
2155  Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
2156 
2157  return true;
2158 }
2159 
2160 static bool isShiftedMask(uint64_t Mask, EVT VT) {
2161  assert(VT == MVT::i32 || VT == MVT::i64);
2162  if (VT == MVT::i32)
2163  return isShiftedMask_32(Mask);
2164  return isShiftedMask_64(Mask);
2165 }
2166 
2167 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
2168 // inserted only sets known zero bits.
2170  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2171 
2172  EVT VT = N->getValueType(0);
2173  if (VT != MVT::i32 && VT != MVT::i64)
2174  return false;
2175 
2176  unsigned BitWidth = VT.getSizeInBits();
2177 
2178  uint64_t OrImm;
2179  if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
2180  return false;
2181 
2182  // Skip this transformation if the ORR immediate can be encoded in the ORR.
2183  // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
2184  // performance neutral.
2185  if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))
2186  return false;
2187 
2188  uint64_t MaskImm;
2189  SDValue And = N->getOperand(0);
2190  // Must be a single use AND with an immediate operand.
2191  if (!And.hasOneUse() ||
2192  !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
2193  return false;
2194 
2195  // Compute the Known Zero for the AND as this allows us to catch more general
2196  // cases than just looking for AND with imm.
2197  KnownBits Known = CurDAG->computeKnownBits(And);
2198 
2199  // Non-zero in the sense that they're not provably zero, which is the key
2200  // point if we want to use this value.
2201  uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
2202 
2203  // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
2204  if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
2205  return false;
2206 
2207  // The bits being inserted must only set those bits that are known to be zero.
2208  if ((OrImm & NotKnownZero) != 0) {
2209  // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
2210  // currently handle this case.
2211  return false;
2212  }
2213 
2214  // BFI/BFXIL dst, src, #lsb, #width.
2215  int LSB = countTrailingOnes(NotKnownZero);
2216  int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation();
2217 
2218  // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
2219  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2220  unsigned ImmS = Width - 1;
2221 
2222  // If we're creating a BFI instruction avoid cases where we need more
2223  // instructions to materialize the BFI constant as compared to the original
2224  // ORR. A BFXIL will use the same constant as the original ORR, so the code
2225  // should be no worse in this case.
2226  bool IsBFI = LSB != 0;
2227  uint64_t BFIImm = OrImm >> LSB;
2228  if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
2229  // We have a BFI instruction and we know the constant can't be materialized
2230  // with a ORR-immediate with the zero register.
2231  unsigned OrChunks = 0, BFIChunks = 0;
2232  for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
2233  if (((OrImm >> Shift) & 0xFFFF) != 0)
2234  ++OrChunks;
2235  if (((BFIImm >> Shift) & 0xFFFF) != 0)
2236  ++BFIChunks;
2237  }
2238  if (BFIChunks > OrChunks)
2239  return false;
2240  }
2241 
2242  // Materialize the constant to be inserted.
2243  SDLoc DL(N);
2244  unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
2245  SDNode *MOVI = CurDAG->getMachineNode(
2246  MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
2247 
2248  // Create the BFI/BFXIL instruction.
2249  SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
2250  CurDAG->getTargetConstant(ImmR, DL, VT),
2251  CurDAG->getTargetConstant(ImmS, DL, VT)};
2252  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2253  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2254  return true;
2255 }
2256 
2257 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
2258  SelectionDAG *CurDAG) {
2259  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2260 
2261  EVT VT = N->getValueType(0);
2262  if (VT != MVT::i32 && VT != MVT::i64)
2263  return false;
2264 
2265  unsigned BitWidth = VT.getSizeInBits();
2266 
2267  // Because of simplify-demanded-bits in DAGCombine, involved masks may not
2268  // have the expected shape. Try to undo that.
2269 
2270  unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
2271  unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
2272 
2273  // Given a OR operation, check if we have the following pattern
2274  // ubfm c, b, imm, imm2 (or something that does the same jobs, see
2275  // isBitfieldExtractOp)
2276  // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
2277  // countTrailingZeros(mask2) == imm2 - imm + 1
2278  // f = d | c
2279  // if yes, replace the OR instruction with:
2280  // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
2281 
2282  // OR is commutative, check all combinations of operand order and values of
2283  // BiggerPattern, i.e.
2284  // Opd0, Opd1, BiggerPattern=false
2285  // Opd1, Opd0, BiggerPattern=false
2286  // Opd0, Opd1, BiggerPattern=true
2287  // Opd1, Opd0, BiggerPattern=true
2288  // Several of these combinations may match, so check with BiggerPattern=false
2289  // first since that will produce better results by matching more instructions
2290  // and/or inserting fewer extra instructions.
2291  for (int I = 0; I < 4; ++I) {
2292 
2293  SDValue Dst, Src;
2294  unsigned ImmR, ImmS;
2295  bool BiggerPattern = I / 2;
2296  SDValue OrOpd0Val = N->getOperand(I % 2);
2297  SDNode *OrOpd0 = OrOpd0Val.getNode();
2298  SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
2299  SDNode *OrOpd1 = OrOpd1Val.getNode();
2300 
2301  unsigned BFXOpc;
2302  int DstLSB, Width;
2303  if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
2304  NumberOfIgnoredLowBits, BiggerPattern)) {
2305  // Check that the returned opcode is compatible with the pattern,
2306  // i.e., same type and zero extended (U and not S)
2307  if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
2308  (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
2309  continue;
2310 
2311  // Compute the width of the bitfield insertion
2312  DstLSB = 0;
2313  Width = ImmS - ImmR + 1;
2314  // FIXME: This constraint is to catch bitfield insertion we may
2315  // want to widen the pattern if we want to grab general bitfied
2316  // move case
2317  if (Width <= 0)
2318  continue;
2319 
2320  // If the mask on the insertee is correct, we have a BFXIL operation. We
2321  // can share the ImmR and ImmS values from the already-computed UBFM.
2322  } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
2323  BiggerPattern,
2324  Src, DstLSB, Width)) {
2325  ImmR = (BitWidth - DstLSB) % BitWidth;
2326  ImmS = Width - 1;
2327  } else
2328  continue;
2329 
2330  // Check the second part of the pattern
2331  EVT VT = OrOpd1Val.getValueType();
2332  assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
2333 
2334  // Compute the Known Zero for the candidate of the first operand.
2335  // This allows to catch more general case than just looking for
2336  // AND with imm. Indeed, simplify-demanded-bits may have removed
2337  // the AND instruction because it proves it was useless.
2338  KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
2339 
2340  // Check if there is enough room for the second operand to appear
2341  // in the first one
2342  APInt BitsToBeInserted =
2343  APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
2344 
2345  if ((BitsToBeInserted & ~Known.Zero) != 0)
2346  continue;
2347 
2348  // Set the first operand
2349  uint64_t Imm;
2350  if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
2351  isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
2352  // In that case, we can eliminate the AND
2353  Dst = OrOpd1->getOperand(0);
2354  else
2355  // Maybe the AND has been removed by simplify-demanded-bits
2356  // or is useful because it discards more bits
2357  Dst = OrOpd1Val;
2358 
2359  // both parts match
2360  SDLoc DL(N);
2361  SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
2362  CurDAG->getTargetConstant(ImmS, DL, VT)};
2363  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2364  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2365  return true;
2366  }
2367 
2368  // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
2369  // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
2370  // mask (e.g., 0x000ffff0).
2371  uint64_t Mask0Imm, Mask1Imm;
2372  SDValue And0 = N->getOperand(0);
2373  SDValue And1 = N->getOperand(1);
2374  if (And0.hasOneUse() && And1.hasOneUse() &&
2375  isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
2376  isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
2377  APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
2378  (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
2379 
2380  // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
2381  // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
2382  // bits to be inserted.
2383  if (isShiftedMask(Mask0Imm, VT)) {
2384  std::swap(And0, And1);
2385  std::swap(Mask0Imm, Mask1Imm);
2386  }
2387 
2388  SDValue Src = And1->getOperand(0);
2389  SDValue Dst = And0->getOperand(0);
2390  unsigned LSB = countTrailingZeros(Mask1Imm);
2391  int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation();
2392 
2393  // The BFXIL inserts the low-order bits from a source register, so right
2394  // shift the needed bits into place.
2395  SDLoc DL(N);
2396  unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2397  SDNode *LSR = CurDAG->getMachineNode(
2398  ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT),
2399  CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
2400 
2401  // BFXIL is an alias of BFM, so translate to BFM operands.
2402  unsigned ImmR = (BitWidth - LSB) % BitWidth;
2403  unsigned ImmS = Width - 1;
2404 
2405  // Create the BFXIL instruction.
2406  SDValue Ops[] = {Dst, SDValue(LSR, 0),
2407  CurDAG->getTargetConstant(ImmR, DL, VT),
2408  CurDAG->getTargetConstant(ImmS, DL, VT)};
2409  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2410  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2411  return true;
2412  }
2413 
2414  return false;
2415 }
2416 
2417 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
2418  if (N->getOpcode() != ISD::OR)
2419  return false;
2420 
2421  APInt NUsefulBits;
2422  getUsefulBits(SDValue(N, 0), NUsefulBits);
2423 
2424  // If all bits are not useful, just return UNDEF.
2425  if (!NUsefulBits) {
2426  CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
2427  return true;
2428  }
2429 
2430  if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
2431  return true;
2432 
2433  return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
2434 }
2435 
2436 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
2437 /// equivalent of a left shift by a constant amount followed by an and masking
2438 /// out a contiguous set of bits.
2439 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
2440  if (N->getOpcode() != ISD::AND)
2441  return false;
2442 
2443  EVT VT = N->getValueType(0);
2444  if (VT != MVT::i32 && VT != MVT::i64)
2445  return false;
2446 
2447  SDValue Op0;
2448  int DstLSB, Width;
2449  if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
2450  Op0, DstLSB, Width))
2451  return false;
2452 
2453  // ImmR is the rotate right amount.
2454  unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
2455  // ImmS is the most significant bit of the source to be moved.
2456  unsigned ImmS = Width - 1;
2457 
2458  SDLoc DL(N);
2459  SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
2460  CurDAG->getTargetConstant(ImmS, DL, VT)};
2461  unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2462  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2463  return true;
2464 }
2465 
2466 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
2467 /// variable shift/rotate instructions.
2468 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
2469  EVT VT = N->getValueType(0);
2470 
2471  unsigned Opc;
2472  switch (N->getOpcode()) {
2473  case ISD::ROTR:
2474  Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
2475  break;
2476  case ISD::SHL:
2477  Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
2478  break;
2479  case ISD::SRL:
2480  Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
2481  break;
2482  case ISD::SRA:
2483  Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
2484  break;
2485  default:
2486  return false;
2487  }
2488 
2489  uint64_t Size;
2490  uint64_t Bits;
2491  if (VT == MVT::i32) {
2492  Bits = 5;
2493  Size = 32;
2494  } else if (VT == MVT::i64) {
2495  Bits = 6;
2496  Size = 64;
2497  } else
2498  return false;
2499 
2500  SDValue ShiftAmt = N->getOperand(1);
2501  SDLoc DL(N);
2502  SDValue NewShiftAmt;
2503 
2504  // Skip over an extend of the shift amount.
2505  if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
2506  ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
2507  ShiftAmt = ShiftAmt->getOperand(0);
2508 
2509  if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
2510  SDValue Add0 = ShiftAmt->getOperand(0);
2511  SDValue Add1 = ShiftAmt->getOperand(1);
2512  uint64_t Add0Imm;
2513  uint64_t Add1Imm;
2514  // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
2515  // to avoid the ADD/SUB.
2516  if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0))
2517  NewShiftAmt = Add0;
2518  // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2519  // generate a NEG instead of a SUB of a constant.
2520  else if (ShiftAmt->getOpcode() == ISD::SUB &&
2521  isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
2522  (Add0Imm % Size == 0)) {
2523  unsigned NegOpc;
2524  unsigned ZeroReg;
2525  EVT SubVT = ShiftAmt->getValueType(0);
2526  if (SubVT == MVT::i32) {
2527  NegOpc = AArch64::SUBWrr;
2528  ZeroReg = AArch64::WZR;
2529  } else {
2530  assert(SubVT == MVT::i64);
2531  NegOpc = AArch64::SUBXrr;
2532  ZeroReg = AArch64::XZR;
2533  }
2534  SDValue Zero =
2535  CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
2536  MachineSDNode *Neg =
2537  CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
2538  NewShiftAmt = SDValue(Neg, 0);
2539  } else
2540  return false;
2541  } else {
2542  // If the shift amount is masked with an AND, check that the mask covers the
2543  // bits that are implicitly ANDed off by the above opcodes and if so, skip
2544  // the AND.
2545  uint64_t MaskImm;
2546  if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm))
2547  return false;
2548 
2549  if (countTrailingOnes(MaskImm) < Bits)
2550  return false;
2551 
2552  NewShiftAmt = ShiftAmt->getOperand(0);
2553  }
2554 
2555  // Narrow/widen the shift amount to match the size of the shift operation.
2556  if (VT == MVT::i32)
2557  NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
2558  else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
2559  SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
2560  MachineSDNode *Ext = CurDAG->getMachineNode(
2561  AArch64::SUBREG_TO_REG, DL, VT,
2562  CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
2563  NewShiftAmt = SDValue(Ext, 0);
2564  }
2565 
2566  SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
2567  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2568  return true;
2569 }
2570 
2571 bool
2572 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
2573  unsigned RegWidth) {
2574  APFloat FVal(0.0);
2575  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
2576  FVal = CN->getValueAPF();
2577  else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
2578  // Some otherwise illegal constants are allowed in this case.
2579  if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
2580  !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
2581  return false;
2582 
2583  ConstantPoolSDNode *CN =
2584  dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
2585  FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
2586  } else
2587  return false;
2588 
2589  // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
2590  // is between 1 and 32 for a destination w-register, or 1 and 64 for an
2591  // x-register.
2592  //
2593  // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
2594  // want THIS_NODE to be 2^fbits. This is much easier to deal with using
2595  // integers.
2596  bool IsExact;
2597 
2598  // fbits is between 1 and 64 in the worst-case, which means the fmul
2599  // could have 2^64 as an actual operand. Need 65 bits of precision.
2600  APSInt IntVal(65, true);
2601  FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
2602 
2603  // N.b. isPowerOf2 also checks for > 0.
2604  if (!IsExact || !IntVal.isPowerOf2()) return false;
2605  unsigned FBits = IntVal.logBase2();
2606 
2607  // Checks above should have guaranteed that we haven't lost information in
2608  // finding FBits, but it must still be in range.
2609  if (FBits == 0 || FBits > RegWidth) return false;
2610 
2611  FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
2612  return true;
2613 }
2614 
2615 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
2616 // of the string and obtains the integer values from them and combines these
2617 // into a single value to be used in the MRS/MSR instruction.
2620  RegString.split(Fields, ':');
2621 
2622  if (Fields.size() == 1)
2623  return -1;
2624 
2625  assert(Fields.size() == 5
2626  && "Invalid number of fields in read register string");
2627 
2628  SmallVector<int, 5> Ops;
2629  bool AllIntFields = true;
2630 
2631  for (StringRef Field : Fields) {
2632  unsigned IntField;
2633  AllIntFields &= !Field.getAsInteger(10, IntField);
2634  Ops.push_back(IntField);
2635  }
2636 
2637  assert(AllIntFields &&
2638  "Unexpected non-integer value in special register string.");
2639 
2640  // Need to combine the integer fields of the string into a single value
2641  // based on the bit encoding of MRS/MSR instruction.
2642  return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
2643  (Ops[3] << 3) | (Ops[4]);
2644 }
2645 
2646 // Lower the read_register intrinsic to an MRS instruction node if the special
2647 // register string argument is either of the form detailed in the ALCE (the
2648 // form described in getIntOperandsFromRegsterString) or is a named register
2649 // known by the MRS SysReg mapper.
2650 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
2651  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2652  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2653  SDLoc DL(N);
2654 
2655  int Reg = getIntOperandFromRegisterString(RegString->getString());
2656  if (Reg != -1) {
2657  ReplaceNode(N, CurDAG->getMachineNode(
2658  AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2659  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2660  N->getOperand(0)));
2661  return true;
2662  }
2663 
2664  // Use the sysreg mapper to map the remaining possible strings to the
2665  // value for the register to be used for the instruction operand.
2666  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2667  if (TheReg && TheReg->Readable &&
2668  TheReg->haveFeatures(Subtarget->getFeatureBits()))
2669  Reg = TheReg->Encoding;
2670  else
2671  Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2672 
2673  if (Reg != -1) {
2674  ReplaceNode(N, CurDAG->getMachineNode(
2675  AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
2676  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2677  N->getOperand(0)));
2678  return true;
2679  }
2680 
2681  return false;
2682 }
2683 
2684 // Lower the write_register intrinsic to an MSR instruction node if the special
2685 // register string argument is either of the form detailed in the ALCE (the
2686 // form described in getIntOperandsFromRegsterString) or is a named register
2687 // known by the MSR SysReg mapper.
2688 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
2689  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
2690  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
2691  SDLoc DL(N);
2692 
2693  int Reg = getIntOperandFromRegisterString(RegString->getString());
2694  if (Reg != -1) {
2695  ReplaceNode(
2696  N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
2697  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2698  N->getOperand(2), N->getOperand(0)));
2699  return true;
2700  }
2701 
2702  // Check if the register was one of those allowed as the pstatefield value in
2703  // the MSR (immediate) instruction. To accept the values allowed in the
2704  // pstatefield for the MSR (immediate) instruction, we also require that an
2705  // immediate value has been provided as an argument, we know that this is
2706  // the case as it has been ensured by semantic checking.
2707  auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());
2708  if (PMapper) {
2709  assert (isa<ConstantSDNode>(N->getOperand(2))
2710  && "Expected a constant integer expression.");
2711  unsigned Reg = PMapper->Encoding;
2712  uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
2713  unsigned State;
2714  if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) {
2715  assert(Immed < 2 && "Bad imm");
2716  State = AArch64::MSRpstateImm1;
2717  } else {
2718  assert(Immed < 16 && "Bad imm");
2719  State = AArch64::MSRpstateImm4;
2720  }
2721  ReplaceNode(N, CurDAG->getMachineNode(
2722  State, DL, MVT::Other,
2723  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2724  CurDAG->getTargetConstant(Immed, DL, MVT::i16),
2725  N->getOperand(0)));
2726  return true;
2727  }
2728 
2729  // Use the sysreg mapper to attempt to map the remaining possible strings
2730  // to the value for the register to be used for the MSR (register)
2731  // instruction operand.
2732  auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
2733  if (TheReg && TheReg->Writeable &&
2734  TheReg->haveFeatures(Subtarget->getFeatureBits()))
2735  Reg = TheReg->Encoding;
2736  else
2737  Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
2738  if (Reg != -1) {
2739  ReplaceNode(N, CurDAG->getMachineNode(
2740  AArch64::MSR, DL, MVT::Other,
2741  CurDAG->getTargetConstant(Reg, DL, MVT::i32),
2742  N->getOperand(2), N->getOperand(0)));
2743  return true;
2744  }
2745 
2746  return false;
2747 }
2748 
2749 /// We've got special pseudo-instructions for these
2750 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2751  unsigned Opcode;
2752  EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2753 
2754  // Leave IR for LSE if subtarget supports it.
2755  if (Subtarget->hasLSE()) return false;
2756 
2757  if (MemTy == MVT::i8)
2758  Opcode = AArch64::CMP_SWAP_8;
2759  else if (MemTy == MVT::i16)
2760  Opcode = AArch64::CMP_SWAP_16;
2761  else if (MemTy == MVT::i32)
2762  Opcode = AArch64::CMP_SWAP_32;
2763  else if (MemTy == MVT::i64)
2764  Opcode = AArch64::CMP_SWAP_64;
2765  else
2766  llvm_unreachable("Unknown AtomicCmpSwap type");
2767 
2768  MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
2769  SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2770  N->getOperand(0)};
2771  SDNode *CmpSwap = CurDAG->getMachineNode(
2772  Opcode, SDLoc(N),
2773  CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
2774 
2775  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2776  CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2777 
2778  ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2779  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2780  CurDAG->RemoveDeadNode(N);
2781 
2782  return true;
2783 }
2784 
2785 void AArch64DAGToDAGISel::Select(SDNode *Node) {
2786  // If we have a custom node, we already have selected!
2787  if (Node->isMachineOpcode()) {
2788  LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
2789  Node->setNodeId(-1);
2790  return;
2791  }
2792 
2793  // Few custom selection stuff.
2794  EVT VT = Node->getValueType(0);
2795 
2796  switch (Node->getOpcode()) {
2797  default:
2798  break;
2799 
2800  case ISD::ATOMIC_CMP_SWAP:
2801  if (SelectCMP_SWAP(Node))
2802  return;
2803  break;
2804 
2805  case ISD::READ_REGISTER:
2806  if (tryReadRegister(Node))
2807  return;
2808  break;
2809 
2810  case ISD::WRITE_REGISTER:
2811  if (tryWriteRegister(Node))
2812  return;
2813  break;
2814 
2815  case ISD::ADD:
2816  if (tryMLAV64LaneV128(Node))
2817  return;
2818  break;
2819 
2820  case ISD::LOAD: {
2821  // Try to select as an indexed load. Fall through to normal processing
2822  // if we can't.
2823  if (tryIndexedLoad(Node))
2824  return;
2825  break;
2826  }
2827 
2828  case ISD::SRL:
2829  case ISD::AND:
2830  case ISD::SRA:
2832  if (tryBitfieldExtractOp(Node))
2833  return;
2834  if (tryBitfieldInsertInZeroOp(Node))
2835  return;
2837  case ISD::ROTR:
2838  case ISD::SHL:
2839  if (tryShiftAmountMod(Node))
2840  return;
2841  break;
2842 
2843  case ISD::SIGN_EXTEND:
2844  if (tryBitfieldExtractOpFromSExt(Node))
2845  return;
2846  break;
2847 
2848  case ISD::OR:
2849  if (tryBitfieldInsertOp(Node))
2850  return;
2851  break;
2852 
2853  case ISD::EXTRACT_VECTOR_ELT: {
2854  // Extracting lane zero is a special case where we can just use a plain
2855  // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
2856  // the rest of the compiler, especially the register allocator and copyi
2857  // propagation, to reason about, so is preferred when it's possible to
2858  // use it.
2859  ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
2860  // Bail and use the default Select() for non-zero lanes.
2861  if (LaneNode->getZExtValue() != 0)
2862  break;
2863  // If the element type is not the same as the result type, likewise
2864  // bail and use the default Select(), as there's more to do than just
2865  // a cross-class COPY. This catches extracts of i8 and i16 elements
2866  // since they will need an explicit zext.
2867  if (VT != Node->getOperand(0).getValueType().getVectorElementType())
2868  break;
2869  unsigned SubReg;
2870  switch (Node->getOperand(0)
2871  .getValueType()
2873  .getSizeInBits()) {
2874  default:
2875  llvm_unreachable("Unexpected vector element type!");
2876  case 64:
2877  SubReg = AArch64::dsub;
2878  break;
2879  case 32:
2880  SubReg = AArch64::ssub;
2881  break;
2882  case 16:
2883  SubReg = AArch64::hsub;
2884  break;
2885  case 8:
2886  llvm_unreachable("unexpected zext-requiring extract element!");
2887  }
2888  SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
2889  Node->getOperand(0));
2890  LLVM_DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
2891  LLVM_DEBUG(Extract->dumpr(CurDAG));
2892  LLVM_DEBUG(dbgs() << "\n");
2893  ReplaceNode(Node, Extract.getNode());
2894  return;
2895  }
2896  case ISD::Constant: {
2897  // Materialize zero constants as copies from WZR/XZR. This allows
2898  // the coalescer to propagate these into other instructions.
2899  ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
2900  if (ConstNode->isNullValue()) {
2901  if (VT == MVT::i32) {
2902  SDValue New = CurDAG->getCopyFromReg(
2903  CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
2904  ReplaceNode(Node, New.getNode());
2905  return;
2906  } else if (VT == MVT::i64) {
2907  SDValue New = CurDAG->getCopyFromReg(
2908  CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
2909  ReplaceNode(Node, New.getNode());
2910  return;
2911  }
2912  }
2913  break;
2914  }
2915 
2916  case ISD::FrameIndex: {
2917  // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
2918  int FI = cast<FrameIndexSDNode>(Node)->getIndex();
2919  unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
2920  const TargetLowering *TLI = getTargetLowering();
2921  SDValue TFI = CurDAG->getTargetFrameIndex(
2922  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2923  SDLoc DL(Node);
2924  SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
2925  CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
2926  CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
2927  return;
2928  }
2929  case ISD::INTRINSIC_W_CHAIN: {
2930  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2931  switch (IntNo) {
2932  default:
2933  break;
2934  case Intrinsic::aarch64_ldaxp:
2935  case Intrinsic::aarch64_ldxp: {
2936  unsigned Op =
2937  IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
2938  SDValue MemAddr = Node->getOperand(2);
2939  SDLoc DL(Node);
2940  SDValue Chain = Node->getOperand(0);
2941 
2942  SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
2943  MVT::Other, MemAddr, Chain);
2944 
2945  // Transfer memoperands.
2946  MachineMemOperand *MemOp =
2947  cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2948  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
2949  ReplaceNode(Node, Ld);
2950  return;
2951  }
2952  case Intrinsic::aarch64_stlxp:
2953  case Intrinsic::aarch64_stxp: {
2954  unsigned Op =
2955  IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
2956  SDLoc DL(Node);
2957  SDValue Chain = Node->getOperand(0);
2958  SDValue ValLo = Node->getOperand(2);
2959  SDValue ValHi = Node->getOperand(3);
2960  SDValue MemAddr = Node->getOperand(4);
2961 
2962  // Place arguments in the right order.
2963  SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
2964 
2965  SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
2966  // Transfer memoperands.
2967  MachineMemOperand *MemOp =
2968  cast<MemIntrinsicSDNode>(Node)->getMemOperand();
2969  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2970 
2971  ReplaceNode(Node, St);
2972  return;
2973  }
2974  case Intrinsic::aarch64_neon_ld1x2:
2975  if (VT == MVT::v8i8) {
2976  SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
2977  return;
2978  } else if (VT == MVT::v16i8) {
2979  SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
2980  return;
2981  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
2982  SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
2983  return;
2984  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
2985  SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
2986  return;
2987  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
2988  SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
2989  return;
2990  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
2991  SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
2992  return;
2993  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
2994  SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
2995  return;
2996  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
2997  SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
2998  return;
2999  }
3000  break;
3001  case Intrinsic::aarch64_neon_ld1x3:
3002  if (VT == MVT::v8i8) {
3003  SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
3004  return;
3005  } else if (VT == MVT::v16i8) {
3006  SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
3007  return;
3008  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3009  SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
3010  return;
3011  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3012  SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
3013  return;
3014  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3015  SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
3016  return;
3017  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3018  SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
3019  return;
3020  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3021  SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3022  return;
3023  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3024  SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
3025  return;
3026  }
3027  break;
3028  case Intrinsic::aarch64_neon_ld1x4:
3029  if (VT == MVT::v8i8) {
3030  SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
3031  return;
3032  } else if (VT == MVT::v16i8) {
3033  SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
3034  return;
3035  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3036  SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
3037  return;
3038  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3039  SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
3040  return;
3041  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3042  SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
3043  return;
3044  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3045  SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
3046  return;
3047  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3048  SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3049  return;
3050  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3051  SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
3052  return;
3053  }
3054  break;
3055  case Intrinsic::aarch64_neon_ld2:
3056  if (VT == MVT::v8i8) {
3057  SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
3058  return;
3059  } else if (VT == MVT::v16i8) {
3060  SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
3061  return;
3062  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3063  SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
3064  return;
3065  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3066  SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
3067  return;
3068  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3069  SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
3070  return;
3071  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3072  SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
3073  return;
3074  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3075  SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
3076  return;
3077  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3078  SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
3079  return;
3080  }
3081  break;
3082  case Intrinsic::aarch64_neon_ld3:
3083  if (VT == MVT::v8i8) {
3084  SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
3085  return;
3086  } else if (VT == MVT::v16i8) {
3087  SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
3088  return;
3089  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3090  SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
3091  return;
3092  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3093  SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
3094  return;
3095  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3096  SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
3097  return;
3098  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3099  SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
3100  return;
3101  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3102  SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
3103  return;
3104  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3105  SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
3106  return;
3107  }
3108  break;
3109  case Intrinsic::aarch64_neon_ld4:
3110  if (VT == MVT::v8i8) {
3111  SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
3112  return;
3113  } else if (VT == MVT::v16i8) {
3114  SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
3115  return;
3116  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3117  SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
3118  return;
3119  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3120  SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
3121  return;
3122  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3123  SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
3124  return;
3125  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3126  SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
3127  return;
3128  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3129  SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
3130  return;
3131  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3132  SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
3133  return;
3134  }
3135  break;
3136  case Intrinsic::aarch64_neon_ld2r:
3137  if (VT == MVT::v8i8) {
3138  SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
3139  return;
3140  } else if (VT == MVT::v16i8) {
3141  SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
3142  return;
3143  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3144  SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
3145  return;
3146  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3147  SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
3148  return;
3149  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3150  SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
3151  return;
3152  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3153  SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
3154  return;
3155  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3156  SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
3157  return;
3158  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3159  SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
3160  return;
3161  }
3162  break;
3163  case Intrinsic::aarch64_neon_ld3r:
3164  if (VT == MVT::v8i8) {
3165  SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
3166  return;
3167  } else if (VT == MVT::v16i8) {
3168  SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
3169  return;
3170  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3171  SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
3172  return;
3173  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3174  SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
3175  return;
3176  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3177  SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
3178  return;
3179  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3180  SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
3181  return;
3182  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3183  SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
3184  return;
3185  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3186  SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
3187  return;
3188  }
3189  break;
3190  case Intrinsic::aarch64_neon_ld4r:
3191  if (VT == MVT::v8i8) {
3192  SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
3193  return;
3194  } else if (VT == MVT::v16i8) {
3195  SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
3196  return;
3197  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3198  SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
3199  return;
3200  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3201  SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
3202  return;
3203  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3204  SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
3205  return;
3206  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3207  SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
3208  return;
3209  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3210  SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
3211  return;
3212  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3213  SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
3214  return;
3215  }
3216  break;
3217  case Intrinsic::aarch64_neon_ld2lane:
3218  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3219  SelectLoadLane(Node, 2, AArch64::LD2i8);
3220  return;
3221  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3222  VT == MVT::v8f16) {
3223  SelectLoadLane(Node, 2, AArch64::LD2i16);
3224  return;
3225  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3226  VT == MVT::v2f32) {
3227  SelectLoadLane(Node, 2, AArch64::LD2i32);
3228  return;
3229  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3230  VT == MVT::v1f64) {
3231  SelectLoadLane(Node, 2, AArch64::LD2i64);
3232  return;
3233  }
3234  break;
3235  case Intrinsic::aarch64_neon_ld3lane:
3236  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3237  SelectLoadLane(Node, 3, AArch64::LD3i8);
3238  return;
3239  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3240  VT == MVT::v8f16) {
3241  SelectLoadLane(Node, 3, AArch64::LD3i16);
3242  return;
3243  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3244  VT == MVT::v2f32) {
3245  SelectLoadLane(Node, 3, AArch64::LD3i32);
3246  return;
3247  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3248  VT == MVT::v1f64) {
3249  SelectLoadLane(Node, 3, AArch64::LD3i64);
3250  return;
3251  }
3252  break;
3253  case Intrinsic::aarch64_neon_ld4lane:
3254  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3255  SelectLoadLane(Node, 4, AArch64::LD4i8);
3256  return;
3257  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3258  VT == MVT::v8f16) {
3259  SelectLoadLane(Node, 4, AArch64::LD4i16);
3260  return;
3261  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3262  VT == MVT::v2f32) {
3263  SelectLoadLane(Node, 4, AArch64::LD4i32);
3264  return;
3265  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3266  VT == MVT::v1f64) {
3267  SelectLoadLane(Node, 4, AArch64::LD4i64);
3268  return;
3269  }
3270  break;
3271  }
3272  } break;
3273  case ISD::INTRINSIC_WO_CHAIN: {
3274  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
3275  switch (IntNo) {
3276  default:
3277  break;
3278  case Intrinsic::aarch64_neon_tbl2:
3279  SelectTable(Node, 2,
3280  VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
3281  false);
3282  return;
3283  case Intrinsic::aarch64_neon_tbl3:
3284  SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
3285  : AArch64::TBLv16i8Three,
3286  false);
3287  return;
3288  case Intrinsic::aarch64_neon_tbl4:
3289  SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
3290  : AArch64::TBLv16i8Four,
3291  false);
3292  return;
3293  case Intrinsic::aarch64_neon_tbx2:
3294  SelectTable(Node, 2,
3295  VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
3296  true);
3297  return;
3298  case Intrinsic::aarch64_neon_tbx3:
3299  SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
3300  : AArch64::TBXv16i8Three,
3301  true);
3302  return;
3303  case Intrinsic::aarch64_neon_tbx4:
3304  SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
3305  : AArch64::TBXv16i8Four,
3306  true);
3307  return;
3308  case Intrinsic::aarch64_neon_smull:
3309  case Intrinsic::aarch64_neon_umull:
3310  if (tryMULLV64LaneV128(IntNo, Node))
3311  return;
3312  break;
3313  }
3314  break;
3315  }
3316  case ISD::INTRINSIC_VOID: {
3317  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
3318  if (Node->getNumOperands() >= 3)
3319  VT = Node->getOperand(2)->getValueType(0);
3320  switch (IntNo) {
3321  default:
3322  break;
3323  case Intrinsic::aarch64_neon_st1x2: {
3324  if (VT == MVT::v8i8) {
3325  SelectStore(Node, 2, AArch64::ST1Twov8b);
3326  return;
3327  } else if (VT == MVT::v16i8) {
3328  SelectStore(Node, 2, AArch64::ST1Twov16b);
3329  return;
3330  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3331  SelectStore(Node, 2, AArch64::ST1Twov4h);
3332  return;
3333  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3334  SelectStore(Node, 2, AArch64::ST1Twov8h);
3335  return;
3336  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3337  SelectStore(Node, 2, AArch64::ST1Twov2s);
3338  return;
3339  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3340  SelectStore(Node, 2, AArch64::ST1Twov4s);
3341  return;
3342  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3343  SelectStore(Node, 2, AArch64::ST1Twov2d);
3344  return;
3345  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3346  SelectStore(Node, 2, AArch64::ST1Twov1d);
3347  return;
3348  }
3349  break;
3350  }
3351  case Intrinsic::aarch64_neon_st1x3: {
3352  if (VT == MVT::v8i8) {
3353  SelectStore(Node, 3, AArch64::ST1Threev8b);
3354  return;
3355  } else if (VT == MVT::v16i8) {
3356  SelectStore(Node, 3, AArch64::ST1Threev16b);
3357  return;
3358  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3359  SelectStore(Node, 3, AArch64::ST1Threev4h);
3360  return;
3361  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3362  SelectStore(Node, 3, AArch64::ST1Threev8h);
3363  return;
3364  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3365  SelectStore(Node, 3, AArch64::ST1Threev2s);
3366  return;
3367  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3368  SelectStore(Node, 3, AArch64::ST1Threev4s);
3369  return;
3370  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3371  SelectStore(Node, 3, AArch64::ST1Threev2d);
3372  return;
3373  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3374  SelectStore(Node, 3, AArch64::ST1Threev1d);
3375  return;
3376  }
3377  break;
3378  }
3379  case Intrinsic::aarch64_neon_st1x4: {
3380  if (VT == MVT::v8i8) {
3381  SelectStore(Node, 4, AArch64::ST1Fourv8b);
3382  return;
3383  } else if (VT == MVT::v16i8) {
3384  SelectStore(Node, 4, AArch64::ST1Fourv16b);
3385  return;
3386  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3387  SelectStore(Node, 4, AArch64::ST1Fourv4h);
3388  return;
3389  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3390  SelectStore(Node, 4, AArch64::ST1Fourv8h);
3391  return;
3392  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3393  SelectStore(Node, 4, AArch64::ST1Fourv2s);
3394  return;
3395  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3396  SelectStore(Node, 4, AArch64::ST1Fourv4s);
3397  return;
3398  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3399  SelectStore(Node, 4, AArch64::ST1Fourv2d);
3400  return;
3401  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3402  SelectStore(Node, 4, AArch64::ST1Fourv1d);
3403  return;
3404  }
3405  break;
3406  }
3407  case Intrinsic::aarch64_neon_st2: {
3408  if (VT == MVT::v8i8) {
3409  SelectStore(Node, 2, AArch64::ST2Twov8b);
3410  return;
3411  } else if (VT == MVT::v16i8) {
3412  SelectStore(Node, 2, AArch64::ST2Twov16b);
3413  return;
3414  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3415  SelectStore(Node, 2, AArch64::ST2Twov4h);
3416  return;
3417  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3418  SelectStore(Node, 2, AArch64::ST2Twov8h);
3419  return;
3420  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3421  SelectStore(Node, 2, AArch64::ST2Twov2s);
3422  return;
3423  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3424  SelectStore(Node, 2, AArch64::ST2Twov4s);
3425  return;
3426  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3427  SelectStore(Node, 2, AArch64::ST2Twov2d);
3428  return;
3429  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3430  SelectStore(Node, 2, AArch64::ST1Twov1d);
3431  return;
3432  }
3433  break;
3434  }
3435  case Intrinsic::aarch64_neon_st3: {
3436  if (VT == MVT::v8i8) {
3437  SelectStore(Node, 3, AArch64::ST3Threev8b);
3438  return;
3439  } else if (VT == MVT::v16i8) {
3440  SelectStore(Node, 3, AArch64::ST3Threev16b);
3441  return;
3442  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3443  SelectStore(Node, 3, AArch64::ST3Threev4h);
3444  return;
3445  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3446  SelectStore(Node, 3, AArch64::ST3Threev8h);
3447  return;
3448  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3449  SelectStore(Node, 3, AArch64::ST3Threev2s);
3450  return;
3451  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3452  SelectStore(Node, 3, AArch64::ST3Threev4s);
3453  return;
3454  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3455  SelectStore(Node, 3, AArch64::ST3Threev2d);
3456  return;
3457  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3458  SelectStore(Node, 3, AArch64::ST1Threev1d);
3459  return;
3460  }
3461  break;
3462  }
3463  case Intrinsic::aarch64_neon_st4: {
3464  if (VT == MVT::v8i8) {
3465  SelectStore(Node, 4, AArch64::ST4Fourv8b);
3466  return;
3467  } else if (VT == MVT::v16i8) {
3468  SelectStore(Node, 4, AArch64::ST4Fourv16b);
3469  return;
3470  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3471  SelectStore(Node, 4, AArch64::ST4Fourv4h);
3472  return;
3473  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3474  SelectStore(Node, 4, AArch64::ST4Fourv8h);
3475  return;
3476  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3477  SelectStore(Node, 4, AArch64::ST4Fourv2s);
3478  return;
3479  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3480  SelectStore(Node, 4, AArch64::ST4Fourv4s);
3481  return;
3482  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3483  SelectStore(Node, 4, AArch64::ST4Fourv2d);
3484  return;
3485  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3486  SelectStore(Node, 4, AArch64::ST1Fourv1d);
3487  return;
3488  }
3489  break;
3490  }
3491  case Intrinsic::aarch64_neon_st2lane: {
3492  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3493  SelectStoreLane(Node, 2, AArch64::ST2i8);
3494  return;
3495  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3496  VT == MVT::v8f16) {
3497  SelectStoreLane(Node, 2, AArch64::ST2i16);
3498  return;
3499  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3500  VT == MVT::v2f32) {
3501  SelectStoreLane(Node, 2, AArch64::ST2i32);
3502  return;
3503  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3504  VT == MVT::v1f64) {
3505  SelectStoreLane(Node, 2, AArch64::ST2i64);
3506  return;
3507  }
3508  break;
3509  }
3510  case Intrinsic::aarch64_neon_st3lane: {
3511  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3512  SelectStoreLane(Node, 3, AArch64::ST3i8);
3513  return;
3514  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3515  VT == MVT::v8f16) {
3516  SelectStoreLane(Node, 3, AArch64::ST3i16);
3517  return;
3518  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3519  VT == MVT::v2f32) {
3520  SelectStoreLane(Node, 3, AArch64::ST3i32);
3521  return;
3522  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3523  VT == MVT::v1f64) {
3524  SelectStoreLane(Node, 3, AArch64::ST3i64);
3525  return;
3526  }
3527  break;
3528  }
3529  case Intrinsic::aarch64_neon_st4lane: {
3530  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3531  SelectStoreLane(Node, 4, AArch64::ST4i8);
3532  return;
3533  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3534  VT == MVT::v8f16) {
3535  SelectStoreLane(Node, 4, AArch64::ST4i16);
3536  return;
3537  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3538  VT == MVT::v2f32) {
3539  SelectStoreLane(Node, 4, AArch64::ST4i32);
3540  return;
3541  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3542  VT == MVT::v1f64) {
3543  SelectStoreLane(Node, 4, AArch64::ST4i64);
3544  return;
3545  }
3546  break;
3547  }
3548  }
3549  break;
3550  }
3551  case AArch64ISD::LD2post: {
3552  if (VT == MVT::v8i8) {
3553  SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
3554  return;
3555  } else if (VT == MVT::v16i8) {
3556  SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
3557  return;
3558  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3559  SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
3560  return;
3561  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3562  SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
3563  return;
3564  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3565  SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
3566  return;
3567  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3568  SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
3569  return;
3570  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3571  SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3572  return;
3573  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3574  SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
3575  return;
3576  }
3577  break;
3578  }
3579  case AArch64ISD::LD3post: {
3580  if (VT == MVT::v8i8) {
3581  SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
3582  return;
3583  } else if (VT == MVT::v16i8) {
3584  SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
3585  return;
3586  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3587  SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
3588  return;
3589  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3590  SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
3591  return;
3592  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3593  SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
3594  return;
3595  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3596  SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
3597  return;
3598  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3599  SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3600  return;
3601  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3602  SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
3603  return;
3604  }
3605  break;
3606  }
3607  case AArch64ISD::LD4post: {
3608  if (VT == MVT::v8i8) {
3609  SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
3610  return;
3611  } else if (VT == MVT::v16i8) {
3612  SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
3613  return;
3614  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3615  SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
3616  return;
3617  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3618  SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
3619  return;
3620  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3621  SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
3622  return;
3623  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3624  SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
3625  return;
3626  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3627  SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3628  return;
3629  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3630  SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
3631  return;
3632  }
3633  break;
3634  }
3635  case AArch64ISD::LD1x2post: {
3636  if (VT == MVT::v8i8) {
3637  SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
3638  return;
3639  } else if (VT == MVT::v16i8) {
3640  SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
3641  return;
3642  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3643  SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
3644  return;
3645  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3646  SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
3647  return;
3648  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3649  SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
3650  return;
3651  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3652  SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
3653  return;
3654  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3655  SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
3656  return;
3657  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3658  SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
3659  return;
3660  }
3661  break;
3662  }
3663  case AArch64ISD::LD1x3post: {
3664  if (VT == MVT::v8i8) {
3665  SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
3666  return;
3667  } else if (VT == MVT::v16i8) {
3668  SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
3669  return;
3670  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3671  SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
3672  return;
3673  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3674  SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
3675  return;
3676  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3677  SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
3678  return;
3679  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3680  SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
3681  return;
3682  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3683  SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
3684  return;
3685  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3686  SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
3687  return;
3688  }
3689  break;
3690  }
3691  case AArch64ISD::LD1x4post: {
3692  if (VT == MVT::v8i8) {
3693  SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
3694  return;
3695  } else if (VT == MVT::v16i8) {
3696  SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
3697  return;
3698  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3699  SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
3700  return;
3701  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3702  SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
3703  return;
3704  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3705  SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
3706  return;
3707  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3708  SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
3709  return;
3710  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3711  SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
3712  return;
3713  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3714  SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
3715  return;
3716  }
3717  break;
3718  }
3719  case AArch64ISD::LD1DUPpost: {
3720  if (VT == MVT::v8i8) {
3721  SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
3722  return;
3723  } else if (VT == MVT::v16i8) {
3724  SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
3725  return;
3726  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3727  SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
3728  return;
3729  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3730  SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
3731  return;
3732  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3733  SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
3734  return;
3735  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3736  SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
3737  return;
3738  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3739  SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
3740  return;
3741  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3742  SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
3743  return;
3744  }
3745  break;
3746  }
3747  case AArch64ISD::LD2DUPpost: {
3748  if (VT == MVT::v8i8) {
3749  SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
3750  return;
3751  } else if (VT == MVT::v16i8) {
3752  SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
3753  return;
3754  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3755  SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
3756  return;
3757  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3758  SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
3759  return;
3760  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3761  SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
3762  return;
3763  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3764  SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
3765  return;
3766  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3767  SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
3768  return;
3769  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3770  SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
3771  return;
3772  }
3773  break;
3774  }
3775  case AArch64ISD::LD3DUPpost: {
3776  if (VT == MVT::v8i8) {
3777  SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
3778  return;
3779  } else if (VT == MVT::v16i8) {
3780  SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
3781  return;
3782  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3783  SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
3784  return;
3785  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3786  SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
3787  return;
3788  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3789  SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
3790  return;
3791  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3792  SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
3793  return;
3794  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3795  SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
3796  return;
3797  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3798  SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
3799  return;
3800  }
3801  break;
3802  }
3803  case AArch64ISD::LD4DUPpost: {
3804  if (VT == MVT::v8i8) {
3805  SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
3806  return;
3807  } else if (VT == MVT::v16i8) {
3808  SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
3809  return;
3810  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3811  SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
3812  return;
3813  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3814  SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
3815  return;
3816  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3817  SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
3818  return;
3819  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3820  SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
3821  return;
3822  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3823  SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
3824  return;
3825  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3826  SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
3827  return;
3828  }
3829  break;
3830  }
3831  case AArch64ISD::LD1LANEpost: {
3832  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3833  SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
3834  return;
3835  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3836  VT == MVT::v8f16) {
3837  SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
3838  return;
3839  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3840  VT == MVT::v2f32) {
3841  SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
3842  return;
3843  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3844  VT == MVT::v1f64) {
3845  SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
3846  return;
3847  }
3848  break;
3849  }
3850  case AArch64ISD::LD2LANEpost: {
3851  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3852  SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
3853  return;
3854  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3855  VT == MVT::v8f16) {
3856  SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
3857  return;
3858  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3859  VT == MVT::v2f32) {
3860  SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
3861  return;
3862  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3863  VT == MVT::v1f64) {
3864  SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
3865  return;
3866  }
3867  break;
3868  }
3869  case AArch64ISD::LD3LANEpost: {
3870  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3871  SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
3872  return;
3873  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3874  VT == MVT::v8f16) {
3875  SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
3876  return;
3877  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3878  VT == MVT::v2f32) {
3879  SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
3880  return;
3881  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3882  VT == MVT::v1f64) {
3883  SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
3884  return;
3885  }
3886  break;
3887  }
3888  case AArch64ISD::LD4LANEpost: {
3889  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
3890  SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
3891  return;
3892  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
3893  VT == MVT::v8f16) {
3894  SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
3895  return;
3896  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
3897  VT == MVT::v2f32) {
3898  SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
3899  return;
3900  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
3901  VT == MVT::v1f64) {
3902  SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
3903  return;
3904  }
3905  break;
3906  }
3907  case AArch64ISD::ST2post: {
3908  VT = Node->getOperand(1).getValueType();
3909  if (VT == MVT::v8i8) {
3910  SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
3911  return;
3912  } else if (VT == MVT::v16i8) {
3913  SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
3914  return;
3915  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3916  SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
3917  return;
3918  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3919  SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
3920  return;
3921  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3922  SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
3923  return;
3924  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3925  SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
3926  return;
3927  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3928  SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
3929  return;
3930  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3931  SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
3932  return;
3933  }
3934  break;
3935  }
3936  case AArch64ISD::ST3post: {
3937  VT = Node->getOperand(1).getValueType();
3938  if (VT == MVT::v8i8) {
3939  SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
3940  return;
3941  } else if (VT == MVT::v16i8) {
3942  SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
3943  return;
3944  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3945  SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
3946  return;
3947  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3948  SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
3949  return;
3950  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3951  SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
3952  return;
3953  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3954  SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
3955  return;
3956  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3957  SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
3958  return;
3959  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3960  SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
3961  return;
3962  }
3963  break;
3964  }
3965  case AArch64ISD::ST4post: {
3966  VT = Node->getOperand(1).getValueType();
3967  if (VT == MVT::v8i8) {
3968  SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
3969  return;
3970  } else if (VT == MVT::v16i8) {
3971  SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
3972  return;
3973  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
3974  SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
3975  return;
3976  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
3977  SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
3978  return;
3979  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
3980  SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
3981  return;
3982  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
3983  SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
3984  return;
3985  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
3986  SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
3987  return;
3988  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
3989  SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
3990  return;
3991  }
3992  break;
3993  }
3994  case AArch64ISD::ST1x2post: {
3995  VT = Node->getOperand(1).getValueType();
3996  if (VT == MVT::v8i8) {
3997  SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
3998  return;
3999  } else if (VT == MVT::v16i8) {
4000  SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
4001  return;
4002  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4003  SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
4004  return;
4005  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4006  SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
4007  return;
4008  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4009  SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
4010  return;
4011  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4012  SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
4013  return;
4014  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4015  SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
4016  return;
4017  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4018  SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
4019  return;
4020  }
4021  break;
4022  }
4023  case AArch64ISD::ST1x3post: {
4024  VT = Node->getOperand(1).getValueType();
4025  if (VT == MVT::v8i8) {
4026  SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
4027  return;
4028  } else if (VT == MVT::v16i8) {
4029  SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
4030  return;
4031  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4032  SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
4033  return;
4034  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4035  SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
4036  return;
4037  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4038  SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
4039  return;
4040  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4041  SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
4042  return;
4043  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4044  SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
4045  return;
4046  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4047  SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
4048  return;
4049  }
4050  break;
4051  }
4052  case AArch64ISD::ST1x4post: {
4053  VT = Node->getOperand(1).getValueType();
4054  if (VT == MVT::v8i8) {
4055  SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
4056  return;
4057  } else if (VT == MVT::v16i8) {
4058  SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
4059  return;
4060  } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
4061  SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
4062  return;
4063  } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
4064  SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
4065  return;
4066  } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4067  SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
4068  return;
4069  } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4070  SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
4071  return;
4072  } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4073  SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
4074  return;
4075  } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4076  SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
4077  return;
4078  }
4079  break;
4080  }
4081  case AArch64ISD::ST2LANEpost: {
4082  VT = Node->getOperand(1).getValueType();
4083  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4084  SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
4085  return;
4086  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4087  VT == MVT::v8f16) {
4088  SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
4089  return;
4090  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4091  VT == MVT::v2f32) {
4092  SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
4093  return;
4094  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4095  VT == MVT::v1f64) {
4096  SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
4097  return;
4098  }
4099  break;
4100  }
4101  case AArch64ISD::ST3LANEpost: {
4102  VT = Node->getOperand(1).getValueType();
4103  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4104  SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
4105  return;
4106  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4107  VT == MVT::v8f16) {
4108  SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
4109  return;
4110  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4111  VT == MVT::v2f32) {
4112  SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
4113  return;
4114  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4115  VT == MVT::v1f64) {
4116  SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
4117  return;
4118  }
4119  break;
4120  }
4121  case AArch64ISD::ST4LANEpost: {
4122  VT = Node->getOperand(1).getValueType();
4123  if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4124  SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
4125  return;
4126  } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4127  VT == MVT::v8f16) {
4128  SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
4129  return;
4130  } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4131  VT == MVT::v2f32) {
4132  SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
4133  return;
4134  } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4135  VT == MVT::v1f64) {
4136  SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
4137  return;
4138  }
4139  break;
4140  }
4141  }
4142 
4143  // Select the default instruction
4144  SelectCode(Node);
4145 }
4146 
4147 /// createAArch64ISelDag - This pass converts a legalized DAG into a
4148 /// AArch64-specific DAG, ready for instruction scheduling.
4150  CodeGenOpt::Level OptLevel) {
4151  return new AArch64DAGToDAGISel(TM, OptLevel);
4152 }
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1451
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
uint64_t CallInst * C
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &ShiftAmount, int &MaskWidth)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL...
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOffset() const
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1476
const GlobalValue * getGlobal() const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
DiagnosticInfoOptimizationBase::Argument NV
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:377
This class represents lattice values for constants.
Definition: AllocatorList.h:23
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:264
iterator begin() const
Definition: ArrayRef.h:136
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:603
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isWorthFoldingADDlow(SDValue N)
If there&#39;s a use of this ADDlow that&#39;s not itself a load/store then we&#39;ll need to create a real ADD i...
unsigned Reg
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
const SDValue & getChain() const
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
unsigned const TargetRegisterInfo * TRI
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1068
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:875
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:372
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:39
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1508
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:158
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1068
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1631
A description of a memory reference used in the backend.
static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, SDValue &LaneOp, int &LaneIdx)
static bool isPreferredADD(int64_t ImmOff)
Shift and rotation operations.
Definition: ISDOpcodes.h:429
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:477
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
unsigned SubReg
const MDNode * getMD() const
unsigned getScalarValueSizeInBits() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SimpleValueType SimpleTy
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:977
unsigned getID() const
Return the register class ID number.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
int64_t getSExtValue() const
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:410
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
op_iterator op_begin() const
unsigned getAlignment() const
Definition: Globals.cpp:96
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:581
bool isStrongerThanMonotonic(AtomicOrdering ao)
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:150
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:165
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:84
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:119
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1657
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:272
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:965
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32...
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo...
Definition: ISDOpcodes.h:822
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Extended Value Type.
Definition: ValueTypes.h:33
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:52
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
void dump() const
Dump this node, for debugging.
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:358
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:264
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:221
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:696
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:749
An SDNode that represents everything that will be needed to construct a MachineInstr.
const Constant * getConstVal() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
unsigned logBase2() const
Definition: APInt.h:1747
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
EVT getMemoryVT() const
Return the type of the in-memory value.
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator_range< use_iterator > uses()
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:463
const SysReg * lookupSysRegByName(StringRef)
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:487
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:490
uint32_t parseGenericRegister(StringRef Name)
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOpt::Level OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG...
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:606
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:176
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:422
static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx)
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:406
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:181
static int getIntOperandFromRegisterString(StringRef RegString)
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:505
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:637
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted, suitable for use in a BFI instruction.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
Type * getValueType() const
Definition: GlobalValue.h:275
uint32_t Size
Definition: Profile.cpp:46
unsigned getOpcode() const
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:1267
static bool isShiftedMask(uint64_t Mask, EVT VT)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void dumpr() const
Dump (recursively) this node and its use-def subgraph.
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:416
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
A single uniqued string.
Definition: Metadata.h:603
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1595
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
Conversion operators.
Definition: ISDOpcodes.h:484
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:815
uint64_t getZExtValue() const
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:493
#define LLVM_DEBUG(X)
Definition: Debug.h:122
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
std::size_t countLeadingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the most significant bit to the first zero bit.
Definition: MathExtras.h:461
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:811
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
virtual const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const
Returns a TargetRegisterClass used for pointer values.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:945
This class is used to represent ISD::LOAD nodes.