LLVM  16.0.0git
ARMISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the ARM target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "ARM.h"
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMTargetMachine.h"
17 #include "Utils/ARMBaseInfo.h"
18 #include "llvm/ADT/APSInt.h"
19 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/IR/CallingConv.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/DerivedTypes.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/IntrinsicsARM.h"
33 #include "llvm/IR/LLVMContext.h"
35 #include "llvm/Support/Debug.h"
38 #include <optional>
39 
40 using namespace llvm;
41 
42 #define DEBUG_TYPE "arm-isel"
43 
44 static cl::opt<bool>
45 DisableShifterOp("disable-shifter-op", cl::Hidden,
46  cl::desc("Disable isel of shifter-op"),
47  cl::init(false));
48 
49 //===--------------------------------------------------------------------===//
50 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
51 /// instructions for SelectionDAG operations.
52 ///
53 namespace {
54 
55 class ARMDAGToDAGISel : public SelectionDAGISel {
56  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
57  /// make the right decision when generating code for different targets.
58  const ARMSubtarget *Subtarget;
59 
60 public:
61  explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
62  : SelectionDAGISel(tm, OptLevel) {}
63 
64  bool runOnMachineFunction(MachineFunction &MF) override {
65  // Reset the subtarget each time through.
66  Subtarget = &MF.getSubtarget<ARMSubtarget>();
68  return true;
69  }
70 
71  StringRef getPassName() const override { return "ARM Instruction Selection"; }
72 
73  void PreprocessISelDAG() override;
74 
75  /// getI32Imm - Return a target constant of type i32 with the specified
76  /// value.
77  inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
78  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
79  }
80 
81  void Select(SDNode *N) override;
82 
83  /// Return true as some complex patterns, like those that call
84  /// canExtractShiftFromMul can modify the DAG inplace.
85  bool ComplexPatternFuncMutatesDAG() const override { return true; }
86 
87  bool hasNoVMLxHazardUse(SDNode *N) const;
88  bool isShifterOpProfitable(const SDValue &Shift,
89  ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
90  bool SelectRegShifterOperand(SDValue N, SDValue &A,
91  SDValue &B, SDValue &C,
92  bool CheckProfitability = true);
93  bool SelectImmShifterOperand(SDValue N, SDValue &A,
94  SDValue &B, bool CheckProfitability = true);
95  bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B,
96  SDValue &C) {
97  // Don't apply the profitability check
98  return SelectRegShifterOperand(N, A, B, C, false);
99  }
100  bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) {
101  // Don't apply the profitability check
102  return SelectImmShifterOperand(N, A, B, false);
103  }
104  bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) {
105  if (!N.hasOneUse())
106  return false;
107  return SelectImmShifterOperand(N, A, B, false);
108  }
109 
110  bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
111 
112  bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
113  bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
114 
115  bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
116  const ConstantSDNode *CN = cast<ConstantSDNode>(N);
117  Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
118  Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
119  return true;
120  }
121 
122  bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
123  SDValue &Offset, SDValue &Opc);
124  bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
125  SDValue &Offset, SDValue &Opc);
126  bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
127  SDValue &Offset, SDValue &Opc);
128  bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
129  bool SelectAddrMode3(SDValue N, SDValue &Base,
130  SDValue &Offset, SDValue &Opc);
131  bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
132  SDValue &Offset, SDValue &Opc);
133  bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
134  bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
135  bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
136  bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
137  bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
138 
139  bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
140 
141  // Thumb Addressing Modes:
142  bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
143  bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
144  bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
145  SDValue &OffImm);
146  bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
147  SDValue &OffImm);
148  bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
149  SDValue &OffImm);
150  bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
151  SDValue &OffImm);
152  bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
153  template <unsigned Shift>
154  bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
155 
156  // Thumb 2 Addressing Modes:
157  bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
158  template <unsigned Shift>
159  bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
160  bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
161  SDValue &OffImm);
162  bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
163  SDValue &OffImm);
164  template <unsigned Shift>
165  bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
166  bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
167  unsigned Shift);
168  template <unsigned Shift>
169  bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
170  bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
171  SDValue &OffReg, SDValue &ShImm);
172  bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
173 
174  template<int Min, int Max>
175  bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
176 
177  inline bool is_so_imm(unsigned Imm) const {
178  return ARM_AM::getSOImmVal(Imm) != -1;
179  }
180 
181  inline bool is_so_imm_not(unsigned Imm) const {
182  return ARM_AM::getSOImmVal(~Imm) != -1;
183  }
184 
185  inline bool is_t2_so_imm(unsigned Imm) const {
186  return ARM_AM::getT2SOImmVal(Imm) != -1;
187  }
188 
189  inline bool is_t2_so_imm_not(unsigned Imm) const {
190  return ARM_AM::getT2SOImmVal(~Imm) != -1;
191  }
192 
193  // Include the pieces autogenerated from the target description.
194 #include "ARMGenDAGISel.inc"
195 
196 private:
197  void transferMemOperands(SDNode *Src, SDNode *Dst);
198 
199  /// Indexed (pre/post inc/dec) load matching code for ARM.
200  bool tryARMIndexedLoad(SDNode *N);
201  bool tryT1IndexedLoad(SDNode *N);
202  bool tryT2IndexedLoad(SDNode *N);
203  bool tryMVEIndexedLoad(SDNode *N);
204  bool tryFMULFixed(SDNode *N, SDLoc dl);
205  bool tryFP_TO_INT(SDNode *N, SDLoc dl);
206  bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul,
207  bool IsUnsigned,
208  bool FixedToFloat);
209 
210  /// SelectVLD - Select NEON load intrinsics. NumVecs should be
211  /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
212  /// loads of D registers and even subregs and odd subregs of Q registers.
213  /// For NumVecs <= 2, QOpcodes1 is not used.
214  void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
215  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
216  const uint16_t *QOpcodes1);
217 
218  /// SelectVST - Select NEON store intrinsics. NumVecs should
219  /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
220  /// stores of D registers and even subregs and odd subregs of Q registers.
221  /// For NumVecs <= 2, QOpcodes1 is not used.
222  void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
223  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
224  const uint16_t *QOpcodes1);
225 
226  /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
227  /// be 2, 3 or 4. The opcode arrays specify the instructions used for
228  /// load/store of D registers and Q registers.
229  void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
230  unsigned NumVecs, const uint16_t *DOpcodes,
231  const uint16_t *QOpcodes);
232 
233  /// Helper functions for setting up clusters of MVE predication operands.
234  template <typename SDValueVector>
235  void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
236  SDValue PredicateMask);
237  template <typename SDValueVector>
238  void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
239  SDValue PredicateMask, SDValue Inactive);
240 
241  template <typename SDValueVector>
242  void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
243  template <typename SDValueVector>
244  void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
245 
246  /// SelectMVE_WB - Select MVE writeback load/store intrinsics.
247  void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);
248 
249  /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
250  void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,
251  bool HasSaturationOperand);
252 
253  /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
254  void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
255  uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
256 
257  /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
258  /// vector lanes.
259  void SelectMVE_VSHLC(SDNode *N, bool Predicated);
260 
261  /// Select long MVE vector reductions with two vector operands
262  /// Stride is the number of vector element widths the instruction can operate
263  /// on:
264  /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
265  /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
266  /// Stride is used when addressing the OpcodesS array which contains multiple
267  /// opcodes for each element width.
268  /// TySize is the index into the list of element types listed above
269  void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
270  const uint16_t *OpcodesS, const uint16_t *OpcodesU,
271  size_t Stride, size_t TySize);
272 
273  /// Select a 64-bit MVE vector reduction with two vector operands
274  /// arm_mve_vmlldava_[predicated]
275  void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
276  const uint16_t *OpcodesU);
277  /// Select a 72-bit MVE vector rounding reduction with two vector operands
278  /// int_arm_mve_vrmlldavha[_predicated]
279  void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
280  const uint16_t *OpcodesU);
281 
282  /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
283  /// should be 2 or 4. The opcode array specifies the instructions
284  /// used for 8, 16 and 32-bit lane sizes respectively, and each
285  /// pointer points to a set of NumVecs sub-opcodes used for the
286  /// different stages (e.g. VLD20 versus VLD21) of each load family.
287  void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
288  const uint16_t *const *Opcodes, bool HasWriteback);
289 
290  /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
291  /// array of 3 elements for the 8, 16 and 32-bit lane sizes.
292  void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
293  bool Wrapping, bool Predicated);
294 
295  /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
296  /// CX1DA, CX2D, CX2DA, CX3, CX3DA).
297  /// \arg \c NumExtraOps number of extra operands besides the coprocossor,
298  /// the accumulator and the immediate operand, i.e. 0
299  /// for CX1*, 1 for CX2*, 2 for CX3*
300  /// \arg \c HasAccum whether the instruction has an accumulator operand
301  void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
302  bool HasAccum);
303 
304  /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
305  /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
306  /// for loading D registers.
307  void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
308  unsigned NumVecs, const uint16_t *DOpcodes,
309  const uint16_t *QOpcodes0 = nullptr,
310  const uint16_t *QOpcodes1 = nullptr);
311 
312  /// Try to select SBFX/UBFX instructions for ARM.
313  bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
314 
315  bool tryInsertVectorElt(SDNode *N);
316 
317  // Select special operations if node forms integer ABS pattern
318  bool tryABSOp(SDNode *N);
319 
320  bool tryReadRegister(SDNode *N);
321  bool tryWriteRegister(SDNode *N);
322 
323  bool tryInlineAsm(SDNode *N);
324 
325  void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
326 
327  void SelectCMP_SWAP(SDNode *N);
328 
329  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
330  /// inline asm expressions.
331  bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
332  std::vector<SDValue> &OutOps) override;
333 
334  // Form pairs of consecutive R, S, D, or Q registers.
336  SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
337  SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
338  SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
339 
340  // Form sequences of 4 consecutive S, D, or Q registers.
341  SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
342  SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
343  SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
344 
345  // Get the alignment operand for a NEON VLD or VST instruction.
346  SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
347  bool is64BitVector);
348 
349  /// Checks if N is a multiplication by a constant where we can extract out a
350  /// power of two from the constant so that it can be used in a shift, but only
351  /// if it simplifies the materialization of the constant. Returns true if it
352  /// is, and assigns to PowerOfTwo the power of two that should be extracted
353  /// out and to NewMulConst the new constant to be multiplied by.
354  bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
355  unsigned &PowerOfTwo, SDValue &NewMulConst) const;
356 
357  /// Replace N with M in CurDAG, in a way that also ensures that M gets
358  /// selected when N would have been selected.
359  void replaceDAGValue(const SDValue &N, SDValue M);
360 };
361 }
362 
363 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
364 /// operand. If so Imm will receive the 32-bit value.
365 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
366  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
367  Imm = cast<ConstantSDNode>(N)->getZExtValue();
368  return true;
369  }
370  return false;
371 }
372 
373 // isInt32Immediate - This method tests to see if a constant operand.
374 // If so Imm will receive the 32 bit value.
375 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
376  return isInt32Immediate(N.getNode(), Imm);
377 }
378 
379 // isOpcWithIntImmediate - This method tests to see if the node is a specific
380 // opcode and that it has a immediate integer right operand.
381 // If so Imm will receive the 32 bit value.
382 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
383  return N->getOpcode() == Opc &&
384  isInt32Immediate(N->getOperand(1).getNode(), Imm);
385 }
386 
387 /// Check whether a particular node is a constant value representable as
388 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
389 ///
390 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
391 static bool isScaledConstantInRange(SDValue Node, int Scale,
392  int RangeMin, int RangeMax,
393  int &ScaledConstant) {
394  assert(Scale > 0 && "Invalid scale!");
395 
396  // Check that this is a constant.
397  const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
398  if (!C)
399  return false;
400 
401  ScaledConstant = (int) C->getZExtValue();
402  if ((ScaledConstant % Scale) != 0)
403  return false;
404 
405  ScaledConstant /= Scale;
406  return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
407 }
408 
409 void ARMDAGToDAGISel::PreprocessISelDAG() {
410  if (!Subtarget->hasV6T2Ops())
411  return;
412 
413  bool isThumb2 = Subtarget->isThumb();
414  // We use make_early_inc_range to avoid invalidation issues.
415  for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
416  if (N.getOpcode() != ISD::ADD)
417  continue;
418 
419  // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
420  // leading zeros, followed by consecutive set bits, followed by 1 or 2
421  // trailing zeros, e.g. 1020.
422  // Transform the expression to
423  // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
424  // of trailing zeros of c2. The left shift would be folded as an shifter
425  // operand of 'add' and the 'and' and 'srl' would become a bits extraction
426  // node (UBFX).
427 
428  SDValue N0 = N.getOperand(0);
429  SDValue N1 = N.getOperand(1);
430  unsigned And_imm = 0;
431  if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
432  if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
433  std::swap(N0, N1);
434  }
435  if (!And_imm)
436  continue;
437 
438  // Check if the AND mask is an immediate of the form: 000.....1111111100
439  unsigned TZ = countTrailingZeros(And_imm);
440  if (TZ != 1 && TZ != 2)
441  // Be conservative here. Shifter operands aren't always free. e.g. On
442  // Swift, left shifter operand of 1 / 2 for free but others are not.
443  // e.g.
444  // ubfx r3, r1, #16, #8
445  // ldr.w r3, [r0, r3, lsl #2]
446  // vs.
447  // mov.w r9, #1020
448  // and.w r2, r9, r1, lsr #14
449  // ldr r2, [r0, r2]
450  continue;
451  And_imm >>= TZ;
452  if (And_imm & (And_imm + 1))
453  continue;
454 
455  // Look for (and (srl X, c1), c2).
456  SDValue Srl = N1.getOperand(0);
457  unsigned Srl_imm = 0;
458  if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
459  (Srl_imm <= 2))
460  continue;
461 
462  // Make sure first operand is not a shifter operand which would prevent
463  // folding of the left shift.
464  SDValue CPTmp0;
465  SDValue CPTmp1;
466  SDValue CPTmp2;
467  if (isThumb2) {
468  if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
469  continue;
470  } else {
471  if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
472  SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
473  continue;
474  }
475 
476  // Now make the transformation.
477  Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
478  Srl.getOperand(0),
479  CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
480  MVT::i32));
481  N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
482  Srl,
483  CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
484  N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
485  N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
486  CurDAG->UpdateNodeOperands(&N, N0, N1);
487  }
488 }
489 
490 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
491 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
492 /// least on current ARM implementations) which should be avoidded.
493 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
494  if (OptLevel == CodeGenOpt::None)
495  return true;
496 
497  if (!Subtarget->hasVMLxHazards())
498  return true;
499 
500  if (!N->hasOneUse())
501  return false;
502 
503  SDNode *Use = *N->use_begin();
504  if (Use->getOpcode() == ISD::CopyToReg)
505  return true;
506  if (Use->isMachineOpcode()) {
507  const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
508  CurDAG->getSubtarget().getInstrInfo());
509 
510  const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
511  if (MCID.mayStore())
512  return true;
513  unsigned Opcode = MCID.getOpcode();
514  if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
515  return true;
516  // vmlx feeding into another vmlx. We actually want to unfold
517  // the use later in the MLxExpansion pass. e.g.
518  // vmla
519  // vmla (stall 8 cycles)
520  //
521  // vmul (5 cycles)
522  // vadd (5 cycles)
523  // vmla
524  // This adds up to about 18 - 19 cycles.
525  //
526  // vmla
527  // vmul (stall 4 cycles)
528  // vadd adds up to about 14 cycles.
529  return TII->isFpMLxInstruction(Opcode);
530  }
531 
532  return false;
533 }
534 
535 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
536  ARM_AM::ShiftOpc ShOpcVal,
537  unsigned ShAmt) {
538  if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
539  return true;
540  if (Shift.hasOneUse())
541  return true;
542  // R << 2 is free.
543  return ShOpcVal == ARM_AM::lsl &&
544  (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
545 }
546 
547 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
548  unsigned MaxShift,
549  unsigned &PowerOfTwo,
550  SDValue &NewMulConst) const {
551  assert(N.getOpcode() == ISD::MUL);
552  assert(MaxShift > 0);
553 
554  // If the multiply is used in more than one place then changing the constant
555  // will make other uses incorrect, so don't.
556  if (!N.hasOneUse()) return false;
557  // Check if the multiply is by a constant
558  ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
559  if (!MulConst) return false;
560  // If the constant is used in more than one place then modifying it will mean
561  // we need to materialize two constants instead of one, which is a bad idea.
562  if (!MulConst->hasOneUse()) return false;
563  unsigned MulConstVal = MulConst->getZExtValue();
564  if (MulConstVal == 0) return false;
565 
566  // Find the largest power of 2 that MulConstVal is a multiple of
567  PowerOfTwo = MaxShift;
568  while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
569  --PowerOfTwo;
570  if (PowerOfTwo == 0) return false;
571  }
572 
573  // Only optimise if the new cost is better
574  unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
575  NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
576  unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
577  unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
578  return NewCost < OldCost;
579 }
580 
581 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
582  CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
583  ReplaceUses(N, M);
584 }
585 
586 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
587  SDValue &BaseReg,
588  SDValue &Opc,
589  bool CheckProfitability) {
590  if (DisableShifterOp)
591  return false;
592 
593  // If N is a multiply-by-constant and it's profitable to extract a shift and
594  // use it in a shifted operand do so.
595  if (N.getOpcode() == ISD::MUL) {
596  unsigned PowerOfTwo = 0;
597  SDValue NewMulConst;
598  if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
599  HandleSDNode Handle(N);
600  SDLoc Loc(N);
601  replaceDAGValue(N.getOperand(1), NewMulConst);
602  BaseReg = Handle.getValue();
603  Opc = CurDAG->getTargetConstant(
604  ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
605  return true;
606  }
607  }
608 
609  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
610 
611  // Don't match base register only case. That is matched to a separate
612  // lower complexity pattern with explicit register operand.
613  if (ShOpcVal == ARM_AM::no_shift) return false;
614 
615  BaseReg = N.getOperand(0);
616  unsigned ShImmVal = 0;
617  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
618  if (!RHS) return false;
619  ShImmVal = RHS->getZExtValue() & 31;
620  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
621  SDLoc(N), MVT::i32);
622  return true;
623 }
624 
625 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
626  SDValue &BaseReg,
627  SDValue &ShReg,
628  SDValue &Opc,
629  bool CheckProfitability) {
630  if (DisableShifterOp)
631  return false;
632 
633  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
634 
635  // Don't match base register only case. That is matched to a separate
636  // lower complexity pattern with explicit register operand.
637  if (ShOpcVal == ARM_AM::no_shift) return false;
638 
639  BaseReg = N.getOperand(0);
640  unsigned ShImmVal = 0;
641  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
642  if (RHS) return false;
643 
644  ShReg = N.getOperand(1);
645  if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
646  return false;
647  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
648  SDLoc(N), MVT::i32);
649  return true;
650 }
651 
652 // Determine whether an ISD::OR's operands are suitable to turn the operation
653 // into an addition, which often has more compact encodings.
654 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
655  assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
656  Out = N;
657  return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
658 }
659 
660 
661 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
662  SDValue &Base,
663  SDValue &OffImm) {
664  // Match simple R + imm12 operands.
665 
666  // Base only.
667  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
668  !CurDAG->isBaseWithConstantOffset(N)) {
669  if (N.getOpcode() == ISD::FrameIndex) {
670  // Match frame index.
671  int FI = cast<FrameIndexSDNode>(N)->getIndex();
672  Base = CurDAG->getTargetFrameIndex(
673  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
674  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
675  return true;
676  }
677 
678  if (N.getOpcode() == ARMISD::Wrapper &&
679  N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
680  N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
681  N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
682  Base = N.getOperand(0);
683  } else
684  Base = N;
685  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
686  return true;
687  }
688 
689  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
690  int RHSC = (int)RHS->getSExtValue();
691  if (N.getOpcode() == ISD::SUB)
692  RHSC = -RHSC;
693 
694  if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
695  Base = N.getOperand(0);
696  if (Base.getOpcode() == ISD::FrameIndex) {
697  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
698  Base = CurDAG->getTargetFrameIndex(
699  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
700  }
701  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
702  return true;
703  }
704  }
705 
706  // Base only.
707  Base = N;
708  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
709  return true;
710 }
711 
712 
713 
714 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
715  SDValue &Opc) {
716  if (N.getOpcode() == ISD::MUL &&
717  ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
718  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
719  // X * [3,5,9] -> X + X * [2,4,8] etc.
720  int RHSC = (int)RHS->getZExtValue();
721  if (RHSC & 1) {
722  RHSC = RHSC & ~1;
724  if (RHSC < 0) {
726  RHSC = - RHSC;
727  }
728  if (isPowerOf2_32(RHSC)) {
729  unsigned ShAmt = Log2_32(RHSC);
730  Base = Offset = N.getOperand(0);
731  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
732  ARM_AM::lsl),
733  SDLoc(N), MVT::i32);
734  return true;
735  }
736  }
737  }
738  }
739 
740  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
741  // ISD::OR that is equivalent to an ISD::ADD.
742  !CurDAG->isBaseWithConstantOffset(N))
743  return false;
744 
745  // Leave simple R +/- imm12 operands for LDRi12
746  if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
747  int RHSC;
748  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
749  -0x1000+1, 0x1000, RHSC)) // 12 bits.
750  return false;
751  }
752 
753  // Otherwise this is R +/- [possibly shifted] R.
755  ARM_AM::ShiftOpc ShOpcVal =
756  ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
757  unsigned ShAmt = 0;
758 
759  Base = N.getOperand(0);
760  Offset = N.getOperand(1);
761 
762  if (ShOpcVal != ARM_AM::no_shift) {
763  // Check to see if the RHS of the shift is a constant, if not, we can't fold
764  // it.
765  if (ConstantSDNode *Sh =
766  dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
767  ShAmt = Sh->getZExtValue();
768  if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
769  Offset = N.getOperand(1).getOperand(0);
770  else {
771  ShAmt = 0;
772  ShOpcVal = ARM_AM::no_shift;
773  }
774  } else {
775  ShOpcVal = ARM_AM::no_shift;
776  }
777  }
778 
779  // Try matching (R shl C) + (R).
780  if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
781  !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
782  N.getOperand(0).hasOneUse())) {
783  ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
784  if (ShOpcVal != ARM_AM::no_shift) {
785  // Check to see if the RHS of the shift is a constant, if not, we can't
786  // fold it.
787  if (ConstantSDNode *Sh =
788  dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
789  ShAmt = Sh->getZExtValue();
790  if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
791  Offset = N.getOperand(0).getOperand(0);
792  Base = N.getOperand(1);
793  } else {
794  ShAmt = 0;
795  ShOpcVal = ARM_AM::no_shift;
796  }
797  } else {
798  ShOpcVal = ARM_AM::no_shift;
799  }
800  }
801  }
802 
803  // If Offset is a multiply-by-constant and it's profitable to extract a shift
804  // and use it in a shifted operand do so.
805  if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
806  unsigned PowerOfTwo = 0;
807  SDValue NewMulConst;
808  if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
809  HandleSDNode Handle(Offset);
810  replaceDAGValue(Offset.getOperand(1), NewMulConst);
811  Offset = Handle.getValue();
812  ShAmt = PowerOfTwo;
813  ShOpcVal = ARM_AM::lsl;
814  }
815  }
816 
817  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
818  SDLoc(N), MVT::i32);
819  return true;
820 }
821 
822 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
823  SDValue &Offset, SDValue &Opc) {
824  unsigned Opcode = Op->getOpcode();
825  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
826  ? cast<LoadSDNode>(Op)->getAddressingMode()
827  : cast<StoreSDNode>(Op)->getAddressingMode();
830  int Val;
831  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
832  return false;
833 
834  Offset = N;
835  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
836  unsigned ShAmt = 0;
837  if (ShOpcVal != ARM_AM::no_shift) {
838  // Check to see if the RHS of the shift is a constant, if not, we can't fold
839  // it.
840  if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
841  ShAmt = Sh->getZExtValue();
842  if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
843  Offset = N.getOperand(0);
844  else {
845  ShAmt = 0;
846  ShOpcVal = ARM_AM::no_shift;
847  }
848  } else {
849  ShOpcVal = ARM_AM::no_shift;
850  }
851  }
852 
853  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
854  SDLoc(N), MVT::i32);
855  return true;
856 }
857 
858 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
859  SDValue &Offset, SDValue &Opc) {
860  unsigned Opcode = Op->getOpcode();
861  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
862  ? cast<LoadSDNode>(Op)->getAddressingMode()
863  : cast<StoreSDNode>(Op)->getAddressingMode();
866  int Val;
867  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
868  if (AddSub == ARM_AM::sub) Val *= -1;
869  Offset = CurDAG->getRegister(0, MVT::i32);
870  Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
871  return true;
872  }
873 
874  return false;
875 }
876 
877 
878 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
879  SDValue &Offset, SDValue &Opc) {
880  unsigned Opcode = Op->getOpcode();
881  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
882  ? cast<LoadSDNode>(Op)->getAddressingMode()
883  : cast<StoreSDNode>(Op)->getAddressingMode();
886  int Val;
887  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
888  Offset = CurDAG->getRegister(0, MVT::i32);
889  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
891  SDLoc(Op), MVT::i32);
892  return true;
893  }
894 
895  return false;
896 }
897 
898 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
899  Base = N;
900  return true;
901 }
902 
903 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
904  SDValue &Base, SDValue &Offset,
905  SDValue &Opc) {
906  if (N.getOpcode() == ISD::SUB) {
907  // X - C is canonicalize to X + -C, no need to handle it here.
908  Base = N.getOperand(0);
909  Offset = N.getOperand(1);
910  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
911  MVT::i32);
912  return true;
913  }
914 
915  if (!CurDAG->isBaseWithConstantOffset(N)) {
916  Base = N;
917  if (N.getOpcode() == ISD::FrameIndex) {
918  int FI = cast<FrameIndexSDNode>(N)->getIndex();
919  Base = CurDAG->getTargetFrameIndex(
920  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
921  }
922  Offset = CurDAG->getRegister(0, MVT::i32);
923  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
924  MVT::i32);
925  return true;
926  }
927 
928  // If the RHS is +/- imm8, fold into addr mode.
929  int RHSC;
930  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
931  -256 + 1, 256, RHSC)) { // 8 bits.
932  Base = N.getOperand(0);
933  if (Base.getOpcode() == ISD::FrameIndex) {
934  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
935  Base = CurDAG->getTargetFrameIndex(
936  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
937  }
938  Offset = CurDAG->getRegister(0, MVT::i32);
939 
941  if (RHSC < 0) {
943  RHSC = -RHSC;
944  }
945  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
946  MVT::i32);
947  return true;
948  }
949 
950  Base = N.getOperand(0);
951  Offset = N.getOperand(1);
952  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
953  MVT::i32);
954  return true;
955 }
956 
957 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
958  SDValue &Offset, SDValue &Opc) {
959  unsigned Opcode = Op->getOpcode();
960  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
961  ? cast<LoadSDNode>(Op)->getAddressingMode()
962  : cast<StoreSDNode>(Op)->getAddressingMode();
965  int Val;
966  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
967  Offset = CurDAG->getRegister(0, MVT::i32);
968  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
969  MVT::i32);
970  return true;
971  }
972 
973  Offset = N;
974  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
975  MVT::i32);
976  return true;
977 }
978 
979 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
980  bool FP16) {
981  if (!CurDAG->isBaseWithConstantOffset(N)) {
982  Base = N;
983  if (N.getOpcode() == ISD::FrameIndex) {
984  int FI = cast<FrameIndexSDNode>(N)->getIndex();
985  Base = CurDAG->getTargetFrameIndex(
986  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
987  } else if (N.getOpcode() == ARMISD::Wrapper &&
988  N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
989  N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
990  N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
991  Base = N.getOperand(0);
992  }
993  Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
994  SDLoc(N), MVT::i32);
995  return true;
996  }
997 
998  // If the RHS is +/- imm8, fold into addr mode.
999  int RHSC;
1000  const int Scale = FP16 ? 2 : 4;
1001 
1002  if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
1003  Base = N.getOperand(0);
1004  if (Base.getOpcode() == ISD::FrameIndex) {
1005  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1006  Base = CurDAG->getTargetFrameIndex(
1007  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1008  }
1009 
1011  if (RHSC < 0) {
1012  AddSub = ARM_AM::sub;
1013  RHSC = -RHSC;
1014  }
1015 
1016  if (FP16)
1017  Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
1018  SDLoc(N), MVT::i32);
1019  else
1020  Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1021  SDLoc(N), MVT::i32);
1022 
1023  return true;
1024  }
1025 
1026  Base = N;
1027 
1028  if (FP16)
1029  Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
1030  SDLoc(N), MVT::i32);
1031  else
1032  Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1033  SDLoc(N), MVT::i32);
1034 
1035  return true;
1036 }
1037 
1038 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1039  SDValue &Base, SDValue &Offset) {
1040  return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
1041 }
1042 
1043 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
1044  SDValue &Base, SDValue &Offset) {
1045  return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
1046 }
1047 
1048 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1049  SDValue &Align) {
1050  Addr = N;
1051 
1052  unsigned Alignment = 0;
1053 
1054  MemSDNode *MemN = cast<MemSDNode>(Parent);
1055 
1056  if (isa<LSBaseSDNode>(MemN) ||
1057  ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1058  MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1059  MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1060  // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1061  // The maximum alignment is equal to the memory size being referenced.
1062  llvm::Align MMOAlign = MemN->getAlign();
1063  unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1064  if (MMOAlign.value() >= MemSize && MemSize > 1)
1065  Alignment = MemSize;
1066  } else {
1067  // All other uses of addrmode6 are for intrinsics. For now just record
1068  // the raw alignment value; it will be refined later based on the legal
1069  // alignment operands for the intrinsic.
1070  Alignment = MemN->getAlign().value();
1071  }
1072 
1073  Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1074  return true;
1075 }
1076 
1077 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1078  SDValue &Offset) {
1079  LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1081  if (AM != ISD::POST_INC)
1082  return false;
1083  Offset = N;
1084  if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1085  if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1086  Offset = CurDAG->getRegister(0, MVT::i32);
1087  }
1088  return true;
1089 }
1090 
1091 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1092  SDValue &Offset, SDValue &Label) {
1093  if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1094  Offset = N.getOperand(0);
1095  SDValue N1 = N.getOperand(1);
1096  Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1097  SDLoc(N), MVT::i32);
1098  return true;
1099  }
1100 
1101  return false;
1102 }
1103 
1104 
1105 //===----------------------------------------------------------------------===//
1106 // Thumb Addressing Modes
1107 //===----------------------------------------------------------------------===//
1108 
1110  // Negative numbers are difficult to materialise in thumb1. If we are
1111  // selecting the add of a negative, instead try to select ri with a zero
1112  // offset, so create the add node directly which will become a sub.
1113  if (N.getOpcode() != ISD::ADD)
1114  return false;
1115 
1116  // Look for an imm which is not legal for ld/st, but is legal for sub.
1117  if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1118  return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1119 
1120  return false;
1121 }
1122 
1123 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1124  SDValue &Offset) {
1125  if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1126  ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1127  if (!NC || !NC->isZero())
1128  return false;
1129 
1130  Base = Offset = N;
1131  return true;
1132  }
1133 
1134  Base = N.getOperand(0);
1135  Offset = N.getOperand(1);
1136  return true;
1137 }
1138 
1139 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1140  SDValue &Offset) {
1142  return false; // Select ri instead
1143  return SelectThumbAddrModeRRSext(N, Base, Offset);
1144 }
1145 
1146 bool
1147 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1148  SDValue &Base, SDValue &OffImm) {
1149  if (shouldUseZeroOffsetLdSt(N)) {
1150  Base = N;
1151  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1152  return true;
1153  }
1154 
1155  if (!CurDAG->isBaseWithConstantOffset(N)) {
1156  if (N.getOpcode() == ISD::ADD) {
1157  return false; // We want to select register offset instead
1158  } else if (N.getOpcode() == ARMISD::Wrapper &&
1159  N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1160  N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1161  N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1162  N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1163  Base = N.getOperand(0);
1164  } else {
1165  Base = N;
1166  }
1167 
1168  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1169  return true;
1170  }
1171 
1172  // If the RHS is + imm5 * scale, fold into addr mode.
1173  int RHSC;
1174  if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1175  Base = N.getOperand(0);
1176  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1177  return true;
1178  }
1179 
1180  // Offset is too large, so use register offset instead.
1181  return false;
1182 }
1183 
1184 bool
1185 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1186  SDValue &OffImm) {
1187  return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1188 }
1189 
1190 bool
1191 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1192  SDValue &OffImm) {
1193  return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1194 }
1195 
1196 bool
1197 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1198  SDValue &OffImm) {
1199  return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1200 }
1201 
1202 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1203  SDValue &Base, SDValue &OffImm) {
1204  if (N.getOpcode() == ISD::FrameIndex) {
1205  int FI = cast<FrameIndexSDNode>(N)->getIndex();
1206  // Only multiples of 4 are allowed for the offset, so the frame object
1207  // alignment must be at least 4.
1208  MachineFrameInfo &MFI = MF->getFrameInfo();
1209  if (MFI.getObjectAlign(FI) < Align(4))
1210  MFI.setObjectAlignment(FI, Align(4));
1211  Base = CurDAG->getTargetFrameIndex(
1212  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1213  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1214  return true;
1215  }
1216 
1217  if (!CurDAG->isBaseWithConstantOffset(N))
1218  return false;
1219 
1220  if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1221  // If the RHS is + imm8 * scale, fold into addr mode.
1222  int RHSC;
1223  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1224  Base = N.getOperand(0);
1225  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1226  // Make sure the offset is inside the object, or we might fail to
1227  // allocate an emergency spill slot. (An out-of-range access is UB, but
1228  // it could show up anyway.)
1229  MachineFrameInfo &MFI = MF->getFrameInfo();
1230  if (RHSC * 4 < MFI.getObjectSize(FI)) {
1231  // For LHS+RHS to result in an offset that's a multiple of 4 the object
1232  // indexed by the LHS must be 4-byte aligned.
1233  if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4))
1234  MFI.setObjectAlignment(FI, Align(4));
1235  if (MFI.getObjectAlign(FI) >= Align(4)) {
1236  Base = CurDAG->getTargetFrameIndex(
1237  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1238  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1239  return true;
1240  }
1241  }
1242  }
1243  }
1244 
1245  return false;
1246 }
1247 
1248 template <unsigned Shift>
1249 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1250  SDValue &OffImm) {
1251  if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1252  int RHSC;
1253  if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1254  RHSC)) {
1255  Base = N.getOperand(0);
1256  if (N.getOpcode() == ISD::SUB)
1257  RHSC = -RHSC;
1258  OffImm =
1259  CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1260  return true;
1261  }
1262  }
1263 
1264  // Base only.
1265  Base = N;
1266  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1267  return true;
1268 }
1269 
1270 
1271 //===----------------------------------------------------------------------===//
1272 // Thumb 2 Addressing Modes
1273 //===----------------------------------------------------------------------===//
1274 
1275 
1276 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1277  SDValue &Base, SDValue &OffImm) {
1278  // Match simple R + imm12 operands.
1279 
1280  // Base only.
1281  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1282  !CurDAG->isBaseWithConstantOffset(N)) {
1283  if (N.getOpcode() == ISD::FrameIndex) {
1284  // Match frame index.
1285  int FI = cast<FrameIndexSDNode>(N)->getIndex();
1286  Base = CurDAG->getTargetFrameIndex(
1287  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1288  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1289  return true;
1290  }
1291 
1292  if (N.getOpcode() == ARMISD::Wrapper &&
1293  N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1294  N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1295  N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1296  Base = N.getOperand(0);
1297  if (Base.getOpcode() == ISD::TargetConstantPool)
1298  return false; // We want to select t2LDRpci instead.
1299  } else
1300  Base = N;
1301  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1302  return true;
1303  }
1304 
1305  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1306  if (SelectT2AddrModeImm8(N, Base, OffImm))
1307  // Let t2LDRi8 handle (R - imm8).
1308  return false;
1309 
1310  int RHSC = (int)RHS->getZExtValue();
1311  if (N.getOpcode() == ISD::SUB)
1312  RHSC = -RHSC;
1313 
1314  if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1315  Base = N.getOperand(0);
1316  if (Base.getOpcode() == ISD::FrameIndex) {
1317  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1318  Base = CurDAG->getTargetFrameIndex(
1319  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1320  }
1321  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1322  return true;
1323  }
1324  }
1325 
1326  // Base only.
1327  Base = N;
1328  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1329  return true;
1330 }
1331 
1332 template <unsigned Shift>
1333 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
1334  SDValue &OffImm) {
1335  if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1336  int RHSC;
1337  if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
1338  Base = N.getOperand(0);
1339  if (Base.getOpcode() == ISD::FrameIndex) {
1340  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1341  Base = CurDAG->getTargetFrameIndex(
1342  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1343  }
1344 
1345  if (N.getOpcode() == ISD::SUB)
1346  RHSC = -RHSC;
1347  OffImm =
1348  CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1349  return true;
1350  }
1351  }
1352 
1353  // Base only.
1354  Base = N;
1355  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1356  return true;
1357 }
1358 
1359 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1360  SDValue &Base, SDValue &OffImm) {
1361  // Match simple R - imm8 operands.
1362  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1363  !CurDAG->isBaseWithConstantOffset(N))
1364  return false;
1365 
1366  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1367  int RHSC = (int)RHS->getSExtValue();
1368  if (N.getOpcode() == ISD::SUB)
1369  RHSC = -RHSC;
1370 
1371  if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1372  Base = N.getOperand(0);
1373  if (Base.getOpcode() == ISD::FrameIndex) {
1374  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1375  Base = CurDAG->getTargetFrameIndex(
1376  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1377  }
1378  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1379  return true;
1380  }
1381  }
1382 
1383  return false;
1384 }
1385 
1386 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1387  SDValue &OffImm){
1388  unsigned Opcode = Op->getOpcode();
1389  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1390  ? cast<LoadSDNode>(Op)->getAddressingMode()
1391  : cast<StoreSDNode>(Op)->getAddressingMode();
1392  int RHSC;
1393  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1394  OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1395  ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1396  : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1397  return true;
1398  }
1399 
1400  return false;
1401 }
1402 
1403 template <unsigned Shift>
1404 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1405  SDValue &OffImm) {
1406  if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1407  int RHSC;
1408  if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1409  RHSC)) {
1410  Base = N.getOperand(0);
1411  if (Base.getOpcode() == ISD::FrameIndex) {
1412  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1413  Base = CurDAG->getTargetFrameIndex(
1414  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1415  }
1416 
1417  if (N.getOpcode() == ISD::SUB)
1418  RHSC = -RHSC;
1419  OffImm =
1420  CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1421  return true;
1422  }
1423  }
1424 
1425  // Base only.
1426  Base = N;
1427  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1428  return true;
1429 }
1430 
1431 template <unsigned Shift>
1432 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1433  SDValue &OffImm) {
1434  return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1435 }
1436 
1437 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1438  SDValue &OffImm,
1439  unsigned Shift) {
1440  unsigned Opcode = Op->getOpcode();
1442  switch (Opcode) {
1443  case ISD::LOAD:
1444  AM = cast<LoadSDNode>(Op)->getAddressingMode();
1445  break;
1446  case ISD::STORE:
1447  AM = cast<StoreSDNode>(Op)->getAddressingMode();
1448  break;
1449  case ISD::MLOAD:
1450  AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode();
1451  break;
1452  case ISD::MSTORE:
1453  AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode();
1454  break;
1455  default:
1456  llvm_unreachable("Unexpected Opcode for Imm7Offset");
1457  }
1458 
1459  int RHSC;
1460  // 7 bit constant, shifted by Shift.
1461  if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) {
1462  OffImm =
1463  ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1464  ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
1465  : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N),
1466  MVT::i32);
1467  return true;
1468  }
1469  return false;
1470 }
1471 
1472 template <int Min, int Max>
1473 bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
1474  int Val;
1475  if (isScaledConstantInRange(N, 1, Min, Max, Val)) {
1476  OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32);
1477  return true;
1478  }
1479  return false;
1480 }
1481 
1482 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1483  SDValue &Base,
1484  SDValue &OffReg, SDValue &ShImm) {
1485  // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1486  if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1487  return false;
1488 
1489  // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1490  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1491  int RHSC = (int)RHS->getZExtValue();
1492  if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1493  return false;
1494  else if (RHSC < 0 && RHSC >= -255) // 8 bits
1495  return false;
1496  }
1497 
1498  // Look for (R + R) or (R + (R << [1,2,3])).
1499  unsigned ShAmt = 0;
1500  Base = N.getOperand(0);
1501  OffReg = N.getOperand(1);
1502 
1503  // Swap if it is ((R << c) + R).
1505  if (ShOpcVal != ARM_AM::lsl) {
1506  ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1507  if (ShOpcVal == ARM_AM::lsl)
1508  std::swap(Base, OffReg);
1509  }
1510 
1511  if (ShOpcVal == ARM_AM::lsl) {
1512  // Check to see if the RHS of the shift is a constant, if not, we can't fold
1513  // it.
1514  if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1515  ShAmt = Sh->getZExtValue();
1516  if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1517  OffReg = OffReg.getOperand(0);
1518  else {
1519  ShAmt = 0;
1520  }
1521  }
1522  }
1523 
1524  // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1525  // and use it in a shifted operand do so.
1526  if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1527  unsigned PowerOfTwo = 0;
1528  SDValue NewMulConst;
1529  if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1530  HandleSDNode Handle(OffReg);
1531  replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1532  OffReg = Handle.getValue();
1533  ShAmt = PowerOfTwo;
1534  }
1535  }
1536 
1537  ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1538 
1539  return true;
1540 }
1541 
1542 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1543  SDValue &OffImm) {
1544  // This *must* succeed since it's used for the irreplaceable ldrex and strex
1545  // instructions.
1546  Base = N;
1547  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1548 
1549  if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1550  return true;
1551 
1552  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1553  if (!RHS)
1554  return true;
1555 
1556  uint32_t RHSC = (int)RHS->getZExtValue();
1557  if (RHSC > 1020 || RHSC % 4 != 0)
1558  return true;
1559 
1560  Base = N.getOperand(0);
1561  if (Base.getOpcode() == ISD::FrameIndex) {
1562  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1563  Base = CurDAG->getTargetFrameIndex(
1564  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1565  }
1566 
1567  OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1568  return true;
1569 }
1570 
1571 //===--------------------------------------------------------------------===//
1572 
1573 /// getAL - Returns a ARMCC::AL immediate node.
1574 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1575  return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1576 }
1577 
1578 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1579  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1580  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1581 }
1582 
1583 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1584  LoadSDNode *LD = cast<LoadSDNode>(N);
1585  ISD::MemIndexedMode AM = LD->getAddressingMode();
1586  if (AM == ISD::UNINDEXED)
1587  return false;
1588 
1589  EVT LoadedVT = LD->getMemoryVT();
1590  SDValue Offset, AMOpc;
1591  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1592  unsigned Opcode = 0;
1593  bool Match = false;
1594  if (LoadedVT == MVT::i32 && isPre &&
1595  SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1596  Opcode = ARM::LDR_PRE_IMM;
1597  Match = true;
1598  } else if (LoadedVT == MVT::i32 && !isPre &&
1599  SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1600  Opcode = ARM::LDR_POST_IMM;
1601  Match = true;
1602  } else if (LoadedVT == MVT::i32 &&
1603  SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1604  Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1605  Match = true;
1606 
1607  } else if (LoadedVT == MVT::i16 &&
1608  SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1609  Match = true;
1610  Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1611  ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1612  : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1613  } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1614  if (LD->getExtensionType() == ISD::SEXTLOAD) {
1615  if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1616  Match = true;
1617  Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1618  }
1619  } else {
1620  if (isPre &&
1621  SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1622  Match = true;
1623  Opcode = ARM::LDRB_PRE_IMM;
1624  } else if (!isPre &&
1625  SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1626  Match = true;
1627  Opcode = ARM::LDRB_POST_IMM;
1628  } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1629  Match = true;
1630  Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1631  }
1632  }
1633  }
1634 
1635  if (Match) {
1636  if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1637  SDValue Chain = LD->getChain();
1638  SDValue Base = LD->getBasePtr();
1639  SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1640  CurDAG->getRegister(0, MVT::i32), Chain };
1641  SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1642  MVT::Other, Ops);
1643  transferMemOperands(N, New);
1644  ReplaceNode(N, New);
1645  return true;
1646  } else {
1647  SDValue Chain = LD->getChain();
1648  SDValue Base = LD->getBasePtr();
1649  SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1650  CurDAG->getRegister(0, MVT::i32), Chain };
1651  SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1652  MVT::Other, Ops);
1653  transferMemOperands(N, New);
1654  ReplaceNode(N, New);
1655  return true;
1656  }
1657  }
1658 
1659  return false;
1660 }
1661 
1662 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1663  LoadSDNode *LD = cast<LoadSDNode>(N);
1664  EVT LoadedVT = LD->getMemoryVT();
1665  ISD::MemIndexedMode AM = LD->getAddressingMode();
1666  if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1667  LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1668  return false;
1669 
1670  auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1671  if (!COffs || COffs->getZExtValue() != 4)
1672  return false;
1673 
1674  // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1675  // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1676  // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1677  // ISel.
1678  SDValue Chain = LD->getChain();
1679  SDValue Base = LD->getBasePtr();
1680  SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1681  CurDAG->getRegister(0, MVT::i32), Chain };
1682  SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1683  MVT::i32, MVT::Other, Ops);
1684  transferMemOperands(N, New);
1685  ReplaceNode(N, New);
1686  return true;
1687 }
1688 
1689 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1690  LoadSDNode *LD = cast<LoadSDNode>(N);
1691  ISD::MemIndexedMode AM = LD->getAddressingMode();
1692  if (AM == ISD::UNINDEXED)
1693  return false;
1694 
1695  EVT LoadedVT = LD->getMemoryVT();
1696  bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1697  SDValue Offset;
1698  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1699  unsigned Opcode = 0;
1700  bool Match = false;
1701  if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1702  switch (LoadedVT.getSimpleVT().SimpleTy) {
1703  case MVT::i32:
1704  Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1705  break;
1706  case MVT::i16:
1707  if (isSExtLd)
1708  Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1709  else
1710  Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1711  break;
1712  case MVT::i8:
1713  case MVT::i1:
1714  if (isSExtLd)
1715  Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1716  else
1717  Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1718  break;
1719  default:
1720  return false;
1721  }
1722  Match = true;
1723  }
1724 
1725  if (Match) {
1726  SDValue Chain = LD->getChain();
1727  SDValue Base = LD->getBasePtr();
1728  SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1729  CurDAG->getRegister(0, MVT::i32), Chain };
1730  SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1731  MVT::Other, Ops);
1732  transferMemOperands(N, New);
1733  ReplaceNode(N, New);
1734  return true;
1735  }
1736 
1737  return false;
1738 }
1739 
1740 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1741  EVT LoadedVT;
1742  unsigned Opcode = 0;
1743  bool isSExtLd, isPre;
1744  Align Alignment;
1745  ARMVCC::VPTCodes Pred;
1746  SDValue PredReg;
1747  SDValue Chain, Base, Offset;
1748 
1749  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1750  ISD::MemIndexedMode AM = LD->getAddressingMode();
1751  if (AM == ISD::UNINDEXED)
1752  return false;
1753  LoadedVT = LD->getMemoryVT();
1754  if (!LoadedVT.isVector())
1755  return false;
1756 
1757  Chain = LD->getChain();
1758  Base = LD->getBasePtr();
1759  Offset = LD->getOffset();
1760  Alignment = LD->getAlign();
1761  isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1762  isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1763  Pred = ARMVCC::None;
1764  PredReg = CurDAG->getRegister(0, MVT::i32);
1765  } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
1766  ISD::MemIndexedMode AM = LD->getAddressingMode();
1767  if (AM == ISD::UNINDEXED)
1768  return false;
1769  LoadedVT = LD->getMemoryVT();
1770  if (!LoadedVT.isVector())
1771  return false;
1772 
1773  Chain = LD->getChain();
1774  Base = LD->getBasePtr();
1775  Offset = LD->getOffset();
1776  Alignment = LD->getAlign();
1777  isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1778  isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1779  Pred = ARMVCC::Then;
1780  PredReg = LD->getMask();
1781  } else
1782  llvm_unreachable("Expected a Load or a Masked Load!");
1783 
1784  // We allow LE non-masked loads to change the type (for example use a vldrb.8
1785  // as opposed to a vldrw.32). This can allow extra addressing modes or
1786  // alignments for what is otherwise an equivalent instruction.
1787  bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N);
1788 
1789  SDValue NewOffset;
1790  if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 &&
1791  SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) {
1792  if (isSExtLd)
1793  Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1794  else
1795  Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1796  } else if (LoadedVT == MVT::v8i8 &&
1797  SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1798  if (isSExtLd)
1799  Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1800  else
1801  Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1802  } else if (LoadedVT == MVT::v4i8 &&
1803  SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1804  if (isSExtLd)
1805  Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1806  else
1807  Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1808  } else if (Alignment >= Align(4) &&
1809  (CanChangeType || LoadedVT == MVT::v4i32 ||
1810  LoadedVT == MVT::v4f32) &&
1811  SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2))
1812  Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1813  else if (Alignment >= Align(2) &&
1814  (CanChangeType || LoadedVT == MVT::v8i16 ||
1815  LoadedVT == MVT::v8f16) &&
1816  SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1))
1817  Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1818  else if ((CanChangeType || LoadedVT == MVT::v16i8) &&
1819  SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0))
1820  Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1821  else
1822  return false;
1823 
1824  SDValue Ops[] = {Base,
1825  NewOffset,
1826  CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32),
1827  PredReg,
1828  CurDAG->getRegister(0, MVT::i32), // tp_reg
1829  Chain};
1830  SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1831  N->getValueType(0), MVT::Other, Ops);
1832  transferMemOperands(N, New);
1833  ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1834  ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1835  ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1836  CurDAG->RemoveDeadNode(N);
1837  return true;
1838 }
1839 
1840 /// Form a GPRPair pseudo register from a pair of GPR regs.
1842  SDLoc dl(V0.getNode());
1843  SDValue RegClass =
1844  CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1845  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1846  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1847  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1848  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1849 }
1850 
1851 /// Form a D register from a pair of S registers.
1852 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1853  SDLoc dl(V0.getNode());
1854  SDValue RegClass =
1855  CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1856  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1857  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1858  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1859  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1860 }
1861 
1862 /// Form a quad register from a pair of D registers.
1863 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1864  SDLoc dl(V0.getNode());
1865  SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1866  MVT::i32);
1867  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1868  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1869  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1870  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1871 }
1872 
1873 /// Form 4 consecutive D registers from a pair of Q registers.
1874 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1875  SDLoc dl(V0.getNode());
1876  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1877  MVT::i32);
1878  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1879  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1880  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1881  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1882 }
1883 
1884 /// Form 4 consecutive S registers.
1885 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1886  SDValue V2, SDValue V3) {
1887  SDLoc dl(V0.getNode());
1888  SDValue RegClass =
1889  CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1890  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1891  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1892  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1893  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1894  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1895  V2, SubReg2, V3, SubReg3 };
1896  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1897 }
1898 
1899 /// Form 4 consecutive D registers.
1900 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1901  SDValue V2, SDValue V3) {
1902  SDLoc dl(V0.getNode());
1903  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1904  MVT::i32);
1905  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1906  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1907  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1908  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1909  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1910  V2, SubReg2, V3, SubReg3 };
1911  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1912 }
1913 
1914 /// Form 4 consecutive Q registers.
1915 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1916  SDValue V2, SDValue V3) {
1917  SDLoc dl(V0.getNode());
1918  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1919  MVT::i32);
1920  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1921  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1922  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1923  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1924  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1925  V2, SubReg2, V3, SubReg3 };
1926  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1927 }
1928 
1929 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1930 /// of a NEON VLD or VST instruction. The supported values depend on the
1931 /// number of registers being loaded.
1932 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1933  unsigned NumVecs, bool is64BitVector) {
1934  unsigned NumRegs = NumVecs;
1935  if (!is64BitVector && NumVecs < 3)
1936  NumRegs *= 2;
1937 
1938  unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1939  if (Alignment >= 32 && NumRegs == 4)
1940  Alignment = 32;
1941  else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1942  Alignment = 16;
1943  else if (Alignment >= 8)
1944  Alignment = 8;
1945  else
1946  Alignment = 0;
1947 
1948  return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1949 }
1950 
1951 static bool isVLDfixed(unsigned Opc)
1952 {
1953  switch (Opc) {
1954  default: return false;
1955  case ARM::VLD1d8wb_fixed : return true;
1956  case ARM::VLD1d16wb_fixed : return true;
1957  case ARM::VLD1d64Qwb_fixed : return true;
1958  case ARM::VLD1d32wb_fixed : return true;
1959  case ARM::VLD1d64wb_fixed : return true;
1960  case ARM::VLD1d8TPseudoWB_fixed : return true;
1961  case ARM::VLD1d16TPseudoWB_fixed : return true;
1962  case ARM::VLD1d32TPseudoWB_fixed : return true;
1963  case ARM::VLD1d64TPseudoWB_fixed : return true;
1964  case ARM::VLD1d8QPseudoWB_fixed : return true;
1965  case ARM::VLD1d16QPseudoWB_fixed : return true;
1966  case ARM::VLD1d32QPseudoWB_fixed : return true;
1967  case ARM::VLD1d64QPseudoWB_fixed : return true;
1968  case ARM::VLD1q8wb_fixed : return true;
1969  case ARM::VLD1q16wb_fixed : return true;
1970  case ARM::VLD1q32wb_fixed : return true;
1971  case ARM::VLD1q64wb_fixed : return true;
1972  case ARM::VLD1DUPd8wb_fixed : return true;
1973  case ARM::VLD1DUPd16wb_fixed : return true;
1974  case ARM::VLD1DUPd32wb_fixed : return true;
1975  case ARM::VLD1DUPq8wb_fixed : return true;
1976  case ARM::VLD1DUPq16wb_fixed : return true;
1977  case ARM::VLD1DUPq32wb_fixed : return true;
1978  case ARM::VLD2d8wb_fixed : return true;
1979  case ARM::VLD2d16wb_fixed : return true;
1980  case ARM::VLD2d32wb_fixed : return true;
1981  case ARM::VLD2q8PseudoWB_fixed : return true;
1982  case ARM::VLD2q16PseudoWB_fixed : return true;
1983  case ARM::VLD2q32PseudoWB_fixed : return true;
1984  case ARM::VLD2DUPd8wb_fixed : return true;
1985  case ARM::VLD2DUPd16wb_fixed : return true;
1986  case ARM::VLD2DUPd32wb_fixed : return true;
1987  case ARM::VLD2DUPq8OddPseudoWB_fixed: return true;
1988  case ARM::VLD2DUPq16OddPseudoWB_fixed: return true;
1989  case ARM::VLD2DUPq32OddPseudoWB_fixed: return true;
1990  }
1991 }
1992 
1993 static bool isVSTfixed(unsigned Opc)
1994 {
1995  switch (Opc) {
1996  default: return false;
1997  case ARM::VST1d8wb_fixed : return true;
1998  case ARM::VST1d16wb_fixed : return true;
1999  case ARM::VST1d32wb_fixed : return true;
2000  case ARM::VST1d64wb_fixed : return true;
2001  case ARM::VST1q8wb_fixed : return true;
2002  case ARM::VST1q16wb_fixed : return true;
2003  case ARM::VST1q32wb_fixed : return true;
2004  case ARM::VST1q64wb_fixed : return true;
2005  case ARM::VST1d8TPseudoWB_fixed : return true;
2006  case ARM::VST1d16TPseudoWB_fixed : return true;
2007  case ARM::VST1d32TPseudoWB_fixed : return true;
2008  case ARM::VST1d64TPseudoWB_fixed : return true;
2009  case ARM::VST1d8QPseudoWB_fixed : return true;
2010  case ARM::VST1d16QPseudoWB_fixed : return true;
2011  case ARM::VST1d32QPseudoWB_fixed : return true;
2012  case ARM::VST1d64QPseudoWB_fixed : return true;
2013  case ARM::VST2d8wb_fixed : return true;
2014  case ARM::VST2d16wb_fixed : return true;
2015  case ARM::VST2d32wb_fixed : return true;
2016  case ARM::VST2q8PseudoWB_fixed : return true;
2017  case ARM::VST2q16PseudoWB_fixed : return true;
2018  case ARM::VST2q32PseudoWB_fixed : return true;
2019  }
2020 }
2021 
2022 // Get the register stride update opcode of a VLD/VST instruction that
2023 // is otherwise equivalent to the given fixed stride updating instruction.
2024 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
2025  assert((isVLDfixed(Opc) || isVSTfixed(Opc))
2026  && "Incorrect fixed stride updating instruction.");
2027  switch (Opc) {
2028  default: break;
2029  case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
2030  case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
2031  case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
2032  case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
2033  case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
2034  case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
2035  case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
2036  case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
2037  case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
2038  case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
2039  case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;
2040  case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;
2041  case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;
2042  case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
2043  case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;
2044  case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;
2045  case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;
2046  case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
2047  case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
2048  case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
2049  case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
2050  case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
2051  case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
2052  case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
2053  case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register;
2054  case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register;
2055  case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register;
2056 
2057  case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
2058  case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
2059  case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
2060  case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
2061  case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
2062  case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
2063  case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
2064  case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
2065  case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register;
2066  case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register;
2067  case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register;
2068  case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
2069  case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register;
2070  case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register;
2071  case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register;
2072  case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
2073 
2074  case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
2075  case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
2076  case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
2077  case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
2078  case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
2079  case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
2080 
2081  case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
2082  case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
2083  case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
2084  case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
2085  case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
2086  case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
2087 
2088  case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
2089  case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
2090  case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
2091  }
2092  return Opc; // If not one we handle, return it unchanged.
2093 }
2094 
2095 /// Returns true if the given increment is a Constant known to be equal to the
2096 /// access size performed by a NEON load/store. This means the "[rN]!" form can
2097 /// be used.
2098 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
2099  auto C = dyn_cast<ConstantSDNode>(Inc);
2100  return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
2101 }
2102 
2103 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
2104  const uint16_t *DOpcodes,
2105  const uint16_t *QOpcodes0,
2106  const uint16_t *QOpcodes1) {
2107  assert(Subtarget->hasNEON());
2108  assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
2109  SDLoc dl(N);
2110 
2111  SDValue MemAddr, Align;
2112  bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2113  // nodes are not intrinsics.
2114  unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2115  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2116  return;
2117 
2118  SDValue Chain = N->getOperand(0);
2119  EVT VT = N->getValueType(0);
2120  bool is64BitVector = VT.is64BitVector();
2121  Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2122 
2123  unsigned OpcodeIndex;
2124  switch (VT.getSimpleVT().SimpleTy) {
2125  default: llvm_unreachable("unhandled vld type");
2126  // Double-register operations:
2127  case MVT::v8i8: OpcodeIndex = 0; break;
2128  case MVT::v4f16:
2129  case MVT::v4bf16:
2130  case MVT::v4i16: OpcodeIndex = 1; break;
2131  case MVT::v2f32:
2132  case MVT::v2i32: OpcodeIndex = 2; break;
2133  case MVT::v1i64: OpcodeIndex = 3; break;
2134  // Quad-register operations:
2135  case MVT::v16i8: OpcodeIndex = 0; break;
2136  case MVT::v8f16:
2137  case MVT::v8bf16:
2138  case MVT::v8i16: OpcodeIndex = 1; break;
2139  case MVT::v4f32:
2140  case MVT::v4i32: OpcodeIndex = 2; break;
2141  case MVT::v2f64:
2142  case MVT::v2i64: OpcodeIndex = 3; break;
2143  }
2144 
2145  EVT ResTy;
2146  if (NumVecs == 1)
2147  ResTy = VT;
2148  else {
2149  unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2150  if (!is64BitVector)
2151  ResTyElts *= 2;
2152  ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2153  }
2154  std::vector<EVT> ResTys;
2155  ResTys.push_back(ResTy);
2156  if (isUpdating)
2157  ResTys.push_back(MVT::i32);
2158  ResTys.push_back(MVT::Other);
2159 
2160  SDValue Pred = getAL(CurDAG, dl);
2161  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2162  SDNode *VLd;
2164 
2165  // Double registers and VLD1/VLD2 quad registers are directly supported.
2166  if (is64BitVector || NumVecs <= 2) {
2167  unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2168  QOpcodes0[OpcodeIndex]);
2169  Ops.push_back(MemAddr);
2170  Ops.push_back(Align);
2171  if (isUpdating) {
2172  SDValue Inc = N->getOperand(AddrOpIdx + 1);
2173  bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2174  if (!IsImmUpdate) {
2175  // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2176  // check for the opcode rather than the number of vector elements.
2177  if (isVLDfixed(Opc))
2178  Opc = getVLDSTRegisterUpdateOpcode(Opc);
2179  Ops.push_back(Inc);
2180  // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2181  // the operands if not such an opcode.
2182  } else if (!isVLDfixed(Opc))
2183  Ops.push_back(Reg0);
2184  }
2185  Ops.push_back(Pred);
2186  Ops.push_back(Reg0);
2187  Ops.push_back(Chain);
2188  VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2189 
2190  } else {
2191  // Otherwise, quad registers are loaded with two separate instructions,
2192  // where one loads the even registers and the other loads the odd registers.
2193  EVT AddrTy = MemAddr.getValueType();
2194 
2195  // Load the even subregs. This is always an updating load, so that it
2196  // provides the address to the second load for the odd subregs.
2197  SDValue ImplDef =
2198  SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2199  const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
2200  SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2201  ResTy, AddrTy, MVT::Other, OpsA);
2202  Chain = SDValue(VLdA, 2);
2203 
2204  // Load the odd subregs.
2205  Ops.push_back(SDValue(VLdA, 1));
2206  Ops.push_back(Align);
2207  if (isUpdating) {
2208  SDValue Inc = N->getOperand(AddrOpIdx + 1);
2209  assert(isa<ConstantSDNode>(Inc.getNode()) &&
2210  "only constant post-increment update allowed for VLD3/4");
2211  (void)Inc;
2212  Ops.push_back(Reg0);
2213  }
2214  Ops.push_back(SDValue(VLdA, 0));
2215  Ops.push_back(Pred);
2216  Ops.push_back(Reg0);
2217  Ops.push_back(Chain);
2218  VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
2219  }
2220 
2221  // Transfer memoperands.
2222  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2223  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
2224 
2225  if (NumVecs == 1) {
2226  ReplaceNode(N, VLd);
2227  return;
2228  }
2229 
2230  // Extract out the subregisters.
2231  SDValue SuperReg = SDValue(VLd, 0);
2232  static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2233  ARM::qsub_3 == ARM::qsub_0 + 3,
2234  "Unexpected subreg numbering");
2235  unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2236  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2237  ReplaceUses(SDValue(N, Vec),
2238  CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2239  ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2240  if (isUpdating)
2241  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2242  CurDAG->RemoveDeadNode(N);
2243 }
2244 
2245 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2246  const uint16_t *DOpcodes,
2247  const uint16_t *QOpcodes0,
2248  const uint16_t *QOpcodes1) {
2249  assert(Subtarget->hasNEON());
2250  assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2251  SDLoc dl(N);
2252 
2253  SDValue MemAddr, Align;
2254  bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2255  // nodes are not intrinsics.
2256  unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2257  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2258  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2259  return;
2260 
2261  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2262 
2263  SDValue Chain = N->getOperand(0);
2264  EVT VT = N->getOperand(Vec0Idx).getValueType();
2265  bool is64BitVector = VT.is64BitVector();
2266  Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2267 
2268  unsigned OpcodeIndex;
2269  switch (VT.getSimpleVT().SimpleTy) {
2270  default: llvm_unreachable("unhandled vst type");
2271  // Double-register operations:
2272  case MVT::v8i8: OpcodeIndex = 0; break;
2273  case MVT::v4f16:
2274  case MVT::v4bf16:
2275  case MVT::v4i16: OpcodeIndex = 1; break;
2276  case MVT::v2f32:
2277  case MVT::v2i32: OpcodeIndex = 2; break;
2278  case MVT::v1i64: OpcodeIndex = 3; break;
2279  // Quad-register operations:
2280  case MVT::v16i8: OpcodeIndex = 0; break;
2281  case MVT::v8f16:
2282  case MVT::v8bf16:
2283  case MVT::v8i16: OpcodeIndex = 1; break;
2284  case MVT::v4f32:
2285  case MVT::v4i32: OpcodeIndex = 2; break;
2286  case MVT::v2f64:
2287  case MVT::v2i64: OpcodeIndex = 3; break;
2288  }
2289 
2290  std::vector<EVT> ResTys;
2291  if (isUpdating)
2292  ResTys.push_back(MVT::i32);
2293  ResTys.push_back(MVT::Other);
2294 
2295  SDValue Pred = getAL(CurDAG, dl);
2296  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2298 
2299  // Double registers and VST1/VST2 quad registers are directly supported.
2300  if (is64BitVector || NumVecs <= 2) {
2301  SDValue SrcReg;
2302  if (NumVecs == 1) {
2303  SrcReg = N->getOperand(Vec0Idx);
2304  } else if (is64BitVector) {
2305  // Form a REG_SEQUENCE to force register allocation.
2306  SDValue V0 = N->getOperand(Vec0Idx + 0);
2307  SDValue V1 = N->getOperand(Vec0Idx + 1);
2308  if (NumVecs == 2)
2309  SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2310  else {
2311  SDValue V2 = N->getOperand(Vec0Idx + 2);
2312  // If it's a vst3, form a quad D-register and leave the last part as
2313  // an undef.
2314  SDValue V3 = (NumVecs == 3)
2315  ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2316  : N->getOperand(Vec0Idx + 3);
2317  SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2318  }
2319  } else {
2320  // Form a QQ register.
2321  SDValue Q0 = N->getOperand(Vec0Idx);
2322  SDValue Q1 = N->getOperand(Vec0Idx + 1);
2323  SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2324  }
2325 
2326  unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2327  QOpcodes0[OpcodeIndex]);
2328  Ops.push_back(MemAddr);
2329  Ops.push_back(Align);
2330  if (isUpdating) {
2331  SDValue Inc = N->getOperand(AddrOpIdx + 1);
2332  bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2333  if (!IsImmUpdate) {
2334  // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2335  // check for the opcode rather than the number of vector elements.
2336  if (isVSTfixed(Opc))
2337  Opc = getVLDSTRegisterUpdateOpcode(Opc);
2338  Ops.push_back(Inc);
2339  }
2340  // VST1/VST2 fixed increment does not need Reg0 so only include it in
2341  // the operands if not such an opcode.
2342  else if (!isVSTfixed(Opc))
2343  Ops.push_back(Reg0);
2344  }
2345  Ops.push_back(SrcReg);
2346  Ops.push_back(Pred);
2347  Ops.push_back(Reg0);
2348  Ops.push_back(Chain);
2349  SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2350 
2351  // Transfer memoperands.
2352  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2353 
2354  ReplaceNode(N, VSt);
2355  return;
2356  }
2357 
2358  // Otherwise, quad registers are stored with two separate instructions,
2359  // where one stores the even registers and the other stores the odd registers.
2360 
2361  // Form the QQQQ REG_SEQUENCE.
2362  SDValue V0 = N->getOperand(Vec0Idx + 0);
2363  SDValue V1 = N->getOperand(Vec0Idx + 1);
2364  SDValue V2 = N->getOperand(Vec0Idx + 2);
2365  SDValue V3 = (NumVecs == 3)
2366  ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2367  : N->getOperand(Vec0Idx + 3);
2368  SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2369 
2370  // Store the even D registers. This is always an updating store, so that it
2371  // provides the address to the second store for the odd subregs.
2372  const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2373  SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2374  MemAddr.getValueType(),
2375  MVT::Other, OpsA);
2376  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2377  Chain = SDValue(VStA, 1);
2378 
2379  // Store the odd D registers.
2380  Ops.push_back(SDValue(VStA, 0));
2381  Ops.push_back(Align);
2382  if (isUpdating) {
2383  SDValue Inc = N->getOperand(AddrOpIdx + 1);
2384  assert(isa<ConstantSDNode>(Inc.getNode()) &&
2385  "only constant post-increment update allowed for VST3/4");
2386  (void)Inc;
2387  Ops.push_back(Reg0);
2388  }
2389  Ops.push_back(RegSeq);
2390  Ops.push_back(Pred);
2391  Ops.push_back(Reg0);
2392  Ops.push_back(Chain);
2393  SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2394  Ops);
2395  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2396  ReplaceNode(N, VStB);
2397 }
2398 
2399 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2400  unsigned NumVecs,
2401  const uint16_t *DOpcodes,
2402  const uint16_t *QOpcodes) {
2403  assert(Subtarget->hasNEON());
2404  assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2405  SDLoc dl(N);
2406 
2407  SDValue MemAddr, Align;
2408  bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2409  // nodes are not intrinsics.
2410  unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2411  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2412  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2413  return;
2414 
2415  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2416 
2417  SDValue Chain = N->getOperand(0);
2418  unsigned Lane =
2419  cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2420  EVT VT = N->getOperand(Vec0Idx).getValueType();
2421  bool is64BitVector = VT.is64BitVector();
2422 
2423  unsigned Alignment = 0;
2424  if (NumVecs != 3) {
2425  Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2426  unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2427  if (Alignment > NumBytes)
2428  Alignment = NumBytes;
2429  if (Alignment < 8 && Alignment < NumBytes)
2430  Alignment = 0;
2431  // Alignment must be a power of two; make sure of that.
2432  Alignment = (Alignment & -Alignment);
2433  if (Alignment == 1)
2434  Alignment = 0;
2435  }
2436  Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2437 
2438  unsigned OpcodeIndex;
2439  switch (VT.getSimpleVT().SimpleTy) {
2440  default: llvm_unreachable("unhandled vld/vst lane type");
2441  // Double-register operations:
2442  case MVT::v8i8: OpcodeIndex = 0; break;
2443  case MVT::v4f16:
2444  case MVT::v4bf16:
2445  case MVT::v4i16: OpcodeIndex = 1; break;
2446  case MVT::v2f32:
2447  case MVT::v2i32: OpcodeIndex = 2; break;
2448  // Quad-register operations:
2449  case MVT::v8f16:
2450  case MVT::v8bf16:
2451  case MVT::v8i16: OpcodeIndex = 0; break;
2452  case MVT::v4f32:
2453  case MVT::v4i32: OpcodeIndex = 1; break;
2454  }
2455 
2456  std::vector<EVT> ResTys;
2457  if (IsLoad) {
2458  unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2459  if (!is64BitVector)
2460  ResTyElts *= 2;
2461  ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2462  MVT::i64, ResTyElts));
2463  }
2464  if (isUpdating)
2465  ResTys.push_back(MVT::i32);
2466  ResTys.push_back(MVT::Other);
2467 
2468  SDValue Pred = getAL(CurDAG, dl);
2469  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2470 
2472  Ops.push_back(MemAddr);
2473  Ops.push_back(Align);
2474  if (isUpdating) {
2475  SDValue Inc = N->getOperand(AddrOpIdx + 1);
2476  bool IsImmUpdate =
2477  isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2478  Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2479  }
2480 
2481  SDValue SuperReg;
2482  SDValue V0 = N->getOperand(Vec0Idx + 0);
2483  SDValue V1 = N->getOperand(Vec0Idx + 1);
2484  if (NumVecs == 2) {
2485  if (is64BitVector)
2486  SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2487  else
2488  SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2489  } else {
2490  SDValue V2 = N->getOperand(Vec0Idx + 2);
2491  SDValue V3 = (NumVecs == 3)
2492  ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2493  : N->getOperand(Vec0Idx + 3);
2494  if (is64BitVector)
2495  SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2496  else
2497  SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2498  }
2499  Ops.push_back(SuperReg);
2500  Ops.push_back(getI32Imm(Lane, dl));
2501  Ops.push_back(Pred);
2502  Ops.push_back(Reg0);
2503  Ops.push_back(Chain);
2504 
2505  unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2506  QOpcodes[OpcodeIndex]);
2507  SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2508  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2509  if (!IsLoad) {
2510  ReplaceNode(N, VLdLn);
2511  return;
2512  }
2513 
2514  // Extract the subregisters.
2515  SuperReg = SDValue(VLdLn, 0);
2516  static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2517  ARM::qsub_3 == ARM::qsub_0 + 3,
2518  "Unexpected subreg numbering");
2519  unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2520  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2521  ReplaceUses(SDValue(N, Vec),
2522  CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2523  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2524  if (isUpdating)
2525  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2526  CurDAG->RemoveDeadNode(N);
2527 }
2528 
2529 template <typename SDValueVector>
2530 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2531  SDValue PredicateMask) {
2532  Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2533  Ops.push_back(PredicateMask);
2534  Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2535 }
2536 
2537 template <typename SDValueVector>
2538 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2539  SDValue PredicateMask,
2540  SDValue Inactive) {
2541  Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2542  Ops.push_back(PredicateMask);
2543  Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2544  Ops.push_back(Inactive);
2545 }
2546 
2547 template <typename SDValueVector>
2548 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
2549  Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2550  Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2551  Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2552 }
2553 
2554 template <typename SDValueVector>
2555 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2556  EVT InactiveTy) {
2557  Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2558  Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2559  Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2560  Ops.push_back(SDValue(
2561  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));
2562 }
2563 
2564 void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
2565  bool Predicated) {
2566  SDLoc Loc(N);
2568 
2569  uint16_t Opcode;
2570  switch (N->getValueType(1).getVectorElementType().getSizeInBits()) {
2571  case 32:
2572  Opcode = Opcodes[0];
2573  break;
2574  case 64:
2575  Opcode = Opcodes[1];
2576  break;
2577  default:
2578  llvm_unreachable("bad vector element size in SelectMVE_WB");
2579  }
2580 
2581  Ops.push_back(N->getOperand(2)); // vector of base addresses
2582 
2583  int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2584  Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset
2585 
2586  if (Predicated)
2587  AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2588  else
2589  AddEmptyMVEPredicateToOps(Ops, Loc);
2590 
2591  Ops.push_back(N->getOperand(0)); // chain
2592 
2593  SmallVector<EVT, 8> VTs;
2594  VTs.push_back(N->getValueType(1));
2595  VTs.push_back(N->getValueType(0));
2596  VTs.push_back(N->getValueType(2));
2597 
2598  SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops);
2599  ReplaceUses(SDValue(N, 0), SDValue(New, 1));
2600  ReplaceUses(SDValue(N, 1), SDValue(New, 0));
2601  ReplaceUses(SDValue(N, 2), SDValue(New, 2));
2602  transferMemOperands(N, New);
2603  CurDAG->RemoveDeadNode(N);
2604 }
2605 
2606 void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
2607  bool Immediate,
2608  bool HasSaturationOperand) {
2609  SDLoc Loc(N);
2611 
2612  // Two 32-bit halves of the value to be shifted
2613  Ops.push_back(N->getOperand(1));
2614  Ops.push_back(N->getOperand(2));
2615 
2616  // The shift count
2617  if (Immediate) {
2618  int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2619  Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2620  } else {
2621  Ops.push_back(N->getOperand(3));
2622  }
2623 
2624  // The immediate saturation operand, if any
2625  if (HasSaturationOperand) {
2626  int32_t SatOp = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue();
2627  int SatBit = (SatOp == 64 ? 0 : 1);
2628  Ops.push_back(getI32Imm(SatBit, Loc));
2629  }
2630 
2631  // MVE scalar shifts are IT-predicable, so include the standard
2632  // predicate arguments.
2633  Ops.push_back(getAL(CurDAG, Loc));
2634  Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2635 
2636  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2637 }
2638 
2639 void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
2640  uint16_t OpcodeWithNoCarry,
2641  bool Add, bool Predicated) {
2642  SDLoc Loc(N);
2644  uint16_t Opcode;
2645 
2646  unsigned FirstInputOp = Predicated ? 2 : 1;
2647 
2648  // Two input vectors and the input carry flag
2649  Ops.push_back(N->getOperand(FirstInputOp));
2650  Ops.push_back(N->getOperand(FirstInputOp + 1));
2651  SDValue CarryIn = N->getOperand(FirstInputOp + 2);
2652  ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn);
2653  uint32_t CarryMask = 1 << 29;
2654  uint32_t CarryExpected = Add ? 0 : CarryMask;
2655  if (CarryInConstant &&
2656  (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
2657  Opcode = OpcodeWithNoCarry;
2658  } else {
2659  Ops.push_back(CarryIn);
2660  Opcode = OpcodeWithCarry;
2661  }
2662 
2663  if (Predicated)
2664  AddMVEPredicateToOps(Ops, Loc,
2665  N->getOperand(FirstInputOp + 3), // predicate
2666  N->getOperand(FirstInputOp - 1)); // inactive
2667  else
2668  AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2669 
2670  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2671 }
2672 
2673 void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {
2674  SDLoc Loc(N);
2676 
2677  // One vector input, followed by a 32-bit word of bits to shift in
2678  // and then an immediate shift count
2679  Ops.push_back(N->getOperand(1));
2680  Ops.push_back(N->getOperand(2));
2681  int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2682  Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2683 
2684  if (Predicated)
2685  AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2686  else
2687  AddEmptyMVEPredicateToOps(Ops, Loc);
2688 
2689  CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), makeArrayRef(Ops));
2690 }
2691 
2692 static bool SDValueToConstBool(SDValue SDVal) {
2693  assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
2694  ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
2695  uint64_t Value = SDValConstant->getZExtValue();
2696  assert((Value == 0 || Value == 1) && "expected value 0 or 1");
2697  return Value;
2698 }
2699 
2700 void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
2701  const uint16_t *OpcodesS,
2702  const uint16_t *OpcodesU,
2703  size_t Stride, size_t TySize) {
2704  assert(TySize < Stride && "Invalid TySize");
2705  bool IsUnsigned = SDValueToConstBool(N->getOperand(1));
2706  bool IsSub = SDValueToConstBool(N->getOperand(2));
2707  bool IsExchange = SDValueToConstBool(N->getOperand(3));
2708  if (IsUnsigned) {
2709  assert(!IsSub &&
2710  "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2711  assert(!IsExchange &&
2712  "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2713  }
2714 
2715  auto OpIsZero = [N](size_t OpNo) {
2716  if (ConstantSDNode *OpConst = dyn_cast<ConstantSDNode>(N->getOperand(OpNo)))
2717  if (OpConst->getZExtValue() == 0)
2718  return true;
2719  return false;
2720  };
2721 
2722  // If the input accumulator value is not zero, select an instruction with
2723  // accumulator, otherwise select an instruction without accumulator
2724  bool IsAccum = !(OpIsZero(4) && OpIsZero(5));
2725 
2726  const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
2727  if (IsSub)
2728  Opcodes += 4 * Stride;
2729  if (IsExchange)
2730  Opcodes += 2 * Stride;
2731  if (IsAccum)
2732  Opcodes += Stride;
2733  uint16_t Opcode = Opcodes[TySize];
2734 
2735  SDLoc Loc(N);
2737  // Push the accumulator operands, if they are used
2738  if (IsAccum) {
2739  Ops.push_back(N->getOperand(4));
2740  Ops.push_back(N->getOperand(5));
2741  }
2742  // Push the two vector operands
2743  Ops.push_back(N->getOperand(6));
2744  Ops.push_back(N->getOperand(7));
2745 
2746  if (Predicated)
2747  AddMVEPredicateToOps(Ops, Loc, N->getOperand(8));
2748  else
2749  AddEmptyMVEPredicateToOps(Ops, Loc);
2750 
2751  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2752 }
2753 
2754 void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,
2755  const uint16_t *OpcodesS,
2756  const uint16_t *OpcodesU) {
2757  EVT VecTy = N->getOperand(6).getValueType();
2758  size_t SizeIndex;
2759  switch (VecTy.getVectorElementType().getSizeInBits()) {
2760  case 16:
2761  SizeIndex = 0;
2762  break;
2763  case 32:
2764  SizeIndex = 1;
2765  break;
2766  default:
2767  llvm_unreachable("bad vector element size");
2768  }
2769 
2770  SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex);
2771 }
2772 
2773 void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
2774  const uint16_t *OpcodesS,
2775  const uint16_t *OpcodesU) {
2776  assert(
2777  N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==
2778  32 &&
2779  "bad vector element size");
2780  SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0);
2781 }
2782 
2783 void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
2784  const uint16_t *const *Opcodes,
2785  bool HasWriteback) {
2786  EVT VT = N->getValueType(0);
2787  SDLoc Loc(N);
2788 
2789  const uint16_t *OurOpcodes;
2790  switch (VT.getVectorElementType().getSizeInBits()) {
2791  case 8:
2792  OurOpcodes = Opcodes[0];
2793  break;
2794  case 16:
2795  OurOpcodes = Opcodes[1];
2796  break;
2797  case 32:
2798  OurOpcodes = Opcodes[2];
2799  break;
2800  default:
2801  llvm_unreachable("bad vector element size in SelectMVE_VLD");
2802  }
2803 
2804  EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2);
2805  SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other};
2806  unsigned PtrOperand = HasWriteback ? 1 : 2;
2807 
2808  auto Data = SDValue(
2809  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0);
2810  SDValue Chain = N->getOperand(0);
2811  // Add a MVE_VLDn instruction for each Vec, except the last
2812  for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) {
2813  SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2814  auto LoadInst =
2815  CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops);
2816  Data = SDValue(LoadInst, 0);
2817  Chain = SDValue(LoadInst, 1);
2818  transferMemOperands(N, LoadInst);
2819  }
2820  // The last may need a writeback on it
2821  if (HasWriteback)
2822  ResultTys = {DataTy, MVT::i32, MVT::Other};
2823  SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2824  auto LoadInst =
2825  CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops);
2826  transferMemOperands(N, LoadInst);
2827 
2828  unsigned i;
2829  for (i = 0; i < NumVecs; i++)
2830  ReplaceUses(SDValue(N, i),
2831  CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT,
2832  SDValue(LoadInst, 0)));
2833  if (HasWriteback)
2834  ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1));
2835  ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1));
2836  CurDAG->RemoveDeadNode(N);
2837 }
2838 
2839 void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
2840  bool Wrapping, bool Predicated) {
2841  EVT VT = N->getValueType(0);
2842  SDLoc Loc(N);
2843 
2844  uint16_t Opcode;
2845  switch (VT.getScalarSizeInBits()) {
2846  case 8:
2847  Opcode = Opcodes[0];
2848  break;
2849  case 16:
2850  Opcode = Opcodes[1];
2851  break;
2852  case 32:
2853  Opcode = Opcodes[2];
2854  break;
2855  default:
2856  llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
2857  }
2858 
2860  unsigned OpIdx = 1;
2861 
2862  SDValue Inactive;
2863  if (Predicated)
2864  Inactive = N->getOperand(OpIdx++);
2865 
2866  Ops.push_back(N->getOperand(OpIdx++)); // base
2867  if (Wrapping)
2868  Ops.push_back(N->getOperand(OpIdx++)); // limit
2869 
2870  SDValue ImmOp = N->getOperand(OpIdx++); // step
2871  int ImmValue = cast<ConstantSDNode>(ImmOp)->getZExtValue();
2872  Ops.push_back(getI32Imm(ImmValue, Loc));
2873 
2874  if (Predicated)
2875  AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive);
2876  else
2877  AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2878 
2879  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2880 }
2881 
2882 void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
2883  size_t NumExtraOps, bool HasAccum) {
2884  bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
2885  SDLoc Loc(N);
2887 
2888  unsigned OpIdx = 1;
2889 
2890  // Convert and append the immediate operand designating the coprocessor.
2891  SDValue ImmCorpoc = N->getOperand(OpIdx++);
2892  uint32_t ImmCoprocVal = cast<ConstantSDNode>(ImmCorpoc)->getZExtValue();
2893  Ops.push_back(getI32Imm(ImmCoprocVal, Loc));
2894 
2895  // For accumulating variants copy the low and high order parts of the
2896  // accumulator into a register pair and add it to the operand vector.
2897  if (HasAccum) {
2898  SDValue AccLo = N->getOperand(OpIdx++);
2899  SDValue AccHi = N->getOperand(OpIdx++);
2900  if (IsBigEndian)
2901  std::swap(AccLo, AccHi);
2902  Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0));
2903  }
2904 
2905  // Copy extra operands as-is.
2906  for (size_t I = 0; I < NumExtraOps; I++)
2907  Ops.push_back(N->getOperand(OpIdx++));
2908 
2909  // Convert and append the immediate operand
2910  SDValue Imm = N->getOperand(OpIdx);
2911  uint32_t ImmVal = cast<ConstantSDNode>(Imm)->getZExtValue();
2912  Ops.push_back(getI32Imm(ImmVal, Loc));
2913 
2914  // Accumulating variants are IT-predicable, add predicate operands.
2915  if (HasAccum) {
2916  SDValue Pred = getAL(CurDAG, Loc);
2917  SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2918  Ops.push_back(Pred);
2919  Ops.push_back(PredReg);
2920  }
2921 
2922  // Create the CDE intruction
2923  SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops);
2924  SDValue ResultPair = SDValue(InstrNode, 0);
2925 
2926  // The original intrinsic had two outputs, and the output of the dual-register
2927  // CDE instruction is a register pair. We need to extract the two subregisters
2928  // and replace all uses of the original outputs with the extracted
2929  // subregisters.
2930  uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1};
2931  if (IsBigEndian)
2932  std::swap(SubRegs[0], SubRegs[1]);
2933 
2934  for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) {
2935  if (SDValue(N, ResIdx).use_empty())
2936  continue;
2937  SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc,
2938  MVT::i32, ResultPair);
2939  ReplaceUses(SDValue(N, ResIdx), SubReg);
2940  }
2941 
2942  CurDAG->RemoveDeadNode(N);
2943 }
2944 
2945 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2946  bool isUpdating, unsigned NumVecs,
2947  const uint16_t *DOpcodes,
2948  const uint16_t *QOpcodes0,
2949  const uint16_t *QOpcodes1) {
2950  assert(Subtarget->hasNEON());
2951  assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2952  SDLoc dl(N);
2953 
2954  SDValue MemAddr, Align;
2955  unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2956  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2957  return;
2958 
2959  SDValue Chain = N->getOperand(0);
2960  EVT VT = N->getValueType(0);
2961  bool is64BitVector = VT.is64BitVector();
2962 
2963  unsigned Alignment = 0;
2964  if (NumVecs != 3) {
2965  Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2966  unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2967  if (Alignment > NumBytes)
2968  Alignment = NumBytes;
2969  if (Alignment < 8 && Alignment < NumBytes)
2970  Alignment = 0;
2971  // Alignment must be a power of two; make sure of that.
2972  Alignment = (Alignment & -Alignment);
2973  if (Alignment == 1)
2974  Alignment = 0;
2975  }
2976  Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2977 
2978  unsigned OpcodeIndex;
2979  switch (VT.getSimpleVT().SimpleTy) {
2980  default: llvm_unreachable("unhandled vld-dup type");
2981  case MVT::v8i8:
2982  case MVT::v16i8: OpcodeIndex = 0; break;
2983  case MVT::v4i16:
2984  case MVT::v8i16:
2985  case MVT::v4f16:
2986  case MVT::v8f16:
2987  case MVT::v4bf16:
2988  case MVT::v8bf16:
2989  OpcodeIndex = 1; break;
2990  case MVT::v2f32:
2991  case MVT::v2i32:
2992  case MVT::v4f32:
2993  case MVT::v4i32: OpcodeIndex = 2; break;
2994  case MVT::v1f64:
2995  case MVT::v1i64: OpcodeIndex = 3; break;
2996  }
2997 
2998  unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2999  if (!is64BitVector)
3000  ResTyElts *= 2;
3001  EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
3002 
3003  std::vector<EVT> ResTys;
3004  ResTys.push_back(ResTy);
3005  if (isUpdating)
3006  ResTys.push_back(MVT::i32);
3007  ResTys.push_back(MVT::Other);
3008 
3009  SDValue Pred = getAL(CurDAG, dl);
3010  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3011 
3013  Ops.push_back(MemAddr);
3014  Ops.push_back(Align);
3015  unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex]
3016  : (NumVecs == 1) ? QOpcodes0[OpcodeIndex]
3017  : QOpcodes1[OpcodeIndex];
3018  if (isUpdating) {
3019  SDValue Inc = N->getOperand(2);
3020  bool IsImmUpdate =
3021  isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
3022  if (IsImmUpdate) {
3023  if (!isVLDfixed(Opc))
3024  Ops.push_back(Reg0);
3025  } else {
3026  if (isVLDfixed(Opc))
3027  Opc = getVLDSTRegisterUpdateOpcode(Opc);
3028  Ops.push_back(Inc);
3029  }
3030  }
3031  if (is64BitVector || NumVecs == 1) {
3032  // Double registers and VLD1 quad registers are directly supported.
3033  } else if (NumVecs == 2) {
3034  const SDValue OpsA[] = {MemAddr, Align, Pred, Reg0, Chain};
3035  SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
3036  MVT::Other, OpsA);
3037  Chain = SDValue(VLdA, 1);
3038  } else {
3039  SDValue ImplDef = SDValue(
3040  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
3041  const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain};
3042  SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
3043  MVT::Other, OpsA);
3044  Ops.push_back(SDValue(VLdA, 0));
3045  Chain = SDValue(VLdA, 1);
3046  }
3047 
3048  Ops.push_back(Pred);
3049  Ops.push_back(Reg0);
3050  Ops.push_back(Chain);
3051 
3052  SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
3053 
3054  // Transfer memoperands.
3055  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3056  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
3057 
3058  // Extract the subregisters.
3059  if (NumVecs == 1) {
3060  ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
3061  } else {
3062  SDValue SuperReg = SDValue(VLdDup, 0);
3063  static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
3064  unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
3065  for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
3066  ReplaceUses(SDValue(N, Vec),
3067  CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
3068  }
3069  }
3070  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
3071  if (isUpdating)
3072  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
3073  CurDAG->RemoveDeadNode(N);
3074 }
3075 
3076 bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
3077  if (!Subtarget->hasMVEIntegerOps())
3078  return false;
3079 
3080  SDLoc dl(N);
3081 
3082  // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and
3083  // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent
3084  // inserts of the correct type:
3085  SDValue Ins1 = SDValue(N, 0);
3086  SDValue Ins2 = N->getOperand(0);
3087  EVT VT = Ins1.getValueType();
3088  if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() ||
3089  !isa<ConstantSDNode>(Ins1.getOperand(2)) ||
3090  !isa<ConstantSDNode>(Ins2.getOperand(2)) ||
3091  (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT))
3092  return false;
3093 
3094  unsigned Lane1 = Ins1.getConstantOperandVal(2);
3095  unsigned Lane2 = Ins2.getConstantOperandVal(2);
3096  if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1)
3097  return false;
3098 
3099  // If the inserted values will be able to use T/B already, leave it to the
3100  // existing tablegen patterns. For example VCVTT/VCVTB.
3101  SDValue Val1 = Ins1.getOperand(1);
3102  SDValue Val2 = Ins2.getOperand(1);
3103  if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND)
3104  return false;
3105 
3106  // Check if the inserted values are both extracts.
3107  if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3108  Val1.getOpcode() == ARMISD::VGETLANEu) &&
3109  (Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3110  Val2.getOpcode() == ARMISD::VGETLANEu) &&
3111  isa<ConstantSDNode>(Val1.getOperand(1)) &&
3112  isa<ConstantSDNode>(Val2.getOperand(1)) &&
3113  (Val1.getOperand(0).getValueType() == MVT::v8f16 ||
3114  Val1.getOperand(0).getValueType() == MVT::v8i16) &&
3115  (Val2.getOperand(0).getValueType() == MVT::v8f16 ||
3116  Val2.getOperand(0).getValueType() == MVT::v8i16)) {
3117  unsigned ExtractLane1 = Val1.getConstantOperandVal(1);
3118  unsigned ExtractLane2 = Val2.getConstantOperandVal(1);
3119 
3120  // If the two extracted lanes are from the same place and adjacent, this
3121  // simplifies into a f32 lane move.
3122  if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 &&
3123  ExtractLane1 == ExtractLane2 + 1) {
3124  SDValue NewExt = CurDAG->getTargetExtractSubreg(
3125  ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0));
3126  SDValue NewIns = CurDAG->getTargetInsertSubreg(
3127  ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0),
3128  NewExt);
3129  ReplaceUses(Ins1, NewIns);
3130  return true;
3131  }
3132 
3133  // Else v8i16 pattern of an extract and an insert, with a optional vmovx for
3134  // extracting odd lanes.
3135  if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) {
3136  SDValue Inp1 = CurDAG->getTargetExtractSubreg(
3137  ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0));
3138  SDValue Inp2 = CurDAG->getTargetExtractSubreg(
3139  ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0));
3140  if (ExtractLane1 % 2 != 0)
3141  Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0);
3142  if (ExtractLane2 % 2 != 0)
3143  Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0);
3144  SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1);
3145  SDValue NewIns =
3146  CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3147  Ins2.getOperand(0), SDValue(VINS, 0));
3148  ReplaceUses(Ins1, NewIns);
3149  return true;
3150  }
3151  }
3152 
3153  // The inserted values are not extracted - if they are f16 then insert them
3154  // directly using a VINS.
3155  if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) {
3156  SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1);
3157  SDValue NewIns =
3158  CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3159  Ins2.getOperand(0), SDValue(VINS, 0));
3160  ReplaceUses(Ins1, NewIns);
3161  return true;
3162  }
3163 
3164  return false;
3165 }
3166 
3167 bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N,
3168  SDNode *FMul,
3169  bool IsUnsigned,
3170  bool FixedToFloat) {
3171  auto Type = N->getValueType(0);
3172  unsigned ScalarBits = Type.getScalarSizeInBits();
3173  if (ScalarBits > 32)
3174  return false;
3175 
3176  SDNodeFlags FMulFlags = FMul->getFlags();
3177  // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3178  // allowed in 16 bit unsigned floats
3179  if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned)
3180  return false;
3181 
3182  SDValue ImmNode = FMul->getOperand(1);
3183  SDValue VecVal = FMul->getOperand(0);
3184  if (VecVal->getOpcode() == ISD::UINT_TO_FP ||
3185  VecVal->getOpcode() == ISD::SINT_TO_FP)
3186  VecVal = VecVal->getOperand(0);
3187 
3188  if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits)
3189  return false;
3190 
3191  if (ImmNode.getOpcode() == ISD::BITCAST) {
3192  if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3193  return false;
3194  ImmNode = ImmNode.getOperand(0);
3195  }
3196 
3197  if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3198  return false;
3199 
3200  APFloat ImmAPF(0.0f);
3201  switch (ImmNode.getOpcode()) {
3202  case ARMISD::VMOVIMM:
3203  case ARMISD::VDUP: {
3204  if (!isa<ConstantSDNode>(ImmNode.getOperand(0)))
3205  return false;
3206  unsigned Imm = ImmNode.getConstantOperandVal(0);
3207  if (ImmNode.getOpcode() == ARMISD::VMOVIMM)
3208  Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits);
3209  ImmAPF =
3210  APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(),
3211  APInt(ScalarBits, Imm));
3212  break;
3213  }
3214  case ARMISD::VMOVFPIMM: {
3215  ImmAPF = APFloat(ARM_AM::getFPImmFloat(ImmNode.getConstantOperandVal(0)));
3216  break;
3217  }
3218  default:
3219  return false;
3220  }
3221 
3222  // Where n is the number of fractional bits, multiplying by 2^n will convert
3223  // from float to fixed and multiplying by 2^-n will convert from fixed to
3224  // float. Taking log2 of the factor (after taking the inverse in the case of
3225  // float to fixed) will give n.
3226  APFloat ToConvert = ImmAPF;
3227  if (FixedToFloat) {
3228  if (!ImmAPF.getExactInverse(&ToConvert))
3229  return false;
3230  }
3231  APSInt Converted(64, false);
3232  bool IsExact;
3234  &IsExact);
3235  if (!IsExact || !Converted.isPowerOf2())
3236  return false;
3237 
3238  unsigned FracBits = Converted.logBase2();
3239  if (FracBits > ScalarBits)
3240  return false;
3241 
3243  VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)};
3244  AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type);
3245 
3246  unsigned int Opcode;
3247  switch (ScalarBits) {
3248  case 16:
3249  if (FixedToFloat)
3250  Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix;
3251  else
3252  Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3253  break;
3254  case 32:
3255  if (FixedToFloat)
3256  Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix;
3257  else
3258  Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3259  break;
3260  default:
3261  llvm_unreachable("unexpected number of scalar bits");
3262  break;
3263  }
3264 
3265  ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops));
3266  return true;
3267 }
3268 
3269 bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
3270  // Transform a floating-point to fixed-point conversion to a VCVT
3271  if (!Subtarget->hasMVEFloatOps())
3272  return false;
3273  EVT Type = N->getValueType(0);
3274  if (!Type.isVector())
3275  return false;
3276  unsigned int ScalarBits = Type.getScalarSizeInBits();
3277 
3278  bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT ||
3279  N->getOpcode() == ISD::FP_TO_UINT_SAT;
3280  SDNode *Node = N->getOperand(0).getNode();
3281 
3282  // floating-point to fixed-point with one fractional bit gets turned into an
3283  // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y))
3284  if (Node->getOpcode() == ISD::FADD) {
3285  if (Node->getOperand(0) != Node->getOperand(1))
3286  return false;
3287  SDNodeFlags Flags = Node->getFlags();
3288  // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3289  // allowed in 16 bit unsigned floats
3290  if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned)
3291  return false;
3292 
3293  unsigned Opcode;
3294  switch (ScalarBits) {
3295  case 16:
3296  Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3297  break;
3298  case 32:
3299  Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3300  break;
3301  }
3302  SmallVector<SDValue, 3> Ops{Node->getOperand(0),
3303  CurDAG->getConstant(1, dl, MVT::i32)};
3304  AddEmptyMVEPredicateToOps(Ops, dl, Type);
3305 
3306  ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops));
3307  return true;
3308  }
3309 
3310  if (Node->getOpcode() != ISD::FMUL)
3311  return false;
3312 
3313  return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false);
3314 }
3315 
3316 bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
3317  // Transform a fixed-point to floating-point conversion to a VCVT
3318  if (!Subtarget->hasMVEFloatOps())
3319  return false;
3320  auto Type = N->getValueType(0);
3321  if (!Type.isVector())
3322  return false;
3323 
3324  auto LHS = N->getOperand(0);
3325  if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP)
3326  return false;
3327 
3328  return transformFixedFloatingPointConversion(
3329  N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true);
3330 }
3331 
3332 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
3333  if (!Subtarget->hasV6T2Ops())
3334  return false;
3335 
3336  unsigned Opc = isSigned
3337  ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
3338  : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
3339  SDLoc dl(N);
3340 
3341  // For unsigned extracts, check for a shift right and mask
3342  unsigned And_imm = 0;
3343  if (N->getOpcode() == ISD::AND) {
3344  if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
3345 
3346  // The immediate is a mask of the low bits iff imm & (imm+1) == 0
3347  if (And_imm & (And_imm + 1))
3348  return false;
3349 
3350  unsigned Srl_imm = 0;
3351  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
3352  Srl_imm)) {
3353  assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3354 
3355  // Mask off the unnecessary bits of the AND immediate; normally
3356  // DAGCombine will do this, but that might not happen if
3357  // targetShrinkDemandedConstant chooses a different immediate.
3358  And_imm &= -1U >> Srl_imm;
3359 
3360  // Note: The width operand is encoded as width-1.
3361  unsigned Width = countTrailingOnes(And_imm) - 1;
3362  unsigned LSB = Srl_imm;
3363 
3364  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3365 
3366  if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
3367  // It's cheaper to use a right shift to extract the top bits.
3368  if (Subtarget->isThumb()) {
3369  Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
3370  SDValue Ops[] = { N->getOperand(0).getOperand(0),
3371  CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3372  getAL(CurDAG, dl), Reg0, Reg0 };
3373  CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3374  return true;
3375  }
3376 
3377  // ARM models shift instructions as MOVsi with shifter operand.
3379  SDValue ShOpc =
3380  CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
3381  MVT::i32);
3382  SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
3383  getAL(CurDAG, dl), Reg0, Reg0 };
3384  CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
3385  return true;
3386  }
3387 
3388  assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3389  SDValue Ops[] = { N->getOperand(0).getOperand(0),
3390  CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3391  CurDAG->getTargetConstant(Width, dl, MVT::i32),
3392  getAL(CurDAG, dl), Reg0 };
3393  CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3394  return true;
3395  }
3396  }
3397  return false;
3398  }
3399 
3400  // Otherwise, we're looking for a shift of a shift
3401  unsigned Shl_imm = 0;
3402  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
3403  assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
3404  unsigned Srl_imm = 0;
3405  if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
3406  assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3407  // Note: The width operand is encoded as width-1.
3408  unsigned Width = 32 - Srl_imm - 1;
3409  int LSB = Srl_imm - Shl_imm;
3410  if (LSB < 0)
3411  return false;
3412  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3413  assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3414  SDValue Ops[] = { N->getOperand(0).getOperand(0),
3415  CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3416  CurDAG->getTargetConstant(Width, dl, MVT::i32),
3417  getAL(CurDAG, dl), Reg0 };
3418  CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3419  return true;
3420  }
3421  }
3422 
3423  // Or we are looking for a shift of an and, with a mask operand
3424  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
3425  isShiftedMask_32(And_imm)) {
3426  unsigned Srl_imm = 0;
3427  unsigned LSB = countTrailingZeros(And_imm);
3428  // Shift must be the same as the ands lsb
3429  if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
3430  assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3431  unsigned MSB = 31 - countLeadingZeros(And_imm);
3432  // Note: The width operand is encoded as width-1.
3433  unsigned Width = MSB - LSB;
3434  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3435  assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3436  SDValue Ops[] = { N->getOperand(0).getOperand(0),
3437  CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
3438  CurDAG->getTargetConstant(Width, dl, MVT::i32),
3439  getAL(CurDAG, dl), Reg0 };
3440  CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3441  return true;
3442  }
3443  }
3444 
3445  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
3446  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
3447  unsigned LSB = 0;
3448  if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
3449  !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
3450  return false;
3451 
3452  if (LSB + Width > 32)
3453  return false;
3454 
3455  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3456  assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
3457  SDValue Ops[] = { N->getOperand(0).getOperand(0),
3458  CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3459  CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
3460  getAL(CurDAG, dl), Reg0 };
3461  CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3462  return true;
3463  }
3464 
3465  return false;
3466 }
3467 
3468 /// Target-specific DAG combining for ISD::SUB.
3469 /// Target-independent combining lowers SELECT_CC nodes of the form
3470 /// select_cc setg[ge] X, 0, X, -X
3471 /// select_cc setgt X, -1, X, -X
3472 /// select_cc setl[te] X, 0, -X, X
3473 /// select_cc setlt X, 1, -X, X
3474 /// which represent Integer ABS into:
3475 /// Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
3476 /// ARM instruction selection detects the latter and matches it to
3477 /// ARM::ABS or ARM::t2ABS machine node.
3478 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
3479  SDValue SUBSrc0 = N->getOperand(0);
3480  SDValue SUBSrc1 = N->getOperand(1);
3481  EVT VT = N->getValueType(0);
3482 
3483  if (Subtarget->isThumb1Only())
3484  return false;
3485 
3486  if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA)
3487  return false;
3488 
3489  SDValue XORSrc0 = SUBSrc0.getOperand(0);
3490  SDValue XORSrc1 = SUBSrc0.getOperand(1);
3491  SDValue SRASrc0 = SUBSrc1.getOperand(0);
3492  SDValue SRASrc1 = SUBSrc1.getOperand(1);
3493  ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
3494  EVT XType = SRASrc0.getValueType();
3495  unsigned Size = XType.getSizeInBits() - 1;
3496 
3497  if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() &&
3498  SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) {
3499  unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
3500  CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0);
3501  return true;
3502  }
3503 
3504  return false;
3505 }
3506 
3507 /// We've got special pseudo-instructions for these
3508 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3509  unsigned Opcode;
3510  EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3511  if (MemTy == MVT::i8)
3512  Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8;
3513  else if (MemTy == MVT::i16)
3514  Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16;
3515  else if (MemTy == MVT::i32)
3516  Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32;
3517  else
3518  llvm_unreachable("Unknown AtomicCmpSwap type");
3519 
3520  SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3521  N->getOperand(0)};
3522  SDNode *CmpSwap = CurDAG->getMachineNode(
3523  Opcode, SDLoc(N),
3524  CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
3525 
3526  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3527  CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3528 
3529  ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3530  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3531  CurDAG->RemoveDeadNode(N);
3532 }
3533 
3534 static std::optional<std::pair<unsigned, unsigned>>
3536  unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
3537  unsigned LastOne = A.countTrailingZeros();
3538  if (A.countPopulation() != (FirstOne - LastOne + 1))
3539  return std::nullopt;
3540  return std::make_pair(FirstOne, LastOne);
3541 }
3542 
3543 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
3544  assert(N->getOpcode() == ARMISD::CMPZ);
3545  SwitchEQNEToPLMI = false;
3546 
3547  if (!Subtarget->isThumb())
3548  // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3549  // LSR don't exist as standalone instructions - they need the barrel shifter.
3550  return;
3551 
3552  // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3553  SDValue And = N->getOperand(0);
3554  if (!And->hasOneUse())
3555  return;
3556 
3557  SDValue Zero = N->getOperand(1);
3558  if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isZero() ||
3559  And->getOpcode() != ISD::AND)
3560  return;
3561  SDValue X = And.getOperand(0);
3562  auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
3563 
3564  if (!C)
3565  return;
3566  auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
3567  if (!Range)
3568  return;
3569 
3570  // There are several ways to lower this:
3571  SDNode *NewN;
3572  SDLoc dl(N);
3573 
3574  auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
3575  if (Subtarget->isThumb2()) {
3576  Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
3577  SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3578  getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3579  CurDAG->getRegister(0, MVT::i32) };
3580  return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3581  } else {
3582  SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
3583  CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3584  getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3585  return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3586  }
3587  };
3588 
3589  if (Range->second == 0) {
3590  // 1. Mask includes the LSB -> Simply shift the top N bits off
3591  NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3592  ReplaceNode(And.getNode(), NewN);
3593  } else if (Range->first == 31) {
3594  // 2. Mask includes the MSB -> Simply shift the bottom N bits off
3595  NewN = EmitShift(ARM::tLSRri, X, Range->second);
3596  ReplaceNode(And.getNode(), NewN);
3597  } else if (Range->first == Range->second) {
3598  // 3. Only one bit is set. We can shift this into the sign bit and use a
3599  // PL/MI comparison.
3600  NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3601  ReplaceNode(And.getNode(), NewN);
3602 
3603  SwitchEQNEToPLMI = true;
3604  } else if (!Subtarget->hasV6T2Ops()) {
3605  // 4. Do a double shift to clear bottom and top bits, but only in
3606  // thumb-1 mode as in thumb-2 we can use UBFX.
3607  NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3608  NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
3609  Range->second + (31 - Range->first));
3610  ReplaceNode(And.getNode(), NewN);
3611  }
3612 }
3613 
3614 static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3],
3615  unsigned Opc128[3]) {
3616  assert((VT.is64BitVector() || VT.is128BitVector()) &&
3617  "Unexpected vector shuffle length");
3618  switch (VT.getScalarSizeInBits()) {
3619  default:
3620  llvm_unreachable("Unexpected vector shuffle element size");
3621  case 8:
3622  return VT.is64BitVector() ? Opc64[0] : Opc128[0];
3623  case 16:
3624  return VT.is64BitVector() ? Opc64[1] : Opc128[1];
3625  case 32:
3626  return VT.is64BitVector() ? Opc64[2] : Opc128[2];
3627  }
3628 }
3629 
3631  SDLoc dl(N);
3632 
3633  if (N->isMachineOpcode()) {
3634  N->setNodeId(-1);
3635  return; // Already selected.
3636  }
3637 
3638  switch (N->getOpcode()) {
3639  default: break;
3640  case ISD::STORE: {
3641  // For Thumb1, match an sp-relative store in C++. This is a little
3642  // unfortunate, but I don't think I can make the chain check work
3643  // otherwise. (The chain of the store has to be the same as the chain
3644  // of the CopyFromReg, or else we can't replace the CopyFromReg with
3645  // a direct reference to "SP".)
3646  //
3647  // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3648  // a different addressing mode from other four-byte stores.
3649  //
3650  // This pattern usually comes up with call arguments.
3651  StoreSDNode *ST = cast<StoreSDNode>(N);
3652  SDValue Ptr = ST->getBasePtr();
3653  if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
3654  int RHSC = 0;
3655  if (Ptr.getOpcode() == ISD::ADD &&
3656  isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
3657  Ptr = Ptr.getOperand(0);
3658 
3659  if (Ptr.getOpcode() == ISD::CopyFromReg &&
3660  cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
3661  Ptr.getOperand(0) == ST->getChain()) {
3662  SDValue Ops[] = {ST->getValue(),
3663  CurDAG->getRegister(ARM::SP, MVT::i32),
3664  CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
3665  getAL(CurDAG, dl),
3666  CurDAG->getRegister(0, MVT::i32),
3667  ST->getChain()};
3668  MachineSDNode *ResNode =
3669  CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
3670  MachineMemOperand *MemOp = ST->getMemOperand();
3671  CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3672  ReplaceNode(N, ResNode);
3673  return;
3674  }
3675  }
3676  break;
3677  }
3678  case ISD::WRITE_REGISTER:
3679  if (tryWriteRegister(N))
3680  return;
3681  break;
3682  case ISD::READ_REGISTER:
3683  if (tryReadRegister(N))
3684  return;
3685  break;
3686  case ISD::INLINEASM:
3687  case ISD::INLINEASM_BR:
3688  if (tryInlineAsm(N))
3689  return;
3690  break;
3691  case ISD::SUB:
3692  // Select special operations if SUB node forms integer ABS pattern
3693  if (tryABSOp(N))
3694  return;
3695  // Other cases are autogenerated.
3696  break;
3697  case ISD::Constant: {
3698  unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
3699  // If we can't materialize the constant we need to use a literal pool
3700  if (ConstantMaterializationCost(Val, Subtarget) > 2) {
3701  SDValue CPIdx = CurDAG->getTargetConstantPool(
3702  ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
3703  TLI->getPointerTy(CurDAG->getDataLayout()));
3704 
3705  SDNode *ResNode;
3706  if (Subtarget->isThumb()) {
3707  SDValue Ops[] = {
3708  CPIdx,
3709  getAL(CurDAG, dl),
3710  CurDAG->getRegister(0, MVT::i32),
3711  CurDAG->getEntryNode()
3712  };
3713  ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
3714  Ops);
3715  } else {
3716  SDValue Ops[] = {
3717  CPIdx,
3718  CurDAG->getTargetConstant(0, dl, MVT::i32),
3719  getAL(CurDAG, dl),
3720  CurDAG->getRegister(0, MVT::i32),
3721  CurDAG->getEntryNode()
3722  };
3723  ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
3724  Ops);
3725  }
3726  // Annotate the Node with memory operand information so that MachineInstr
3727  // queries work properly. This e.g. gives the register allocation the
3728  // required information for rematerialization.
3729  MachineFunction& MF = CurDAG->getMachineFunction();
3733 
3734  CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3735 
3736  ReplaceNode(N, ResNode);
3737  return;
3738  }
3739 
3740  // Other cases are autogenerated.
3741  break;
3742  }
3743  case ISD::FrameIndex: {
3744  // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3745  int FI = cast<FrameIndexSDNode>(N)->getIndex();
3746  SDValue TFI = CurDAG->getTargetFrameIndex(
3747  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3748  if (Subtarget->isThumb1Only()) {
3749  // Set the alignment of the frame object to 4, to avoid having to generate
3750  // more than one ADD
3751  MachineFrameInfo &MFI = MF->getFrameInfo();
3752  if (MFI.getObjectAlign(FI) < Align(4))
3753  MFI.setObjectAlignment(FI, Align(4));
3754  CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
3755  CurDAG->getTargetConstant(0, dl, MVT::i32));
3756  return;
3757  } else {
3758  unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
3759  ARM::t2ADDri : ARM::ADDri);
3760  SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
3761  getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3762  CurDAG->getRegister(0, MVT::i32) };
3763  CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3764  return;
3765  }
3766  }
3767  case ISD::INSERT_VECTOR_ELT: {
3768  if (tryInsertVectorElt(N))
3769  return;
3770  break;
3771  }
3772  case ISD::SRL:
3773  if (tryV6T2BitfieldExtractOp(N, false))
3774  return;
3775  break;
3777  case ISD::SRA:
3778  if (tryV6T2BitfieldExtractOp(N, true))
3779  return;
3780  break;
3781  case ISD::FP_TO_UINT:
3782  case ISD::FP_TO_SINT:
3783  case ISD::FP_TO_UINT_SAT:
3784  case ISD::FP_TO_SINT_SAT:
3785  if (tryFP_TO_INT(N, dl))
3786  return;
3787  break;
3788  case ISD::FMUL:
3789  if (tryFMULFixed(N, dl))
3790  return;
3791  break;
3792  case ISD::MUL:
3793  if (Subtarget->isThumb1Only())
3794  break;
3795  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
3796  unsigned RHSV = C->getZExtValue();
3797  if (!RHSV) break;
3798  if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
3799  unsigned ShImm = Log2_32(RHSV-1);
3800  if (ShImm >= 32)
3801  break;
3802  SDValue V = N->getOperand(0);
3803  ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3804  SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3805  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3806  if (Subtarget->isThumb()) {
3807  SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3808  CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
3809  return;
3810  } else {
3811  SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3812  Reg0 };
3813  CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
3814  return;
3815  }
3816  }
3817  if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
3818  unsigned ShImm = Log2_32(RHSV+1);
3819  if (ShImm >= 32)
3820  break;
3821  SDValue V = N->getOperand(0);
3822  ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3823  SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3824  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3825  if (Subtarget->isThumb()) {
3826  SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3827  CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
3828  return;
3829  } else {
3830  SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3831  Reg0 };
3832  CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
3833  return;
3834  }
3835  }
3836  }
3837  break;
3838  case ISD::AND: {
3839  // Check for unsigned bitfield extract
3840  if (tryV6T2BitfieldExtractOp(N, false))
3841  return;
3842 
3843  // If an immediate is used in an AND node, it is possible that the immediate
3844  // can be more optimally materialized when negated. If this is the case we
3845  // can negate the immediate and use a BIC instead.
3846  auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
3847  if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
3848  uint32_t Imm = (uint32_t) N1C->getZExtValue();
3849 
3850  // In Thumb2 mode, an AND can take a 12-bit immediate. If this
3851  // immediate can be negated and fit in the immediate operand of
3852  // a t2BIC, don't do any manual transform here as this can be
3853  // handled by the generic ISel machinery.
3854  bool PreferImmediateEncoding =
3855  Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
3856  if (!PreferImmediateEncoding &&
3857  ConstantMaterializationCost(Imm, Subtarget) >
3858  ConstantMaterializationCost(~Imm, Subtarget)) {
3859  // The current immediate costs more to materialize than a negated
3860  // immediate, so negate the immediate and use a BIC.
3861  SDValue NewImm =
3862  CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
3863  // If the new constant didn't exist before, reposition it in the topological
3864  // ordering so it is just before N. Otherwise, don't touch its location.
3865  if (NewImm->getNodeId() == -1)
3866  CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
3867 
3868  if (!Subtarget->hasThumb2()) {
3869  SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
3870  N->getOperand(0), NewImm, getAL(CurDAG, dl),
3871  CurDAG->getRegister(0, MVT::i32)};
3872  ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
3873  return;
3874  } else {
3875  SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
3876  CurDAG->getRegister(0, MVT::i32),
3877  CurDAG->getRegister(0, MVT::i32)};
3878  ReplaceNode(N,
3879  CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
3880  return;
3881  }
3882  }
3883  }
3884 
3885  // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3886  // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3887  // are entirely contributed by c2 and lower 16-bits are entirely contributed
3888  // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3889  // Select it to: "movt x, ((c1 & 0xffff) >> 16)
3890  EVT VT = N->getValueType(0);
3891  if (VT != MVT::i32)
3892  break;
3893  unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3894  ? ARM::t2MOVTi16
3895  : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
3896  if (!Opc)
3897  break;
3898  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3899  N1C = dyn_cast<ConstantSDNode>(N1);
3900  if (!N1C)
3901  break;
3902  if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3903  SDValue N2 = N0.getOperand(1);
3904  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
3905  if (!N2C)
3906  break;
3907  unsigned N1CVal = N1C->getZExtValue();
3908  unsigned N2CVal = N2C->getZExtValue();
3909  if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
3910  (N1CVal & 0xffffU) == 0xffffU &&
3911  (N2CVal & 0xffffU) == 0x0U) {
3912  SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
3913  dl, MVT::i32);
3914  SDValue Ops[] = { N0.getOperand(0), Imm16,
3915  getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3916  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
3917  return;
3918  }
3919  }
3920 
3921  break;
3922  }
3923  case ARMISD::UMAAL: {
3924  unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3925  SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3926  N->getOperand(2), N->getOperand(3),
3927  getAL(CurDAG, dl),
3928  CurDAG->getRegister(0, MVT::i32) };
3929  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3930  return;
3931  }
3932  case ARMISD::UMLAL:{
3933  if (Subtarget->isThumb()) {
3934  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3935  N->getOperand(3), getAL(CurDAG, dl),
3936  CurDAG->getRegister(0, MVT::i32)};
3937  ReplaceNode(
3938  N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3939  return;
3940  }else{
3941  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3942  N->getOperand(3), getAL(CurDAG, dl),
3943  CurDAG->getRegister(0, MVT::i32),
3944  CurDAG->getRegister(0, MVT::i32) };
3945  ReplaceNode(N, CurDAG->getMachineNode(
3946  Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3947  MVT::i32, MVT::i32, Ops));
3948  return;
3949  }
3950  }
3951  case ARMISD::SMLAL:{
3952  if (Subtarget->isThumb()) {
3953  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3954  N->getOperand(3), getAL(CurDAG, dl),
3955  CurDAG->getRegister(0, MVT::i32)};
3956  ReplaceNode(
3957  N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3958  return;
3959  }else{
3960  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3961  N->getOperand(3), getAL(CurDAG, dl),
3962  CurDAG->getRegister(0, MVT::i32),
3963  CurDAG->getRegister(0, MVT::i32) };
3964  ReplaceNode(N, CurDAG->getMachineNode(
3965  Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3966  MVT::i32, MVT::i32, Ops));
3967  return;
3968  }
3969  }
3970  case ARMISD::SUBE: {
3971  if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3972  break;
3973  // Look for a pattern to match SMMLS
3974  // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3975  if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3976  N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3977  !SDValue(N, 1).use_empty())
3978  break;
3979 
3980  if (Subtarget->isThumb())
3981  assert(Subtarget->hasThumb2() &&
3982  "This pattern should not be generated for Thumb");
3983 
3984  SDValue SmulLoHi = N->getOperand(1);
3985  SDValue Subc = N->getOperand(2);
3986  auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
3987 
3988  if (!Zero || Zero->getZExtValue() != 0 ||
3989  Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3990  N->getOperand(1) != SmulLoHi.getValue(1) ||
3991  N->getOperand(2) != Subc.getValue(1))
3992  break;
3993 
3994  unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3995  SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3996  N->getOperand(0), getAL(CurDAG, dl),
3997  CurDAG->getRegister(0, MVT::i32) };
3998  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
3999  return;
4000  }
4001  case ISD::LOAD: {
4002  if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
4003  return;
4004  if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
4005  if (tryT2IndexedLoad(N))
4006  return;
4007  } else if (Subtarget->isThumb()) {
4008  if (tryT1IndexedLoad(N))
4009  return;
4010  } else if (tryARMIndexedLoad(N))
4011  return;
4012  // Other cases are autogenerated.
4013  break;
4014  }
4015  case ISD::MLOAD:
4016  if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
4017  return;
4018  // Other cases are autogenerated.
4019  break;
4020  case ARMISD::WLSSETUP: {
4021  SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32,
4022  N->getOperand(0));
4023  ReplaceUses(N, New);
4024  CurDAG->RemoveDeadNode(N);
4025  return;
4026  }
4027  case ARMISD::WLS: {
4028  SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other,
4029  N->getOperand(1), N->getOperand(2),
4030  N->getOperand(0));
4031  ReplaceUses(N, New);
4032  CurDAG->RemoveDeadNode(N);
4033  return;
4034  }
4035  case ARMISD::LE: {
4036  SDValue Ops[] = { N->getOperand(1),
4037  N->getOperand(2),
4038  N->getOperand(0) };
4039  unsigned Opc = ARM::t2LoopEnd;
4040  SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
4041  ReplaceUses(N, New);
4042  CurDAG->RemoveDeadNode(N);
4043  return;
4044  }
4045  case ARMISD::LDRD: {
4046  if (Subtarget->isThumb2())
4047  break; // TableGen handles isel in this case.
4048  SDValue Base, RegOffset, ImmOffset;
4049  const SDValue &Chain = N->getOperand(0);
4050  const SDValue &Addr = N->getOperand(1);
4051  SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4052  if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4053  // The register-offset variant of LDRD mandates that the register
4054  // allocated to RegOffset is not reused in any of the remaining operands.
4055  // This restriction is currently not enforced. Therefore emitting this
4056  // variant is explicitly avoided.
4057  Base = Addr;
4058  RegOffset = CurDAG->getRegister(0, MVT::i32);
4059  }
4060  SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
4061  SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
4062  {MVT::Untyped, MVT::Other}, Ops);
4063  SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4064  SDValue(New, 0));
4065  SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4066  SDValue(New, 0));
4067  transferMemOperands(N, New);
4068  ReplaceUses(SDValue(N, 0), Lo);
4069  ReplaceUses(SDValue(N, 1), Hi);
4070  ReplaceUses(SDValue(N, 2), SDValue(New, 1));
4071  CurDAG->RemoveDeadNode(N);
4072  return;
4073  }
4074  case ARMISD::STRD: {
4075  if (Subtarget->isThumb2())
4076  break; // TableGen handles isel in this case.
4077  SDValue Base, RegOffset, ImmOffset;
4078  const SDValue &Chain = N->getOperand(0);
4079  const SDValue &Addr = N->getOperand(3);
4080  SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4081  if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4082  // The register-offset variant of STRD mandates that the register
4083  // allocated to RegOffset is not reused in any of the remaining operands.
4084  // This restriction is currently not enforced. Therefore emitting this
4085  // variant is explicitly avoided.
4086  Base = Addr;
4087  RegOffset = CurDAG->getRegister(0, MVT::i32);
4088  }
4089  SDNode *RegPair =
4090  createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2));
4091  SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
4092  SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);
4093  transferMemOperands(N, New);
4094  ReplaceUses(SDValue(N, 0), SDValue(New, 0));
4095  CurDAG->RemoveDeadNode(N);
4096  return;
4097  }
4098  case ARMISD::LOOP_DEC: {
4099  SDValue Ops[] = { N->getOperand(1),
4100  N->getOperand(2),
4101  N->getOperand(0) };
4102  SDNode *Dec =
4103  CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4104  CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
4105  ReplaceUses(N, Dec);
4106  CurDAG->RemoveDeadNode(N);
4107  return;
4108  }
4109  case ARMISD::BRCOND: {
4110  // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4111  // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4112  // Pattern complexity = 6 cost = 1 size = 0
4113 
4114  // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4115  // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
4116  // Pattern complexity = 6 cost = 1 size = 0
4117 
4118  // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4119  // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4120  // Pattern complexity = 6 cost = 1 size = 0
4121 
4122  unsigned Opc = Subtarget->isThumb() ?
4123  ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
4124  SDValue Chain = N->getOperand(0);
4125  SDValue N1 = N->getOperand(1);
4126  SDValue N2 = N->getOperand(2);
4127  SDValue N3 = N->getOperand(3);
4128  SDValue InFlag = N->getOperand(4);
4129  assert(N1.getOpcode() == ISD::BasicBlock);
4130  assert(N2.getOpcode() == ISD::Constant);
4131  assert(N3.getOpcode() == ISD::Register);
4132 
4133  unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
4134 
4135  if (InFlag.getOpcode() == ARMISD::CMPZ) {
4136  if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4137  SDValue Int = InFlag.getOperand(0);
4138  uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
4139 
4140  // Handle low-overhead loops.
4141  if (ID == Intrinsic::loop_decrement_reg) {
4142  SDValue Elements = Int.getOperand(2);
4143  SDValue Size = CurDAG->getTargetConstant(
4144  cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
4145  MVT::i32);
4146 
4147  SDValue Args[] = { Elements, Size, Int.getOperand(0) };
4148  SDNode *LoopDec =
4149  CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4150  CurDAG->getVTList(MVT::i32, MVT::Other),
4151  Args);
4152  ReplaceUses(Int.getNode(), LoopDec);
4153 
4154  SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
4155  SDNode *LoopEnd =
4156  CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
4157 
4158  ReplaceUses(N, LoopEnd);
4159  CurDAG->RemoveDeadNode(N);
4160  CurDAG->RemoveDeadNode(InFlag.getNode());
4161  CurDAG->RemoveDeadNode(Int.getNode());
4162  return;
4163  }
4164  }
4165 
4166  bool SwitchEQNEToPLMI;
4167  SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
4168  InFlag = N->getOperand(4);
4169 
4170  if (SwitchEQNEToPLMI) {
4171  switch ((ARMCC::CondCodes)CC) {
4172  default: llvm_unreachable("CMPZ must be either NE or EQ!");
4173  case ARMCC::NE:
4174  CC = (unsigned)ARMCC::MI;
4175  break;
4176  case ARMCC::EQ:
4177  CC = (unsigned)ARMCC::PL;
4178  break;
4179  }
4180  }
4181  }
4182 
4183  SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
4184  SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
4185  SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
4186  MVT::Glue, Ops);
4187  Chain = SDValue(ResNode, 0);
4188  if (N->getNumValues() == 2) {
4189  InFlag = SDValue(ResNode, 1);
4190  ReplaceUses(SDValue(N, 1), InFlag);
4191  }
4192  ReplaceUses(SDValue(N, 0),
4193  SDValue(Chain.getNode(), Chain.getResNo()));
4194  CurDAG->RemoveDeadNode(N);
4195  return;
4196  }
4197 
4198  case ARMISD::CMPZ: {
4199  // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
4200  // This allows us to avoid materializing the expensive negative constant.
4201  // The CMPZ #0 is useless and will be peepholed away but we need to keep it
4202  // for its glue output.
4203  SDValue X = N->getOperand(0);
4204  auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
4205  if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
4206  int64_t Addend = -C->getSExtValue();
4207 
4208  SDNode *Add = nullptr;
4209  // ADDS can be better than CMN if the immediate fits in a
4210  // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
4211  // Outside that range we can just use a CMN which is 32-bit but has a
4212  // 12-bit immediate range.
4213  if (Addend < 1<<8) {
4214  if (Subtarget->isThumb2()) {
4215  SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4216  getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
4217  CurDAG->getRegister(0, MVT::i32) };
4218  Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
4219  } else {
4220  unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
4221  SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
4222  CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4223  getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
4224  Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
4225  }
4226  }
4227  if (Add) {
4228  SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
4229  CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
4230  }
4231  }
4232  // Other cases are autogenerated.
4233  break;
4234  }
4235 
4236  case ARMISD::CMOV: {
4237  SDValue InFlag = N->getOperand(4);
4238 
4239  if (InFlag.getOpcode() == ARMISD::CMPZ) {
4240  bool SwitchEQNEToPLMI;
4241  SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
4242 
4243  if (SwitchEQNEToPLMI) {
4244  SDValue ARMcc = N->getOperand(2);
4246  (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
4247 
4248  switch (CC) {
4249  default: llvm_unreachable("CMPZ must be either NE or EQ!");
4250  case ARMCC::NE:
4251  CC = ARMCC::MI;
4252  break;
4253  case ARMCC::EQ:
4254  CC = ARMCC::PL;
4255  break;
4256  }
4257  SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
4258  SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
4259  N->getOperand(3), N->getOperand(4)};
4260  CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
4261  }
4262 
4263  }
4264  // Other cases are autogenerated.
4265  break;
4266  }
4267  case ARMISD::VZIP: {
4268  EVT VT = N->getValueType(0);
4269  // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4270  unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32};
4271  unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32};
4272  unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4273  SDValue Pred = getAL(CurDAG, dl);
4274  SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4275  SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4276  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4277  return;
4278  }
4279  case ARMISD::VUZP: {
4280  EVT VT = N->getValueType(0);
4281  // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4282  unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32};
4283  unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32};
4284  unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4285  SDValue Pred = getAL(CurDAG, dl);
4286  SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4287  SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4288  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4289  return;
4290  }
4291  case ARMISD::VTRN: {
4292  EVT VT = N->getValueType(0);
4293  unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32};
4294  unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32};
4295  unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4296  SDValue Pred = getAL(CurDAG, dl);
4297  SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4298  SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4299  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4300  return;
4301  }
4302  case ARMISD::BUILD_VECTOR: {
4303  EVT VecVT = N->getValueType(0);
4304  EVT EltVT = VecVT.getVectorElementType();
4305  unsigned NumElts = VecVT.getVectorNumElements();
4306  if (EltVT == MVT::f64) {
4307  assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
4308  ReplaceNode(
4309  N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4310  return;
4311  }
4312  assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
4313  if (NumElts == 2) {
4314  ReplaceNode(
4315  N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4316  return;
4317  }
4318  assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
4319  ReplaceNode(N,
4320  createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
4321  N->getOperand(2), N->getOperand(3)));
4322  return;
4323  }
4324 
4325  case ARMISD::VLD1DUP: {
4326  static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
4327  ARM::VLD1DUPd32 };
4328  static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
4329  ARM::VLD1DUPq32 };
4330  SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
4331  return;
4332  }
4333 
4334  case ARMISD::VLD2DUP: {
4335  static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4336  ARM::VLD2DUPd32 };
4337  SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
4338  return;
4339  }
4340 
4341  case ARMISD::VLD3DUP: {
4342  static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
4343  ARM::VLD3DUPd16Pseudo,
4344  ARM::VLD3DUPd32Pseudo };
4345  SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
4346  return;
4347  }
4348 
4349  case ARMISD::VLD4DUP: {
4350  static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
4351  ARM::VLD4DUPd16Pseudo,
4352  ARM::VLD4DUPd32Pseudo };
4353  SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
4354  return;
4355  }
4356 
4357  case ARMISD::VLD1DUP_UPD: {
4358  static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
4359  ARM::VLD1DUPd16wb_fixed,
4360  ARM::VLD1DUPd32wb_fixed };
4361  static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
4362  ARM::VLD1DUPq16wb_fixed,
4363  ARM::VLD1DUPq32wb_fixed };
4364  SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
4365  return;
4366  }
4367 
4368  case ARMISD::VLD2DUP_UPD: {
4369  static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,
4370  ARM::VLD2DUPd16wb_fixed,
4371  ARM::VLD2DUPd32wb_fixed,
4372  ARM::VLD1q64wb_fixed };
4373  static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4374  ARM::VLD2DUPq16EvenPseudo,
4375  ARM::VLD2DUPq32EvenPseudo };
4376  static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,
4377  ARM::VLD2DUPq16OddPseudoWB_fixed,
4378  ARM::VLD2DUPq32OddPseudoWB_fixed };
4379  SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
4380  return;
4381  }
4382 
4383  case ARMISD::VLD3DUP_UPD: {
4384  static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
4385  ARM::VLD3DUPd16Pseudo_UPD,
4386  ARM::VLD3DUPd32Pseudo_UPD,
4387  ARM::VLD1d64TPseudoWB_fixed };
4388  static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4389  ARM::VLD3DUPq16EvenPseudo,
4390  ARM::VLD3DUPq32EvenPseudo };
4391  static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,
4392  ARM::VLD3DUPq16OddPseudo_UPD,
4393  ARM::VLD3DUPq32OddPseudo_UPD };
4394  SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4395  return;
4396  }
4397 
4398  case ARMISD::VLD4DUP_UPD: {
4399  static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
4400  ARM::VLD4DUPd16Pseudo_UPD,
4401  ARM::VLD4DUPd32Pseudo_UPD,
4402  ARM::VLD1d64QPseudoWB_fixed };
4403  static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4404  ARM::VLD4DUPq16EvenPseudo,
4405  ARM::VLD4DUPq32EvenPseudo };
4406  static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,
4407  ARM::VLD4DUPq16OddPseudo_UPD,
4408  ARM::VLD4DUPq32OddPseudo_UPD };
4409  SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4410  return;
4411  }
4412 
4413  case ARMISD::VLD1_UPD: {
4414  static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
4415  ARM::VLD1d16wb_fixed,
4416  ARM::VLD1d32wb_fixed,
4417  ARM::VLD1d64wb_fixed };
4418  static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
4419  ARM::VLD1q16wb_fixed,
4420  ARM::VLD1q32wb_fixed,
4421  ARM::VLD1q64wb_fixed };
4422  SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
4423  return;
4424  }
4425 
4426  case ARMISD::VLD2_UPD: {
4427  if (Subtarget->hasNEON()) {
4428  static const uint16_t DOpcodes[] = {
4429  ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
4430  ARM::VLD1q64wb_fixed};
4431  static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
4432  ARM::VLD2q16PseudoWB_fixed,
4433  ARM::VLD2q32PseudoWB_fixed};
4434  SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4435  } else {
4436  static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
4437  ARM::MVE_VLD21_8_wb};
4438  static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4439  ARM::MVE_VLD21_16_wb};
4440  static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4441  ARM::MVE_VLD21_32_wb};
4442  static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4443  SelectMVE_VLD(N, 2, Opcodes, true);
4444  }
4445  return;
4446  }
4447 
4448  case ARMISD::VLD3_UPD: {
4449  static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
4450  ARM::VLD3d16Pseudo_UPD,
4451  ARM::VLD3d32Pseudo_UPD,
4452  ARM::VLD1d64TPseudoWB_fixed};
4453  static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4454  ARM::VLD3q16Pseudo_UPD,
4455  ARM::VLD3q32Pseudo_UPD };
4456  static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
4457  ARM::VLD3q16oddPseudo_UPD,
4458  ARM::VLD3q32oddPseudo_UPD };
4459  SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4460  return;
4461  }
4462 
4463  case ARMISD::VLD4_UPD: {
4464  if (Subtarget->hasNEON()) {
4465  static const uint16_t DOpcodes[] = {
4466  ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
4467  ARM::VLD1d64QPseudoWB_fixed};
4468  static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
4469  ARM::VLD4q16Pseudo_UPD,
4470  ARM::VLD4q32Pseudo_UPD};
4471  static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
4472  ARM::VLD4q16oddPseudo_UPD,
4473  ARM::VLD4q32oddPseudo_UPD};
4474  SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4475  } else {
4476  static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4477  ARM::MVE_VLD42_8,
4478  ARM::MVE_VLD43_8_wb};
4479  static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4480  ARM::MVE_VLD42_16,
4481  ARM::MVE_VLD43_16_wb};
4482  static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4483  ARM::MVE_VLD42_32,
4484  ARM::MVE_VLD43_32_wb};
4485  static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4486  SelectMVE_VLD(N, 4, Opcodes, true);
4487  }
4488  return;
4489  }
4490 
4491  case ARMISD::VLD1x2_UPD: {
4492  if (Subtarget->hasNEON()) {
4493  static const uint16_t DOpcodes[] = {
4494  ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
4495  ARM::VLD1q64wb_fixed};
4496  static const uint16_t QOpcodes[] = {
4497  ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4498  ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4499  SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4500  return;
4501  }
4502  break;
4503  }
4504 
4505  case ARMISD::VLD1x3_UPD: {
4506  if (Subtarget->hasNEON()) {
4507  static const uint16_t DOpcodes[] = {
4508  ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
4509  ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
4510  static const uint16_t QOpcodes0[] = {
4511  ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
4512  ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
4513  static const uint16_t QOpcodes1[] = {
4514  ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
4515  ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
4516  SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4517  return;
4518  }
4519  break;
4520  }
4521 
4522  case ARMISD::VLD1x4_UPD: {
4523  if (Subtarget->hasNEON()) {
4524  static const uint16_t DOpcodes[] = {
4525  ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4526  ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4527  static const uint16_t QOpcodes0[] = {
4528  ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
4529  ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
4530  static const uint16_t QOpcodes1[] = {
4531  ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
4532  ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
4533  SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4534  return;
4535  }
4536  break;
4537  }
4538 
4539  case ARMISD::VLD2LN_UPD: {
4540  static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
4541  ARM::VLD2LNd16Pseudo_UPD,
4542  ARM::VLD2LNd32Pseudo_UPD };
4543  static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
4544  ARM::VLD2LNq32Pseudo_UPD };
4545  SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
4546  return;
4547  }
4548 
4549  case ARMISD::VLD3LN_UPD: {
4550  static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
4551  ARM::VLD3LNd16Pseudo_UPD,
4552  ARM::VLD3LNd32Pseudo_UPD };
4553  static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
4554  ARM::VLD3LNq32Pseudo_UPD };
4555  SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
4556  return;
4557  }
4558 
4559  case ARMISD::VLD4LN_UPD: {
4560  static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
4561  ARM::VLD4LNd16Pseudo_UPD,
4562  ARM::VLD4LNd32Pseudo_UPD };
4563  static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
4564  ARM::VLD4LNq32Pseudo_UPD };
4565  SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
4566  return;
4567  }
4568 
4569  case ARMISD::VST1_UPD: {
4570  static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
4571  ARM::VST1d16wb_fixed,
4572  ARM::VST1d32wb_fixed,
4573  ARM::VST1d64wb_fixed };
4574  static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
4575  ARM::VST1q16wb_fixed,
4576  ARM::VST1q32wb_fixed,
4577  ARM::VST1q64wb_fixed };
4578  SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
4579  return;
4580  }
4581 
4582  case ARMISD::VST2_UPD: {
4583  if (Subtarget->hasNEON()) {
4584  static const uint16_t DOpcodes[] = {
4585  ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
4586  ARM::VST1q64wb_fixed};
4587  static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
4588  ARM::VST2q16PseudoWB_fixed,
4589  ARM::VST2q32PseudoWB_fixed};
4590  SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4591  return;
4592  }
4593  break;
4594  }
4595 
4596  case ARMISD::VST3_UPD: {
4597  static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
4598  ARM::VST3d16Pseudo_UPD,
4599  ARM::VST3d32Pseudo_UPD,
4600  ARM::VST1d64TPseudoWB_fixed};
4601  static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4602  ARM::VST3q16Pseudo_UPD,
4603  ARM::VST3q32Pseudo_UPD };
4604  static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
4605  ARM::VST3q16oddPseudo_UPD,
4606  ARM::VST3q32oddPseudo_UPD };
4607  SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4608  return;
4609  }
4610 
4611  case ARMISD::VST4_UPD: {
4612  if (Subtarget->hasNEON()) {
4613  static const uint16_t DOpcodes[] = {
4614  ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
4615  ARM::VST1d64QPseudoWB_fixed};
4616  static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
4617  ARM::VST4q16Pseudo_UPD,
4618  ARM::VST4q32Pseudo_UPD};
4619  static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
4620  ARM::VST4q16oddPseudo_UPD,
4621  ARM::VST4q32oddPseudo_UPD};
4622  SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4623  return;
4624  }
4625  break;
4626  }
4627 
4628  case ARMISD::VST1x2_UPD: {
4629  if (Subtarget->hasNEON()) {
4630  static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,
4631  ARM::VST1q16wb_fixed,
4632  ARM::VST1q32wb_fixed,
4633  ARM::VST1q64wb_fixed};
4634  static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4635  ARM::VST1d16QPseudoWB_fixed,
4636  ARM::VST1d32QPseudoWB_fixed,
4637  ARM::VST1d64QPseudoWB_fixed };
4638  SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4639  return;
4640  }
4641  break;
4642  }
4643 
4644  case ARMISD::VST1x3_UPD: {
4645  if (Subtarget->hasNEON()) {
4646  static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,
4647  ARM::VST1d16TPseudoWB_fixed,
4648  ARM::VST1d32TPseudoWB_fixed,
4649  ARM::VST1d64TPseudoWB_fixed };
4650  static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4651  ARM::VST1q16LowTPseudo_UPD,
4652  ARM::VST1q32LowTPseudo_UPD,
4653  ARM::VST1q64LowTPseudo_UPD };
4654  static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,
4655  ARM::VST1q16HighTPseudo_UPD,
4656  ARM::VST1q32HighTPseudo_UPD,
4657  ARM::VST1q64HighTPseudo_UPD };
4658  SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4659  return;
4660  }
4661  break;
4662  }
4663 
4664  case ARMISD::VST1x4_UPD: {
4665  if (Subtarget->hasNEON()) {
4666  static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4667  ARM::VST1d16QPseudoWB_fixed,
4668  ARM::VST1d32QPseudoWB_fixed,
4669  ARM::VST1d64QPseudoWB_fixed };
4670  static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4671  ARM::VST1q16LowQPseudo_UPD,
4672  ARM::VST1q32LowQPseudo_UPD,
4673  ARM::VST1q64LowQPseudo_UPD };
4674  static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,
4675  ARM::VST1q16HighQPseudo_UPD,
4676  ARM::VST1q32HighQPseudo_UPD,
4677  ARM::VST1q64HighQPseudo_UPD };
4678  SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4679  return;
4680  }
4681  break;
4682  }
4683  case ARMISD::VST2LN_UPD: {
4684  static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
4685  ARM::VST2LNd16Pseudo_UPD,
4686  ARM::VST2LNd32Pseudo_UPD };
4687  static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
4688  ARM::VST2LNq32Pseudo_UPD };
4689  SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
4690  return;
4691  }
4692 
4693  case ARMISD::VST3LN_UPD: {
4694  static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
4695  ARM::VST3LNd16Pseudo_UPD,
4696  ARM::VST3LNd32Pseudo_UPD };
4697  static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
4698  ARM::VST3LNq32Pseudo_UPD };
4699  SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
4700  return;
4701  }
4702 
4703  case ARMISD::VST4LN_UPD: {
4704  static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
4705  ARM::VST4LNd16Pseudo_UPD,
4706  ARM::VST4LNd32Pseudo_UPD };
4707  static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
4708  ARM::VST4LNq32Pseudo_UPD };
4709  SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
4710  return;
4711  }
4712 
4713  case ISD::INTRINSIC_VOID:
4714  case ISD::INTRINSIC_W_CHAIN: {
4715  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
4716  switch (IntNo) {
4717  default:
4718  break;
4719 
4720  case Intrinsic::arm_mrrc:
4721  case Intrinsic::arm_mrrc2: {
4722  SDLoc dl(N);
4723  SDValue Chain = N->getOperand(0);
4724  unsigned Opc;
4725 
4726  if (Subtarget->isThumb())
4727  Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
4728  else
4729  Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
4730 
4732  Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
4733  Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
4734  Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
4735 
4736  // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4737  // instruction will always be '1111' but it is possible in assembly language to specify
4738  // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4739  if (Opc != ARM::MRRC2) {
4740  Ops.push_back(getAL(CurDAG, dl));
4741  Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4742  }
4743 
4744  Ops.push_back(Chain);
4745 
4746  // Writes to two registers.
4747  const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
4748 
4749  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
4750  return;
4751  }
4752  case Intrinsic::arm_ldaexd:
4753  case Intrinsic::arm_ldrexd: {
4754  SDLoc dl(N);
4755  SDValue Chain = N->getOperand(0);
4756  SDValue MemAddr = N->getOperand(2);
4757  bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
4758 
4759  bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
4760  unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
4761  : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
4762 
4763  // arm_ldrexd returns a i64 value in {i32, i32}
4764  std::vector<EVT> ResTys;
4765  if (isThumb) {
4766  ResTys.push_back(MVT::i32);
4767  ResTys.push_back(MVT::i32);
4768  } else
4769  ResTys.push_back(MVT::Untyped);
4770  ResTys.push_back(MVT::Other);
4771 
4772  // Place arguments in the right order.
4773  SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
4774  CurDAG->getRegister(0, MVT::i32), Chain};
4775  SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4776  // Transfer memoperands.
4777  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4778  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4779 
4780  // Remap uses.
4781  SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
4782  if (!SDValue(N, 0).use_empty()) {
4783  SDValue Result;
4784  if (isThumb)
4785  Result = SDValue(Ld, 0);
4786  else {
4787  SDValue SubRegIdx =
4788  CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
4789  SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4790  dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4791  Result = SDValue(ResNode,0);
4792  }
4793  ReplaceUses(SDValue(N, 0), Result);
4794  }
4795  if (!SDValue(N, 1).use_empty()) {
4796  SDValue Result;
4797  if (isThumb)
4798  Result = SDValue(Ld, 1);
4799  else {
4800  SDValue SubRegIdx =
4801  CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
4802  SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4803  dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4804  Result = SDValue(ResNode,0);
4805  }
4806  ReplaceUses(SDValue(N, 1), Result);
4807  }
4808  ReplaceUses(SDValue(N, 2), OutChain);
4809  CurDAG->RemoveDeadNode(N);
4810  return;
4811  }
4812  case Intrinsic::arm_stlexd:
4813  case Intrinsic::arm_strexd: {
4814  SDLoc dl(N);
4815  SDValue Chain = N->getOperand(0);
4816  SDValue Val0 = N->getOperand(2);
4817  SDValue Val1 = N->getOperand(3);
4818  SDValue MemAddr = N->getOperand(4);
4819 
4820  // Store exclusive double return a i32 value which is the return status
4821  // of the issued store.
4822  const EVT ResTys[] = {MVT::i32, MVT::Other};
4823 
4824  bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
4825  // Place arguments in the right order.
4827  if (isThumb) {
4828  Ops.push_back(Val0);
4829  Ops.push_back(Val1);
4830  } else
4831  // arm_strexd uses GPRPair.
4832  Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
4833  Ops.push_back(MemAddr);
4834  Ops.push_back(getAL(CurDAG, dl));
4835  Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4836  Ops.push_back(Chain);
4837 
4838  bool IsRelease = IntNo == Intrinsic::arm_stlexd;
4839  unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
4840  : (IsRelease ? ARM::STLEXD : ARM::STREXD);
4841 
4842  SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4843  // Transfer memoperands.
4844  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4845  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4846 
4847  ReplaceNode(N, St);
4848  return;
4849  }
4850 
4851  case Intrinsic::arm_neon_vld1: {
4852  static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
4853  ARM::VLD1d32, ARM::VLD1d64 };
4854  static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4855  ARM::VLD1q32, ARM::VLD1q64};
4856  SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
4857  return;
4858  }
4859 
4860  case Intrinsic::arm_neon_vld1x2: {
4861  static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4862  ARM::VLD1q32, ARM::VLD1q64 };
4863  static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
4864  ARM::VLD1d16QPseudo,
4865  ARM::VLD1d32QPseudo,
4866  ARM::VLD1d64QPseudo };
4867  SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4868  return;
4869  }
4870 
4871  case Intrinsic::arm_neon_vld1x3: {
4872  static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
4873  ARM::VLD1d16TPseudo,
4874  ARM::VLD1d32TPseudo,
4875  ARM::VLD1d64TPseudo };
4876  static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
4877  ARM::VLD1q16LowTPseudo_UPD,
4878  ARM::VLD1q32LowTPseudo_UPD,
4879  ARM::VLD1q64LowTPseudo_UPD };
4880  static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
4881  ARM::VLD1q16HighTPseudo,
4882  ARM::VLD1q32HighTPseudo,
4883  ARM::VLD1q64HighTPseudo };
4884  SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4885  return;
4886  }
4887 
4888  case Intrinsic::arm_neon_vld1x4: {
4889  static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
4890  ARM::VLD1d16QPseudo,
4891  ARM::VLD1d32QPseudo,
4892  ARM::VLD1d64QPseudo };
4893  static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
4894  ARM::VLD1q16LowQPseudo_UPD,
4895  ARM::VLD1q32LowQPseudo_UPD,
4896  ARM::VLD1q64LowQPseudo_UPD };
4897  static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
4898  ARM::VLD1q16HighQPseudo,
4899  ARM::VLD1q32HighQPseudo,
4900  ARM::VLD1q64HighQPseudo };
4901  SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4902  return;
4903  }
4904 
4905  case Intrinsic::arm_neon_vld2: {
4906  static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
4907  ARM::VLD2d32, ARM::VLD1q64 };
4908  static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
4909  ARM::VLD2q32Pseudo };
4910  SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4911  return;
4912  }
4913 
4914  case Intrinsic::arm_neon_vld3: {
4915  static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
4916  ARM::VLD3d16Pseudo,
4917  ARM::VLD3d32Pseudo,
4918  ARM::VLD1d64TPseudo };
4919  static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4920  ARM::VLD3q16Pseudo_UPD,
4921  ARM::VLD3q32Pseudo_UPD };
4922  static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
4923  ARM::VLD3q16oddPseudo,
4924  ARM::VLD3q32oddPseudo };
4925  SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4926  return;
4927  }
4928 
4929  case Intrinsic::arm_neon_vld4: {
4930  static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
4931  ARM::VLD4d16Pseudo,
4932  ARM::VLD4d32Pseudo,
4933  ARM::VLD1d64QPseudo };
4934  static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
4935  ARM::VLD4q16Pseudo_UPD,
4936  ARM::VLD4q32Pseudo_UPD };
4937  static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
4938  ARM::VLD4q16oddPseudo,
4939  ARM::VLD4q32oddPseudo };
4940  SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4941  return;
4942  }
4943 
4944  case Intrinsic::arm_neon_vld2dup: {
4945  static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4946  ARM::VLD2DUPd32, ARM::VLD1q64 };
4947  static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4948  ARM::VLD2DUPq16EvenPseudo,
4949  ARM::VLD2DUPq32EvenPseudo };
4950  static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
4951  ARM::VLD2DUPq16OddPseudo,
4952  ARM::VLD2DUPq32OddPseudo };
4953  SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
4954  DOpcodes, QOpcodes0, QOpcodes1);
4955  return;
4956  }
4957 
4958  case Intrinsic::arm_neon_vld3dup: {
4959  static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
4960  ARM::VLD3DUPd16Pseudo,
4961  ARM::VLD3DUPd32Pseudo,
4962  ARM::VLD1d64TPseudo };
4963  static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4964  ARM::VLD3DUPq16EvenPseudo,
4965  ARM::VLD3DUPq32EvenPseudo };
4966  static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
4967  ARM::VLD3DUPq16OddPseudo,
4968  ARM::VLD3DUPq32OddPseudo };
4969  SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
4970  DOpcodes, QOpcodes0, QOpcodes1);
4971  return;
4972  }
4973 
4974  case Intrinsic::arm_neon_vld4dup: {
4975  static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
4976  ARM::VLD4DUPd16Pseudo,
4977  ARM::VLD4DUPd32Pseudo,
4978  ARM::VLD1d64QPseudo };
4979  static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4980  ARM::VLD4DUPq16EvenPseudo,
4981  ARM::VLD4DUPq32EvenPseudo };
4982  static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
4983  ARM::VLD4DUPq16OddPseudo,
4984  ARM::VLD4DUPq32OddPseudo };
4985  SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
4986  DOpcodes, QOpcodes0, QOpcodes1);
4987  return;