LLVM  13.0.0git
ARMISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the ARM target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "ARM.h"
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMTargetMachine.h"
17 #include "Utils/ARMBaseInfo.h"
18 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/IR/CallingConv.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/IntrinsicsARM.h"
32 #include "llvm/IR/LLVMContext.h"
34 #include "llvm/Support/Debug.h"
37 
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "arm-isel"
41 
42 static cl::opt<bool>
43 DisableShifterOp("disable-shifter-op", cl::Hidden,
44  cl::desc("Disable isel of shifter-op"),
45  cl::init(false));
46 
47 //===--------------------------------------------------------------------===//
48 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
49 /// instructions for SelectionDAG operations.
50 ///
51 namespace {
52 
53 class ARMDAGToDAGISel : public SelectionDAGISel {
54  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
55  /// make the right decision when generating code for different targets.
56  const ARMSubtarget *Subtarget;
57 
58 public:
59  explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
60  : SelectionDAGISel(tm, OptLevel) {}
61 
62  bool runOnMachineFunction(MachineFunction &MF) override {
63  // Reset the subtarget each time through.
64  Subtarget = &MF.getSubtarget<ARMSubtarget>();
66  return true;
67  }
68 
69  StringRef getPassName() const override { return "ARM Instruction Selection"; }
70 
71  void PreprocessISelDAG() override;
72 
73  /// getI32Imm - Return a target constant of type i32 with the specified
74  /// value.
75  inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
76  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
77  }
78 
79  void Select(SDNode *N) override;
80 
81  bool hasNoVMLxHazardUse(SDNode *N) const;
82  bool isShifterOpProfitable(const SDValue &Shift,
83  ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
84  bool SelectRegShifterOperand(SDValue N, SDValue &A,
85  SDValue &B, SDValue &C,
86  bool CheckProfitability = true);
87  bool SelectImmShifterOperand(SDValue N, SDValue &A,
88  SDValue &B, bool CheckProfitability = true);
89  bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
90  SDValue &B, SDValue &C) {
91  // Don't apply the profitability check
92  return SelectRegShifterOperand(N, A, B, C, false);
93  }
94  bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
95  SDValue &B) {
96  // Don't apply the profitability check
97  return SelectImmShifterOperand(N, A, B, false);
98  }
99 
100  bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
101 
102  bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
103  bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
104 
105  bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
106  const ConstantSDNode *CN = cast<ConstantSDNode>(N);
107  Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
108  Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
109  return true;
110  }
111 
112  bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
113  SDValue &Offset, SDValue &Opc);
114  bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
115  SDValue &Offset, SDValue &Opc);
116  bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
117  SDValue &Offset, SDValue &Opc);
118  bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
119  bool SelectAddrMode3(SDValue N, SDValue &Base,
120  SDValue &Offset, SDValue &Opc);
121  bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
122  SDValue &Offset, SDValue &Opc);
123  bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
124  bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
125  bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
126  bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
127  bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
128 
129  bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
130 
131  // Thumb Addressing Modes:
132  bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
133  bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
134  bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
135  SDValue &OffImm);
136  bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
137  SDValue &OffImm);
138  bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
139  SDValue &OffImm);
140  bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
141  SDValue &OffImm);
142  bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
143  template <unsigned Shift>
144  bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
145 
146  // Thumb 2 Addressing Modes:
147  bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
148  template <unsigned Shift>
149  bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
150  bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
151  SDValue &OffImm);
152  bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
153  SDValue &OffImm);
154  template <unsigned Shift>
155  bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
156  bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
157  unsigned Shift);
158  template <unsigned Shift>
159  bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
160  bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
161  SDValue &OffReg, SDValue &ShImm);
162  bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
163 
164  template<int Min, int Max>
165  bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
166 
167  inline bool is_so_imm(unsigned Imm) const {
168  return ARM_AM::getSOImmVal(Imm) != -1;
169  }
170 
171  inline bool is_so_imm_not(unsigned Imm) const {
172  return ARM_AM::getSOImmVal(~Imm) != -1;
173  }
174 
175  inline bool is_t2_so_imm(unsigned Imm) const {
176  return ARM_AM::getT2SOImmVal(Imm) != -1;
177  }
178 
179  inline bool is_t2_so_imm_not(unsigned Imm) const {
180  return ARM_AM::getT2SOImmVal(~Imm) != -1;
181  }
182 
183  // Include the pieces autogenerated from the target description.
184 #include "ARMGenDAGISel.inc"
185 
186 private:
187  void transferMemOperands(SDNode *Src, SDNode *Dst);
188 
189  /// Indexed (pre/post inc/dec) load matching code for ARM.
190  bool tryARMIndexedLoad(SDNode *N);
191  bool tryT1IndexedLoad(SDNode *N);
192  bool tryT2IndexedLoad(SDNode *N);
193  bool tryMVEIndexedLoad(SDNode *N);
194 
195  /// SelectVLD - Select NEON load intrinsics. NumVecs should be
196  /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
197  /// loads of D registers and even subregs and odd subregs of Q registers.
198  /// For NumVecs <= 2, QOpcodes1 is not used.
199  void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
200  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
201  const uint16_t *QOpcodes1);
202 
203  /// SelectVST - Select NEON store intrinsics. NumVecs should
204  /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
205  /// stores of D registers and even subregs and odd subregs of Q registers.
206  /// For NumVecs <= 2, QOpcodes1 is not used.
207  void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
208  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
209  const uint16_t *QOpcodes1);
210 
211  /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
212  /// be 2, 3 or 4. The opcode arrays specify the instructions used for
213  /// load/store of D registers and Q registers.
214  void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
215  unsigned NumVecs, const uint16_t *DOpcodes,
216  const uint16_t *QOpcodes);
217 
218  /// Helper functions for setting up clusters of MVE predication operands.
219  template <typename SDValueVector>
220  void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
221  SDValue PredicateMask);
222  template <typename SDValueVector>
223  void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
224  SDValue PredicateMask, SDValue Inactive);
225 
226  template <typename SDValueVector>
227  void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
228  template <typename SDValueVector>
229  void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
230 
231  /// SelectMVE_WB - Select MVE writeback load/store intrinsics.
232  void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);
233 
234  /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
235  void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,
236  bool HasSaturationOperand);
237 
238  /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
239  void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
240  uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
241 
242  /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
243  /// vector lanes.
244  void SelectMVE_VSHLC(SDNode *N, bool Predicated);
245 
246  /// Select long MVE vector reductions with two vector operands
247  /// Stride is the number of vector element widths the instruction can operate
248  /// on:
249  /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
250  /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
251  /// Stride is used when addressing the OpcodesS array which contains multiple
252  /// opcodes for each element width.
253  /// TySize is the index into the list of element types listed above
254  void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
255  const uint16_t *OpcodesS, const uint16_t *OpcodesU,
256  size_t Stride, size_t TySize);
257 
258  /// Select a 64-bit MVE vector reduction with two vector operands
259  /// arm_mve_vmlldava_[predicated]
260  void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
261  const uint16_t *OpcodesU);
262  /// Select a 72-bit MVE vector rounding reduction with two vector operands
263  /// int_arm_mve_vrmlldavha[_predicated]
264  void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
265  const uint16_t *OpcodesU);
266 
267  /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
268  /// should be 2 or 4. The opcode array specifies the instructions
269  /// used for 8, 16 and 32-bit lane sizes respectively, and each
270  /// pointer points to a set of NumVecs sub-opcodes used for the
271  /// different stages (e.g. VLD20 versus VLD21) of each load family.
272  void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
273  const uint16_t *const *Opcodes, bool HasWriteback);
274 
275  /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
276  /// array of 3 elements for the 8, 16 and 32-bit lane sizes.
277  void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
278  bool Wrapping, bool Predicated);
279 
280  /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
281  /// CX1DA, CX2D, CX2DA, CX3, CX3DA).
282  /// \arg \c NumExtraOps number of extra operands besides the coprocossor,
283  /// the accumulator and the immediate operand, i.e. 0
284  /// for CX1*, 1 for CX2*, 2 for CX3*
285  /// \arg \c HasAccum whether the instruction has an accumulator operand
286  void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
287  bool HasAccum);
288 
289  /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
290  /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
291  /// for loading D registers.
292  void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
293  unsigned NumVecs, const uint16_t *DOpcodes,
294  const uint16_t *QOpcodes0 = nullptr,
295  const uint16_t *QOpcodes1 = nullptr);
296 
297  /// Try to select SBFX/UBFX instructions for ARM.
298  bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
299 
300  bool tryInsertVectorElt(SDNode *N);
301 
302  // Select special operations if node forms integer ABS pattern
303  bool tryABSOp(SDNode *N);
304 
305  bool tryReadRegister(SDNode *N);
306  bool tryWriteRegister(SDNode *N);
307 
308  bool tryInlineAsm(SDNode *N);
309 
310  void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
311 
312  void SelectCMP_SWAP(SDNode *N);
313 
314  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
315  /// inline asm expressions.
316  bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
317  std::vector<SDValue> &OutOps) override;
318 
319  // Form pairs of consecutive R, S, D, or Q registers.
321  SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
322  SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
323  SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
324 
325  // Form sequences of 4 consecutive S, D, or Q registers.
326  SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
327  SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
328  SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
329 
330  // Get the alignment operand for a NEON VLD or VST instruction.
331  SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
332  bool is64BitVector);
333 
334  /// Checks if N is a multiplication by a constant where we can extract out a
335  /// power of two from the constant so that it can be used in a shift, but only
336  /// if it simplifies the materialization of the constant. Returns true if it
337  /// is, and assigns to PowerOfTwo the power of two that should be extracted
338  /// out and to NewMulConst the new constant to be multiplied by.
339  bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
340  unsigned &PowerOfTwo, SDValue &NewMulConst) const;
341 
342  /// Replace N with M in CurDAG, in a way that also ensures that M gets
343  /// selected when N would have been selected.
344  void replaceDAGValue(const SDValue &N, SDValue M);
345 };
346 }
347 
348 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
349 /// operand. If so Imm will receive the 32-bit value.
350 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
351  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
352  Imm = cast<ConstantSDNode>(N)->getZExtValue();
353  return true;
354  }
355  return false;
356 }
357 
358 // isInt32Immediate - This method tests to see if a constant operand.
359 // If so Imm will receive the 32 bit value.
360 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
361  return isInt32Immediate(N.getNode(), Imm);
362 }
363 
364 // isOpcWithIntImmediate - This method tests to see if the node is a specific
365 // opcode and that it has a immediate integer right operand.
366 // If so Imm will receive the 32 bit value.
367 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
368  return N->getOpcode() == Opc &&
369  isInt32Immediate(N->getOperand(1).getNode(), Imm);
370 }
371 
372 /// Check whether a particular node is a constant value representable as
373 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
374 ///
375 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
376 static bool isScaledConstantInRange(SDValue Node, int Scale,
377  int RangeMin, int RangeMax,
378  int &ScaledConstant) {
379  assert(Scale > 0 && "Invalid scale!");
380 
381  // Check that this is a constant.
382  const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
383  if (!C)
384  return false;
385 
386  ScaledConstant = (int) C->getZExtValue();
387  if ((ScaledConstant % Scale) != 0)
388  return false;
389 
390  ScaledConstant /= Scale;
391  return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
392 }
393 
394 void ARMDAGToDAGISel::PreprocessISelDAG() {
395  if (!Subtarget->hasV6T2Ops())
396  return;
397 
398  bool isThumb2 = Subtarget->isThumb();
399  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
400  E = CurDAG->allnodes_end(); I != E; ) {
401  SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
402 
403  if (N->getOpcode() != ISD::ADD)
404  continue;
405 
406  // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
407  // leading zeros, followed by consecutive set bits, followed by 1 or 2
408  // trailing zeros, e.g. 1020.
409  // Transform the expression to
410  // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
411  // of trailing zeros of c2. The left shift would be folded as an shifter
412  // operand of 'add' and the 'and' and 'srl' would become a bits extraction
413  // node (UBFX).
414 
415  SDValue N0 = N->getOperand(0);
416  SDValue N1 = N->getOperand(1);
417  unsigned And_imm = 0;
418  if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
419  if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
420  std::swap(N0, N1);
421  }
422  if (!And_imm)
423  continue;
424 
425  // Check if the AND mask is an immediate of the form: 000.....1111111100
426  unsigned TZ = countTrailingZeros(And_imm);
427  if (TZ != 1 && TZ != 2)
428  // Be conservative here. Shifter operands aren't always free. e.g. On
429  // Swift, left shifter operand of 1 / 2 for free but others are not.
430  // e.g.
431  // ubfx r3, r1, #16, #8
432  // ldr.w r3, [r0, r3, lsl #2]
433  // vs.
434  // mov.w r9, #1020
435  // and.w r2, r9, r1, lsr #14
436  // ldr r2, [r0, r2]
437  continue;
438  And_imm >>= TZ;
439  if (And_imm & (And_imm + 1))
440  continue;
441 
442  // Look for (and (srl X, c1), c2).
443  SDValue Srl = N1.getOperand(0);
444  unsigned Srl_imm = 0;
445  if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
446  (Srl_imm <= 2))
447  continue;
448 
449  // Make sure first operand is not a shifter operand which would prevent
450  // folding of the left shift.
451  SDValue CPTmp0;
452  SDValue CPTmp1;
453  SDValue CPTmp2;
454  if (isThumb2) {
455  if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
456  continue;
457  } else {
458  if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
459  SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
460  continue;
461  }
462 
463  // Now make the transformation.
464  Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
465  Srl.getOperand(0),
466  CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
467  MVT::i32));
468  N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
469  Srl,
470  CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
471  N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
472  N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
473  CurDAG->UpdateNodeOperands(N, N0, N1);
474  }
475 }
476 
477 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
478 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
479 /// least on current ARM implementations) which should be avoidded.
480 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
481  if (OptLevel == CodeGenOpt::None)
482  return true;
483 
484  if (!Subtarget->hasVMLxHazards())
485  return true;
486 
487  if (!N->hasOneUse())
488  return false;
489 
490  SDNode *Use = *N->use_begin();
491  if (Use->getOpcode() == ISD::CopyToReg)
492  return true;
493  if (Use->isMachineOpcode()) {
494  const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
495  CurDAG->getSubtarget().getInstrInfo());
496 
497  const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
498  if (MCID.mayStore())
499  return true;
500  unsigned Opcode = MCID.getOpcode();
501  if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
502  return true;
503  // vmlx feeding into another vmlx. We actually want to unfold
504  // the use later in the MLxExpansion pass. e.g.
505  // vmla
506  // vmla (stall 8 cycles)
507  //
508  // vmul (5 cycles)
509  // vadd (5 cycles)
510  // vmla
511  // This adds up to about 18 - 19 cycles.
512  //
513  // vmla
514  // vmul (stall 4 cycles)
515  // vadd adds up to about 14 cycles.
516  return TII->isFpMLxInstruction(Opcode);
517  }
518 
519  return false;
520 }
521 
522 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
523  ARM_AM::ShiftOpc ShOpcVal,
524  unsigned ShAmt) {
525  if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
526  return true;
527  if (Shift.hasOneUse())
528  return true;
529  // R << 2 is free.
530  return ShOpcVal == ARM_AM::lsl &&
531  (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
532 }
533 
534 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
535  unsigned MaxShift,
536  unsigned &PowerOfTwo,
537  SDValue &NewMulConst) const {
538  assert(N.getOpcode() == ISD::MUL);
539  assert(MaxShift > 0);
540 
541  // If the multiply is used in more than one place then changing the constant
542  // will make other uses incorrect, so don't.
543  if (!N.hasOneUse()) return false;
544  // Check if the multiply is by a constant
545  ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
546  if (!MulConst) return false;
547  // If the constant is used in more than one place then modifying it will mean
548  // we need to materialize two constants instead of one, which is a bad idea.
549  if (!MulConst->hasOneUse()) return false;
550  unsigned MulConstVal = MulConst->getZExtValue();
551  if (MulConstVal == 0) return false;
552 
553  // Find the largest power of 2 that MulConstVal is a multiple of
554  PowerOfTwo = MaxShift;
555  while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
556  --PowerOfTwo;
557  if (PowerOfTwo == 0) return false;
558  }
559 
560  // Only optimise if the new cost is better
561  unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
562  NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
563  unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
564  unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
565  return NewCost < OldCost;
566 }
567 
568 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
569  CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
570  ReplaceUses(N, M);
571 }
572 
573 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
574  SDValue &BaseReg,
575  SDValue &Opc,
576  bool CheckProfitability) {
577  if (DisableShifterOp)
578  return false;
579 
580  // If N is a multiply-by-constant and it's profitable to extract a shift and
581  // use it in a shifted operand do so.
582  if (N.getOpcode() == ISD::MUL) {
583  unsigned PowerOfTwo = 0;
584  SDValue NewMulConst;
585  if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
586  HandleSDNode Handle(N);
587  SDLoc Loc(N);
588  replaceDAGValue(N.getOperand(1), NewMulConst);
589  BaseReg = Handle.getValue();
590  Opc = CurDAG->getTargetConstant(
591  ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
592  return true;
593  }
594  }
595 
596  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
597 
598  // Don't match base register only case. That is matched to a separate
599  // lower complexity pattern with explicit register operand.
600  if (ShOpcVal == ARM_AM::no_shift) return false;
601 
602  BaseReg = N.getOperand(0);
603  unsigned ShImmVal = 0;
604  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
605  if (!RHS) return false;
606  ShImmVal = RHS->getZExtValue() & 31;
607  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
608  SDLoc(N), MVT::i32);
609  return true;
610 }
611 
612 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
613  SDValue &BaseReg,
614  SDValue &ShReg,
615  SDValue &Opc,
616  bool CheckProfitability) {
617  if (DisableShifterOp)
618  return false;
619 
620  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
621 
622  // Don't match base register only case. That is matched to a separate
623  // lower complexity pattern with explicit register operand.
624  if (ShOpcVal == ARM_AM::no_shift) return false;
625 
626  BaseReg = N.getOperand(0);
627  unsigned ShImmVal = 0;
628  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
629  if (RHS) return false;
630 
631  ShReg = N.getOperand(1);
632  if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
633  return false;
634  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
635  SDLoc(N), MVT::i32);
636  return true;
637 }
638 
639 // Determine whether an ISD::OR's operands are suitable to turn the operation
640 // into an addition, which often has more compact encodings.
641 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
642  assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
643  Out = N;
644  return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
645 }
646 
647 
648 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
649  SDValue &Base,
650  SDValue &OffImm) {
651  // Match simple R + imm12 operands.
652 
653  // Base only.
654  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
655  !CurDAG->isBaseWithConstantOffset(N)) {
656  if (N.getOpcode() == ISD::FrameIndex) {
657  // Match frame index.
658  int FI = cast<FrameIndexSDNode>(N)->getIndex();
659  Base = CurDAG->getTargetFrameIndex(
660  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
661  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
662  return true;
663  }
664 
665  if (N.getOpcode() == ARMISD::Wrapper &&
666  N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
667  N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
668  N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
669  Base = N.getOperand(0);
670  } else
671  Base = N;
672  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
673  return true;
674  }
675 
676  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
677  int RHSC = (int)RHS->getSExtValue();
678  if (N.getOpcode() == ISD::SUB)
679  RHSC = -RHSC;
680 
681  if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
682  Base = N.getOperand(0);
683  if (Base.getOpcode() == ISD::FrameIndex) {
684  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
685  Base = CurDAG->getTargetFrameIndex(
686  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
687  }
688  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
689  return true;
690  }
691  }
692 
693  // Base only.
694  Base = N;
695  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
696  return true;
697 }
698 
699 
700 
701 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
702  SDValue &Opc) {
703  if (N.getOpcode() == ISD::MUL &&
704  ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
705  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
706  // X * [3,5,9] -> X + X * [2,4,8] etc.
707  int RHSC = (int)RHS->getZExtValue();
708  if (RHSC & 1) {
709  RHSC = RHSC & ~1;
711  if (RHSC < 0) {
713  RHSC = - RHSC;
714  }
715  if (isPowerOf2_32(RHSC)) {
716  unsigned ShAmt = Log2_32(RHSC);
717  Base = Offset = N.getOperand(0);
718  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
719  ARM_AM::lsl),
720  SDLoc(N), MVT::i32);
721  return true;
722  }
723  }
724  }
725  }
726 
727  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
728  // ISD::OR that is equivalent to an ISD::ADD.
729  !CurDAG->isBaseWithConstantOffset(N))
730  return false;
731 
732  // Leave simple R +/- imm12 operands for LDRi12
733  if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
734  int RHSC;
735  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
736  -0x1000+1, 0x1000, RHSC)) // 12 bits.
737  return false;
738  }
739 
740  // Otherwise this is R +/- [possibly shifted] R.
742  ARM_AM::ShiftOpc ShOpcVal =
743  ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
744  unsigned ShAmt = 0;
745 
746  Base = N.getOperand(0);
747  Offset = N.getOperand(1);
748 
749  if (ShOpcVal != ARM_AM::no_shift) {
750  // Check to see if the RHS of the shift is a constant, if not, we can't fold
751  // it.
752  if (ConstantSDNode *Sh =
753  dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
754  ShAmt = Sh->getZExtValue();
755  if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
756  Offset = N.getOperand(1).getOperand(0);
757  else {
758  ShAmt = 0;
759  ShOpcVal = ARM_AM::no_shift;
760  }
761  } else {
762  ShOpcVal = ARM_AM::no_shift;
763  }
764  }
765 
766  // Try matching (R shl C) + (R).
767  if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
768  !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
769  N.getOperand(0).hasOneUse())) {
770  ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
771  if (ShOpcVal != ARM_AM::no_shift) {
772  // Check to see if the RHS of the shift is a constant, if not, we can't
773  // fold it.
774  if (ConstantSDNode *Sh =
775  dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
776  ShAmt = Sh->getZExtValue();
777  if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
778  Offset = N.getOperand(0).getOperand(0);
779  Base = N.getOperand(1);
780  } else {
781  ShAmt = 0;
782  ShOpcVal = ARM_AM::no_shift;
783  }
784  } else {
785  ShOpcVal = ARM_AM::no_shift;
786  }
787  }
788  }
789 
790  // If Offset is a multiply-by-constant and it's profitable to extract a shift
791  // and use it in a shifted operand do so.
792  if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
793  unsigned PowerOfTwo = 0;
794  SDValue NewMulConst;
795  if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
796  HandleSDNode Handle(Offset);
797  replaceDAGValue(Offset.getOperand(1), NewMulConst);
798  Offset = Handle.getValue();
799  ShAmt = PowerOfTwo;
800  ShOpcVal = ARM_AM::lsl;
801  }
802  }
803 
804  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
805  SDLoc(N), MVT::i32);
806  return true;
807 }
808 
809 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
810  SDValue &Offset, SDValue &Opc) {
811  unsigned Opcode = Op->getOpcode();
812  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
813  ? cast<LoadSDNode>(Op)->getAddressingMode()
814  : cast<StoreSDNode>(Op)->getAddressingMode();
817  int Val;
818  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
819  return false;
820 
821  Offset = N;
822  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
823  unsigned ShAmt = 0;
824  if (ShOpcVal != ARM_AM::no_shift) {
825  // Check to see if the RHS of the shift is a constant, if not, we can't fold
826  // it.
827  if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
828  ShAmt = Sh->getZExtValue();
829  if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
830  Offset = N.getOperand(0);
831  else {
832  ShAmt = 0;
833  ShOpcVal = ARM_AM::no_shift;
834  }
835  } else {
836  ShOpcVal = ARM_AM::no_shift;
837  }
838  }
839 
840  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
841  SDLoc(N), MVT::i32);
842  return true;
843 }
844 
845 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
846  SDValue &Offset, SDValue &Opc) {
847  unsigned Opcode = Op->getOpcode();
848  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
849  ? cast<LoadSDNode>(Op)->getAddressingMode()
850  : cast<StoreSDNode>(Op)->getAddressingMode();
853  int Val;
854  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
855  if (AddSub == ARM_AM::sub) Val *= -1;
856  Offset = CurDAG->getRegister(0, MVT::i32);
857  Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
858  return true;
859  }
860 
861  return false;
862 }
863 
864 
865 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
866  SDValue &Offset, SDValue &Opc) {
867  unsigned Opcode = Op->getOpcode();
868  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
869  ? cast<LoadSDNode>(Op)->getAddressingMode()
870  : cast<StoreSDNode>(Op)->getAddressingMode();
873  int Val;
874  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
875  Offset = CurDAG->getRegister(0, MVT::i32);
876  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
878  SDLoc(Op), MVT::i32);
879  return true;
880  }
881 
882  return false;
883 }
884 
885 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
886  Base = N;
887  return true;
888 }
889 
890 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
891  SDValue &Base, SDValue &Offset,
892  SDValue &Opc) {
893  if (N.getOpcode() == ISD::SUB) {
894  // X - C is canonicalize to X + -C, no need to handle it here.
895  Base = N.getOperand(0);
896  Offset = N.getOperand(1);
897  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
898  MVT::i32);
899  return true;
900  }
901 
902  if (!CurDAG->isBaseWithConstantOffset(N)) {
903  Base = N;
904  if (N.getOpcode() == ISD::FrameIndex) {
905  int FI = cast<FrameIndexSDNode>(N)->getIndex();
906  Base = CurDAG->getTargetFrameIndex(
907  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
908  }
909  Offset = CurDAG->getRegister(0, MVT::i32);
910  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
911  MVT::i32);
912  return true;
913  }
914 
915  // If the RHS is +/- imm8, fold into addr mode.
916  int RHSC;
917  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
918  -256 + 1, 256, RHSC)) { // 8 bits.
919  Base = N.getOperand(0);
920  if (Base.getOpcode() == ISD::FrameIndex) {
921  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
922  Base = CurDAG->getTargetFrameIndex(
923  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
924  }
925  Offset = CurDAG->getRegister(0, MVT::i32);
926 
928  if (RHSC < 0) {
930  RHSC = -RHSC;
931  }
932  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
933  MVT::i32);
934  return true;
935  }
936 
937  Base = N.getOperand(0);
938  Offset = N.getOperand(1);
939  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
940  MVT::i32);
941  return true;
942 }
943 
944 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
945  SDValue &Offset, SDValue &Opc) {
946  unsigned Opcode = Op->getOpcode();
947  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
948  ? cast<LoadSDNode>(Op)->getAddressingMode()
949  : cast<StoreSDNode>(Op)->getAddressingMode();
952  int Val;
953  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
954  Offset = CurDAG->getRegister(0, MVT::i32);
955  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
956  MVT::i32);
957  return true;
958  }
959 
960  Offset = N;
961  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
962  MVT::i32);
963  return true;
964 }
965 
966 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
967  bool FP16) {
968  if (!CurDAG->isBaseWithConstantOffset(N)) {
969  Base = N;
970  if (N.getOpcode() == ISD::FrameIndex) {
971  int FI = cast<FrameIndexSDNode>(N)->getIndex();
972  Base = CurDAG->getTargetFrameIndex(
973  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
974  } else if (N.getOpcode() == ARMISD::Wrapper &&
975  N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
976  N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
977  N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
978  Base = N.getOperand(0);
979  }
980  Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
981  SDLoc(N), MVT::i32);
982  return true;
983  }
984 
985  // If the RHS is +/- imm8, fold into addr mode.
986  int RHSC;
987  const int Scale = FP16 ? 2 : 4;
988 
989  if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
990  Base = N.getOperand(0);
991  if (Base.getOpcode() == ISD::FrameIndex) {
992  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
993  Base = CurDAG->getTargetFrameIndex(
994  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
995  }
996 
998  if (RHSC < 0) {
1000  RHSC = -RHSC;
1001  }
1002 
1003  if (FP16)
1004  Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
1005  SDLoc(N), MVT::i32);
1006  else
1007  Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1008  SDLoc(N), MVT::i32);
1009 
1010  return true;
1011  }
1012 
1013  Base = N;
1014 
1015  if (FP16)
1016  Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
1017  SDLoc(N), MVT::i32);
1018  else
1019  Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1020  SDLoc(N), MVT::i32);
1021 
1022  return true;
1023 }
1024 
1025 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1026  SDValue &Base, SDValue &Offset) {
1027  return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
1028 }
1029 
1030 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
1031  SDValue &Base, SDValue &Offset) {
1032  return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
1033 }
1034 
1035 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1036  SDValue &Align) {
1037  Addr = N;
1038 
1039  unsigned Alignment = 0;
1040 
1041  MemSDNode *MemN = cast<MemSDNode>(Parent);
1042 
1043  if (isa<LSBaseSDNode>(MemN) ||
1044  ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1045  MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1046  MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1047  // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1048  // The maximum alignment is equal to the memory size being referenced.
1049  unsigned MMOAlign = MemN->getAlignment();
1050  unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1051  if (MMOAlign >= MemSize && MemSize > 1)
1052  Alignment = MemSize;
1053  } else {
1054  // All other uses of addrmode6 are for intrinsics. For now just record
1055  // the raw alignment value; it will be refined later based on the legal
1056  // alignment operands for the intrinsic.
1057  Alignment = MemN->getAlignment();
1058  }
1059 
1060  Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1061  return true;
1062 }
1063 
1064 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1065  SDValue &Offset) {
1066  LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1068  if (AM != ISD::POST_INC)
1069  return false;
1070  Offset = N;
1071  if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1072  if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1073  Offset = CurDAG->getRegister(0, MVT::i32);
1074  }
1075  return true;
1076 }
1077 
1078 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1079  SDValue &Offset, SDValue &Label) {
1080  if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1081  Offset = N.getOperand(0);
1082  SDValue N1 = N.getOperand(1);
1083  Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1084  SDLoc(N), MVT::i32);
1085  return true;
1086  }
1087 
1088  return false;
1089 }
1090 
1091 
1092 //===----------------------------------------------------------------------===//
1093 // Thumb Addressing Modes
1094 //===----------------------------------------------------------------------===//
1095 
1097  // Negative numbers are difficult to materialise in thumb1. If we are
1098  // selecting the add of a negative, instead try to select ri with a zero
1099  // offset, so create the add node directly which will become a sub.
1100  if (N.getOpcode() != ISD::ADD)
1101  return false;
1102 
1103  // Look for an imm which is not legal for ld/st, but is legal for sub.
1104  if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1105  return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1106 
1107  return false;
1108 }
1109 
1110 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1111  SDValue &Offset) {
1112  if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1113  ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1114  if (!NC || !NC->isNullValue())
1115  return false;
1116 
1117  Base = Offset = N;
1118  return true;
1119  }
1120 
1121  Base = N.getOperand(0);
1122  Offset = N.getOperand(1);
1123  return true;
1124 }
1125 
1126 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1127  SDValue &Offset) {
1129  return false; // Select ri instead
1130  return SelectThumbAddrModeRRSext(N, Base, Offset);
1131 }
1132 
1133 bool
1134 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1135  SDValue &Base, SDValue &OffImm) {
1136  if (shouldUseZeroOffsetLdSt(N)) {
1137  Base = N;
1138  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1139  return true;
1140  }
1141 
1142  if (!CurDAG->isBaseWithConstantOffset(N)) {
1143  if (N.getOpcode() == ISD::ADD) {
1144  return false; // We want to select register offset instead
1145  } else if (N.getOpcode() == ARMISD::Wrapper &&
1146  N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1147  N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1148  N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1149  N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1150  Base = N.getOperand(0);
1151  } else {
1152  Base = N;
1153  }
1154 
1155  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1156  return true;
1157  }
1158 
1159  // If the RHS is + imm5 * scale, fold into addr mode.
1160  int RHSC;
1161  if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1162  Base = N.getOperand(0);
1163  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1164  return true;
1165  }
1166 
1167  // Offset is too large, so use register offset instead.
1168  return false;
1169 }
1170 
1171 bool
1172 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1173  SDValue &OffImm) {
1174  return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1175 }
1176 
1177 bool
1178 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1179  SDValue &OffImm) {
1180  return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1181 }
1182 
1183 bool
1184 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1185  SDValue &OffImm) {
1186  return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1187 }
1188 
1189 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1190  SDValue &Base, SDValue &OffImm) {
1191  if (N.getOpcode() == ISD::FrameIndex) {
1192  int FI = cast<FrameIndexSDNode>(N)->getIndex();
1193  // Only multiples of 4 are allowed for the offset, so the frame object
1194  // alignment must be at least 4.
1195  MachineFrameInfo &MFI = MF->getFrameInfo();
1196  if (MFI.getObjectAlign(FI) < Align(4))
1197  MFI.setObjectAlignment(FI, Align(4));
1198  Base = CurDAG->getTargetFrameIndex(
1199  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1200  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1201  return true;
1202  }
1203 
1204  if (!CurDAG->isBaseWithConstantOffset(N))
1205  return false;
1206 
1207  if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1208  // If the RHS is + imm8 * scale, fold into addr mode.
1209  int RHSC;
1210  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1211  Base = N.getOperand(0);
1212  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1213  // Make sure the offset is inside the object, or we might fail to
1214  // allocate an emergency spill slot. (An out-of-range access is UB, but
1215  // it could show up anyway.)
1216  MachineFrameInfo &MFI = MF->getFrameInfo();
1217  if (RHSC * 4 < MFI.getObjectSize(FI)) {
1218  // For LHS+RHS to result in an offset that's a multiple of 4 the object
1219  // indexed by the LHS must be 4-byte aligned.
1220  if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4))
1221  MFI.setObjectAlignment(FI, Align(4));
1222  if (MFI.getObjectAlign(FI) >= Align(4)) {
1223  Base = CurDAG->getTargetFrameIndex(
1224  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1225  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1226  return true;
1227  }
1228  }
1229  }
1230  }
1231 
1232  return false;
1233 }
1234 
1235 template <unsigned Shift>
1236 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1237  SDValue &OffImm) {
1238  if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1239  int RHSC;
1240  if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1241  RHSC)) {
1242  Base = N.getOperand(0);
1243  if (N.getOpcode() == ISD::SUB)
1244  RHSC = -RHSC;
1245  OffImm =
1246  CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1247  return true;
1248  }
1249  }
1250 
1251  // Base only.
1252  Base = N;
1253  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1254  return true;
1255 }
1256 
1257 
1258 //===----------------------------------------------------------------------===//
1259 // Thumb 2 Addressing Modes
1260 //===----------------------------------------------------------------------===//
1261 
1262 
1263 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1264  SDValue &Base, SDValue &OffImm) {
1265  // Match simple R + imm12 operands.
1266 
1267  // Base only.
1268  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1269  !CurDAG->isBaseWithConstantOffset(N)) {
1270  if (N.getOpcode() == ISD::FrameIndex) {
1271  // Match frame index.
1272  int FI = cast<FrameIndexSDNode>(N)->getIndex();
1273  Base = CurDAG->getTargetFrameIndex(
1274  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1275  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1276  return true;
1277  }
1278 
1279  if (N.getOpcode() == ARMISD::Wrapper &&
1280  N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1281  N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1282  N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1283  Base = N.getOperand(0);
1284  if (Base.getOpcode() == ISD::TargetConstantPool)
1285  return false; // We want to select t2LDRpci instead.
1286  } else
1287  Base = N;
1288  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1289  return true;
1290  }
1291 
1292  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1293  if (SelectT2AddrModeImm8(N, Base, OffImm))
1294  // Let t2LDRi8 handle (R - imm8).
1295  return false;
1296 
1297  int RHSC = (int)RHS->getZExtValue();
1298  if (N.getOpcode() == ISD::SUB)
1299  RHSC = -RHSC;
1300 
1301  if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1302  Base = N.getOperand(0);
1303  if (Base.getOpcode() == ISD::FrameIndex) {
1304  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1305  Base = CurDAG->getTargetFrameIndex(
1306  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1307  }
1308  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1309  return true;
1310  }
1311  }
1312 
1313  // Base only.
1314  Base = N;
1315  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1316  return true;
1317 }
1318 
1319 template <unsigned Shift>
1320 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
1321  SDValue &OffImm) {
1322  if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1323  int RHSC;
1324  if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
1325  Base = N.getOperand(0);
1326  if (Base.getOpcode() == ISD::FrameIndex) {
1327  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1328  Base = CurDAG->getTargetFrameIndex(
1329  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1330  }
1331 
1332  if (N.getOpcode() == ISD::SUB)
1333  RHSC = -RHSC;
1334  OffImm =
1335  CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1336  return true;
1337  }
1338  }
1339 
1340  // Base only.
1341  Base = N;
1342  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1343  return true;
1344 }
1345 
1346 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1347  SDValue &Base, SDValue &OffImm) {
1348  // Match simple R - imm8 operands.
1349  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1350  !CurDAG->isBaseWithConstantOffset(N))
1351  return false;
1352 
1353  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1354  int RHSC = (int)RHS->getSExtValue();
1355  if (N.getOpcode() == ISD::SUB)
1356  RHSC = -RHSC;
1357 
1358  if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1359  Base = N.getOperand(0);
1360  if (Base.getOpcode() == ISD::FrameIndex) {
1361  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1362  Base = CurDAG->getTargetFrameIndex(
1363  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1364  }
1365  OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1366  return true;
1367  }
1368  }
1369 
1370  return false;
1371 }
1372 
1373 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1374  SDValue &OffImm){
1375  unsigned Opcode = Op->getOpcode();
1376  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1377  ? cast<LoadSDNode>(Op)->getAddressingMode()
1378  : cast<StoreSDNode>(Op)->getAddressingMode();
1379  int RHSC;
1380  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1381  OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1382  ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1383  : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1384  return true;
1385  }
1386 
1387  return false;
1388 }
1389 
1390 template <unsigned Shift>
1391 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1392  SDValue &OffImm) {
1393  if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1394  int RHSC;
1395  if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1396  RHSC)) {
1397  Base = N.getOperand(0);
1398  if (Base.getOpcode() == ISD::FrameIndex) {
1399  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1400  Base = CurDAG->getTargetFrameIndex(
1401  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1402  }
1403 
1404  if (N.getOpcode() == ISD::SUB)
1405  RHSC = -RHSC;
1406  OffImm =
1407  CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1408  return true;
1409  }
1410  }
1411 
1412  // Base only.
1413  Base = N;
1414  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1415  return true;
1416 }
1417 
1418 template <unsigned Shift>
1419 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1420  SDValue &OffImm) {
1421  return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1422 }
1423 
1424 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1425  SDValue &OffImm,
1426  unsigned Shift) {
1427  unsigned Opcode = Op->getOpcode();
1429  switch (Opcode) {
1430  case ISD::LOAD:
1431  AM = cast<LoadSDNode>(Op)->getAddressingMode();
1432  break;
1433  case ISD::STORE:
1434  AM = cast<StoreSDNode>(Op)->getAddressingMode();
1435  break;
1436  case ISD::MLOAD:
1437  AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode();
1438  break;
1439  case ISD::MSTORE:
1440  AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode();
1441  break;
1442  default:
1443  llvm_unreachable("Unexpected Opcode for Imm7Offset");
1444  }
1445 
1446  int RHSC;
1447  // 7 bit constant, shifted by Shift.
1448  if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) {
1449  OffImm =
1450  ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1451  ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
1452  : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N),
1453  MVT::i32);
1454  return true;
1455  }
1456  return false;
1457 }
1458 
1459 template <int Min, int Max>
1460 bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
1461  int Val;
1462  if (isScaledConstantInRange(N, 1, Min, Max, Val)) {
1463  OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32);
1464  return true;
1465  }
1466  return false;
1467 }
1468 
1469 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1470  SDValue &Base,
1471  SDValue &OffReg, SDValue &ShImm) {
1472  // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1473  if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1474  return false;
1475 
1476  // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1477  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1478  int RHSC = (int)RHS->getZExtValue();
1479  if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1480  return false;
1481  else if (RHSC < 0 && RHSC >= -255) // 8 bits
1482  return false;
1483  }
1484 
1485  // Look for (R + R) or (R + (R << [1,2,3])).
1486  unsigned ShAmt = 0;
1487  Base = N.getOperand(0);
1488  OffReg = N.getOperand(1);
1489 
1490  // Swap if it is ((R << c) + R).
1492  if (ShOpcVal != ARM_AM::lsl) {
1493  ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1494  if (ShOpcVal == ARM_AM::lsl)
1495  std::swap(Base, OffReg);
1496  }
1497 
1498  if (ShOpcVal == ARM_AM::lsl) {
1499  // Check to see if the RHS of the shift is a constant, if not, we can't fold
1500  // it.
1501  if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1502  ShAmt = Sh->getZExtValue();
1503  if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1504  OffReg = OffReg.getOperand(0);
1505  else {
1506  ShAmt = 0;
1507  }
1508  }
1509  }
1510 
1511  // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1512  // and use it in a shifted operand do so.
1513  if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1514  unsigned PowerOfTwo = 0;
1515  SDValue NewMulConst;
1516  if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1517  HandleSDNode Handle(OffReg);
1518  replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1519  OffReg = Handle.getValue();
1520  ShAmt = PowerOfTwo;
1521  }
1522  }
1523 
1524  ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1525 
1526  return true;
1527 }
1528 
1529 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1530  SDValue &OffImm) {
1531  // This *must* succeed since it's used for the irreplaceable ldrex and strex
1532  // instructions.
1533  Base = N;
1534  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1535 
1536  if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1537  return true;
1538 
1539  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1540  if (!RHS)
1541  return true;
1542 
1543  uint32_t RHSC = (int)RHS->getZExtValue();
1544  if (RHSC > 1020 || RHSC % 4 != 0)
1545  return true;
1546 
1547  Base = N.getOperand(0);
1548  if (Base.getOpcode() == ISD::FrameIndex) {
1549  int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1550  Base = CurDAG->getTargetFrameIndex(
1551  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1552  }
1553 
1554  OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1555  return true;
1556 }
1557 
1558 //===--------------------------------------------------------------------===//
1559 
1560 /// getAL - Returns a ARMCC::AL immediate node.
1561 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1562  return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1563 }
1564 
1565 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1566  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1567  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1568 }
1569 
1570 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1571  LoadSDNode *LD = cast<LoadSDNode>(N);
1572  ISD::MemIndexedMode AM = LD->getAddressingMode();
1573  if (AM == ISD::UNINDEXED)
1574  return false;
1575 
1576  EVT LoadedVT = LD->getMemoryVT();
1577  SDValue Offset, AMOpc;
1578  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1579  unsigned Opcode = 0;
1580  bool Match = false;
1581  if (LoadedVT == MVT::i32 && isPre &&
1582  SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1583  Opcode = ARM::LDR_PRE_IMM;
1584  Match = true;
1585  } else if (LoadedVT == MVT::i32 && !isPre &&
1586  SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1587  Opcode = ARM::LDR_POST_IMM;
1588  Match = true;
1589  } else if (LoadedVT == MVT::i32 &&
1590  SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1591  Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1592  Match = true;
1593 
1594  } else if (LoadedVT == MVT::i16 &&
1595  SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1596  Match = true;
1597  Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1598  ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1599  : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1600  } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1601  if (LD->getExtensionType() == ISD::SEXTLOAD) {
1602  if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1603  Match = true;
1604  Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1605  }
1606  } else {
1607  if (isPre &&
1608  SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1609  Match = true;
1610  Opcode = ARM::LDRB_PRE_IMM;
1611  } else if (!isPre &&
1612  SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1613  Match = true;
1614  Opcode = ARM::LDRB_POST_IMM;
1615  } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1616  Match = true;
1617  Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1618  }
1619  }
1620  }
1621 
1622  if (Match) {
1623  if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1624  SDValue Chain = LD->getChain();
1625  SDValue Base = LD->getBasePtr();
1626  SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1627  CurDAG->getRegister(0, MVT::i32), Chain };
1628  SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1629  MVT::Other, Ops);
1630  transferMemOperands(N, New);
1631  ReplaceNode(N, New);
1632  return true;
1633  } else {
1634  SDValue Chain = LD->getChain();
1635  SDValue Base = LD->getBasePtr();
1636  SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1637  CurDAG->getRegister(0, MVT::i32), Chain };
1638  SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1639  MVT::Other, Ops);
1640  transferMemOperands(N, New);
1641  ReplaceNode(N, New);
1642  return true;
1643  }
1644  }
1645 
1646  return false;
1647 }
1648 
1649 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1650  LoadSDNode *LD = cast<LoadSDNode>(N);
1651  EVT LoadedVT = LD->getMemoryVT();
1652  ISD::MemIndexedMode AM = LD->getAddressingMode();
1653  if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1654  LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1655  return false;
1656 
1657  auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1658  if (!COffs || COffs->getZExtValue() != 4)
1659  return false;
1660 
1661  // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1662  // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1663  // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1664  // ISel.
1665  SDValue Chain = LD->getChain();
1666  SDValue Base = LD->getBasePtr();
1667  SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1668  CurDAG->getRegister(0, MVT::i32), Chain };
1669  SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1670  MVT::i32, MVT::Other, Ops);
1671  transferMemOperands(N, New);
1672  ReplaceNode(N, New);
1673  return true;
1674 }
1675 
1676 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1677  LoadSDNode *LD = cast<LoadSDNode>(N);
1678  ISD::MemIndexedMode AM = LD->getAddressingMode();
1679  if (AM == ISD::UNINDEXED)
1680  return false;
1681 
1682  EVT LoadedVT = LD->getMemoryVT();
1683  bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1684  SDValue Offset;
1685  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1686  unsigned Opcode = 0;
1687  bool Match = false;
1688  if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1689  switch (LoadedVT.getSimpleVT().SimpleTy) {
1690  case MVT::i32:
1691  Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1692  break;
1693  case MVT::i16:
1694  if (isSExtLd)
1695  Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1696  else
1697  Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1698  break;
1699  case MVT::i8:
1700  case MVT::i1:
1701  if (isSExtLd)
1702  Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1703  else
1704  Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1705  break;
1706  default:
1707  return false;
1708  }
1709  Match = true;
1710  }
1711 
1712  if (Match) {
1713  SDValue Chain = LD->getChain();
1714  SDValue Base = LD->getBasePtr();
1715  SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1716  CurDAG->getRegister(0, MVT::i32), Chain };
1717  SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1718  MVT::Other, Ops);
1719  transferMemOperands(N, New);
1720  ReplaceNode(N, New);
1721  return true;
1722  }
1723 
1724  return false;
1725 }
1726 
1727 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1728  EVT LoadedVT;
1729  unsigned Opcode = 0;
1730  bool isSExtLd, isPre;
1731  Align Alignment;
1732  ARMVCC::VPTCodes Pred;
1733  SDValue PredReg;
1734  SDValue Chain, Base, Offset;
1735 
1736  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1737  ISD::MemIndexedMode AM = LD->getAddressingMode();
1738  if (AM == ISD::UNINDEXED)
1739  return false;
1740  LoadedVT = LD->getMemoryVT();
1741  if (!LoadedVT.isVector())
1742  return false;
1743 
1744  Chain = LD->getChain();
1745  Base = LD->getBasePtr();
1746  Offset = LD->getOffset();
1747  Alignment = LD->getAlign();
1748  isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1749  isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1750  Pred = ARMVCC::None;
1751  PredReg = CurDAG->getRegister(0, MVT::i32);
1752  } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
1753  ISD::MemIndexedMode AM = LD->getAddressingMode();
1754  if (AM == ISD::UNINDEXED)
1755  return false;
1756  LoadedVT = LD->getMemoryVT();
1757  if (!LoadedVT.isVector())
1758  return false;
1759 
1760  Chain = LD->getChain();
1761  Base = LD->getBasePtr();
1762  Offset = LD->getOffset();
1763  Alignment = LD->getAlign();
1764  isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1765  isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1766  Pred = ARMVCC::Then;
1767  PredReg = LD->getMask();
1768  } else
1769  llvm_unreachable("Expected a Load or a Masked Load!");
1770 
1771  // We allow LE non-masked loads to change the type (for example use a vldrb.8
1772  // as opposed to a vldrw.32). This can allow extra addressing modes or
1773  // alignments for what is otherwise an equivalent instruction.
1774  bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N);
1775 
1776  SDValue NewOffset;
1777  if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 &&
1778  SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) {
1779  if (isSExtLd)
1780  Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1781  else
1782  Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1783  } else if (LoadedVT == MVT::v8i8 &&
1784  SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1785  if (isSExtLd)
1786  Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1787  else
1788  Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1789  } else if (LoadedVT == MVT::v4i8 &&
1790  SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1791  if (isSExtLd)
1792  Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1793  else
1794  Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1795  } else if (Alignment >= Align(4) &&
1796  (CanChangeType || LoadedVT == MVT::v4i32 ||
1797  LoadedVT == MVT::v4f32) &&
1798  SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2))
1799  Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1800  else if (Alignment >= Align(2) &&
1801  (CanChangeType || LoadedVT == MVT::v8i16 ||
1802  LoadedVT == MVT::v8f16) &&
1803  SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1))
1804  Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1805  else if ((CanChangeType || LoadedVT == MVT::v16i8) &&
1806  SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0))
1807  Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1808  else
1809  return false;
1810 
1811  SDValue Ops[] = {Base, NewOffset,
1812  CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg,
1813  Chain};
1814  SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1815  N->getValueType(0), MVT::Other, Ops);
1816  transferMemOperands(N, New);
1817  ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1818  ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1819  ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1820  CurDAG->RemoveDeadNode(N);
1821  return true;
1822 }
1823 
1824 /// Form a GPRPair pseudo register from a pair of GPR regs.
1826  SDLoc dl(V0.getNode());
1827  SDValue RegClass =
1828  CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1829  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1830  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1831  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1832  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1833 }
1834 
1835 /// Form a D register from a pair of S registers.
1836 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1837  SDLoc dl(V0.getNode());
1838  SDValue RegClass =
1839  CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1840  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1841  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1842  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1843  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1844 }
1845 
1846 /// Form a quad register from a pair of D registers.
1847 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1848  SDLoc dl(V0.getNode());
1849  SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1850  MVT::i32);
1851  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1852  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1853  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1854  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1855 }
1856 
1857 /// Form 4 consecutive D registers from a pair of Q registers.
1858 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1859  SDLoc dl(V0.getNode());
1860  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1861  MVT::i32);
1862  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1863  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1864  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1865  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1866 }
1867 
1868 /// Form 4 consecutive S registers.
1869 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1870  SDValue V2, SDValue V3) {
1871  SDLoc dl(V0.getNode());
1872  SDValue RegClass =
1873  CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1874  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1875  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1876  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1877  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1878  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1879  V2, SubReg2, V3, SubReg3 };
1880  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1881 }
1882 
1883 /// Form 4 consecutive D registers.
1884 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1885  SDValue V2, SDValue V3) {
1886  SDLoc dl(V0.getNode());
1887  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1888  MVT::i32);
1889  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1890  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1891  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1892  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1893  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1894  V2, SubReg2, V3, SubReg3 };
1895  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1896 }
1897 
1898 /// Form 4 consecutive Q registers.
1899 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1900  SDValue V2, SDValue V3) {
1901  SDLoc dl(V0.getNode());
1902  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1903  MVT::i32);
1904  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1905  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1906  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1907  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1908  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1909  V2, SubReg2, V3, SubReg3 };
1910  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1911 }
1912 
1913 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1914 /// of a NEON VLD or VST instruction. The supported values depend on the
1915 /// number of registers being loaded.
1916 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1917  unsigned NumVecs, bool is64BitVector) {
1918  unsigned NumRegs = NumVecs;
1919  if (!is64BitVector && NumVecs < 3)
1920  NumRegs *= 2;
1921 
1922  unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1923  if (Alignment >= 32 && NumRegs == 4)
1924  Alignment = 32;
1925  else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1926  Alignment = 16;
1927  else if (Alignment >= 8)
1928  Alignment = 8;
1929  else
1930  Alignment = 0;
1931 
1932  return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1933 }
1934 
1935 static bool isVLDfixed(unsigned Opc)
1936 {
1937  switch (Opc) {
1938  default: return false;
1939  case ARM::VLD1d8wb_fixed : return true;
1940  case ARM::VLD1d16wb_fixed : return true;
1941  case ARM::VLD1d64Qwb_fixed : return true;
1942  case ARM::VLD1d32wb_fixed : return true;
1943  case ARM::VLD1d64wb_fixed : return true;
1944  case ARM::VLD1d64TPseudoWB_fixed : return true;
1945  case ARM::VLD1d64QPseudoWB_fixed : return true;
1946  case ARM::VLD1q8wb_fixed : return true;
1947  case ARM::VLD1q16wb_fixed : return true;
1948  case ARM::VLD1q32wb_fixed : return true;
1949  case ARM::VLD1q64wb_fixed : return true;
1950  case ARM::VLD1DUPd8wb_fixed : return true;
1951  case ARM::VLD1DUPd16wb_fixed : return true;
1952  case ARM::VLD1DUPd32wb_fixed : return true;
1953  case ARM::VLD1DUPq8wb_fixed : return true;
1954  case ARM::VLD1DUPq16wb_fixed : return true;
1955  case ARM::VLD1DUPq32wb_fixed : return true;
1956  case ARM::VLD2d8wb_fixed : return true;
1957  case ARM::VLD2d16wb_fixed : return true;
1958  case ARM::VLD2d32wb_fixed : return true;
1959  case ARM::VLD2q8PseudoWB_fixed : return true;
1960  case ARM::VLD2q16PseudoWB_fixed : return true;
1961  case ARM::VLD2q32PseudoWB_fixed : return true;
1962  case ARM::VLD2DUPd8wb_fixed : return true;
1963  case ARM::VLD2DUPd16wb_fixed : return true;
1964  case ARM::VLD2DUPd32wb_fixed : return true;
1965  }
1966 }
1967 
1968 static bool isVSTfixed(unsigned Opc)
1969 {
1970  switch (Opc) {
1971  default: return false;
1972  case ARM::VST1d8wb_fixed : return true;
1973  case ARM::VST1d16wb_fixed : return true;
1974  case ARM::VST1d32wb_fixed : return true;
1975  case ARM::VST1d64wb_fixed : return true;
1976  case ARM::VST1q8wb_fixed : return true;
1977  case ARM::VST1q16wb_fixed : return true;
1978  case ARM::VST1q32wb_fixed : return true;
1979  case ARM::VST1q64wb_fixed : return true;
1980  case ARM::VST1d64TPseudoWB_fixed : return true;
1981  case ARM::VST1d64QPseudoWB_fixed : return true;
1982  case ARM::VST2d8wb_fixed : return true;
1983  case ARM::VST2d16wb_fixed : return true;
1984  case ARM::VST2d32wb_fixed : return true;
1985  case ARM::VST2q8PseudoWB_fixed : return true;
1986  case ARM::VST2q16PseudoWB_fixed : return true;
1987  case ARM::VST2q32PseudoWB_fixed : return true;
1988  }
1989 }
1990 
1991 // Get the register stride update opcode of a VLD/VST instruction that
1992 // is otherwise equivalent to the given fixed stride updating instruction.
1993 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1994  assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1995  && "Incorrect fixed stride updating instruction.");
1996  switch (Opc) {
1997  default: break;
1998  case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1999  case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
2000  case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
2001  case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
2002  case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
2003  case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
2004  case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
2005  case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
2006  case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
2007  case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
2008  case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
2009  case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
2010  case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
2011  case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
2012  case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
2013  case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
2014  case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
2015  case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
2016 
2017  case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
2018  case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
2019  case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
2020  case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
2021  case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
2022  case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
2023  case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
2024  case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
2025  case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
2026  case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
2027 
2028  case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
2029  case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
2030  case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
2031  case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
2032  case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
2033  case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
2034 
2035  case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
2036  case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
2037  case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
2038  case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
2039  case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
2040  case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
2041 
2042  case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
2043  case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
2044  case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
2045  }
2046  return Opc; // If not one we handle, return it unchanged.
2047 }
2048 
2049 /// Returns true if the given increment is a Constant known to be equal to the
2050 /// access size performed by a NEON load/store. This means the "[rN]!" form can
2051 /// be used.
2052 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
2053  auto C = dyn_cast<ConstantSDNode>(Inc);
2054  return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
2055 }
2056 
2057 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
2058  const uint16_t *DOpcodes,
2059  const uint16_t *QOpcodes0,
2060  const uint16_t *QOpcodes1) {
2061  assert(Subtarget->hasNEON());
2062  assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
2063  SDLoc dl(N);
2064 
2065  SDValue MemAddr, Align;
2066  bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2067  // nodes are not intrinsics.
2068  unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2069  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2070  return;
2071 
2072  SDValue Chain = N->getOperand(0);
2073  EVT VT = N->getValueType(0);
2074  bool is64BitVector = VT.is64BitVector();
2075  Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2076 
2077  unsigned OpcodeIndex;
2078  switch (VT.getSimpleVT().SimpleTy) {
2079  default: llvm_unreachable("unhandled vld type");
2080  // Double-register operations:
2081  case MVT::v8i8: OpcodeIndex = 0; break;
2082  case MVT::v4f16:
2083  case MVT::v4bf16:
2084  case MVT::v4i16: OpcodeIndex = 1; break;
2085  case MVT::v2f32:
2086  case MVT::v2i32: OpcodeIndex = 2; break;
2087  case MVT::v1i64: OpcodeIndex = 3; break;
2088  // Quad-register operations:
2089  case MVT::v16i8: OpcodeIndex = 0; break;
2090  case MVT::v8f16:
2091  case MVT::v8bf16:
2092  case MVT::v8i16: OpcodeIndex = 1; break;
2093  case MVT::v4f32:
2094  case MVT::v4i32: OpcodeIndex = 2; break;
2095  case MVT::v2f64:
2096  case MVT::v2i64: OpcodeIndex = 3; break;
2097  }
2098 
2099  EVT ResTy;
2100  if (NumVecs == 1)
2101  ResTy = VT;
2102  else {
2103  unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2104  if (!is64BitVector)
2105  ResTyElts *= 2;
2106  ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2107  }
2108  std::vector<EVT> ResTys;
2109  ResTys.push_back(ResTy);
2110  if (isUpdating)
2111  ResTys.push_back(MVT::i32);
2112  ResTys.push_back(MVT::Other);
2113 
2114  SDValue Pred = getAL(CurDAG, dl);
2115  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2116  SDNode *VLd;
2118 
2119  // Double registers and VLD1/VLD2 quad registers are directly supported.
2120  if (is64BitVector || NumVecs <= 2) {
2121  unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2122  QOpcodes0[OpcodeIndex]);
2123  Ops.push_back(MemAddr);
2124  Ops.push_back(Align);
2125  if (isUpdating) {
2126  SDValue Inc = N->getOperand(AddrOpIdx + 1);
2127  bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2128  if (!IsImmUpdate) {
2129  // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2130  // check for the opcode rather than the number of vector elements.
2131  if (isVLDfixed(Opc))
2132  Opc = getVLDSTRegisterUpdateOpcode(Opc);
2133  Ops.push_back(Inc);
2134  // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2135  // the operands if not such an opcode.
2136  } else if (!isVLDfixed(Opc))
2137  Ops.push_back(Reg0);
2138  }
2139  Ops.push_back(Pred);
2140  Ops.push_back(Reg0);
2141  Ops.push_back(Chain);
2142  VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2143 
2144  } else {
2145  // Otherwise, quad registers are loaded with two separate instructions,
2146  // where one loads the even registers and the other loads the odd registers.
2147  EVT AddrTy = MemAddr.getValueType();
2148 
2149  // Load the even subregs. This is always an updating load, so that it
2150  // provides the address to the second load for the odd subregs.
2151  SDValue ImplDef =
2152  SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2153  const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
2154  SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2155  ResTy, AddrTy, MVT::Other, OpsA);
2156  Chain = SDValue(VLdA, 2);
2157 
2158  // Load the odd subregs.
2159  Ops.push_back(SDValue(VLdA, 1));
2160  Ops.push_back(Align);
2161  if (isUpdating) {
2162  SDValue Inc = N->getOperand(AddrOpIdx + 1);
2163  assert(isa<ConstantSDNode>(Inc.getNode()) &&
2164  "only constant post-increment update allowed for VLD3/4");
2165  (void)Inc;
2166  Ops.push_back(Reg0);
2167  }
2168  Ops.push_back(SDValue(VLdA, 0));
2169  Ops.push_back(Pred);
2170  Ops.push_back(Reg0);
2171  Ops.push_back(Chain);
2172  VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
2173  }
2174 
2175  // Transfer memoperands.
2176  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2177  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
2178 
2179  if (NumVecs == 1) {
2180  ReplaceNode(N, VLd);
2181  return;
2182  }
2183 
2184  // Extract out the subregisters.
2185  SDValue SuperReg = SDValue(VLd, 0);
2186  static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2187  ARM::qsub_3 == ARM::qsub_0 + 3,
2188  "Unexpected subreg numbering");
2189  unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2190  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2191  ReplaceUses(SDValue(N, Vec),
2192  CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2193  ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2194  if (isUpdating)
2195  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2196  CurDAG->RemoveDeadNode(N);
2197 }
2198 
2199 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2200  const uint16_t *DOpcodes,
2201  const uint16_t *QOpcodes0,
2202  const uint16_t *QOpcodes1) {
2203  assert(Subtarget->hasNEON());
2204  assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2205  SDLoc dl(N);
2206 
2207  SDValue MemAddr, Align;
2208  bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2209  // nodes are not intrinsics.
2210  unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2211  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2212  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2213  return;
2214 
2215  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2216 
2217  SDValue Chain = N->getOperand(0);
2218  EVT VT = N->getOperand(Vec0Idx).getValueType();
2219  bool is64BitVector = VT.is64BitVector();
2220  Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2221 
2222  unsigned OpcodeIndex;
2223  switch (VT.getSimpleVT().SimpleTy) {
2224  default: llvm_unreachable("unhandled vst type");
2225  // Double-register operations:
2226  case MVT::v8i8: OpcodeIndex = 0; break;
2227  case MVT::v4f16:
2228  case MVT::v4bf16:
2229  case MVT::v4i16: OpcodeIndex = 1; break;
2230  case MVT::v2f32:
2231  case MVT::v2i32: OpcodeIndex = 2; break;
2232  case MVT::v1i64: OpcodeIndex = 3; break;
2233  // Quad-register operations:
2234  case MVT::v16i8: OpcodeIndex = 0; break;
2235  case MVT::v8f16:
2236  case MVT::v8bf16:
2237  case MVT::v8i16: OpcodeIndex = 1; break;
2238  case MVT::v4f32:
2239  case MVT::v4i32: OpcodeIndex = 2; break;
2240  case MVT::v2f64:
2241  case MVT::v2i64: OpcodeIndex = 3; break;
2242  }
2243 
2244  std::vector<EVT> ResTys;
2245  if (isUpdating)
2246  ResTys.push_back(MVT::i32);
2247  ResTys.push_back(MVT::Other);
2248 
2249  SDValue Pred = getAL(CurDAG, dl);
2250  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2252 
2253  // Double registers and VST1/VST2 quad registers are directly supported.
2254  if (is64BitVector || NumVecs <= 2) {
2255  SDValue SrcReg;
2256  if (NumVecs == 1) {
2257  SrcReg = N->getOperand(Vec0Idx);
2258  } else if (is64BitVector) {
2259  // Form a REG_SEQUENCE to force register allocation.
2260  SDValue V0 = N->getOperand(Vec0Idx + 0);
2261  SDValue V1 = N->getOperand(Vec0Idx + 1);
2262  if (NumVecs == 2)
2263  SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2264  else {
2265  SDValue V2 = N->getOperand(Vec0Idx + 2);
2266  // If it's a vst3, form a quad D-register and leave the last part as
2267  // an undef.
2268  SDValue V3 = (NumVecs == 3)
2269  ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2270  : N->getOperand(Vec0Idx + 3);
2271  SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2272  }
2273  } else {
2274  // Form a QQ register.
2275  SDValue Q0 = N->getOperand(Vec0Idx);
2276  SDValue Q1 = N->getOperand(Vec0Idx + 1);
2277  SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2278  }
2279 
2280  unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2281  QOpcodes0[OpcodeIndex]);
2282  Ops.push_back(MemAddr);
2283  Ops.push_back(Align);
2284  if (isUpdating) {
2285  SDValue Inc = N->getOperand(AddrOpIdx + 1);
2286  bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2287  if (!IsImmUpdate) {
2288  // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2289  // check for the opcode rather than the number of vector elements.
2290  if (isVSTfixed(Opc))
2291  Opc = getVLDSTRegisterUpdateOpcode(Opc);
2292  Ops.push_back(Inc);
2293  }
2294  // VST1/VST2 fixed increment does not need Reg0 so only include it in
2295  // the operands if not such an opcode.
2296  else if (!isVSTfixed(Opc))
2297  Ops.push_back(Reg0);
2298  }
2299  Ops.push_back(SrcReg);
2300  Ops.push_back(Pred);
2301  Ops.push_back(Reg0);
2302  Ops.push_back(Chain);
2303  SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2304 
2305  // Transfer memoperands.
2306  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2307 
2308  ReplaceNode(N, VSt);
2309  return;
2310  }
2311 
2312  // Otherwise, quad registers are stored with two separate instructions,
2313  // where one stores the even registers and the other stores the odd registers.
2314 
2315  // Form the QQQQ REG_SEQUENCE.
2316  SDValue V0 = N->getOperand(Vec0Idx + 0);
2317  SDValue V1 = N->getOperand(Vec0Idx + 1);
2318  SDValue V2 = N->getOperand(Vec0Idx + 2);
2319  SDValue V3 = (NumVecs == 3)
2320  ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2321  : N->getOperand(Vec0Idx + 3);
2322  SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2323 
2324  // Store the even D registers. This is always an updating store, so that it
2325  // provides the address to the second store for the odd subregs.
2326  const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2327  SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2328  MemAddr.getValueType(),
2329  MVT::Other, OpsA);
2330  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2331  Chain = SDValue(VStA, 1);
2332 
2333  // Store the odd D registers.
2334  Ops.push_back(SDValue(VStA, 0));
2335  Ops.push_back(Align);
2336  if (isUpdating) {
2337  SDValue Inc = N->getOperand(AddrOpIdx + 1);
2338  assert(isa<ConstantSDNode>(Inc.getNode()) &&
2339  "only constant post-increment update allowed for VST3/4");
2340  (void)Inc;
2341  Ops.push_back(Reg0);
2342  }
2343  Ops.push_back(RegSeq);
2344  Ops.push_back(Pred);
2345  Ops.push_back(Reg0);
2346  Ops.push_back(Chain);
2347  SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2348  Ops);
2349  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2350  ReplaceNode(N, VStB);
2351 }
2352 
2353 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2354  unsigned NumVecs,
2355  const uint16_t *DOpcodes,
2356  const uint16_t *QOpcodes) {
2357  assert(Subtarget->hasNEON());
2358  assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2359  SDLoc dl(N);
2360 
2361  SDValue MemAddr, Align;
2362  bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2363  // nodes are not intrinsics.
2364  unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2365  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2366  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2367  return;
2368 
2369  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2370 
2371  SDValue Chain = N->getOperand(0);
2372  unsigned Lane =
2373  cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2374  EVT VT = N->getOperand(Vec0Idx).getValueType();
2375  bool is64BitVector = VT.is64BitVector();
2376 
2377  unsigned Alignment = 0;
2378  if (NumVecs != 3) {
2379  Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2380  unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2381  if (Alignment > NumBytes)
2382  Alignment = NumBytes;
2383  if (Alignment < 8 && Alignment < NumBytes)
2384  Alignment = 0;
2385  // Alignment must be a power of two; make sure of that.
2386  Alignment = (Alignment & -Alignment);
2387  if (Alignment == 1)
2388  Alignment = 0;
2389  }
2390  Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2391 
2392  unsigned OpcodeIndex;
2393  switch (VT.getSimpleVT().SimpleTy) {
2394  default: llvm_unreachable("unhandled vld/vst lane type");
2395  // Double-register operations:
2396  case MVT::v8i8: OpcodeIndex = 0; break;
2397  case MVT::v4f16:
2398  case MVT::v4bf16:
2399  case MVT::v4i16: OpcodeIndex = 1; break;
2400  case MVT::v2f32:
2401  case MVT::v2i32: OpcodeIndex = 2; break;
2402  // Quad-register operations:
2403  case MVT::v8f16:
2404  case MVT::v8bf16:
2405  case MVT::v8i16: OpcodeIndex = 0; break;
2406  case MVT::v4f32:
2407  case MVT::v4i32: OpcodeIndex = 1; break;
2408  }
2409 
2410  std::vector<EVT> ResTys;
2411  if (IsLoad) {
2412  unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2413  if (!is64BitVector)
2414  ResTyElts *= 2;
2415  ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2416  MVT::i64, ResTyElts));
2417  }
2418  if (isUpdating)
2419  ResTys.push_back(MVT::i32);
2420  ResTys.push_back(MVT::Other);
2421 
2422  SDValue Pred = getAL(CurDAG, dl);
2423  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2424 
2426  Ops.push_back(MemAddr);
2427  Ops.push_back(Align);
2428  if (isUpdating) {
2429  SDValue Inc = N->getOperand(AddrOpIdx + 1);
2430  bool IsImmUpdate =
2431  isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2432  Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2433  }
2434 
2435  SDValue SuperReg;
2436  SDValue V0 = N->getOperand(Vec0Idx + 0);
2437  SDValue V1 = N->getOperand(Vec0Idx + 1);
2438  if (NumVecs == 2) {
2439  if (is64BitVector)
2440  SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2441  else
2442  SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2443  } else {
2444  SDValue V2 = N->getOperand(Vec0Idx + 2);
2445  SDValue V3 = (NumVecs == 3)
2446  ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2447  : N->getOperand(Vec0Idx + 3);
2448  if (is64BitVector)
2449  SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2450  else
2451  SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2452  }
2453  Ops.push_back(SuperReg);
2454  Ops.push_back(getI32Imm(Lane, dl));
2455  Ops.push_back(Pred);
2456  Ops.push_back(Reg0);
2457  Ops.push_back(Chain);
2458 
2459  unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2460  QOpcodes[OpcodeIndex]);
2461  SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2462  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2463  if (!IsLoad) {
2464  ReplaceNode(N, VLdLn);
2465  return;
2466  }
2467 
2468  // Extract the subregisters.
2469  SuperReg = SDValue(VLdLn, 0);
2470  static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2471  ARM::qsub_3 == ARM::qsub_0 + 3,
2472  "Unexpected subreg numbering");
2473  unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2474  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2475  ReplaceUses(SDValue(N, Vec),
2476  CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2477  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2478  if (isUpdating)
2479  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2480  CurDAG->RemoveDeadNode(N);
2481 }
2482 
2483 template <typename SDValueVector>
2484 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2485  SDValue PredicateMask) {
2486  Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2487  Ops.push_back(PredicateMask);
2488 }
2489 
2490 template <typename SDValueVector>
2491 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2492  SDValue PredicateMask,
2493  SDValue Inactive) {
2494  Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2495  Ops.push_back(PredicateMask);
2496  Ops.push_back(Inactive);
2497 }
2498 
2499 template <typename SDValueVector>
2500 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
2501  Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2502  Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2503 }
2504 
2505 template <typename SDValueVector>
2506 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2507  EVT InactiveTy) {
2508  Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2509  Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2510  Ops.push_back(SDValue(
2511  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));
2512 }
2513 
2514 void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
2515  bool Predicated) {
2516  SDLoc Loc(N);
2518 
2519  uint16_t Opcode;
2520  switch (N->getValueType(1).getVectorElementType().getSizeInBits()) {
2521  case 32:
2522  Opcode = Opcodes[0];
2523  break;
2524  case 64:
2525  Opcode = Opcodes[1];
2526  break;
2527  default:
2528  llvm_unreachable("bad vector element size in SelectMVE_WB");
2529  }
2530 
2531  Ops.push_back(N->getOperand(2)); // vector of base addresses
2532 
2533  int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2534  Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset
2535 
2536  if (Predicated)
2537  AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2538  else
2539  AddEmptyMVEPredicateToOps(Ops, Loc);
2540 
2541  Ops.push_back(N->getOperand(0)); // chain
2542 
2543  SmallVector<EVT, 8> VTs;
2544  VTs.push_back(N->getValueType(1));
2545  VTs.push_back(N->getValueType(0));
2546  VTs.push_back(N->getValueType(2));
2547 
2548  SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops);
2549  ReplaceUses(SDValue(N, 0), SDValue(New, 1));
2550  ReplaceUses(SDValue(N, 1), SDValue(New, 0));
2551  ReplaceUses(SDValue(N, 2), SDValue(New, 2));
2552  transferMemOperands(N, New);
2553  CurDAG->RemoveDeadNode(N);
2554 }
2555 
2556 void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
2557  bool Immediate,
2558  bool HasSaturationOperand) {
2559  SDLoc Loc(N);
2561 
2562  // Two 32-bit halves of the value to be shifted
2563  Ops.push_back(N->getOperand(1));
2564  Ops.push_back(N->getOperand(2));
2565 
2566  // The shift count
2567  if (Immediate) {
2568  int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2569  Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2570  } else {
2571  Ops.push_back(N->getOperand(3));
2572  }
2573 
2574  // The immediate saturation operand, if any
2575  if (HasSaturationOperand) {
2576  int32_t SatOp = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue();
2577  int SatBit = (SatOp == 64 ? 0 : 1);
2578  Ops.push_back(getI32Imm(SatBit, Loc));
2579  }
2580 
2581  // MVE scalar shifts are IT-predicable, so include the standard
2582  // predicate arguments.
2583  Ops.push_back(getAL(CurDAG, Loc));
2584  Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2585 
2586  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2587 }
2588 
2589 void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
2590  uint16_t OpcodeWithNoCarry,
2591  bool Add, bool Predicated) {
2592  SDLoc Loc(N);
2594  uint16_t Opcode;
2595 
2596  unsigned FirstInputOp = Predicated ? 2 : 1;
2597 
2598  // Two input vectors and the input carry flag
2599  Ops.push_back(N->getOperand(FirstInputOp));
2600  Ops.push_back(N->getOperand(FirstInputOp + 1));
2601  SDValue CarryIn = N->getOperand(FirstInputOp + 2);
2602  ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn);
2603  uint32_t CarryMask = 1 << 29;
2604  uint32_t CarryExpected = Add ? 0 : CarryMask;
2605  if (CarryInConstant &&
2606  (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
2607  Opcode = OpcodeWithNoCarry;
2608  } else {
2609  Ops.push_back(CarryIn);
2610  Opcode = OpcodeWithCarry;
2611  }
2612 
2613  if (Predicated)
2614  AddMVEPredicateToOps(Ops, Loc,
2615  N->getOperand(FirstInputOp + 3), // predicate
2616  N->getOperand(FirstInputOp - 1)); // inactive
2617  else
2618  AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2619 
2620  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2621 }
2622 
2623 void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {
2624  SDLoc Loc(N);
2626 
2627  // One vector input, followed by a 32-bit word of bits to shift in
2628  // and then an immediate shift count
2629  Ops.push_back(N->getOperand(1));
2630  Ops.push_back(N->getOperand(2));
2631  int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2632  Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2633 
2634  if (Predicated)
2635  AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2636  else
2637  AddEmptyMVEPredicateToOps(Ops, Loc);
2638 
2639  CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), makeArrayRef(Ops));
2640 }
2641 
2642 static bool SDValueToConstBool(SDValue SDVal) {
2643  assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
2644  ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
2645  uint64_t Value = SDValConstant->getZExtValue();
2646  assert((Value == 0 || Value == 1) && "expected value 0 or 1");
2647  return Value;
2648 }
2649 
2650 void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
2651  const uint16_t *OpcodesS,
2652  const uint16_t *OpcodesU,
2653  size_t Stride, size_t TySize) {
2654  assert(TySize < Stride && "Invalid TySize");
2655  bool IsUnsigned = SDValueToConstBool(N->getOperand(1));
2656  bool IsSub = SDValueToConstBool(N->getOperand(2));
2657  bool IsExchange = SDValueToConstBool(N->getOperand(3));
2658  if (IsUnsigned) {
2659  assert(!IsSub &&
2660  "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2661  assert(!IsExchange &&
2662  "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2663  }
2664 
2665  auto OpIsZero = [N](size_t OpNo) {
2666  if (ConstantSDNode *OpConst = dyn_cast<ConstantSDNode>(N->getOperand(OpNo)))
2667  if (OpConst->getZExtValue() == 0)
2668  return true;
2669  return false;
2670  };
2671 
2672  // If the input accumulator value is not zero, select an instruction with
2673  // accumulator, otherwise select an instruction without accumulator
2674  bool IsAccum = !(OpIsZero(4) && OpIsZero(5));
2675 
2676  const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
2677  if (IsSub)
2678  Opcodes += 4 * Stride;
2679  if (IsExchange)
2680  Opcodes += 2 * Stride;
2681  if (IsAccum)
2682  Opcodes += Stride;
2683  uint16_t Opcode = Opcodes[TySize];
2684 
2685  SDLoc Loc(N);
2687  // Push the accumulator operands, if they are used
2688  if (IsAccum) {
2689  Ops.push_back(N->getOperand(4));
2690  Ops.push_back(N->getOperand(5));
2691  }
2692  // Push the two vector operands
2693  Ops.push_back(N->getOperand(6));
2694  Ops.push_back(N->getOperand(7));
2695 
2696  if (Predicated)
2697  AddMVEPredicateToOps(Ops, Loc, N->getOperand(8));
2698  else
2699  AddEmptyMVEPredicateToOps(Ops, Loc);
2700 
2701  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2702 }
2703 
2704 void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,
2705  const uint16_t *OpcodesS,
2706  const uint16_t *OpcodesU) {
2707  EVT VecTy = N->getOperand(6).getValueType();
2708  size_t SizeIndex;
2709  switch (VecTy.getVectorElementType().getSizeInBits()) {
2710  case 16:
2711  SizeIndex = 0;
2712  break;
2713  case 32:
2714  SizeIndex = 1;
2715  break;
2716  default:
2717  llvm_unreachable("bad vector element size");
2718  }
2719 
2720  SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex);
2721 }
2722 
2723 void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
2724  const uint16_t *OpcodesS,
2725  const uint16_t *OpcodesU) {
2726  assert(
2727  N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==
2728  32 &&
2729  "bad vector element size");
2730  SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0);
2731 }
2732 
2733 void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
2734  const uint16_t *const *Opcodes,
2735  bool HasWriteback) {
2736  EVT VT = N->getValueType(0);
2737  SDLoc Loc(N);
2738 
2739  const uint16_t *OurOpcodes;
2740  switch (VT.getVectorElementType().getSizeInBits()) {
2741  case 8:
2742  OurOpcodes = Opcodes[0];
2743  break;
2744  case 16:
2745  OurOpcodes = Opcodes[1];
2746  break;
2747  case 32:
2748  OurOpcodes = Opcodes[2];
2749  break;
2750  default:
2751  llvm_unreachable("bad vector element size in SelectMVE_VLD");
2752  }
2753 
2754  EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2);
2755  SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other};
2756  unsigned PtrOperand = HasWriteback ? 1 : 2;
2757 
2758  auto Data = SDValue(
2759  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0);
2760  SDValue Chain = N->getOperand(0);
2761  // Add a MVE_VLDn instruction for each Vec, except the last
2762  for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) {
2763  SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2764  auto LoadInst =
2765  CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops);
2766  Data = SDValue(LoadInst, 0);
2767  Chain = SDValue(LoadInst, 1);
2768  transferMemOperands(N, LoadInst);
2769  }
2770  // The last may need a writeback on it
2771  if (HasWriteback)
2772  ResultTys = {DataTy, MVT::i32, MVT::Other};
2773  SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2774  auto LoadInst =
2775  CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops);
2776  transferMemOperands(N, LoadInst);
2777 
2778  unsigned i;
2779  for (i = 0; i < NumVecs; i++)
2780  ReplaceUses(SDValue(N, i),
2781  CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT,
2782  SDValue(LoadInst, 0)));
2783  if (HasWriteback)
2784  ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1));
2785  ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1));
2786  CurDAG->RemoveDeadNode(N);
2787 }
2788 
2789 void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
2790  bool Wrapping, bool Predicated) {
2791  EVT VT = N->getValueType(0);
2792  SDLoc Loc(N);
2793 
2794  uint16_t Opcode;
2795  switch (VT.getScalarSizeInBits()) {
2796  case 8:
2797  Opcode = Opcodes[0];
2798  break;
2799  case 16:
2800  Opcode = Opcodes[1];
2801  break;
2802  case 32:
2803  Opcode = Opcodes[2];
2804  break;
2805  default:
2806  llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
2807  }
2808 
2810  unsigned OpIdx = 1;
2811 
2812  SDValue Inactive;
2813  if (Predicated)
2814  Inactive = N->getOperand(OpIdx++);
2815 
2816  Ops.push_back(N->getOperand(OpIdx++)); // base
2817  if (Wrapping)
2818  Ops.push_back(N->getOperand(OpIdx++)); // limit
2819 
2820  SDValue ImmOp = N->getOperand(OpIdx++); // step
2821  int ImmValue = cast<ConstantSDNode>(ImmOp)->getZExtValue();
2822  Ops.push_back(getI32Imm(ImmValue, Loc));
2823 
2824  if (Predicated)
2825  AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive);
2826  else
2827  AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2828 
2829  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2830 }
2831 
2832 void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
2833  size_t NumExtraOps, bool HasAccum) {
2834  bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
2835  SDLoc Loc(N);
2837 
2838  unsigned OpIdx = 1;
2839 
2840  // Convert and append the immediate operand designating the coprocessor.
2841  SDValue ImmCorpoc = N->getOperand(OpIdx++);
2842  uint32_t ImmCoprocVal = cast<ConstantSDNode>(ImmCorpoc)->getZExtValue();
2843  Ops.push_back(getI32Imm(ImmCoprocVal, Loc));
2844 
2845  // For accumulating variants copy the low and high order parts of the
2846  // accumulator into a register pair and add it to the operand vector.
2847  if (HasAccum) {
2848  SDValue AccLo = N->getOperand(OpIdx++);
2849  SDValue AccHi = N->getOperand(OpIdx++);
2850  if (IsBigEndian)
2851  std::swap(AccLo, AccHi);
2852  Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0));
2853  }
2854 
2855  // Copy extra operands as-is.
2856  for (size_t I = 0; I < NumExtraOps; I++)
2857  Ops.push_back(N->getOperand(OpIdx++));
2858 
2859  // Convert and append the immediate operand
2860  SDValue Imm = N->getOperand(OpIdx);
2861  uint32_t ImmVal = cast<ConstantSDNode>(Imm)->getZExtValue();
2862  Ops.push_back(getI32Imm(ImmVal, Loc));
2863 
2864  // Accumulating variants are IT-predicable, add predicate operands.
2865  if (HasAccum) {
2866  SDValue Pred = getAL(CurDAG, Loc);
2867  SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2868  Ops.push_back(Pred);
2869  Ops.push_back(PredReg);
2870  }
2871 
2872  // Create the CDE intruction
2873  SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops);
2874  SDValue ResultPair = SDValue(InstrNode, 0);
2875 
2876  // The original intrinsic had two outputs, and the output of the dual-register
2877  // CDE instruction is a register pair. We need to extract the two subregisters
2878  // and replace all uses of the original outputs with the extracted
2879  // subregisters.
2880  uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1};
2881  if (IsBigEndian)
2882  std::swap(SubRegs[0], SubRegs[1]);
2883 
2884  for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) {
2885  if (SDValue(N, ResIdx).use_empty())
2886  continue;
2887  SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc,
2888  MVT::i32, ResultPair);
2889  ReplaceUses(SDValue(N, ResIdx), SubReg);
2890  }
2891 
2892  CurDAG->RemoveDeadNode(N);
2893 }
2894 
2895 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2896  bool isUpdating, unsigned NumVecs,
2897  const uint16_t *DOpcodes,
2898  const uint16_t *QOpcodes0,
2899  const uint16_t *QOpcodes1) {
2900  assert(Subtarget->hasNEON());
2901  assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2902  SDLoc dl(N);
2903 
2904  SDValue MemAddr, Align;
2905  unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2906  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2907  return;
2908 
2909  SDValue Chain = N->getOperand(0);
2910  EVT VT = N->getValueType(0);
2911  bool is64BitVector = VT.is64BitVector();
2912 
2913  unsigned Alignment = 0;
2914  if (NumVecs != 3) {
2915  Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2916  unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2917  if (Alignment > NumBytes)
2918  Alignment = NumBytes;
2919  if (Alignment < 8 && Alignment < NumBytes)
2920  Alignment = 0;
2921  // Alignment must be a power of two; make sure of that.
2922  Alignment = (Alignment & -Alignment);
2923  if (Alignment == 1)
2924  Alignment = 0;
2925  }
2926  Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2927 
2928  unsigned OpcodeIndex;
2929  switch (VT.getSimpleVT().SimpleTy) {
2930  default: llvm_unreachable("unhandled vld-dup type");
2931  case MVT::v8i8:
2932  case MVT::v16i8: OpcodeIndex = 0; break;
2933  case MVT::v4i16:
2934  case MVT::v8i16:
2935  case MVT::v4f16:
2936  case MVT::v8f16:
2937  case MVT::v4bf16:
2938  case MVT::v8bf16:
2939  OpcodeIndex = 1; break;
2940  case MVT::v2f32:
2941  case MVT::v2i32:
2942  case MVT::v4f32:
2943  case MVT::v4i32: OpcodeIndex = 2; break;
2944  case MVT::v1f64:
2945  case MVT::v1i64: OpcodeIndex = 3; break;
2946  }
2947 
2948  unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2949  if (!is64BitVector)
2950  ResTyElts *= 2;
2951  EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2952 
2953  std::vector<EVT> ResTys;
2954  ResTys.push_back(ResTy);
2955  if (isUpdating)
2956  ResTys.push_back(MVT::i32);
2957  ResTys.push_back(MVT::Other);
2958 
2959  SDValue Pred = getAL(CurDAG, dl);
2960  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2961 
2962  SDNode *VLdDup;
2963  if (is64BitVector || NumVecs == 1) {
2965  Ops.push_back(MemAddr);
2966  Ops.push_back(Align);
2967  unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] :
2968  QOpcodes0[OpcodeIndex];
2969  if (isUpdating) {
2970  // fixed-stride update instructions don't have an explicit writeback
2971  // operand. It's implicit in the opcode itself.
2972  SDValue Inc = N->getOperand(2);
2973  bool IsImmUpdate =
2974  isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2975  if (NumVecs <= 2 && !IsImmUpdate)
2976  Opc = getVLDSTRegisterUpdateOpcode(Opc);
2977  if (!IsImmUpdate)
2978  Ops.push_back(Inc);
2979  // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2980  else if (NumVecs > 2)
2981  Ops.push_back(Reg0);
2982  }
2983  Ops.push_back(Pred);
2984  Ops.push_back(Reg0);
2985  Ops.push_back(Chain);
2986  VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2987  } else if (NumVecs == 2) {
2988  const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain };
2989  SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2990  dl, ResTys, OpsA);
2991 
2992  Chain = SDValue(VLdA, 1);
2993  const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain };
2994  VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2995  } else {
2996  SDValue ImplDef =
2997  SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2998  const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain };
2999  SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
3000  dl, ResTys, OpsA);
3001 
3002  SDValue SuperReg = SDValue(VLdA, 0);
3003  Chain = SDValue(VLdA, 1);
3004  const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain };
3005  VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
3006  }
3007 
3008  // Transfer memoperands.
3009  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3010  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
3011 
3012  // Extract the subregisters.
3013  if (NumVecs == 1) {
3014  ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
3015  } else {
3016  SDValue SuperReg = SDValue(VLdDup, 0);
3017  static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
3018  unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
3019  for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
3020  ReplaceUses(SDValue(N, Vec),
3021  CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
3022  }
3023  }
3024  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
3025  if (isUpdating)
3026  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
3027  CurDAG->RemoveDeadNode(N);
3028 }
3029 
3030 bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
3031  if (!Subtarget->hasMVEIntegerOps())
3032  return false;
3033 
3034  SDLoc dl(N);
3035 
3036  // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and
3037  // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent
3038  // inserts of the correct type:
3039  SDValue Ins1 = SDValue(N, 0);
3040  SDValue Ins2 = N->getOperand(0);
3041  EVT VT = Ins1.getValueType();
3042  if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() ||
3043  !isa<ConstantSDNode>(Ins1.getOperand(2)) ||
3044  !isa<ConstantSDNode>(Ins2.getOperand(2)) ||
3045  (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT))
3046  return false;
3047 
3048  unsigned Lane1 = Ins1.getConstantOperandVal(2);
3049  unsigned Lane2 = Ins2.getConstantOperandVal(2);
3050  if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1)
3051  return false;
3052 
3053  // If the inserted values will be able to use T/B already, leave it to the
3054  // existing tablegen patterns. For example VCVTT/VCVTB.
3055  SDValue Val1 = Ins1.getOperand(1);
3056  SDValue Val2 = Ins2.getOperand(1);
3057  if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND)
3058  return false;
3059 
3060  // Check if the inserted values are both extracts.
3061  if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3062  Val1.getOpcode() == ARMISD::VGETLANEu) &&
3063  (Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3064  Val2.getOpcode() == ARMISD::VGETLANEu) &&
3065  isa<ConstantSDNode>(Val1.getOperand(1)) &&
3066  isa<ConstantSDNode>(Val2.getOperand(1)) &&
3067  (Val1.getOperand(0).getValueType() == MVT::v8f16 ||
3068  Val1.getOperand(0).getValueType() == MVT::v8i16) &&
3069  (Val2.getOperand(0).getValueType() == MVT::v8f16 ||
3070  Val2.getOperand(0).getValueType() == MVT::v8i16)) {
3071  unsigned ExtractLane1 = Val1.getConstantOperandVal(1);
3072  unsigned ExtractLane2 = Val2.getConstantOperandVal(1);
3073 
3074  // If the two extracted lanes are from the same place and adjacent, this
3075  // simplifies into a f32 lane move.
3076  if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 &&
3077  ExtractLane1 == ExtractLane2 + 1) {
3078  SDValue NewExt = CurDAG->getTargetExtractSubreg(
3079  ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0));
3080  SDValue NewIns = CurDAG->getTargetInsertSubreg(
3081  ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0),
3082  NewExt);
3083  ReplaceUses(Ins1, NewIns);
3084  return true;
3085  }
3086 
3087  // Else v8i16 pattern of an extract and an insert, with a optional vmovx for
3088  // extracting odd lanes.
3089  if (VT == MVT::v8i16) {
3090  SDValue Inp1 = CurDAG->getTargetExtractSubreg(
3091  ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0));
3092  SDValue Inp2 = CurDAG->getTargetExtractSubreg(
3093  ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0));
3094  if (ExtractLane1 % 2 != 0)
3095  Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0);
3096  if (ExtractLane2 % 2 != 0)
3097  Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0);
3098  SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1);
3099  SDValue NewIns =
3100  CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3101  Ins2.getOperand(0), SDValue(VINS, 0));
3102  ReplaceUses(Ins1, NewIns);
3103  return true;
3104  }
3105  }
3106 
3107  // The inserted values are not extracted - if they are f16 then insert them
3108  // directly using a VINS.
3109  if (VT == MVT::v8f16) {
3110  SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1);
3111  SDValue NewIns =
3112  CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3113  Ins2.getOperand(0), SDValue(VINS, 0));
3114  ReplaceUses(Ins1, NewIns);
3115  return true;
3116  }
3117 
3118  return false;
3119 }
3120 
3121 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
3122  if (!Subtarget->hasV6T2Ops())
3123  return false;
3124 
3125  unsigned Opc = isSigned
3126  ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
3127  : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
3128  SDLoc dl(N);
3129 
3130  // For unsigned extracts, check for a shift right and mask
3131  unsigned And_imm = 0;
3132  if (N->getOpcode() == ISD::AND) {
3133  if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
3134 
3135  // The immediate is a mask of the low bits iff imm & (imm+1) == 0
3136  if (And_imm & (And_imm + 1))
3137  return false;
3138 
3139  unsigned Srl_imm = 0;
3140  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
3141  Srl_imm)) {
3142  assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3143 
3144  // Mask off the unnecessary bits of the AND immediate; normally
3145  // DAGCombine will do this, but that might not happen if
3146  // targetShrinkDemandedConstant chooses a different immediate.
3147  And_imm &= -1U >> Srl_imm;
3148 
3149  // Note: The width operand is encoded as width-1.
3150  unsigned Width = countTrailingOnes(And_imm) - 1;
3151  unsigned LSB = Srl_imm;
3152 
3153  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3154 
3155  if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
3156  // It's cheaper to use a right shift to extract the top bits.
3157  if (Subtarget->isThumb()) {
3158  Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
3159  SDValue Ops[] = { N->getOperand(0).getOperand(0),
3160  CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3161  getAL(CurDAG, dl), Reg0, Reg0 };
3162  CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3163  return true;
3164  }
3165 
3166  // ARM models shift instructions as MOVsi with shifter operand.
3168  SDValue ShOpc =
3169  CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
3170  MVT::i32);
3171  SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
3172  getAL(CurDAG, dl), Reg0, Reg0 };
3173  CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
3174  return true;
3175  }
3176 
3177  assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3178  SDValue Ops[] = { N->getOperand(0).getOperand(0),
3179  CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3180  CurDAG->getTargetConstant(Width, dl, MVT::i32),
3181  getAL(CurDAG, dl), Reg0 };
3182  CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3183  return true;
3184  }
3185  }
3186  return false;
3187  }
3188 
3189  // Otherwise, we're looking for a shift of a shift
3190  unsigned Shl_imm = 0;
3191  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
3192  assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
3193  unsigned Srl_imm = 0;
3194  if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
3195  assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3196  // Note: The width operand is encoded as width-1.
3197  unsigned Width = 32 - Srl_imm - 1;
3198  int LSB = Srl_imm - Shl_imm;
3199  if (LSB < 0)
3200  return false;
3201  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3202  assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3203  SDValue Ops[] = { N->getOperand(0).getOperand(0),
3204  CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3205  CurDAG->getTargetConstant(Width, dl, MVT::i32),
3206  getAL(CurDAG, dl), Reg0 };
3207  CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3208  return true;
3209  }
3210  }
3211 
3212  // Or we are looking for a shift of an and, with a mask operand
3213  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
3214  isShiftedMask_32(And_imm)) {
3215  unsigned Srl_imm = 0;
3216  unsigned LSB = countTrailingZeros(And_imm);
3217  // Shift must be the same as the ands lsb
3218  if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
3219  assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3220  unsigned MSB = 31 - countLeadingZeros(And_imm);
3221  // Note: The width operand is encoded as width-1.
3222  unsigned Width = MSB - LSB;
3223  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3224  assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3225  SDValue Ops[] = { N->getOperand(0).getOperand(0),
3226  CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
3227  CurDAG->getTargetConstant(Width, dl, MVT::i32),
3228  getAL(CurDAG, dl), Reg0 };
3229  CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3230  return true;
3231  }
3232  }
3233 
3234  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
3235  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
3236  unsigned LSB = 0;
3237  if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
3238  !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
3239  return false;
3240 
3241  if (LSB + Width > 32)
3242  return false;
3243 
3244  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3245  assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
3246  SDValue Ops[] = { N->getOperand(0).getOperand(0),
3247  CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3248  CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
3249  getAL(CurDAG, dl), Reg0 };
3250  CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3251  return true;
3252  }
3253 
3254  return false;
3255 }
3256 
3257 /// Target-specific DAG combining for ISD::XOR.
3258 /// Target-independent combining lowers SELECT_CC nodes of the form
3259 /// select_cc setg[ge] X, 0, X, -X
3260 /// select_cc setgt X, -1, X, -X
3261 /// select_cc setl[te] X, 0, -X, X
3262 /// select_cc setlt X, 1, -X, X
3263 /// which represent Integer ABS into:
3264 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
3265 /// ARM instruction selection detects the latter and matches it to
3266 /// ARM::ABS or ARM::t2ABS machine node.
3267 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
3268  SDValue XORSrc0 = N->getOperand(0);
3269  SDValue XORSrc1 = N->getOperand(1);
3270  EVT VT = N->getValueType(0);
3271 
3272  if (Subtarget->isThumb1Only())
3273  return false;
3274 
3275  if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
3276  return false;
3277 
3278  SDValue ADDSrc0 = XORSrc0.getOperand(0);
3279  SDValue ADDSrc1 = XORSrc0.getOperand(1);
3280  SDValue SRASrc0 = XORSrc1.getOperand(0);
3281  SDValue SRASrc1 = XORSrc1.getOperand(1);
3282  ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
3283  EVT XType = SRASrc0.getValueType();
3284  unsigned Size = XType.getSizeInBits() - 1;
3285 
3286  if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
3287  XType.isInteger() && SRAConstant != nullptr &&
3288  Size == SRAConstant->getZExtValue()) {
3289  unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
3290  CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
3291  return true;
3292  }
3293 
3294  return false;
3295 }
3296 
3297 /// We've got special pseudo-instructions for these
3298 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3299  unsigned Opcode;
3300  EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3301  if (MemTy == MVT::i8)
3302  Opcode = ARM::CMP_SWAP_8;
3303  else if (MemTy == MVT::i16)
3304  Opcode = ARM::CMP_SWAP_16;
3305  else if (MemTy == MVT::i32)
3306  Opcode = ARM::CMP_SWAP_32;
3307  else
3308  llvm_unreachable("Unknown AtomicCmpSwap type");
3309 
3310  SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3311  N->getOperand(0)};
3312  SDNode *CmpSwap = CurDAG->getMachineNode(
3313  Opcode, SDLoc(N),
3314  CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
3315 
3316  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3317  CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3318 
3319  ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3320  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3321  CurDAG->RemoveDeadNode(N);
3322 }
3323 
3326  unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
3327  unsigned LastOne = A.countTrailingZeros();
3328  if (A.countPopulation() != (FirstOne - LastOne + 1))
3330  return std::make_pair(FirstOne, LastOne);
3331 }
3332 
3333 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
3334  assert(N->getOpcode() == ARMISD::CMPZ);
3335  SwitchEQNEToPLMI = false;
3336 
3337  if (!Subtarget->isThumb())
3338  // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3339  // LSR don't exist as standalone instructions - they need the barrel shifter.
3340  return;
3341 
3342  // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3343  SDValue And = N->getOperand(0);
3344  if (!And->hasOneUse())
3345  return;
3346 
3347  SDValue Zero = N->getOperand(1);
3348  if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
3349  And->getOpcode() != ISD::AND)
3350  return;
3351  SDValue X = And.getOperand(0);
3352  auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
3353 
3354  if (!C)
3355  return;
3356  auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
3357  if (!Range)
3358  return;
3359 
3360  // There are several ways to lower this:
3361  SDNode *NewN;
3362  SDLoc dl(N);
3363 
3364  auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
3365  if (Subtarget->isThumb2()) {
3366  Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
3367  SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3368  getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3369  CurDAG->getRegister(0, MVT::i32) };
3370  return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3371  } else {
3372  SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
3373  CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3374  getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3375  return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3376  }
3377  };
3378 
3379  if (Range->second == 0) {
3380  // 1. Mask includes the LSB -> Simply shift the top N bits off
3381  NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3382  ReplaceNode(And.getNode(), NewN);
3383  } else if (Range->first == 31) {
3384  // 2. Mask includes the MSB -> Simply shift the bottom N bits off
3385  NewN = EmitShift(ARM::tLSRri, X, Range->second);
3386  ReplaceNode(And.getNode(), NewN);
3387  } else if (Range->first == Range->second) {
3388  // 3. Only one bit is set. We can shift this into the sign bit and use a
3389  // PL/MI comparison.
3390  NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3391  ReplaceNode(And.getNode(), NewN);
3392 
3393  SwitchEQNEToPLMI = true;
3394  } else if (!Subtarget->hasV6T2Ops()) {
3395  // 4. Do a double shift to clear bottom and top bits, but only in
3396  // thumb-1 mode as in thumb-2 we can use UBFX.
3397  NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3398  NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
3399  Range->second + (31 - Range->first));
3400  ReplaceNode(And.getNode(), NewN);
3401  }
3402 
3403 }
3404 
3406  SDLoc dl(N);
3407 
3408  if (N->isMachineOpcode()) {
3409  N->setNodeId(-1);
3410  return; // Already selected.
3411  }
3412 
3413  switch (N->getOpcode()) {
3414  default: break;
3415  case ISD::STORE: {
3416  // For Thumb1, match an sp-relative store in C++. This is a little
3417  // unfortunate, but I don't think I can make the chain check work
3418  // otherwise. (The chain of the store has to be the same as the chain
3419  // of the CopyFromReg, or else we can't replace the CopyFromReg with
3420  // a direct reference to "SP".)
3421  //
3422  // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3423  // a different addressing mode from other four-byte stores.
3424  //
3425  // This pattern usually comes up with call arguments.
3426  StoreSDNode *ST = cast<StoreSDNode>(N);
3427  SDValue Ptr = ST->getBasePtr();
3428  if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
3429  int RHSC = 0;
3430  if (Ptr.getOpcode() == ISD::ADD &&
3431  isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
3432  Ptr = Ptr.getOperand(0);
3433 
3434  if (Ptr.getOpcode() == ISD::CopyFromReg &&
3435  cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
3436  Ptr.getOperand(0) == ST->getChain()) {
3437  SDValue Ops[] = {ST->getValue(),
3438  CurDAG->getRegister(ARM::SP, MVT::i32),
3439  CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
3440  getAL(CurDAG, dl),
3441  CurDAG->getRegister(0, MVT::i32),
3442  ST->getChain()};
3443  MachineSDNode *ResNode =
3444  CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
3445  MachineMemOperand *MemOp = ST->getMemOperand();
3446  CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3447  ReplaceNode(N, ResNode);
3448  return;
3449  }
3450  }
3451  break;
3452  }
3453  case ISD::WRITE_REGISTER:
3454  if (tryWriteRegister(N))
3455  return;
3456  break;
3457  case ISD::READ_REGISTER:
3458  if (tryReadRegister(N))
3459  return;
3460  break;
3461  case ISD::INLINEASM:
3462  case ISD::INLINEASM_BR:
3463  if (tryInlineAsm(N))
3464  return;
3465  break;
3466  case ISD::XOR:
3467  // Select special operations if XOR node forms integer ABS pattern
3468  if (tryABSOp(N))
3469  return;
3470  // Other cases are autogenerated.
3471  break;
3472  case ISD::Constant: {
3473  unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
3474  // If we can't materialize the constant we need to use a literal pool
3475  if (ConstantMaterializationCost(Val, Subtarget) > 2) {
3476  SDValue CPIdx = CurDAG->getTargetConstantPool(
3477  ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
3478  TLI->getPointerTy(CurDAG->getDataLayout()));
3479 
3480  SDNode *ResNode;
3481  if (Subtarget->isThumb()) {
3482  SDValue Ops[] = {
3483  CPIdx,
3484  getAL(CurDAG, dl),
3485  CurDAG->getRegister(0, MVT::i32),
3486  CurDAG->getEntryNode()
3487  };
3488  ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
3489  Ops);
3490  } else {
3491  SDValue Ops[] = {
3492  CPIdx,
3493  CurDAG->getTargetConstant(0, dl, MVT::i32),
3494  getAL(CurDAG, dl),
3495  CurDAG->getRegister(0, MVT::i32),
3496  CurDAG->getEntryNode()
3497  };
3498  ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
3499  Ops);
3500  }
3501  // Annotate the Node with memory operand information so that MachineInstr
3502  // queries work properly. This e.g. gives the register allocation the
3503  // required information for rematerialization.
3504  MachineFunction& MF = CurDAG->getMachineFunction();
3508 
3509  CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3510 
3511  ReplaceNode(N, ResNode);
3512  return;
3513  }
3514 
3515  // Other cases are autogenerated.
3516  break;
3517  }
3518  case ISD::FrameIndex: {
3519  // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3520  int FI = cast<FrameIndexSDNode>(N)->getIndex();
3521  SDValue TFI = CurDAG->getTargetFrameIndex(
3522  FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3523  if (Subtarget->isThumb1Only()) {
3524  // Set the alignment of the frame object to 4, to avoid having to generate
3525  // more than one ADD
3526  MachineFrameInfo &MFI = MF->getFrameInfo();
3527  if (MFI.getObjectAlign(FI) < Align(4))
3528  MFI.setObjectAlignment(FI, Align(4));
3529  CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
3530  CurDAG->getTargetConstant(0, dl, MVT::i32));
3531  return;
3532  } else {
3533  unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
3534  ARM::t2ADDri : ARM::ADDri);
3535  SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
3536  getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3537  CurDAG->getRegister(0, MVT::i32) };
3538  CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3539  return;
3540  }
3541  }
3542  case ISD::INSERT_VECTOR_ELT: {
3543  if (tryInsertVectorElt(N))
3544  return;
3545  break;
3546  }
3547  case ISD::SRL:
3548  if (tryV6T2BitfieldExtractOp(N, false))
3549  return;
3550  break;
3552  case ISD::SRA:
3553  if (tryV6T2BitfieldExtractOp(N, true))
3554  return;
3555  break;
3556  case ISD::MUL:
3557  if (Subtarget->isThumb1Only())
3558  break;
3559  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
3560  unsigned RHSV = C->getZExtValue();
3561  if (!RHSV) break;
3562  if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
3563  unsigned ShImm = Log2_32(RHSV-1);
3564  if (ShImm >= 32)
3565  break;
3566  SDValue V = N->getOperand(0);
3567  ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3568  SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3569  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3570  if (Subtarget->isThumb()) {
3571  SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3572  CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
3573  return;
3574  } else {
3575  SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3576  Reg0 };
3577  CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
3578  return;
3579  }
3580  }
3581  if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
3582  unsigned ShImm = Log2_32(RHSV+1);
3583  if (ShImm >= 32)
3584  break;
3585  SDValue V = N->getOperand(0);
3586  ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3587  SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3588  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3589  if (Subtarget->isThumb()) {
3590  SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3591  CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
3592  return;
3593  } else {
3594  SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3595  Reg0 };
3596  CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
3597  return;
3598  }
3599  }
3600  }
3601  break;
3602  case ISD::AND: {
3603  // Check for unsigned bitfield extract
3604  if (tryV6T2BitfieldExtractOp(N, false))
3605  return;
3606 
3607  // If an immediate is used in an AND node, it is possible that the immediate
3608  // can be more optimally materialized when negated. If this is the case we
3609  // can negate the immediate and use a BIC instead.
3610  auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
3611  if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
3612  uint32_t Imm = (uint32_t) N1C->getZExtValue();
3613 
3614  // In Thumb2 mode, an AND can take a 12-bit immediate. If this
3615  // immediate can be negated and fit in the immediate operand of
3616  // a t2BIC, don't do any manual transform here as this can be
3617  // handled by the generic ISel machinery.
3618  bool PreferImmediateEncoding =
3619  Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
3620  if (!PreferImmediateEncoding &&
3621  ConstantMaterializationCost(Imm, Subtarget) >
3622  ConstantMaterializationCost(~Imm, Subtarget)) {
3623  // The current immediate costs more to materialize than a negated
3624  // immediate, so negate the immediate and use a BIC.
3625  SDValue NewImm =
3626  CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
3627  // If the new constant didn't exist before, reposition it in the topological
3628  // ordering so it is just before N. Otherwise, don't touch its location.
3629  if (NewImm->getNodeId() == -1)
3630  CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
3631 
3632  if (!Subtarget->hasThumb2()) {
3633  SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
3634  N->getOperand(0), NewImm, getAL(CurDAG, dl),
3635  CurDAG->getRegister(0, MVT::i32)};
3636  ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
3637  return;
3638  } else {
3639  SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
3640  CurDAG->getRegister(0, MVT::i32),
3641  CurDAG->getRegister(0, MVT::i32)};
3642  ReplaceNode(N,
3643  CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
3644  return;
3645  }
3646  }
3647  }
3648 
3649  // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3650  // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3651  // are entirely contributed by c2 and lower 16-bits are entirely contributed
3652  // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3653  // Select it to: "movt x, ((c1 & 0xffff) >> 16)
3654  EVT VT = N->getValueType(0);
3655  if (VT != MVT::i32)
3656  break;
3657  unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3658  ? ARM::t2MOVTi16
3659  : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
3660  if (!Opc)
3661  break;
3662  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3663  N1C = dyn_cast<ConstantSDNode>(N1);
3664  if (!N1C)
3665  break;
3666  if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3667  SDValue N2 = N0.getOperand(1);
3668  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
3669  if (!N2C)
3670  break;
3671  unsigned N1CVal = N1C->getZExtValue();
3672  unsigned N2CVal = N2C->getZExtValue();
3673  if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
3674  (N1CVal & 0xffffU) == 0xffffU &&
3675  (N2CVal & 0xffffU) == 0x0U) {
3676  SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
3677  dl, MVT::i32);
3678  SDValue Ops[] = { N0.getOperand(0), Imm16,
3679  getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3680  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
3681  return;
3682  }
3683  }
3684 
3685  break;
3686  }
3687  case ARMISD::UMAAL: {
3688  unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3689  SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3690  N->getOperand(2), N->getOperand(3),
3691  getAL(CurDAG, dl),
3692  CurDAG->getRegister(0, MVT::i32) };
3693  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3694  return;
3695  }
3696  case ARMISD::UMLAL:{
3697  if (Subtarget->isThumb()) {
3698  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3699  N->getOperand(3), getAL(CurDAG, dl),
3700  CurDAG->getRegister(0, MVT::i32)};
3701  ReplaceNode(
3702  N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3703  return;
3704  }else{
3705  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3706  N->getOperand(3), getAL(CurDAG, dl),
3707  CurDAG->getRegister(0, MVT::i32),
3708  CurDAG->getRegister(0, MVT::i32) };
3709  ReplaceNode(N, CurDAG->getMachineNode(
3710  Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3711  MVT::i32, MVT::i32, Ops));
3712  return;
3713  }
3714  }
3715  case ARMISD::SMLAL:{
3716  if (Subtarget->isThumb()) {
3717  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3718  N->getOperand(3), getAL(CurDAG, dl),
3719  CurDAG->getRegister(0, MVT::i32)};
3720  ReplaceNode(
3721  N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3722  return;
3723  }else{
3724  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3725  N->getOperand(3), getAL(CurDAG, dl),
3726  CurDAG->getRegister(0, MVT::i32),
3727  CurDAG->getRegister(0, MVT::i32) };
3728  ReplaceNode(N, CurDAG->getMachineNode(
3729  Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3730  MVT::i32, MVT::i32, Ops));
3731  return;
3732  }
3733  }
3734  case ARMISD::SUBE: {
3735  if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3736  break;
3737  // Look for a pattern to match SMMLS
3738  // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3739  if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3740  N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3741  !SDValue(N, 1).use_empty())
3742  break;
3743 
3744  if (Subtarget->isThumb())
3745  assert(Subtarget->hasThumb2() &&
3746  "This pattern should not be generated for Thumb");
3747 
3748  SDValue SmulLoHi = N->getOperand(1);
3749  SDValue Subc = N->getOperand(2);
3750  auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
3751 
3752  if (!Zero || Zero->getZExtValue() != 0 ||
3753  Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3754  N->getOperand(1) != SmulLoHi.getValue(1) ||
3755  N->getOperand(2) != Subc.getValue(1))
3756  break;
3757 
3758  unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3759  SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3760  N->getOperand(0), getAL(CurDAG, dl),
3761  CurDAG->getRegister(0, MVT::i32) };
3762  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
3763  return;
3764  }
3765  case ISD::LOAD: {
3766  if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3767  return;
3768  if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
3769  if (tryT2IndexedLoad(N))
3770  return;
3771  } else if (Subtarget->isThumb()) {
3772  if (tryT1IndexedLoad(N))
3773  return;
3774  } else if (tryARMIndexedLoad(N))
3775  return;
3776  // Other cases are autogenerated.
3777  break;
3778  }
3779  case ISD::MLOAD:
3780  if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3781  return;
3782  // Other cases are autogenerated.
3783  break;
3784  case ARMISD::WLSSETUP: {
3785  SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32,
3786  N->getOperand(0));
3787  ReplaceUses(N, New);
3788  CurDAG->RemoveDeadNode(N);
3789  return;
3790  }
3791  case ARMISD::WLS: {
3792  SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other,
3793  N->getOperand(1), N->getOperand(2),
3794  N->getOperand(0));
3795  ReplaceUses(N, New);
3796  CurDAG->RemoveDeadNode(N);
3797  return;
3798  }
3799  case ARMISD::LE: {
3800  SDValue Ops[] = { N->getOperand(1),
3801  N->getOperand(2),
3802  N->getOperand(0) };
3803  unsigned Opc = ARM::t2LoopEnd;
3804  SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
3805  ReplaceUses(N, New);
3806  CurDAG->RemoveDeadNode(N);
3807  return;
3808  }
3809  case ARMISD::LDRD: {
3810  if (Subtarget->isThumb2())
3811  break; // TableGen handles isel in this case.
3812  SDValue Base, RegOffset, ImmOffset;
3813  const SDValue &Chain = N->getOperand(0);
3814  const SDValue &Addr = N->getOperand(1);
3815  SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
3816  if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
3817  // The register-offset variant of LDRD mandates that the register
3818  // allocated to RegOffset is not reused in any of the remaining operands.
3819  // This restriction is currently not enforced. Therefore emitting this
3820  // variant is explicitly avoided.
3821  Base = Addr;
3822  RegOffset = CurDAG->getRegister(0, MVT::i32);
3823  }
3824  SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
3825  SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
3826  {MVT::Untyped, MVT::Other}, Ops);
3827  SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
3828  SDValue(New, 0));
3829  SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
3830  SDValue(New, 0));
3831  transferMemOperands(N, New);
3832  ReplaceUses(SDValue(N, 0), Lo);
3833  ReplaceUses(SDValue(N, 1), Hi);
3834  ReplaceUses(SDValue(N, 2), SDValue(New, 1));
3835  CurDAG->RemoveDeadNode(N);
3836  return;
3837  }
3838  case ARMISD::STRD: {
3839  if (Subtarget->isThumb2())
3840  break; // TableGen handles isel in this case.
3841  SDValue Base, RegOffset, ImmOffset;
3842  const SDValue &Chain = N->getOperand(0);
3843  const SDValue &Addr = N->getOperand(3);
3844  SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
3845  if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
3846  // The register-offset variant of STRD mandates that the register
3847  // allocated to RegOffset is not reused in any of the remaining operands.
3848  // This restriction is currently not enforced. Therefore emitting this
3849  // variant is explicitly avoided.
3850  Base = Addr;
3851  RegOffset = CurDAG->getRegister(0, MVT::i32);
3852  }
3853  SDNode *RegPair =
3854  createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2));
3855  SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
3856  SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);
3857  transferMemOperands(N, New);
3858  ReplaceUses(SDValue(N, 0), SDValue(New, 0));
3859  CurDAG->RemoveDeadNode(N);
3860  return;
3861  }
3862  case ARMISD::LOOP_DEC: {
3863  SDValue Ops[] = { N->getOperand(1),
3864  N->getOperand(2),
3865  N->getOperand(0) };
3866  SDNode *Dec =
3867  CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3868  CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
3869  ReplaceUses(N, Dec);
3870  CurDAG->RemoveDeadNode(N);
3871  return;
3872  }
3873  case ARMISD::BRCOND: {
3874  // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3875  // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3876  // Pattern complexity = 6 cost = 1 size = 0
3877 
3878  // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3879  // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
3880  // Pattern complexity = 6 cost = 1 size = 0
3881 
3882  // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3883  // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3884  // Pattern complexity = 6 cost = 1 size = 0
3885 
3886  unsigned Opc = Subtarget->isThumb() ?
3887  ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
3888  SDValue Chain = N->getOperand(0);
3889  SDValue N1 = N->getOperand(1);
3890  SDValue N2 = N->getOperand(2);
3891  SDValue N3 = N->getOperand(3);
3892  SDValue InFlag = N->getOperand(4);
3893  assert(N1.getOpcode() == ISD::BasicBlock);
3894  assert(N2.getOpcode() == ISD::Constant);
3895  assert(N3.getOpcode() == ISD::Register);
3896 
3897  unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
3898 
3899  if (InFlag.getOpcode() == ARMISD::CMPZ) {
3900  if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
3901  SDValue Int = InFlag.getOperand(0);
3902  uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
3903 
3904  // Handle low-overhead loops.
3905  if (ID == Intrinsic::loop_decrement_reg) {
3906  SDValue Elements = Int.getOperand(2);
3907  SDValue Size = CurDAG->getTargetConstant(
3908  cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
3909  MVT::i32);
3910 
3911  SDValue Args[] = { Elements, Size, Int.getOperand(0) };
3912  SDNode *LoopDec =
3913  CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3914  CurDAG->getVTList(MVT::i32, MVT::Other),
3915  Args);
3916  ReplaceUses(Int.getNode(), LoopDec);
3917 
3918  SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
3919  SDNode *LoopEnd =
3920  CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
3921 
3922  ReplaceUses(N, LoopEnd);
3923  CurDAG->RemoveDeadNode(N);
3924  CurDAG->RemoveDeadNode(InFlag.getNode());
3925  CurDAG->RemoveDeadNode(Int.getNode());
3926  return;
3927  }
3928  }
3929 
3930  bool SwitchEQNEToPLMI;
3931  SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3932  InFlag = N->getOperand(4);
3933 
3934  if (SwitchEQNEToPLMI) {
3935  switch ((ARMCC::CondCodes)CC) {
3936  default: llvm_unreachable("CMPZ must be either NE or EQ!");
3937  case ARMCC::NE:
3938  CC = (unsigned)ARMCC::MI;
3939  break;
3940  case ARMCC::EQ:
3941  CC = (unsigned)ARMCC::PL;
3942  break;
3943  }
3944  }
3945  }
3946 
3947  SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3948  SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3949  SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3950  MVT::Glue, Ops);
3951  Chain = SDValue(ResNode, 0);
3952  if (N->getNumValues() == 2) {
3953  InFlag = SDValue(ResNode, 1);
3954  ReplaceUses(SDValue(N, 1), InFlag);
3955  }
3956  ReplaceUses(SDValue(N, 0),
3957  SDValue(Chain.getNode(), Chain.getResNo()));
3958  CurDAG->RemoveDeadNode(N);
3959  return;
3960  }
3961 
3962  case ARMISD::CMPZ: {
3963  // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3964  // This allows us to avoid materializing the expensive negative constant.
3965  // The CMPZ #0 is useless and will be peepholed away but we need to keep it
3966  // for its glue output.
3967  SDValue X = N->getOperand(0);
3968  auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3969  if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3970  int64_t Addend = -C->getSExtValue();
3971 
3972  SDNode *Add = nullptr;
3973  // ADDS can be better than CMN if the immediate fits in a
3974  // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3975  // Outside that range we can just use a CMN which is 32-bit but has a
3976  // 12-bit immediate range.
3977  if (Addend < 1<<8) {
3978  if (Subtarget->isThumb2()) {
3979  SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3980  getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3981  CurDAG->getRegister(0, MVT::i32) };
3982  Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3983  } else {
3984  unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
3985  SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3986  CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3987  getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3988  Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3989  }
3990  }
3991  if (Add) {
3992  SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3993  CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3994  }
3995  }
3996  // Other cases are autogenerated.
3997  break;
3998  }
3999 
4000  case ARMISD::CMOV: {
4001  SDValue InFlag = N->getOperand(4);
4002 
4003  if (InFlag.getOpcode() == ARMISD::CMPZ) {
4004  bool SwitchEQNEToPLMI;
4005  SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
4006 
4007  if (SwitchEQNEToPLMI) {
4008  SDValue ARMcc = N->getOperand(2);
4009  ARMCC::CondCodes CC =
4010  (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
4011 
4012  switch (CC) {
4013  default: llvm_unreachable("CMPZ must be either NE or EQ!");
4014  case ARMCC::NE:
4015  CC = ARMCC::MI;
4016  break;
4017  case ARMCC::EQ:
4018  CC = ARMCC::PL;
4019  break;
4020  }
4021  SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
4022  SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
4023  N->getOperand(3), N->getOperand(4)};
4024  CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
4025  }
4026 
4027  }
4028  // Other cases are autogenerated.
4029  break;
4030  }
4031 
4032  case ARMISD::VZIP: {
4033  unsigned Opc = 0;
4034  EVT VT = N->getValueType(0);
4035  switch (VT.getSimpleVT().SimpleTy) {
4036  default: return;
4037  case MVT::v8i8: Opc = ARM::VZIPd8; break;
4038  case MVT::v4f16:
4039  case MVT::v4i16: Opc = ARM::VZIPd16; break;
4040  case MVT::v2f32:
4041  // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4042  case MVT::v2i32: Opc = ARM::VTRNd32; break;
4043  case MVT::v16i8: Opc = ARM::VZIPq8; break;
4044  case MVT::v8f16:
4045  case MVT::v8i16: Opc = ARM::VZIPq16; break;
4046  case MVT::v4f32:
4047  case MVT::v4i32: Opc = ARM::VZIPq32; break;
4048  }
4049  SDValue Pred = getAL(CurDAG, dl);
4050  SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4051  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
4052  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4053  return;
4054  }
4055  case ARMISD::VUZP: {
4056  unsigned Opc = 0;
4057  EVT VT = N->getValueType(0);
4058  switch (VT.getSimpleVT().SimpleTy) {
4059  default: return;
4060  case MVT::v8i8: Opc = ARM::VUZPd8; break;
4061  case MVT::v4f16:
4062  case MVT::v4i16: Opc = ARM::VUZPd16; break;
4063  case MVT::v2f32:
4064  // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4065  case MVT::v2i32: Opc = ARM::VTRNd32; break;
4066  case MVT::v16i8: Opc = ARM::VUZPq8; break;
4067  case MVT::v8f16:
4068  case MVT::v8i16: Opc = ARM::VUZPq16; break;
4069  case MVT::v4f32:
4070  case MVT::v4i32: Opc = ARM::VUZPq32; break;
4071  }
4072  SDValue Pred = getAL(CurDAG, dl);
4073  SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4074  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
4075  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4076  return;
4077  }
4078  case ARMISD::VTRN: {
4079  unsigned Opc = 0;
4080  EVT VT = N->getValueType(0);
4081  switch (VT.getSimpleVT().SimpleTy) {
4082  default: return;
4083  case MVT::v8i8: Opc = ARM::VTRNd8; break;
4084  case MVT::v4f16:
4085  case MVT::v4i16: Opc = ARM::VTRNd16; break;
4086  case MVT::v2f32:
4087  case MVT::v2i32: Opc = ARM::VTRNd32; break;
4088  case MVT::v16i8: Opc = ARM::VTRNq8; break;
4089  case MVT::v8f16:
4090  case MVT::v8i16: Opc = ARM::VTRNq16; break;
4091  case MVT::v4f32:
4092  case MVT::v4i32: Opc = ARM::VTRNq32; break;
4093  }
4094  SDValue Pred = getAL(CurDAG, dl);
4095  SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4096  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
4097  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4098  return;
4099  }
4100  case ARMISD::BUILD_VECTOR: {
4101  EVT VecVT = N->getValueType(0);
4102  EVT EltVT = VecVT.getVectorElementType();
4103  unsigned NumElts = VecVT.getVectorNumElements();
4104  if (EltVT == MVT::f64) {
4105  assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
4106  ReplaceNode(
4107  N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4108  return;
4109  }
4110  assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
4111  if (NumElts == 2) {
4112  ReplaceNode(
4113  N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4114  return;
4115  }
4116  assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
4117  ReplaceNode(N,
4118  createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
4119  N->getOperand(2), N->getOperand(3)));
4120  return;
4121  }
4122 
4123  case ARMISD::VLD1DUP: {
4124  static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
4125  ARM::VLD1DUPd32 };
4126  static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
4127  ARM::VLD1DUPq32 };
4128  SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
4129  return;
4130  }
4131 
4132  case ARMISD::VLD2DUP: {
4133  static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4134  ARM::VLD2DUPd32 };
4135  SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
4136  return;
4137  }
4138 
4139  case ARMISD::VLD3DUP: {
4140  static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
4141  ARM::VLD3DUPd16Pseudo,
4142  ARM::VLD3DUPd32Pseudo };
4143  SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
4144  return;
4145  }
4146 
4147  case ARMISD::VLD4DUP: {
4148  static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
4149  ARM::VLD4DUPd16Pseudo,
4150  ARM::VLD4DUPd32Pseudo };
4151  SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
4152  return;
4153  }
4154 
4155  case ARMISD::VLD1DUP_UPD: {
4156  static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
4157  ARM::VLD1DUPd16wb_fixed,
4158  ARM::VLD1DUPd32wb_fixed };
4159  static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
4160  ARM::VLD1DUPq16wb_fixed,
4161  ARM::VLD1DUPq32wb_fixed };
4162  SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
4163  return;
4164  }
4165 
4166  case ARMISD::VLD2DUP_UPD: {
4167  static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
4168  ARM::VLD2DUPd16wb_fixed,
4169  ARM::VLD2DUPd32wb_fixed };
4170  SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes);
4171  return;
4172  }
4173 
4174  case ARMISD::VLD3DUP_UPD: {
4175  static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
4176  ARM::VLD3DUPd16Pseudo_UPD,
4177  ARM::VLD3DUPd32Pseudo_UPD };
4178  SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes);
4179  return;
4180  }
4181 
4182  case ARMISD::VLD4DUP_UPD: {
4183  static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
4184  ARM::VLD4DUPd16Pseudo_UPD,
4185  ARM::VLD4DUPd32Pseudo_UPD };
4186  SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes);
4187  return;
4188  }
4189 
4190  case ARMISD::VLD1_UPD: {
4191  static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
4192  ARM::VLD1d16wb_fixed,
4193  ARM::VLD1d32wb_fixed,
4194  ARM::VLD1d64wb_fixed };
4195  static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
4196  ARM::VLD1q16wb_fixed,
4197  ARM::VLD1q32wb_fixed,
4198  ARM::VLD1q64wb_fixed };
4199  SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
4200  return;
4201  }
4202 
4203  case ARMISD::VLD2_UPD: {
4204  if (Subtarget->hasNEON()) {
4205  static const uint16_t DOpcodes[] = {
4206  ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
4207  ARM::VLD1q64wb_fixed};
4208  static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
4209  ARM::VLD2q16PseudoWB_fixed,
4210  ARM::VLD2q32PseudoWB_fixed};
4211  SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4212  } else {
4213  static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
4214  ARM::MVE_VLD21_8_wb};
4215  static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4216  ARM::MVE_VLD21_16_wb};
4217  static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4218  ARM::MVE_VLD21_32_wb};
4219  static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4220  SelectMVE_VLD(N, 2, Opcodes, true);
4221  }
4222  return;
4223  }
4224 
4225  case ARMISD::VLD3_UPD: {
4226  static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
4227  ARM::VLD3d16Pseudo_UPD,
4228  ARM::VLD3d32Pseudo_UPD,
4229  ARM::VLD1d64TPseudoWB_fixed};
4230  static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4231  ARM::VLD3q16Pseudo_UPD,
4232  ARM::VLD3q32Pseudo_UPD };
4233  static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
4234  ARM::VLD3q16oddPseudo_UPD,
4235  ARM::VLD3q32oddPseudo_UPD };
4236  SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4237  return;
4238  }
4239 
4240  case ARMISD::VLD4_UPD: {
4241  if (Subtarget->hasNEON()) {
4242  static const uint16_t DOpcodes[] = {
4243  ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
4244  ARM::VLD1d64QPseudoWB_fixed};
4245  static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
4246  ARM::VLD4q16Pseudo_UPD,
4247  ARM::VLD4q32Pseudo_UPD};
4248  static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
4249  ARM::VLD4q16oddPseudo_UPD,
4250  ARM::VLD4q32oddPseudo_UPD};
4251  SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4252  } else {
4253  static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4254  ARM::MVE_VLD42_8,
4255  ARM::MVE_VLD43_8_wb};
4256  static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4257  ARM::MVE_VLD42_16,
4258  ARM::MVE_VLD43_16_wb};
4259  static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4260  ARM::MVE_VLD42_32,
4261  ARM::MVE_VLD43_32_wb};
4262  static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4263  SelectMVE_VLD(N, 4, Opcodes, true);
4264  }
4265  return;
4266  }
4267 
4268  case ARMISD::VLD2LN_UPD: {
4269  static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
4270  ARM::VLD2LNd16Pseudo_UPD,
4271  ARM::VLD2LNd32Pseudo_UPD };
4272  static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
4273  ARM::VLD2LNq32Pseudo_UPD };
4274  SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
4275  return;
4276  }
4277 
4278  case ARMISD::VLD3LN_UPD: {
4279  static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
4280  ARM::VLD3LNd16Pseudo_UPD,
4281  ARM::VLD3LNd32Pseudo_UPD };
4282  static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
4283  ARM::VLD3LNq32Pseudo_UPD };
4284  SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
4285  return;
4286  }
4287 
4288  case ARMISD::VLD4LN_UPD: {
4289  static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
4290  ARM::VLD4LNd16Pseudo_UPD,
4291  ARM::VLD4LNd32Pseudo_UPD };
4292  static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
4293  ARM::VLD4LNq32Pseudo_UPD };
4294  SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
4295  return;
4296  }
4297 
4298  case ARMISD::VST1_UPD: {
4299  static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
4300  ARM::VST1d16wb_fixed,
4301  ARM::VST1d32wb_fixed,
4302  ARM::VST1d64wb_fixed };
4303  static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
4304  ARM::VST1q16wb_fixed,
4305  ARM::VST1q32wb_fixed,
4306  ARM::VST1q64wb_fixed };
4307  SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
4308  return;
4309  }
4310 
4311  case ARMISD::VST2_UPD: {
4312  if (Subtarget->hasNEON()) {
4313  static const uint16_t DOpcodes[] = {
4314  ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
4315  ARM::VST1q64wb_fixed};
4316  static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
4317  ARM::VST2q16PseudoWB_fixed,
4318  ARM::VST2q32PseudoWB_fixed};
4319  SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4320  return;
4321  }
4322  break;
4323  }
4324 
4325  case ARMISD::VST3_UPD: {
4326  static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
4327  ARM::VST3d16Pseudo_UPD,
4328  ARM::VST3d32Pseudo_UPD,
4329  ARM::VST1d64TPseudoWB_fixed};
4330  static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4331  ARM::VST3q16Pseudo_UPD,
4332  ARM::VST3q32Pseudo_UPD };
4333  static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
4334  ARM::VST3q16oddPseudo_UPD,
4335  ARM::VST3q32oddPseudo_UPD };
4336  SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4337  return;
4338  }
4339 
4340  case ARMISD::VST4_UPD: {
4341  if (Subtarget->hasNEON()) {
4342  static const uint16_t DOpcodes[] = {
4343  ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
4344  ARM::VST1d64QPseudoWB_fixed};
4345  static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
4346  ARM::VST4q16Pseudo_UPD,
4347  ARM::VST4q32Pseudo_UPD};
4348  static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
4349  ARM::VST4q16oddPseudo_UPD,
4350  ARM::VST4q32oddPseudo_UPD};
4351  SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4352  return;
4353  }
4354  break;
4355  }
4356 
4357  case ARMISD::VST2LN_UPD: {
4358  static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
4359  ARM::VST2LNd16Pseudo_UPD,
4360  ARM::VST2LNd32Pseudo_UPD };
4361  static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
4362  ARM::VST2LNq32Pseudo_UPD };
4363  SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
4364  return;
4365  }
4366 
4367  case ARMISD::VST3LN_UPD: {
4368  static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
4369  ARM::VST3LNd16Pseudo_UPD,
4370  ARM::VST3LNd32Pseudo_UPD };
4371  static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
4372  ARM::VST3LNq32Pseudo_UPD };
4373  SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
4374  return;
4375  }
4376 
4377  case ARMISD::VST4LN_UPD: {
4378  static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
4379  ARM::VST4LNd16Pseudo_UPD,
4380  ARM::VST4LNd32Pseudo_UPD };
4381  static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
4382  ARM::VST4LNq32Pseudo_UPD };
4383  SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
4384  return;
4385  }
4386 
4387  case ISD::INTRINSIC_VOID:
4388  case ISD::INTRINSIC_W_CHAIN: {
4389  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
4390  switch (IntNo) {
4391  default:
4392  break;
4393 
4394  case Intrinsic::arm_mrrc:
4395  case Intrinsic::arm_mrrc2: {
4396  SDLoc dl(N);
4397  SDValue Chain = N->getOperand(0);
4398  unsigned Opc;
4399 
4400  if (Subtarget->isThumb())
4401  Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
4402  else
4403  Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
4404 
4406  Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
4407  Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
4408  Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
4409 
4410  // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4411  // instruction will always be '1111' but it is possible in assembly language to specify
4412  // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4413  if (Opc != ARM::MRRC2) {
4414  Ops.push_back(getAL(CurDAG, dl));
4415  Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4416  }
4417 
4418  Ops.push_back(Chain);
4419 
4420  // Writes to two registers.
4421  const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
4422 
4423  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
4424  return;
4425  }
4426  case Intrinsic::arm_ldaexd:
4427  case Intrinsic::arm_ldrexd: {
4428  SDLoc dl(N);
4429  SDValue Chain = N->getOperand(0);
4430  SDValue MemAddr = N->getOperand(2);
4431  bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
4432 
4433  bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
4434  unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
4435  : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
4436 
4437  // arm_ldrexd returns a i64 value in {i32, i32}
4438  std::vector<EVT> ResTys;
4439  if (isThumb) {
4440  ResTys.push_back(MVT::i32);
4441  ResTys.push_back(MVT::i32);
4442  } else
4443  ResTys.push_back(MVT::Untyped);
4444  ResTys.push_back(MVT::Other);
4445 
4446  // Place arguments in the right order.
4447  SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
4448  CurDAG->getRegister(0, MVT::i32), Chain};
4449  SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4450  // Transfer memoperands.
4451  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4452  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4453 
4454  // Remap uses.
4455  SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
4456  if (!SDValue(N, 0).use_empty()) {
4457  SDValue Result;
4458  if (isThumb)
4459  Result = SDValue(Ld, 0);
4460  else {
4461  SDValue SubRegIdx =
4462  CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
4463  SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4464  dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4465  Result = SDValue(ResNode,0);
4466  }
4467  ReplaceUses(SDValue(N, 0), Result);
4468  }
4469  if (!SDValue(N, 1).use_empty()) {
4470  SDValue Result;
4471  if (isThumb)
4472  Result = SDValue(Ld, 1);
4473  else {
4474  SDValue SubRegIdx =
4475  CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
4476  SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4477  dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4478  Result = SDValue(ResNode,0);
4479  }
4480  ReplaceUses(SDValue(N, 1), Result);
4481  }
4482  ReplaceUses(SDValue(N, 2), OutChain);
4483  CurDAG->RemoveDeadNode(N);
4484  return;
4485  }
4486  case Intrinsic::arm_stlexd:
4487  case Intrinsic::arm_strexd: {
4488  SDLoc dl(N);
4489  SDValue Chain = N->getOperand(0);
4490  SDValue Val0 = N->getOperand(2);
4491  SDValue Val1 = N->getOperand(3);
4492  SDValue MemAddr = N->getOperand(4);
4493 
4494  // Store exclusive double return a i32 value which is the return status
4495  // of the issued store.
4496  const EVT ResTys[] = {MVT::i32, MVT::Other};
4497 
4498  bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
4499  // Place arguments in the right order.
4501  if (isThumb) {
4502  Ops.push_back(Val0);
4503  Ops.push_back(Val1);
4504  } else
4505  // arm_strexd uses GPRPair.
4506  Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
4507  Ops.push_back(MemAddr);
4508  Ops.push_back(getAL(CurDAG, dl));
4509  Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4510  Ops.push_back(Chain);
4511 
4512  bool IsRelease = IntNo == Intrinsic::arm_stlexd;
4513  unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
4514  : (IsRelease ? ARM::STLEXD : ARM::STREXD);
4515 
4516  SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4517  // Transfer memoperands.
4518  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4519  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4520 
4521  ReplaceNode(N, St);
4522  return;
4523  }
4524 
4525  case Intrinsic::arm_neon_vld1: {
4526  static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
4527  ARM::VLD1d32, ARM::VLD1d64 };
4528  static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4529  ARM::VLD1q32, ARM::VLD1q64};
4530  SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
4531  return;
4532  }
4533 
4534  case Intrinsic::arm_neon_vld1x2: {
4535  static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4536  ARM::VLD1q32, ARM::VLD1q64 };
4537  static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
4538  ARM::VLD1d16QPseudo,
4539  ARM::VLD1d32QPseudo,
4540  ARM::VLD1d64QPseudo };
4541  SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4542  return;
4543  }
4544 
4545  case Intrinsic::arm_neon_vld1x3: {
4546  static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
4547  ARM::VLD1d16TPseudo,
4548  ARM::VLD1d32TPseudo,
4549  ARM::VLD1d64TPseudo };
4550  static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
4551  ARM::VLD1q16LowTPseudo_UPD,
4552  ARM::VLD1q32LowTPseudo_UPD,
4553  ARM::VLD1q64LowTPseudo_UPD };
4554  static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
4555  ARM::VLD1q16HighTPseudo,
4556  ARM::VLD1q32HighTPseudo,
4557  ARM::VLD1q64HighTPseudo };
4558  SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4559  return;
4560  }
4561 
4562  case Intrinsic::arm_neon_vld1x4: {
4563  static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
4564  ARM::VLD1d16QPseudo,
4565  ARM::VLD1d32QPseudo,
4566  ARM::VLD1d64QPseudo };
4567  static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
4568  ARM::VLD1q16LowQPseudo_UPD,
4569  ARM::VLD1q32LowQPseudo_UPD,
4570  ARM::VLD1q64LowQPseudo_UPD };
4571  static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
4572  ARM::VLD1q16HighQPseudo,
4573  ARM::VLD1q32HighQPseudo,
4574  ARM::VLD1q64HighQPseudo };
4575  SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4576  return;
4577  }
4578 
4579  case Intrinsic::arm_neon_vld2: {
4580  static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
4581  ARM::VLD2d32, ARM::VLD1q64 };
4582  static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
4583  ARM::VLD2q32Pseudo };
4584  SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4585  return;
4586  }
4587 
4588  case Intrinsic::arm_neon_vld3: {
4589  static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
4590  ARM::VLD3d16Pseudo,
4591  ARM::VLD3d32Pseudo,
4592  ARM::VLD1d64TPseudo };
4593  static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4594  ARM::VLD3q16Pseudo_UPD,
4595  ARM::VLD3q32Pseudo_UPD };
4596  static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
4597  ARM::VLD3q16oddPseudo,
4598  ARM::VLD3q32oddPseudo };
4599  SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4600  return;
4601  }
4602 
4603  case Intrinsic::arm_neon_vld4: {
4604  static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
4605  ARM::VLD4d16Pseudo,
4606  ARM::VLD4d32Pseudo,
4607  ARM::VLD1d64QPseudo };
4608  static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
4609  ARM::VLD4q16Pseudo_UPD,
4610  ARM::VLD4q32Pseudo_UPD };
4611  static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
4612  ARM::VLD4q16oddPseudo,
4613  ARM::VLD4q32oddPseudo };
4614  SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4615  return;
4616  }
4617 
4618  case Intrinsic::arm_neon_vld2dup: {
4619  static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4620  ARM::VLD2DUPd32, ARM::VLD1q64 };
4621  static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4622  ARM::VLD2DUPq16EvenPseudo,
4623  ARM::VLD2DUPq32EvenPseudo };
4624  static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
4625  ARM::VLD2DUPq16OddPseudo,
4626  ARM::VLD2DUPq32OddPseudo };
4627  SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
4628  DOpcodes, QOpcodes0, QOpcodes1);
4629  return;
4630  }
4631 
4632  case Intrinsic::arm_neon_vld3dup: {
4633  static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
4634  ARM::VLD3DUPd16Pseudo,
4635  ARM::VLD3DUPd32Pseudo,
4636  ARM::VLD1d64TPseudo };
4637  static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4638  ARM::VLD3DUPq16EvenPseudo,
4639  ARM::VLD3DUPq32EvenPseudo };
4640  static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
4641  ARM::VLD3DUPq16OddPseudo,
4642  ARM::VLD3DUPq32OddPseudo };
4643  SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
4644  DOpcodes, QOpcodes0, QOpcodes1);
4645  return;
4646  }
4647 
4648  case Intrinsic::arm_neon_vld4dup: {
4649  static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
4650  ARM::VLD4DUPd16Pseudo,
4651  ARM::VLD4DUPd32Pseudo,
4652  ARM::VLD1d64QPseudo };
4653  static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4654  ARM::VLD4DUPq16EvenPseudo,
4655  ARM::VLD4DUPq32EvenPseudo };
4656  static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
4657  ARM::VLD4DUPq16OddPseudo,
4658  ARM::VLD4DUPq32OddPseudo };
4659  SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
4660  DOpcodes, QOpcodes0, QOpcodes1);
4661  return;
4662  }
4663 
4664  case Intrinsic::arm_neon_vld2lane: {
4665  static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
4666  ARM::VLD2LNd16Pseudo,
4667  ARM::VLD2LNd32Pseudo };
4668  static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
4669  ARM::VLD2LNq32Pseudo };
4670  SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
4671  return;
4672  }
4673 
4674  case Intrinsic::arm_neon_vld3lane: {
4675  static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
4676  ARM::VLD3LNd16Pseudo,
4677  ARM::VLD3LNd32Pseudo };
4678  static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
4679  ARM::VLD3LNq32Pseudo };
4680  SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
4681  return;
4682  }
4683 
4684  case Intrinsic::arm_neon_vld4lane: {
4685  static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
4686  ARM::VLD4LNd16Pseudo,
4687  ARM::VLD4LNd32Pseudo };
4688  static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
4689  ARM::VLD4LNq32Pseudo };
4690  SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
4691  return;
4692  }
4693 
4694  case Intrinsic::arm_neon_vst1: {
4695  static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
4696  ARM::VST1d32, ARM::VST1d64 };
4697  static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
4698  ARM::VST1q32, ARM::VST1q64 };
4699  SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
4700  return;
4701  }
4702 
4703  case Intrinsic::arm_neon_vst1x2: {
4704  static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
4705  ARM::VST1q32, ARM::VST1q64 };
4706  static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
4707  ARM::VST1d16QPseudo,
4708  ARM::VST1d32QPseudo,
4709  ARM::VST1d64QPseudo };
4710  SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
4711  return;
4712  }
4713 
4714  case Intrinsic::arm_neon_vst1x3: {
4715  static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
4716  ARM::VST1d16TPseudo,
4717  ARM::VST1d32TPseudo,
4718  ARM::VST1d64TPseudo };
4719  static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4720  ARM::VST1q16LowTPseudo_UPD,
4721  ARM::VST1q32LowTPseudo_UPD,
4722  ARM::VST1q64LowTPseudo_UPD };
4723  static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
4724  ARM::VST1q16HighTPseudo,
4725  ARM::VST1q32HighTPseudo,
4726  ARM::VST1q64HighTPseudo };
4727  SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4728  return;
4729  }
4730 
4731  case Intrinsic::arm_neon_vst1x4: {
4732  static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
4733  ARM::VST1d16QPseudo,
4734  ARM::VST1d32QPseudo,
4735  ARM::VST1d64QPseudo };
4736  static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4737  ARM::VST1q16LowQPseudo_UPD,
4738  ARM::VST1q32LowQPseudo_UPD,
4739  ARM::VST1q64LowQPseudo_UPD };
4740  static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
4741  ARM::VST1q16HighQPseudo,
4742  ARM::VST1q32HighQPseudo,
4743  ARM::VST1q64HighQPseudo };
4744  SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4745  return;
4746  }
4747 
4748  case Intrinsic::arm_neon_vst2: {
4749  static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
4750  ARM::VST2d32, ARM::VST1q64 };
4751  static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
4752  ARM::VST2q32Pseudo };
4753  SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
4754  return;
4755  }
4756 
4757  case Intrinsic::arm_neon_vst3: {
4758  static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
4759  ARM::VST3d16Pseudo,
4760  ARM::VST3d32Pseudo,
4761  ARM::VST1d64TPseudo };
4762  static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4763  ARM::VST3q16Pseudo_UPD,
4764  ARM::VST3q32Pseudo_UPD };
4765  static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
4766  ARM::VST3q16oddPseudo,
4767  ARM::VST3q32oddPseudo };
4768  SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4769  return;
4770  }
4771 
4772  case Intrinsic::arm_neon_vst4: {
4773  static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
4774  ARM::VST4d16Pseudo,
4775  ARM::VST4d32Pseudo,
4776  ARM::VST1d64QPseudo };
4777  static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
4778  ARM::VST4q16Pseudo_UPD,
4779  ARM::VST4q32Pseudo_UPD };
4780  static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
4781  ARM::VST4q16oddPseudo,
4782  ARM::VST4q32oddPseudo };
4783  SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4784  return;
4785  }
4786 
4787  case Intrinsic::arm_neon_vst2lane: {
4788  static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
4789  ARM::VST2LNd16Pseudo,
4790  ARM::VST2LNd32Pseudo };
4791  static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
4792  ARM::VST2LNq32Pseudo };
4793  SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
4794  return;
4795  }
4796 
4797  case Intrinsic::arm_neon_vst3lane: {
4798  static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
4799  ARM::VST3LNd16Pseudo,
4800  ARM::VST3LNd32Pseudo };
4801  static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
4802  ARM::VST3LNq32Pseudo };
4803  SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
4804  return;
4805  }
4806 
4807  case Intrinsic::arm_neon_vst4lane: {
4808  static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
4809  ARM::VST4LNd16Pseudo,
4810  ARM::VST4LNd32Pseudo };
4811  static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
4812  ARM::VST4LNq32Pseudo };
4813  SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
4814  return;
4815  }
4816 
4817  case Intrinsic::arm_mve_vldr_gather_base_wb:
4818  case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
4819  static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
4820  ARM::MVE_VLDRDU64_qi_pre};
4821  SelectMVE_WB(N, Opcodes,
4822  IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
4823  return;
4824  }
4825 
4826  case Intrinsic::arm_mve_vld2q: {
4827  static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
4828  static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4829  ARM::MVE_VLD21_16};
4830  static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4831  ARM::MVE_VLD21_32};
4832  static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4833  SelectMVE_VLD(N, 2, Opcodes, false);
4834  return;
4835  }
4836 
4837  case Intrinsic::arm_mve_vld4q: {
4838  static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4839  ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
4840  static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4841  ARM::MVE_VLD42_16,
4842  ARM::MVE_VLD43_16};
4843  static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4844  ARM::MVE_VLD42_32,
4845  ARM::MVE_VLD43_32};
4846  static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4847  SelectMVE_VLD(N, 4, Opcodes, false);
4848  return;
4849  }
4850  }
4851  break;
4852  }
4853 
4854  case ISD::INTRINSIC_WO_CHAIN: {
4855  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
4856  switch (IntNo) {
4857  default:
4858  break;
4859 
4860  // Scalar f32 -> bf16
4861  case Intrinsic::arm_neon_vcvtbfp2bf: {
4862  SDLoc dl(N);
4863  const SDValue &Src = N->getOperand(1);
4864  llvm::EVT DestTy = N->getValueType(0);
4865  SDValue Pred = getAL(CurDAG, dl);
4866  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
4867  SDValue Ops[] = { Src, Src, Pred, Reg0 };
4868  CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops);
4869  return;
4870  }
4871 
4872  // Vector v4f32 -> v4bf16
4873  case Intrinsic::arm_neon_vcvtfp2bf: {
4874  SDLoc dl(N);
4875  const SDValue &Src = N->getOperand(1);
4876  SDValue Pred = getAL(CurDAG, dl);
4877  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
4878  SDValue Ops[] = { Src, Pred, Reg0 };
4879  CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops);
4880  return;
4881  }
4882 
4883  case Intrinsic::arm_mve_urshrl:
4884  SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false);
4885  return;
4886  case Intrinsic::arm_mve_uqshll:
4887  SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false);
4888  return;
4889  case Intrinsic::arm_mve_srshrl:
4890  SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false);
4891  return;
4892  case Intrinsic::arm_mve_sqshll:
4893  SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false);
4894  return;
4895  case Intrinsic::arm_mve_uqrshll:
4896  SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true);
4897  return;
4898  case Intrinsic::arm_mve_sqrshrl:
4899  SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
4900  return;
4901 
4902  case Intrinsic::arm_mve_vadc:
4903  case Intrinsic::arm_mve_vadc_predicated:
4904  SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true,
4905  IntNo == Intrinsic::arm_mve_vadc_predicated);
4906  return;
4907  case Intrinsic::arm_mve_vsbc:
4908  case Intrinsic::arm_mve_vsbc_predicated:
4909  SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true,
4910  IntNo == Intrinsic::arm_mve_vsbc_predicated);
4911  return;
4912  case Intrinsic::arm_mve_vshlc:
4913  case Intrinsic::arm_mve_vshlc_predicated:
4914  SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated);
4915  return;
4916 
4917  case Intrinsic::arm_mve_vmlldava:
4918  case Intrinsic::arm_mve_vmlldava_predicated: {
4919  static const uint16_t OpcodesU[] = {
4920  ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32,
4921  ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32,
4922  };
4923  static const uint16_t OpcodesS[] = {
4924  ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32,
4925  ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32,
4926  ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32,
4927  ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
4928  ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32,
4929  ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32,
4930  ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32,
4931  ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
4932  };
4933  SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated,
4934  OpcodesS, OpcodesU);
4935  return;
4936  }
4937 
4938  case Intrinsic::arm_mve_vrmlldavha:
4939  case Intrinsic::arm_mve_vrmlldavha_predicated: {
4940  static const uint16_t OpcodesU[] = {
4941  ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32,
4942  };
4943  static const uint16_t OpcodesS[] = {
4944  ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32,
4945  ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
4946  ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32,
4947  ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
4948  };
4949  SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
4950  OpcodesS, OpcodesU);
4951  return;
4952  }
4953 
4954  case Intrinsic::arm_mve_vidup:
4955  case Intrinsic::arm_mve_vidup_predicated: {
4956  static const uint16_t Opcodes[] = {
4957  ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
4958  };
4959  SelectMVE_VxDUP(N, Opcodes, false,
4960  IntNo == Intrinsic::arm_mve_vidup_predicated);
4961  return;
4962  }
4963 
4964  case Intrinsic::arm_mve_vddup:
4965  case Intrinsic::arm_mve_vddup_predicated: {
4966  static const uint16_t Opcodes[] = {
4967  ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
4968  };
4969  SelectMVE_VxDUP(N, Opcodes, false,
4970  IntNo == Intrinsic::arm_mve_vddup_predicated);
4971  return;
4972  }
4973 
4974  case Intrinsic::arm_mve_viwdup:
4975  case Intrinsic::arm_mve_viwdup_predicated: {
4976  static const uint16_t Opcodes[] = {
4977  ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
4978  };
4979  SelectMVE_VxDUP(N, Opcodes, true,
4980  IntNo == Intrinsic::arm_mve_viwdup_predicated);
4981  return;
4982  }
4983 
4984  case Intrinsic::arm_mve_vdwdup:
4985  case Intrinsic::arm_mve_vdwdup_predicated: {
4986  static const uint16_t Opcodes[] = {
4987  ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
4988  };
4989  SelectMVE_VxDUP(N, Opcodes, true,
4990  IntNo == Intrinsic::arm_mve_vdwdup_predicated);
4991  return;
4992  }
4993 
4994  case Intrinsic::arm_cde_cx1d:
4995  case Intrinsic::arm_cde_cx1da:
4996  case Intrinsic::arm_cde_cx2d:
4997  case Intrinsic::arm_cde_cx2da:
4998  case Intrinsic::arm_cde_cx3d:
4999  case Intrinsic::arm_cde_cx3da: {
5000  bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da ||
5001  IntNo == Intrinsic::arm_cde_cx2da ||
5002  IntNo == Intrinsic::arm_cde_cx3da;
5003  size_t NumExtraOps;
5004  uint16_t Opcode;
5005  switch (IntNo) {
5006  case Intrinsic::arm_cde_cx1d:
5007  case Intrinsic::arm_cde_cx1da:
5008  NumExtraOps = 0;
5009  Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
5010  break;
5011  case Intrinsic::arm_cde_cx2d:
5012  case Intrinsic::arm_cde_cx2da:
5013  NumExtraOps = 1;
5014  Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
5015  break;
5016  case Intrinsic::arm_cde_cx3d:
5017  case Intrinsic::arm_cde_cx3da:
5018  NumExtraOps = 2;
5019  Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
5020  break;
5021  default:
5022  llvm_unreachable("Unexpected opcode");
5023  }
5024  SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
5025  return;
5026  }
5027  }
5028  break;
5029  }
5030 
5031  case ISD::ATOMIC_CMP_SWAP:
5032  SelectCMP_SWAP(N);
5033  return;
5034  }
5035 
5036  SelectCode(N);
5037 }
5038 
5039 // Inspect a register string of the form
5040 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
5041 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
5042 // and obtain the integer operands from them, adding these operands to the
5043 // provided vector.
5045  SelectionDAG *CurDAG,
5046  const SDLoc &DL,
5047  std::vector<SDValue> &Ops) {
5049  RegString.split(Fields, ':');
5050 
5051  if (Fields.size() > 1) {
5052  bool AllIntFields = true;
5053 
5054  for (StringRef Field : Fields) {
5055  // Need to trim out leading 'cp' characters and get the integer field.
5056  unsigned IntField;
5057  AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
5058  Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
5059  }
5060 
5061  assert(AllIntFields &&
5062  "Unexpected non-integer value in special register string.");
5063  }
5064 }
5065 
5066 // Maps a Banked Register string to its mask value. The mask value returned is
5067 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
5068 // mask operand, which expresses which register is to be used, e.g. r8, and in
5069 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
5070 // was invalid.
5071 static inline int getBankedRegisterMask(StringRef RegString) {
5072  auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
5073  if (!TheReg)
5074  return -1;
5075  return TheReg->Encoding;
5076 }
5077 
5078 // The flags here are common to those allowed for apsr in the A class cores and
5079 // those allowed for the special registers in the M class cores. Returns a
5080 // value representing which flags were present, -1 if invalid.
5081 static inline int getMClassFlagsMask(StringRef Flags) {
5082  return StringSwitch<int>(Flags)
508