LLVM 19.0.0git
ARMISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the ARM target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARM.h"
14#include "ARMBaseInstrInfo.h"
15#include "ARMTargetMachine.h"
17#include "Utils/ARMBaseInfo.h"
18#include "llvm/ADT/APSInt.h"
27#include "llvm/IR/CallingConv.h"
28#include "llvm/IR/Constants.h"
30#include "llvm/IR/Function.h"
31#include "llvm/IR/Intrinsics.h"
32#include "llvm/IR/IntrinsicsARM.h"
33#include "llvm/IR/LLVMContext.h"
35#include "llvm/Support/Debug.h"
38#include <optional>
39
40using namespace llvm;
41
42#define DEBUG_TYPE "arm-isel"
43#define PASS_NAME "ARM Instruction Selection"
44
45static cl::opt<bool>
46DisableShifterOp("disable-shifter-op", cl::Hidden,
47 cl::desc("Disable isel of shifter-op"),
48 cl::init(false));
49
50//===--------------------------------------------------------------------===//
51/// ARMDAGToDAGISel - ARM specific code to select ARM machine
52/// instructions for SelectionDAG operations.
53///
54namespace {
55
56class ARMDAGToDAGISel : public SelectionDAGISel {
57 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
58 /// make the right decision when generating code for different targets.
59 const ARMSubtarget *Subtarget;
60
61public:
62 static char ID;
63
64 ARMDAGToDAGISel() = delete;
65
66 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
67 : SelectionDAGISel(ID, tm, OptLevel) {}
68
69 bool runOnMachineFunction(MachineFunction &MF) override {
70 // Reset the subtarget each time through.
71 Subtarget = &MF.getSubtarget<ARMSubtarget>();
73 return true;
74 }
75
76 void PreprocessISelDAG() override;
77
78 /// getI32Imm - Return a target constant of type i32 with the specified
79 /// value.
80 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
81 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
82 }
83
84 void Select(SDNode *N) override;
85
86 /// Return true as some complex patterns, like those that call
87 /// canExtractShiftFromMul can modify the DAG inplace.
88 bool ComplexPatternFuncMutatesDAG() const override { return true; }
89
90 bool hasNoVMLxHazardUse(SDNode *N) const;
91 bool isShifterOpProfitable(const SDValue &Shift,
92 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
93 bool SelectRegShifterOperand(SDValue N, SDValue &A,
94 SDValue &B, SDValue &C,
95 bool CheckProfitability = true);
96 bool SelectImmShifterOperand(SDValue N, SDValue &A,
97 SDValue &B, bool CheckProfitability = true);
98 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B,
99 SDValue &C) {
100 // Don't apply the profitability check
101 return SelectRegShifterOperand(N, A, B, C, false);
102 }
103 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) {
104 // Don't apply the profitability check
105 return SelectImmShifterOperand(N, A, B, false);
106 }
107 bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) {
108 if (!N.hasOneUse())
109 return false;
110 return SelectImmShifterOperand(N, A, B, false);
111 }
112
113 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
114
115 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
116 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
117
118 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
119 const ConstantSDNode *CN = cast<ConstantSDNode>(N);
120 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
121 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
122 return true;
123 }
124
125 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
126 SDValue &Offset, SDValue &Opc);
127 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
128 SDValue &Offset, SDValue &Opc);
129 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
130 SDValue &Offset, SDValue &Opc);
131 bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
132 bool SelectAddrMode3(SDValue N, SDValue &Base,
133 SDValue &Offset, SDValue &Opc);
134 bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
135 SDValue &Offset, SDValue &Opc);
136 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
137 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
138 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
139 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
140 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
141
142 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
143
144 // Thumb Addressing Modes:
145 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
146 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
147 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
148 SDValue &OffImm);
149 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
150 SDValue &OffImm);
151 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
152 SDValue &OffImm);
153 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
154 SDValue &OffImm);
155 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
156 template <unsigned Shift>
157 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
158
159 // Thumb 2 Addressing Modes:
160 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
161 template <unsigned Shift>
162 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
163 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
164 SDValue &OffImm);
165 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
166 SDValue &OffImm);
167 template <unsigned Shift>
168 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
169 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
170 unsigned Shift);
171 template <unsigned Shift>
172 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
173 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
174 SDValue &OffReg, SDValue &ShImm);
175 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
176
177 template<int Min, int Max>
178 bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
179
180 inline bool is_so_imm(unsigned Imm) const {
181 return ARM_AM::getSOImmVal(Imm) != -1;
182 }
183
184 inline bool is_so_imm_not(unsigned Imm) const {
185 return ARM_AM::getSOImmVal(~Imm) != -1;
186 }
187
188 inline bool is_t2_so_imm(unsigned Imm) const {
189 return ARM_AM::getT2SOImmVal(Imm) != -1;
190 }
191
192 inline bool is_t2_so_imm_not(unsigned Imm) const {
193 return ARM_AM::getT2SOImmVal(~Imm) != -1;
194 }
195
196 // Include the pieces autogenerated from the target description.
197#include "ARMGenDAGISel.inc"
198
199private:
200 void transferMemOperands(SDNode *Src, SDNode *Dst);
201
202 /// Indexed (pre/post inc/dec) load matching code for ARM.
203 bool tryARMIndexedLoad(SDNode *N);
204 bool tryT1IndexedLoad(SDNode *N);
205 bool tryT2IndexedLoad(SDNode *N);
206 bool tryMVEIndexedLoad(SDNode *N);
207 bool tryFMULFixed(SDNode *N, SDLoc dl);
208 bool tryFP_TO_INT(SDNode *N, SDLoc dl);
209 bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul,
210 bool IsUnsigned,
211 bool FixedToFloat);
212
213 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
214 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
215 /// loads of D registers and even subregs and odd subregs of Q registers.
216 /// For NumVecs <= 2, QOpcodes1 is not used.
217 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
218 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
219 const uint16_t *QOpcodes1);
220
221 /// SelectVST - Select NEON store intrinsics. NumVecs should
222 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
223 /// stores of D registers and even subregs and odd subregs of Q registers.
224 /// For NumVecs <= 2, QOpcodes1 is not used.
225 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
226 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
227 const uint16_t *QOpcodes1);
228
229 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
230 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
231 /// load/store of D registers and Q registers.
232 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
233 unsigned NumVecs, const uint16_t *DOpcodes,
234 const uint16_t *QOpcodes);
235
236 /// Helper functions for setting up clusters of MVE predication operands.
237 template <typename SDValueVector>
238 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
239 SDValue PredicateMask);
240 template <typename SDValueVector>
241 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
242 SDValue PredicateMask, SDValue Inactive);
243
244 template <typename SDValueVector>
245 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
246 template <typename SDValueVector>
247 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
248
249 /// SelectMVE_WB - Select MVE writeback load/store intrinsics.
250 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);
251
252 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
253 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,
254 bool HasSaturationOperand);
255
256 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
257 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
258 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
259
260 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
261 /// vector lanes.
262 void SelectMVE_VSHLC(SDNode *N, bool Predicated);
263
264 /// Select long MVE vector reductions with two vector operands
265 /// Stride is the number of vector element widths the instruction can operate
266 /// on:
267 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
268 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
269 /// Stride is used when addressing the OpcodesS array which contains multiple
270 /// opcodes for each element width.
271 /// TySize is the index into the list of element types listed above
272 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
273 const uint16_t *OpcodesS, const uint16_t *OpcodesU,
274 size_t Stride, size_t TySize);
275
276 /// Select a 64-bit MVE vector reduction with two vector operands
277 /// arm_mve_vmlldava_[predicated]
278 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
279 const uint16_t *OpcodesU);
280 /// Select a 72-bit MVE vector rounding reduction with two vector operands
281 /// int_arm_mve_vrmlldavha[_predicated]
282 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
283 const uint16_t *OpcodesU);
284
285 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
286 /// should be 2 or 4. The opcode array specifies the instructions
287 /// used for 8, 16 and 32-bit lane sizes respectively, and each
288 /// pointer points to a set of NumVecs sub-opcodes used for the
289 /// different stages (e.g. VLD20 versus VLD21) of each load family.
290 void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
291 const uint16_t *const *Opcodes, bool HasWriteback);
292
293 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
294 /// array of 3 elements for the 8, 16 and 32-bit lane sizes.
295 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
296 bool Wrapping, bool Predicated);
297
298 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
299 /// CX1DA, CX2D, CX2DA, CX3, CX3DA).
300 /// \arg \c NumExtraOps number of extra operands besides the coprocossor,
301 /// the accumulator and the immediate operand, i.e. 0
302 /// for CX1*, 1 for CX2*, 2 for CX3*
303 /// \arg \c HasAccum whether the instruction has an accumulator operand
304 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
305 bool HasAccum);
306
307 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
308 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
309 /// for loading D registers.
310 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
311 unsigned NumVecs, const uint16_t *DOpcodes,
312 const uint16_t *QOpcodes0 = nullptr,
313 const uint16_t *QOpcodes1 = nullptr);
314
315 /// Try to select SBFX/UBFX instructions for ARM.
316 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
317
318 bool tryInsertVectorElt(SDNode *N);
319
320 // Select special operations if node forms integer ABS pattern
321 bool tryABSOp(SDNode *N);
322
323 bool tryReadRegister(SDNode *N);
324 bool tryWriteRegister(SDNode *N);
325
326 bool tryInlineAsm(SDNode *N);
327
328 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
329
330 void SelectCMP_SWAP(SDNode *N);
331
332 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
333 /// inline asm expressions.
335 InlineAsm::ConstraintCode ConstraintID,
336 std::vector<SDValue> &OutOps) override;
337
338 // Form pairs of consecutive R, S, D, or Q registers.
340 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
341 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
342 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
343
344 // Form sequences of 4 consecutive S, D, or Q registers.
345 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
346 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
347 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
348
349 // Get the alignment operand for a NEON VLD or VST instruction.
350 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
351 bool is64BitVector);
352
353 /// Checks if N is a multiplication by a constant where we can extract out a
354 /// power of two from the constant so that it can be used in a shift, but only
355 /// if it simplifies the materialization of the constant. Returns true if it
356 /// is, and assigns to PowerOfTwo the power of two that should be extracted
357 /// out and to NewMulConst the new constant to be multiplied by.
358 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
359 unsigned &PowerOfTwo, SDValue &NewMulConst) const;
360
361 /// Replace N with M in CurDAG, in a way that also ensures that M gets
362 /// selected when N would have been selected.
363 void replaceDAGValue(const SDValue &N, SDValue M);
364};
365}
366
367char ARMDAGToDAGISel::ID = 0;
368
369INITIALIZE_PASS(ARMDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
370
371/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
372/// operand. If so Imm will receive the 32-bit value.
373static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
374 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
375 Imm = N->getAsZExtVal();
376 return true;
377 }
378 return false;
379}
380
381// isInt32Immediate - This method tests to see if a constant operand.
382// If so Imm will receive the 32 bit value.
383static bool isInt32Immediate(SDValue N, unsigned &Imm) {
384 return isInt32Immediate(N.getNode(), Imm);
385}
386
387// isOpcWithIntImmediate - This method tests to see if the node is a specific
388// opcode and that it has a immediate integer right operand.
389// If so Imm will receive the 32 bit value.
390static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
391 return N->getOpcode() == Opc &&
392 isInt32Immediate(N->getOperand(1).getNode(), Imm);
393}
394
395/// Check whether a particular node is a constant value representable as
396/// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
397///
398/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
399static bool isScaledConstantInRange(SDValue Node, int Scale,
400 int RangeMin, int RangeMax,
401 int &ScaledConstant) {
402 assert(Scale > 0 && "Invalid scale!");
403
404 // Check that this is a constant.
405 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
406 if (!C)
407 return false;
408
409 ScaledConstant = (int) C->getZExtValue();
410 if ((ScaledConstant % Scale) != 0)
411 return false;
412
413 ScaledConstant /= Scale;
414 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
415}
416
417void ARMDAGToDAGISel::PreprocessISelDAG() {
418 if (!Subtarget->hasV6T2Ops())
419 return;
420
421 bool isThumb2 = Subtarget->isThumb();
422 // We use make_early_inc_range to avoid invalidation issues.
423 for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
424 if (N.getOpcode() != ISD::ADD)
425 continue;
426
427 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
428 // leading zeros, followed by consecutive set bits, followed by 1 or 2
429 // trailing zeros, e.g. 1020.
430 // Transform the expression to
431 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
432 // of trailing zeros of c2. The left shift would be folded as an shifter
433 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
434 // node (UBFX).
435
436 SDValue N0 = N.getOperand(0);
437 SDValue N1 = N.getOperand(1);
438 unsigned And_imm = 0;
439 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
440 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
441 std::swap(N0, N1);
442 }
443 if (!And_imm)
444 continue;
445
446 // Check if the AND mask is an immediate of the form: 000.....1111111100
447 unsigned TZ = llvm::countr_zero(And_imm);
448 if (TZ != 1 && TZ != 2)
449 // Be conservative here. Shifter operands aren't always free. e.g. On
450 // Swift, left shifter operand of 1 / 2 for free but others are not.
451 // e.g.
452 // ubfx r3, r1, #16, #8
453 // ldr.w r3, [r0, r3, lsl #2]
454 // vs.
455 // mov.w r9, #1020
456 // and.w r2, r9, r1, lsr #14
457 // ldr r2, [r0, r2]
458 continue;
459 And_imm >>= TZ;
460 if (And_imm & (And_imm + 1))
461 continue;
462
463 // Look for (and (srl X, c1), c2).
464 SDValue Srl = N1.getOperand(0);
465 unsigned Srl_imm = 0;
466 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
467 (Srl_imm <= 2))
468 continue;
469
470 // Make sure first operand is not a shifter operand which would prevent
471 // folding of the left shift.
472 SDValue CPTmp0;
473 SDValue CPTmp1;
474 SDValue CPTmp2;
475 if (isThumb2) {
476 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
477 continue;
478 } else {
479 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
480 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
481 continue;
482 }
483
484 // Now make the transformation.
485 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
486 Srl.getOperand(0),
487 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
488 MVT::i32));
489 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
490 Srl,
491 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
492 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
493 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
494 CurDAG->UpdateNodeOperands(&N, N0, N1);
495 }
496}
497
498/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
499/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
500/// least on current ARM implementations) which should be avoidded.
501bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
502 if (OptLevel == CodeGenOptLevel::None)
503 return true;
504
505 if (!Subtarget->hasVMLxHazards())
506 return true;
507
508 if (!N->hasOneUse())
509 return false;
510
511 SDNode *Use = *N->use_begin();
512 if (Use->getOpcode() == ISD::CopyToReg)
513 return true;
514 if (Use->isMachineOpcode()) {
515 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
516 CurDAG->getSubtarget().getInstrInfo());
517
518 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
519 if (MCID.mayStore())
520 return true;
521 unsigned Opcode = MCID.getOpcode();
522 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
523 return true;
524 // vmlx feeding into another vmlx. We actually want to unfold
525 // the use later in the MLxExpansion pass. e.g.
526 // vmla
527 // vmla (stall 8 cycles)
528 //
529 // vmul (5 cycles)
530 // vadd (5 cycles)
531 // vmla
532 // This adds up to about 18 - 19 cycles.
533 //
534 // vmla
535 // vmul (stall 4 cycles)
536 // vadd adds up to about 14 cycles.
537 return TII->isFpMLxInstruction(Opcode);
538 }
539
540 return false;
541}
542
543bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
544 ARM_AM::ShiftOpc ShOpcVal,
545 unsigned ShAmt) {
546 if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
547 return true;
548 if (Shift.hasOneUse())
549 return true;
550 // R << 2 is free.
551 return ShOpcVal == ARM_AM::lsl &&
552 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
553}
554
555bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
556 unsigned MaxShift,
557 unsigned &PowerOfTwo,
558 SDValue &NewMulConst) const {
559 assert(N.getOpcode() == ISD::MUL);
560 assert(MaxShift > 0);
561
562 // If the multiply is used in more than one place then changing the constant
563 // will make other uses incorrect, so don't.
564 if (!N.hasOneUse()) return false;
565 // Check if the multiply is by a constant
566 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
567 if (!MulConst) return false;
568 // If the constant is used in more than one place then modifying it will mean
569 // we need to materialize two constants instead of one, which is a bad idea.
570 if (!MulConst->hasOneUse()) return false;
571 unsigned MulConstVal = MulConst->getZExtValue();
572 if (MulConstVal == 0) return false;
573
574 // Find the largest power of 2 that MulConstVal is a multiple of
575 PowerOfTwo = MaxShift;
576 while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
577 --PowerOfTwo;
578 if (PowerOfTwo == 0) return false;
579 }
580
581 // Only optimise if the new cost is better
582 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
583 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
584 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
585 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
586 return NewCost < OldCost;
587}
588
589void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
590 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
591 ReplaceUses(N, M);
592}
593
594bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
595 SDValue &BaseReg,
596 SDValue &Opc,
597 bool CheckProfitability) {
599 return false;
600
601 // If N is a multiply-by-constant and it's profitable to extract a shift and
602 // use it in a shifted operand do so.
603 if (N.getOpcode() == ISD::MUL) {
604 unsigned PowerOfTwo = 0;
605 SDValue NewMulConst;
606 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
607 HandleSDNode Handle(N);
608 SDLoc Loc(N);
609 replaceDAGValue(N.getOperand(1), NewMulConst);
610 BaseReg = Handle.getValue();
611 Opc = CurDAG->getTargetConstant(
612 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
613 return true;
614 }
615 }
616
617 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
618
619 // Don't match base register only case. That is matched to a separate
620 // lower complexity pattern with explicit register operand.
621 if (ShOpcVal == ARM_AM::no_shift) return false;
622
623 BaseReg = N.getOperand(0);
624 unsigned ShImmVal = 0;
625 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
626 if (!RHS) return false;
627 ShImmVal = RHS->getZExtValue() & 31;
628 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
629 SDLoc(N), MVT::i32);
630 return true;
631}
632
633bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
634 SDValue &BaseReg,
635 SDValue &ShReg,
636 SDValue &Opc,
637 bool CheckProfitability) {
639 return false;
640
641 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
642
643 // Don't match base register only case. That is matched to a separate
644 // lower complexity pattern with explicit register operand.
645 if (ShOpcVal == ARM_AM::no_shift) return false;
646
647 BaseReg = N.getOperand(0);
648 unsigned ShImmVal = 0;
649 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
650 if (RHS) return false;
651
652 ShReg = N.getOperand(1);
653 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
654 return false;
655 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
656 SDLoc(N), MVT::i32);
657 return true;
658}
659
660// Determine whether an ISD::OR's operands are suitable to turn the operation
661// into an addition, which often has more compact encodings.
662bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
663 assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
664 Out = N;
665 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
666}
667
668
669bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
670 SDValue &Base,
671 SDValue &OffImm) {
672 // Match simple R + imm12 operands.
673
674 // Base only.
675 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
676 !CurDAG->isBaseWithConstantOffset(N)) {
677 if (N.getOpcode() == ISD::FrameIndex) {
678 // Match frame index.
679 int FI = cast<FrameIndexSDNode>(N)->getIndex();
680 Base = CurDAG->getTargetFrameIndex(
681 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
682 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
683 return true;
684 }
685
686 if (N.getOpcode() == ARMISD::Wrapper &&
687 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
688 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
689 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
690 Base = N.getOperand(0);
691 } else
692 Base = N;
693 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
694 return true;
695 }
696
697 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
698 int RHSC = (int)RHS->getSExtValue();
699 if (N.getOpcode() == ISD::SUB)
700 RHSC = -RHSC;
701
702 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
703 Base = N.getOperand(0);
704 if (Base.getOpcode() == ISD::FrameIndex) {
705 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
706 Base = CurDAG->getTargetFrameIndex(
707 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
708 }
709 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
710 return true;
711 }
712 }
713
714 // Base only.
715 Base = N;
716 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
717 return true;
718}
719
720
721
722bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
723 SDValue &Opc) {
724 if (N.getOpcode() == ISD::MUL &&
725 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
726 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
727 // X * [3,5,9] -> X + X * [2,4,8] etc.
728 int RHSC = (int)RHS->getZExtValue();
729 if (RHSC & 1) {
730 RHSC = RHSC & ~1;
732 if (RHSC < 0) {
734 RHSC = - RHSC;
735 }
736 if (isPowerOf2_32(RHSC)) {
737 unsigned ShAmt = Log2_32(RHSC);
738 Base = Offset = N.getOperand(0);
739 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
741 SDLoc(N), MVT::i32);
742 return true;
743 }
744 }
745 }
746 }
747
748 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
749 // ISD::OR that is equivalent to an ISD::ADD.
750 !CurDAG->isBaseWithConstantOffset(N))
751 return false;
752
753 // Leave simple R +/- imm12 operands for LDRi12
754 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
755 int RHSC;
756 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
757 -0x1000+1, 0x1000, RHSC)) // 12 bits.
758 return false;
759 }
760
761 // Otherwise this is R +/- [possibly shifted] R.
763 ARM_AM::ShiftOpc ShOpcVal =
764 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
765 unsigned ShAmt = 0;
766
767 Base = N.getOperand(0);
768 Offset = N.getOperand(1);
769
770 if (ShOpcVal != ARM_AM::no_shift) {
771 // Check to see if the RHS of the shift is a constant, if not, we can't fold
772 // it.
773 if (ConstantSDNode *Sh =
774 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
775 ShAmt = Sh->getZExtValue();
776 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
777 Offset = N.getOperand(1).getOperand(0);
778 else {
779 ShAmt = 0;
780 ShOpcVal = ARM_AM::no_shift;
781 }
782 } else {
783 ShOpcVal = ARM_AM::no_shift;
784 }
785 }
786
787 // Try matching (R shl C) + (R).
788 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
789 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
790 N.getOperand(0).hasOneUse())) {
791 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
792 if (ShOpcVal != ARM_AM::no_shift) {
793 // Check to see if the RHS of the shift is a constant, if not, we can't
794 // fold it.
795 if (ConstantSDNode *Sh =
796 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
797 ShAmt = Sh->getZExtValue();
798 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
799 Offset = N.getOperand(0).getOperand(0);
800 Base = N.getOperand(1);
801 } else {
802 ShAmt = 0;
803 ShOpcVal = ARM_AM::no_shift;
804 }
805 } else {
806 ShOpcVal = ARM_AM::no_shift;
807 }
808 }
809 }
810
811 // If Offset is a multiply-by-constant and it's profitable to extract a shift
812 // and use it in a shifted operand do so.
813 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
814 unsigned PowerOfTwo = 0;
815 SDValue NewMulConst;
816 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
817 HandleSDNode Handle(Offset);
818 replaceDAGValue(Offset.getOperand(1), NewMulConst);
819 Offset = Handle.getValue();
820 ShAmt = PowerOfTwo;
821 ShOpcVal = ARM_AM::lsl;
822 }
823 }
824
825 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
826 SDLoc(N), MVT::i32);
827 return true;
828}
829
830bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
831 SDValue &Offset, SDValue &Opc) {
832 unsigned Opcode = Op->getOpcode();
833 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
834 ? cast<LoadSDNode>(Op)->getAddressingMode()
835 : cast<StoreSDNode>(Op)->getAddressingMode();
838 int Val;
839 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
840 return false;
841
842 Offset = N;
843 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
844 unsigned ShAmt = 0;
845 if (ShOpcVal != ARM_AM::no_shift) {
846 // Check to see if the RHS of the shift is a constant, if not, we can't fold
847 // it.
848 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
849 ShAmt = Sh->getZExtValue();
850 if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
851 Offset = N.getOperand(0);
852 else {
853 ShAmt = 0;
854 ShOpcVal = ARM_AM::no_shift;
855 }
856 } else {
857 ShOpcVal = ARM_AM::no_shift;
858 }
859 }
860
861 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
862 SDLoc(N), MVT::i32);
863 return true;
864}
865
866bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
867 SDValue &Offset, SDValue &Opc) {
868 unsigned Opcode = Op->getOpcode();
869 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
870 ? cast<LoadSDNode>(Op)->getAddressingMode()
871 : cast<StoreSDNode>(Op)->getAddressingMode();
874 int Val;
875 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
876 if (AddSub == ARM_AM::sub) Val *= -1;
877 Offset = CurDAG->getRegister(0, MVT::i32);
878 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
879 return true;
880 }
881
882 return false;
883}
884
885
886bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
887 SDValue &Offset, SDValue &Opc) {
888 unsigned Opcode = Op->getOpcode();
889 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
890 ? cast<LoadSDNode>(Op)->getAddressingMode()
891 : cast<StoreSDNode>(Op)->getAddressingMode();
894 int Val;
895 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
896 Offset = CurDAG->getRegister(0, MVT::i32);
897 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
899 SDLoc(Op), MVT::i32);
900 return true;
901 }
902
903 return false;
904}
905
906bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
907 Base = N;
908 return true;
909}
910
911bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
913 SDValue &Opc) {
914 if (N.getOpcode() == ISD::SUB) {
915 // X - C is canonicalize to X + -C, no need to handle it here.
916 Base = N.getOperand(0);
917 Offset = N.getOperand(1);
918 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
919 MVT::i32);
920 return true;
921 }
922
923 if (!CurDAG->isBaseWithConstantOffset(N)) {
924 Base = N;
925 if (N.getOpcode() == ISD::FrameIndex) {
926 int FI = cast<FrameIndexSDNode>(N)->getIndex();
927 Base = CurDAG->getTargetFrameIndex(
928 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
929 }
930 Offset = CurDAG->getRegister(0, MVT::i32);
931 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
932 MVT::i32);
933 return true;
934 }
935
936 // If the RHS is +/- imm8, fold into addr mode.
937 int RHSC;
938 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
939 -256 + 1, 256, RHSC)) { // 8 bits.
940 Base = N.getOperand(0);
941 if (Base.getOpcode() == ISD::FrameIndex) {
942 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
943 Base = CurDAG->getTargetFrameIndex(
944 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
945 }
946 Offset = CurDAG->getRegister(0, MVT::i32);
947
949 if (RHSC < 0) {
951 RHSC = -RHSC;
952 }
953 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
954 MVT::i32);
955 return true;
956 }
957
958 Base = N.getOperand(0);
959 Offset = N.getOperand(1);
960 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
961 MVT::i32);
962 return true;
963}
964
965bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
966 SDValue &Offset, SDValue &Opc) {
967 unsigned Opcode = Op->getOpcode();
968 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
969 ? cast<LoadSDNode>(Op)->getAddressingMode()
970 : cast<StoreSDNode>(Op)->getAddressingMode();
973 int Val;
974 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
975 Offset = CurDAG->getRegister(0, MVT::i32);
976 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
977 MVT::i32);
978 return true;
979 }
980
981 Offset = N;
982 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
983 MVT::i32);
984 return true;
985}
986
987bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
988 bool FP16) {
989 if (!CurDAG->isBaseWithConstantOffset(N)) {
990 Base = N;
991 if (N.getOpcode() == ISD::FrameIndex) {
992 int FI = cast<FrameIndexSDNode>(N)->getIndex();
993 Base = CurDAG->getTargetFrameIndex(
994 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
995 } else if (N.getOpcode() == ARMISD::Wrapper &&
996 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
997 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
998 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
999 Base = N.getOperand(0);
1000 }
1001 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1002 SDLoc(N), MVT::i32);
1003 return true;
1004 }
1005
1006 // If the RHS is +/- imm8, fold into addr mode.
1007 int RHSC;
1008 const int Scale = FP16 ? 2 : 4;
1009
1010 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
1011 Base = N.getOperand(0);
1012 if (Base.getOpcode() == ISD::FrameIndex) {
1013 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1014 Base = CurDAG->getTargetFrameIndex(
1015 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1016 }
1017
1019 if (RHSC < 0) {
1021 RHSC = -RHSC;
1022 }
1023
1024 if (FP16)
1025 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
1026 SDLoc(N), MVT::i32);
1027 else
1028 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1029 SDLoc(N), MVT::i32);
1030
1031 return true;
1032 }
1033
1034 Base = N;
1035
1036 if (FP16)
1037 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
1038 SDLoc(N), MVT::i32);
1039 else
1040 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1041 SDLoc(N), MVT::i32);
1042
1043 return true;
1044}
1045
1046bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1048 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
1049}
1050
1051bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
1053 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
1054}
1055
1056bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1057 SDValue &Align) {
1058 Addr = N;
1059
1060 unsigned Alignment = 0;
1061
1062 MemSDNode *MemN = cast<MemSDNode>(Parent);
1063
1064 if (isa<LSBaseSDNode>(MemN) ||
1065 ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1066 MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1067 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1068 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1069 // The maximum alignment is equal to the memory size being referenced.
1070 llvm::Align MMOAlign = MemN->getAlign();
1071 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1072 if (MMOAlign.value() >= MemSize && MemSize > 1)
1073 Alignment = MemSize;
1074 } else {
1075 // All other uses of addrmode6 are for intrinsics. For now just record
1076 // the raw alignment value; it will be refined later based on the legal
1077 // alignment operands for the intrinsic.
1078 Alignment = MemN->getAlign().value();
1079 }
1080
1081 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1082 return true;
1083}
1084
1085bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1086 SDValue &Offset) {
1087 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1089 if (AM != ISD::POST_INC)
1090 return false;
1091 Offset = N;
1092 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1093 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1094 Offset = CurDAG->getRegister(0, MVT::i32);
1095 }
1096 return true;
1097}
1098
1099bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1100 SDValue &Offset, SDValue &Label) {
1101 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1102 Offset = N.getOperand(0);
1103 SDValue N1 = N.getOperand(1);
1104 Label = CurDAG->getTargetConstant(N1->getAsZExtVal(), SDLoc(N), MVT::i32);
1105 return true;
1106 }
1107
1108 return false;
1109}
1110
1111
1112//===----------------------------------------------------------------------===//
1113// Thumb Addressing Modes
1114//===----------------------------------------------------------------------===//
1115
1117 // Negative numbers are difficult to materialise in thumb1. If we are
1118 // selecting the add of a negative, instead try to select ri with a zero
1119 // offset, so create the add node directly which will become a sub.
1120 if (N.getOpcode() != ISD::ADD)
1121 return false;
1122
1123 // Look for an imm which is not legal for ld/st, but is legal for sub.
1124 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1125 return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1126
1127 return false;
1128}
1129
1130bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1131 SDValue &Offset) {
1132 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1133 if (!isNullConstant(N))
1134 return false;
1135
1136 Base = Offset = N;
1137 return true;
1138 }
1139
1140 Base = N.getOperand(0);
1141 Offset = N.getOperand(1);
1142 return true;
1143}
1144
1145bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1146 SDValue &Offset) {
1148 return false; // Select ri instead
1149 return SelectThumbAddrModeRRSext(N, Base, Offset);
1150}
1151
1152bool
1153ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1154 SDValue &Base, SDValue &OffImm) {
1156 Base = N;
1157 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1158 return true;
1159 }
1160
1161 if (!CurDAG->isBaseWithConstantOffset(N)) {
1162 if (N.getOpcode() == ISD::ADD) {
1163 return false; // We want to select register offset instead
1164 } else if (N.getOpcode() == ARMISD::Wrapper &&
1165 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1166 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1167 N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1168 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1169 Base = N.getOperand(0);
1170 } else {
1171 Base = N;
1172 }
1173
1174 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1175 return true;
1176 }
1177
1178 // If the RHS is + imm5 * scale, fold into addr mode.
1179 int RHSC;
1180 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1181 Base = N.getOperand(0);
1182 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1183 return true;
1184 }
1185
1186 // Offset is too large, so use register offset instead.
1187 return false;
1188}
1189
1190bool
1191ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1192 SDValue &OffImm) {
1193 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1194}
1195
1196bool
1197ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1198 SDValue &OffImm) {
1199 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1200}
1201
1202bool
1203ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1204 SDValue &OffImm) {
1205 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1206}
1207
1208bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1209 SDValue &Base, SDValue &OffImm) {
1210 if (N.getOpcode() == ISD::FrameIndex) {
1211 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1212 // Only multiples of 4 are allowed for the offset, so the frame object
1213 // alignment must be at least 4.
1214 MachineFrameInfo &MFI = MF->getFrameInfo();
1215 if (MFI.getObjectAlign(FI) < Align(4))
1216 MFI.setObjectAlignment(FI, Align(4));
1217 Base = CurDAG->getTargetFrameIndex(
1218 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1219 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1220 return true;
1221 }
1222
1223 if (!CurDAG->isBaseWithConstantOffset(N))
1224 return false;
1225
1226 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1227 // If the RHS is + imm8 * scale, fold into addr mode.
1228 int RHSC;
1229 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1230 Base = N.getOperand(0);
1231 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1232 // Make sure the offset is inside the object, or we might fail to
1233 // allocate an emergency spill slot. (An out-of-range access is UB, but
1234 // it could show up anyway.)
1235 MachineFrameInfo &MFI = MF->getFrameInfo();
1236 if (RHSC * 4 < MFI.getObjectSize(FI)) {
1237 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1238 // indexed by the LHS must be 4-byte aligned.
1239 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4))
1240 MFI.setObjectAlignment(FI, Align(4));
1241 if (MFI.getObjectAlign(FI) >= Align(4)) {
1242 Base = CurDAG->getTargetFrameIndex(
1243 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1244 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1245 return true;
1246 }
1247 }
1248 }
1249 }
1250
1251 return false;
1252}
1253
1254template <unsigned Shift>
1255bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1256 SDValue &OffImm) {
1257 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1258 int RHSC;
1259 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1260 RHSC)) {
1261 Base = N.getOperand(0);
1262 if (N.getOpcode() == ISD::SUB)
1263 RHSC = -RHSC;
1264 OffImm =
1265 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1266 return true;
1267 }
1268 }
1269
1270 // Base only.
1271 Base = N;
1272 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1273 return true;
1274}
1275
1276
1277//===----------------------------------------------------------------------===//
1278// Thumb 2 Addressing Modes
1279//===----------------------------------------------------------------------===//
1280
1281
1282bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1283 SDValue &Base, SDValue &OffImm) {
1284 // Match simple R + imm12 operands.
1285
1286 // Base only.
1287 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1288 !CurDAG->isBaseWithConstantOffset(N)) {
1289 if (N.getOpcode() == ISD::FrameIndex) {
1290 // Match frame index.
1291 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1292 Base = CurDAG->getTargetFrameIndex(
1293 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1294 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1295 return true;
1296 }
1297
1298 if (N.getOpcode() == ARMISD::Wrapper &&
1299 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1300 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1301 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1302 Base = N.getOperand(0);
1303 if (Base.getOpcode() == ISD::TargetConstantPool)
1304 return false; // We want to select t2LDRpci instead.
1305 } else
1306 Base = N;
1307 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1308 return true;
1309 }
1310
1311 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1312 if (SelectT2AddrModeImm8(N, Base, OffImm))
1313 // Let t2LDRi8 handle (R - imm8).
1314 return false;
1315
1316 int RHSC = (int)RHS->getZExtValue();
1317 if (N.getOpcode() == ISD::SUB)
1318 RHSC = -RHSC;
1319
1320 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1321 Base = N.getOperand(0);
1322 if (Base.getOpcode() == ISD::FrameIndex) {
1323 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1324 Base = CurDAG->getTargetFrameIndex(
1325 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1326 }
1327 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1328 return true;
1329 }
1330 }
1331
1332 // Base only.
1333 Base = N;
1334 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1335 return true;
1336}
1337
1338template <unsigned Shift>
1339bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
1340 SDValue &OffImm) {
1341 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1342 int RHSC;
1343 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
1344 Base = N.getOperand(0);
1345 if (Base.getOpcode() == ISD::FrameIndex) {
1346 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1347 Base = CurDAG->getTargetFrameIndex(
1348 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1349 }
1350
1351 if (N.getOpcode() == ISD::SUB)
1352 RHSC = -RHSC;
1353 OffImm =
1354 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1355 return true;
1356 }
1357 }
1358
1359 // Base only.
1360 Base = N;
1361 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1362 return true;
1363}
1364
1365bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1366 SDValue &Base, SDValue &OffImm) {
1367 // Match simple R - imm8 operands.
1368 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1369 !CurDAG->isBaseWithConstantOffset(N))
1370 return false;
1371
1372 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1373 int RHSC = (int)RHS->getSExtValue();
1374 if (N.getOpcode() == ISD::SUB)
1375 RHSC = -RHSC;
1376
1377 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1378 Base = N.getOperand(0);
1379 if (Base.getOpcode() == ISD::FrameIndex) {
1380 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1381 Base = CurDAG->getTargetFrameIndex(
1382 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1383 }
1384 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1385 return true;
1386 }
1387 }
1388
1389 return false;
1390}
1391
1392bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1393 SDValue &OffImm){
1394 unsigned Opcode = Op->getOpcode();
1395 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1396 ? cast<LoadSDNode>(Op)->getAddressingMode()
1397 : cast<StoreSDNode>(Op)->getAddressingMode();
1398 int RHSC;
1399 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1400 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1401 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1402 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1403 return true;
1404 }
1405
1406 return false;
1407}
1408
1409template <unsigned Shift>
1410bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1411 SDValue &OffImm) {
1412 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1413 int RHSC;
1414 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1415 RHSC)) {
1416 Base = N.getOperand(0);
1417 if (Base.getOpcode() == ISD::FrameIndex) {
1418 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1419 Base = CurDAG->getTargetFrameIndex(
1420 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1421 }
1422
1423 if (N.getOpcode() == ISD::SUB)
1424 RHSC = -RHSC;
1425 OffImm =
1426 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1427 return true;
1428 }
1429 }
1430
1431 // Base only.
1432 Base = N;
1433 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1434 return true;
1435}
1436
1437template <unsigned Shift>
1438bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1439 SDValue &OffImm) {
1440 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1441}
1442
1443bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1444 SDValue &OffImm,
1445 unsigned Shift) {
1446 unsigned Opcode = Op->getOpcode();
1448 switch (Opcode) {
1449 case ISD::LOAD:
1450 AM = cast<LoadSDNode>(Op)->getAddressingMode();
1451 break;
1452 case ISD::STORE:
1453 AM = cast<StoreSDNode>(Op)->getAddressingMode();
1454 break;
1455 case ISD::MLOAD:
1456 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode();
1457 break;
1458 case ISD::MSTORE:
1459 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode();
1460 break;
1461 default:
1462 llvm_unreachable("Unexpected Opcode for Imm7Offset");
1463 }
1464
1465 int RHSC;
1466 // 7 bit constant, shifted by Shift.
1467 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) {
1468 OffImm =
1469 ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1470 ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
1471 : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N),
1472 MVT::i32);
1473 return true;
1474 }
1475 return false;
1476}
1477
1478template <int Min, int Max>
1479bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
1480 int Val;
1481 if (isScaledConstantInRange(N, 1, Min, Max, Val)) {
1482 OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32);
1483 return true;
1484 }
1485 return false;
1486}
1487
1488bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1489 SDValue &Base,
1490 SDValue &OffReg, SDValue &ShImm) {
1491 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1492 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1493 return false;
1494
1495 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1496 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1497 int RHSC = (int)RHS->getZExtValue();
1498 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1499 return false;
1500 else if (RHSC < 0 && RHSC >= -255) // 8 bits
1501 return false;
1502 }
1503
1504 // Look for (R + R) or (R + (R << [1,2,3])).
1505 unsigned ShAmt = 0;
1506 Base = N.getOperand(0);
1507 OffReg = N.getOperand(1);
1508
1509 // Swap if it is ((R << c) + R).
1511 if (ShOpcVal != ARM_AM::lsl) {
1512 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1513 if (ShOpcVal == ARM_AM::lsl)
1514 std::swap(Base, OffReg);
1515 }
1516
1517 if (ShOpcVal == ARM_AM::lsl) {
1518 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1519 // it.
1520 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1521 ShAmt = Sh->getZExtValue();
1522 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1523 OffReg = OffReg.getOperand(0);
1524 else {
1525 ShAmt = 0;
1526 }
1527 }
1528 }
1529
1530 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1531 // and use it in a shifted operand do so.
1532 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1533 unsigned PowerOfTwo = 0;
1534 SDValue NewMulConst;
1535 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1536 HandleSDNode Handle(OffReg);
1537 replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1538 OffReg = Handle.getValue();
1539 ShAmt = PowerOfTwo;
1540 }
1541 }
1542
1543 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1544
1545 return true;
1546}
1547
1548bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1549 SDValue &OffImm) {
1550 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1551 // instructions.
1552 Base = N;
1553 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1554
1555 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1556 return true;
1557
1558 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1559 if (!RHS)
1560 return true;
1561
1562 uint32_t RHSC = (int)RHS->getZExtValue();
1563 if (RHSC > 1020 || RHSC % 4 != 0)
1564 return true;
1565
1566 Base = N.getOperand(0);
1567 if (Base.getOpcode() == ISD::FrameIndex) {
1568 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1569 Base = CurDAG->getTargetFrameIndex(
1570 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1571 }
1572
1573 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1574 return true;
1575}
1576
1577//===--------------------------------------------------------------------===//
1578
1579/// getAL - Returns a ARMCC::AL immediate node.
1580static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1581 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1582}
1583
1584void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1585 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1586 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1587}
1588
1589bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1590 LoadSDNode *LD = cast<LoadSDNode>(N);
1591 ISD::MemIndexedMode AM = LD->getAddressingMode();
1592 if (AM == ISD::UNINDEXED)
1593 return false;
1594
1595 EVT LoadedVT = LD->getMemoryVT();
1596 SDValue Offset, AMOpc;
1597 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1598 unsigned Opcode = 0;
1599 bool Match = false;
1600 if (LoadedVT == MVT::i32 && isPre &&
1601 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1602 Opcode = ARM::LDR_PRE_IMM;
1603 Match = true;
1604 } else if (LoadedVT == MVT::i32 && !isPre &&
1605 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1606 Opcode = ARM::LDR_POST_IMM;
1607 Match = true;
1608 } else if (LoadedVT == MVT::i32 &&
1609 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1610 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1611 Match = true;
1612
1613 } else if (LoadedVT == MVT::i16 &&
1614 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1615 Match = true;
1616 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1617 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1618 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1619 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1620 if (LD->getExtensionType() == ISD::SEXTLOAD) {
1621 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1622 Match = true;
1623 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1624 }
1625 } else {
1626 if (isPre &&
1627 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1628 Match = true;
1629 Opcode = ARM::LDRB_PRE_IMM;
1630 } else if (!isPre &&
1631 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1632 Match = true;
1633 Opcode = ARM::LDRB_POST_IMM;
1634 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1635 Match = true;
1636 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1637 }
1638 }
1639 }
1640
1641 if (Match) {
1642 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1643 SDValue Chain = LD->getChain();
1644 SDValue Base = LD->getBasePtr();
1645 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1646 CurDAG->getRegister(0, MVT::i32), Chain };
1647 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1648 MVT::Other, Ops);
1649 transferMemOperands(N, New);
1650 ReplaceNode(N, New);
1651 return true;
1652 } else {
1653 SDValue Chain = LD->getChain();
1654 SDValue Base = LD->getBasePtr();
1655 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1656 CurDAG->getRegister(0, MVT::i32), Chain };
1657 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1658 MVT::Other, Ops);
1659 transferMemOperands(N, New);
1660 ReplaceNode(N, New);
1661 return true;
1662 }
1663 }
1664
1665 return false;
1666}
1667
1668bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1669 LoadSDNode *LD = cast<LoadSDNode>(N);
1670 EVT LoadedVT = LD->getMemoryVT();
1671 ISD::MemIndexedMode AM = LD->getAddressingMode();
1672 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1673 LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1674 return false;
1675
1676 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1677 if (!COffs || COffs->getZExtValue() != 4)
1678 return false;
1679
1680 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1681 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1682 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1683 // ISel.
1684 SDValue Chain = LD->getChain();
1685 SDValue Base = LD->getBasePtr();
1686 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1687 CurDAG->getRegister(0, MVT::i32), Chain };
1688 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1689 MVT::i32, MVT::Other, Ops);
1690 transferMemOperands(N, New);
1691 ReplaceNode(N, New);
1692 return true;
1693}
1694
1695bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1696 LoadSDNode *LD = cast<LoadSDNode>(N);
1697 ISD::MemIndexedMode AM = LD->getAddressingMode();
1698 if (AM == ISD::UNINDEXED)
1699 return false;
1700
1701 EVT LoadedVT = LD->getMemoryVT();
1702 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1704 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1705 unsigned Opcode = 0;
1706 bool Match = false;
1707 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1708 switch (LoadedVT.getSimpleVT().SimpleTy) {
1709 case MVT::i32:
1710 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1711 break;
1712 case MVT::i16:
1713 if (isSExtLd)
1714 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1715 else
1716 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1717 break;
1718 case MVT::i8:
1719 case MVT::i1:
1720 if (isSExtLd)
1721 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1722 else
1723 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1724 break;
1725 default:
1726 return false;
1727 }
1728 Match = true;
1729 }
1730
1731 if (Match) {
1732 SDValue Chain = LD->getChain();
1733 SDValue Base = LD->getBasePtr();
1734 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1735 CurDAG->getRegister(0, MVT::i32), Chain };
1736 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1737 MVT::Other, Ops);
1738 transferMemOperands(N, New);
1739 ReplaceNode(N, New);
1740 return true;
1741 }
1742
1743 return false;
1744}
1745
1746bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1747 EVT LoadedVT;
1748 unsigned Opcode = 0;
1749 bool isSExtLd, isPre;
1750 Align Alignment;
1751 ARMVCC::VPTCodes Pred;
1752 SDValue PredReg;
1753 SDValue Chain, Base, Offset;
1754
1755 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1756 ISD::MemIndexedMode AM = LD->getAddressingMode();
1757 if (AM == ISD::UNINDEXED)
1758 return false;
1759 LoadedVT = LD->getMemoryVT();
1760 if (!LoadedVT.isVector())
1761 return false;
1762
1763 Chain = LD->getChain();
1764 Base = LD->getBasePtr();
1765 Offset = LD->getOffset();
1766 Alignment = LD->getAlign();
1767 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1768 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1769 Pred = ARMVCC::None;
1770 PredReg = CurDAG->getRegister(0, MVT::i32);
1771 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
1772 ISD::MemIndexedMode AM = LD->getAddressingMode();
1773 if (AM == ISD::UNINDEXED)
1774 return false;
1775 LoadedVT = LD->getMemoryVT();
1776 if (!LoadedVT.isVector())
1777 return false;
1778
1779 Chain = LD->getChain();
1780 Base = LD->getBasePtr();
1781 Offset = LD->getOffset();
1782 Alignment = LD->getAlign();
1783 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1784 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1785 Pred = ARMVCC::Then;
1786 PredReg = LD->getMask();
1787 } else
1788 llvm_unreachable("Expected a Load or a Masked Load!");
1789
1790 // We allow LE non-masked loads to change the type (for example use a vldrb.8
1791 // as opposed to a vldrw.32). This can allow extra addressing modes or
1792 // alignments for what is otherwise an equivalent instruction.
1793 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N);
1794
1795 SDValue NewOffset;
1796 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 &&
1797 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) {
1798 if (isSExtLd)
1799 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1800 else
1801 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1802 } else if (LoadedVT == MVT::v8i8 &&
1803 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1804 if (isSExtLd)
1805 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1806 else
1807 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1808 } else if (LoadedVT == MVT::v4i8 &&
1809 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1810 if (isSExtLd)
1811 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1812 else
1813 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1814 } else if (Alignment >= Align(4) &&
1815 (CanChangeType || LoadedVT == MVT::v4i32 ||
1816 LoadedVT == MVT::v4f32) &&
1817 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2))
1818 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1819 else if (Alignment >= Align(2) &&
1820 (CanChangeType || LoadedVT == MVT::v8i16 ||
1821 LoadedVT == MVT::v8f16) &&
1822 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1))
1823 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1824 else if ((CanChangeType || LoadedVT == MVT::v16i8) &&
1825 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0))
1826 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1827 else
1828 return false;
1829
1830 SDValue Ops[] = {Base,
1831 NewOffset,
1832 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32),
1833 PredReg,
1834 CurDAG->getRegister(0, MVT::i32), // tp_reg
1835 Chain};
1836 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1837 N->getValueType(0), MVT::Other, Ops);
1838 transferMemOperands(N, New);
1839 ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1840 ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1841 ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1842 CurDAG->RemoveDeadNode(N);
1843 return true;
1844}
1845
1846/// Form a GPRPair pseudo register from a pair of GPR regs.
1847SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1848 SDLoc dl(V0.getNode());
1849 SDValue RegClass =
1850 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1851 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1852 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1853 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1854 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1855}
1856
1857/// Form a D register from a pair of S registers.
1858SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1859 SDLoc dl(V0.getNode());
1860 SDValue RegClass =
1861 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1862 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1863 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1864 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1865 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1866}
1867
1868/// Form a quad register from a pair of D registers.
1869SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1870 SDLoc dl(V0.getNode());
1871 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1872 MVT::i32);
1873 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1874 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1875 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1876 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1877}
1878
1879/// Form 4 consecutive D registers from a pair of Q registers.
1880SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1881 SDLoc dl(V0.getNode());
1882 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1883 MVT::i32);
1884 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1885 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1886 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1887 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1888}
1889
1890/// Form 4 consecutive S registers.
1891SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1892 SDValue V2, SDValue V3) {
1893 SDLoc dl(V0.getNode());
1894 SDValue RegClass =
1895 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1896 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1897 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1898 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1899 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1900 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1901 V2, SubReg2, V3, SubReg3 };
1902 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1903}
1904
1905/// Form 4 consecutive D registers.
1906SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1907 SDValue V2, SDValue V3) {
1908 SDLoc dl(V0.getNode());
1909 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1910 MVT::i32);
1911 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1912 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1913 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1914 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1915 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1916 V2, SubReg2, V3, SubReg3 };
1917 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1918}
1919
1920/// Form 4 consecutive Q registers.
1921SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1922 SDValue V2, SDValue V3) {
1923 SDLoc dl(V0.getNode());
1924 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1925 MVT::i32);
1926 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1927 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1928 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1929 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1930 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1931 V2, SubReg2, V3, SubReg3 };
1932 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1933}
1934
1935/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1936/// of a NEON VLD or VST instruction. The supported values depend on the
1937/// number of registers being loaded.
1938SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1939 unsigned NumVecs, bool is64BitVector) {
1940 unsigned NumRegs = NumVecs;
1941 if (!is64BitVector && NumVecs < 3)
1942 NumRegs *= 2;
1943
1944 unsigned Alignment = Align->getAsZExtVal();
1945 if (Alignment >= 32 && NumRegs == 4)
1946 Alignment = 32;
1947 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1948 Alignment = 16;
1949 else if (Alignment >= 8)
1950 Alignment = 8;
1951 else
1952 Alignment = 0;
1953
1954 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1955}
1956
1957static bool isVLDfixed(unsigned Opc)
1958{
1959 switch (Opc) {
1960 default: return false;
1961 case ARM::VLD1d8wb_fixed : return true;
1962 case ARM::VLD1d16wb_fixed : return true;
1963 case ARM::VLD1d64Qwb_fixed : return true;
1964 case ARM::VLD1d32wb_fixed : return true;
1965 case ARM::VLD1d64wb_fixed : return true;
1966 case ARM::VLD1d8TPseudoWB_fixed : return true;
1967 case ARM::VLD1d16TPseudoWB_fixed : return true;
1968 case ARM::VLD1d32TPseudoWB_fixed : return true;
1969 case ARM::VLD1d64TPseudoWB_fixed : return true;
1970 case ARM::VLD1d8QPseudoWB_fixed : return true;
1971 case ARM::VLD1d16QPseudoWB_fixed : return true;
1972 case ARM::VLD1d32QPseudoWB_fixed : return true;
1973 case ARM::VLD1d64QPseudoWB_fixed : return true;
1974 case ARM::VLD1q8wb_fixed : return true;
1975 case ARM::VLD1q16wb_fixed : return true;
1976 case ARM::VLD1q32wb_fixed : return true;
1977 case ARM::VLD1q64wb_fixed : return true;
1978 case ARM::VLD1DUPd8wb_fixed : return true;
1979 case ARM::VLD1DUPd16wb_fixed : return true;
1980 case ARM::VLD1DUPd32wb_fixed : return true;
1981 case ARM::VLD1DUPq8wb_fixed : return true;
1982 case ARM::VLD1DUPq16wb_fixed : return true;
1983 case ARM::VLD1DUPq32wb_fixed : return true;
1984 case ARM::VLD2d8wb_fixed : return true;
1985 case ARM::VLD2d16wb_fixed : return true;
1986 case ARM::VLD2d32wb_fixed : return true;
1987 case ARM::VLD2q8PseudoWB_fixed : return true;
1988 case ARM::VLD2q16PseudoWB_fixed : return true;
1989 case ARM::VLD2q32PseudoWB_fixed : return true;
1990 case ARM::VLD2DUPd8wb_fixed : return true;
1991 case ARM::VLD2DUPd16wb_fixed : return true;
1992 case ARM::VLD2DUPd32wb_fixed : return true;
1993 case ARM::VLD2DUPq8OddPseudoWB_fixed: return true;
1994 case ARM::VLD2DUPq16OddPseudoWB_fixed: return true;
1995 case ARM::VLD2DUPq32OddPseudoWB_fixed: return true;
1996 }
1997}
1998
1999static bool isVSTfixed(unsigned Opc)
2000{
2001 switch (Opc) {
2002 default: return false;
2003 case ARM::VST1d8wb_fixed : return true;
2004 case ARM::VST1d16wb_fixed : return true;
2005 case ARM::VST1d32wb_fixed : return true;
2006 case ARM::VST1d64wb_fixed : return true;
2007 case ARM::VST1q8wb_fixed : return true;
2008 case ARM::VST1q16wb_fixed : return true;
2009 case ARM::VST1q32wb_fixed : return true;
2010 case ARM::VST1q64wb_fixed : return true;
2011 case ARM::VST1d8TPseudoWB_fixed : return true;
2012 case ARM::VST1d16TPseudoWB_fixed : return true;
2013 case ARM::VST1d32TPseudoWB_fixed : return true;
2014 case ARM::VST1d64TPseudoWB_fixed : return true;
2015 case ARM::VST1d8QPseudoWB_fixed : return true;
2016 case ARM::VST1d16QPseudoWB_fixed : return true;
2017 case ARM::VST1d32QPseudoWB_fixed : return true;
2018 case ARM::VST1d64QPseudoWB_fixed : return true;
2019 case ARM::VST2d8wb_fixed : return true;
2020 case ARM::VST2d16wb_fixed : return true;
2021 case ARM::VST2d32wb_fixed : return true;
2022 case ARM::VST2q8PseudoWB_fixed : return true;
2023 case ARM::VST2q16PseudoWB_fixed : return true;
2024 case ARM::VST2q32PseudoWB_fixed : return true;
2025 }
2026}
2027
2028// Get the register stride update opcode of a VLD/VST instruction that
2029// is otherwise equivalent to the given fixed stride updating instruction.
2030static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
2031 assert((isVLDfixed(Opc) || isVSTfixed(Opc))
2032 && "Incorrect fixed stride updating instruction.");
2033 switch (Opc) {
2034 default: break;
2035 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
2036 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
2037 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
2038 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
2039 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
2040 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
2041 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
2042 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
2043 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
2044 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
2045 case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;
2046 case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;
2047 case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;
2048 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
2049 case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;
2050 case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;
2051 case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;
2052 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
2053 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
2054 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
2055 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
2056 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
2057 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
2058 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
2059 case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register;
2060 case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register;
2061 case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register;
2062
2063 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
2064 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
2065 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
2066 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
2067 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
2068 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
2069 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
2070 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
2071 case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register;
2072 case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register;
2073 case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register;
2074 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
2075 case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register;
2076 case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register;
2077 case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register;
2078 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
2079
2080 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
2081 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
2082 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
2083 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
2084 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
2085 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
2086
2087 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
2088 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
2089 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
2090 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
2091 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
2092 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
2093
2094 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
2095 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
2096 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
2097 }
2098 return Opc; // If not one we handle, return it unchanged.
2099}
2100
2101/// Returns true if the given increment is a Constant known to be equal to the
2102/// access size performed by a NEON load/store. This means the "[rN]!" form can
2103/// be used.
2104static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
2105 auto C = dyn_cast<ConstantSDNode>(Inc);
2106 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
2107}
2108
2109void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
2110 const uint16_t *DOpcodes,
2111 const uint16_t *QOpcodes0,
2112 const uint16_t *QOpcodes1) {
2113 assert(Subtarget->hasNEON());
2114 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
2115 SDLoc dl(N);
2116
2117 SDValue MemAddr, Align;
2118 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2119 // nodes are not intrinsics.
2120 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2121 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2122 return;
2123
2124 SDValue Chain = N->getOperand(0);
2125 EVT VT = N->getValueType(0);
2126 bool is64BitVector = VT.is64BitVector();
2127 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2128
2129 unsigned OpcodeIndex;
2130 switch (VT.getSimpleVT().SimpleTy) {
2131 default: llvm_unreachable("unhandled vld type");
2132 // Double-register operations:
2133 case MVT::v8i8: OpcodeIndex = 0; break;
2134 case MVT::v4f16:
2135 case MVT::v4bf16:
2136 case MVT::v4i16: OpcodeIndex = 1; break;
2137 case MVT::v2f32:
2138 case MVT::v2i32: OpcodeIndex = 2; break;
2139 case MVT::v1i64: OpcodeIndex = 3; break;
2140 // Quad-register operations:
2141 case MVT::v16i8: OpcodeIndex = 0; break;
2142 case MVT::v8f16:
2143 case MVT::v8bf16:
2144 case MVT::v8i16: OpcodeIndex = 1; break;
2145 case MVT::v4f32:
2146 case MVT::v4i32: OpcodeIndex = 2; break;
2147 case MVT::v2f64:
2148 case MVT::v2i64: OpcodeIndex = 3; break;
2149 }
2150
2151 EVT ResTy;
2152 if (NumVecs == 1)
2153 ResTy = VT;
2154 else {
2155 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2156 if (!is64BitVector)
2157 ResTyElts *= 2;
2158 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2159 }
2160 std::vector<EVT> ResTys;
2161 ResTys.push_back(ResTy);
2162 if (isUpdating)
2163 ResTys.push_back(MVT::i32);
2164 ResTys.push_back(MVT::Other);
2165
2166 SDValue Pred = getAL(CurDAG, dl);
2167 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2168 SDNode *VLd;
2170
2171 // Double registers and VLD1/VLD2 quad registers are directly supported.
2172 if (is64BitVector || NumVecs <= 2) {
2173 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2174 QOpcodes0[OpcodeIndex]);
2175 Ops.push_back(MemAddr);
2176 Ops.push_back(Align);
2177 if (isUpdating) {
2178 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2179 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2180 if (!IsImmUpdate) {
2181 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2182 // check for the opcode rather than the number of vector elements.
2183 if (isVLDfixed(Opc))
2185 Ops.push_back(Inc);
2186 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2187 // the operands if not such an opcode.
2188 } else if (!isVLDfixed(Opc))
2189 Ops.push_back(Reg0);
2190 }
2191 Ops.push_back(Pred);
2192 Ops.push_back(Reg0);
2193 Ops.push_back(Chain);
2194 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2195
2196 } else {
2197 // Otherwise, quad registers are loaded with two separate instructions,
2198 // where one loads the even registers and the other loads the odd registers.
2199 EVT AddrTy = MemAddr.getValueType();
2200
2201 // Load the even subregs. This is always an updating load, so that it
2202 // provides the address to the second load for the odd subregs.
2203 SDValue ImplDef =
2204 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2205 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
2206 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2207 ResTy, AddrTy, MVT::Other, OpsA);
2208 Chain = SDValue(VLdA, 2);
2209
2210 // Load the odd subregs.
2211 Ops.push_back(SDValue(VLdA, 1));
2212 Ops.push_back(Align);
2213 if (isUpdating) {
2214 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2215 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2216 "only constant post-increment update allowed for VLD3/4");
2217 (void)Inc;
2218 Ops.push_back(Reg0);
2219 }
2220 Ops.push_back(SDValue(VLdA, 0));
2221 Ops.push_back(Pred);
2222 Ops.push_back(Reg0);
2223 Ops.push_back(Chain);
2224 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
2225 }
2226
2227 // Transfer memoperands.
2228 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2229 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
2230
2231 if (NumVecs == 1) {
2232 ReplaceNode(N, VLd);
2233 return;
2234 }
2235
2236 // Extract out the subregisters.
2237 SDValue SuperReg = SDValue(VLd, 0);
2238 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2239 ARM::qsub_3 == ARM::qsub_0 + 3,
2240 "Unexpected subreg numbering");
2241 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2242 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2243 ReplaceUses(SDValue(N, Vec),
2244 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2245 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2246 if (isUpdating)
2247 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2248 CurDAG->RemoveDeadNode(N);
2249}
2250
2251void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2252 const uint16_t *DOpcodes,
2253 const uint16_t *QOpcodes0,
2254 const uint16_t *QOpcodes1) {
2255 assert(Subtarget->hasNEON());
2256 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2257 SDLoc dl(N);
2258
2259 SDValue MemAddr, Align;
2260 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2261 // nodes are not intrinsics.
2262 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2263 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2264 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2265 return;
2266
2267 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2268
2269 SDValue Chain = N->getOperand(0);
2270 EVT VT = N->getOperand(Vec0Idx).getValueType();
2271 bool is64BitVector = VT.is64BitVector();
2272 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2273
2274 unsigned OpcodeIndex;
2275 switch (VT.getSimpleVT().SimpleTy) {
2276 default: llvm_unreachable("unhandled vst type");
2277 // Double-register operations:
2278 case MVT::v8i8: OpcodeIndex = 0; break;
2279 case MVT::v4f16:
2280 case MVT::v4bf16:
2281 case MVT::v4i16: OpcodeIndex = 1; break;
2282 case MVT::v2f32:
2283 case MVT::v2i32: OpcodeIndex = 2; break;
2284 case MVT::v1i64: OpcodeIndex = 3; break;
2285 // Quad-register operations:
2286 case MVT::v16i8: OpcodeIndex = 0; break;
2287 case MVT::v8f16:
2288 case MVT::v8bf16:
2289 case MVT::v8i16: OpcodeIndex = 1; break;
2290 case MVT::v4f32:
2291 case MVT::v4i32: OpcodeIndex = 2; break;
2292 case MVT::v2f64:
2293 case MVT::v2i64: OpcodeIndex = 3; break;
2294 }
2295
2296 std::vector<EVT> ResTys;
2297 if (isUpdating)
2298 ResTys.push_back(MVT::i32);
2299 ResTys.push_back(MVT::Other);
2300
2301 SDValue Pred = getAL(CurDAG, dl);
2302 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2304
2305 // Double registers and VST1/VST2 quad registers are directly supported.
2306 if (is64BitVector || NumVecs <= 2) {
2307 SDValue SrcReg;
2308 if (NumVecs == 1) {
2309 SrcReg = N->getOperand(Vec0Idx);
2310 } else if (is64BitVector) {
2311 // Form a REG_SEQUENCE to force register allocation.
2312 SDValue V0 = N->getOperand(Vec0Idx + 0);
2313 SDValue V1 = N->getOperand(Vec0Idx + 1);
2314 if (NumVecs == 2)
2315 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2316 else {
2317 SDValue V2 = N->getOperand(Vec0Idx + 2);
2318 // If it's a vst3, form a quad D-register and leave the last part as
2319 // an undef.
2320 SDValue V3 = (NumVecs == 3)
2321 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2322 : N->getOperand(Vec0Idx + 3);
2323 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2324 }
2325 } else {
2326 // Form a QQ register.
2327 SDValue Q0 = N->getOperand(Vec0Idx);
2328 SDValue Q1 = N->getOperand(Vec0Idx + 1);
2329 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2330 }
2331
2332 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2333 QOpcodes0[OpcodeIndex]);
2334 Ops.push_back(MemAddr);
2335 Ops.push_back(Align);
2336 if (isUpdating) {
2337 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2338 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2339 if (!IsImmUpdate) {
2340 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2341 // check for the opcode rather than the number of vector elements.
2342 if (isVSTfixed(Opc))
2344 Ops.push_back(Inc);
2345 }
2346 // VST1/VST2 fixed increment does not need Reg0 so only include it in
2347 // the operands if not such an opcode.
2348 else if (!isVSTfixed(Opc))
2349 Ops.push_back(Reg0);
2350 }
2351 Ops.push_back(SrcReg);
2352 Ops.push_back(Pred);
2353 Ops.push_back(Reg0);
2354 Ops.push_back(Chain);
2355 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2356
2357 // Transfer memoperands.
2358 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2359
2360 ReplaceNode(N, VSt);
2361 return;
2362 }
2363
2364 // Otherwise, quad registers are stored with two separate instructions,
2365 // where one stores the even registers and the other stores the odd registers.
2366
2367 // Form the QQQQ REG_SEQUENCE.
2368 SDValue V0 = N->getOperand(Vec0Idx + 0);
2369 SDValue V1 = N->getOperand(Vec0Idx + 1);
2370 SDValue V2 = N->getOperand(Vec0Idx + 2);
2371 SDValue V3 = (NumVecs == 3)
2372 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2373 : N->getOperand(Vec0Idx + 3);
2374 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2375
2376 // Store the even D registers. This is always an updating store, so that it
2377 // provides the address to the second store for the odd subregs.
2378 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2379 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2380 MemAddr.getValueType(),
2381 MVT::Other, OpsA);
2382 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2383 Chain = SDValue(VStA, 1);
2384
2385 // Store the odd D registers.
2386 Ops.push_back(SDValue(VStA, 0));
2387 Ops.push_back(Align);
2388 if (isUpdating) {
2389 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2390 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2391 "only constant post-increment update allowed for VST3/4");
2392 (void)Inc;
2393 Ops.push_back(Reg0);
2394 }
2395 Ops.push_back(RegSeq);
2396 Ops.push_back(Pred);
2397 Ops.push_back(Reg0);
2398 Ops.push_back(Chain);
2399 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2400 Ops);
2401 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2402 ReplaceNode(N, VStB);
2403}
2404
2405void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2406 unsigned NumVecs,
2407 const uint16_t *DOpcodes,
2408 const uint16_t *QOpcodes) {
2409 assert(Subtarget->hasNEON());
2410 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2411 SDLoc dl(N);
2412
2413 SDValue MemAddr, Align;
2414 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2415 // nodes are not intrinsics.
2416 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2417 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2418 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2419 return;
2420
2421 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2422
2423 SDValue Chain = N->getOperand(0);
2424 unsigned Lane = N->getConstantOperandVal(Vec0Idx + NumVecs);
2425 EVT VT = N->getOperand(Vec0Idx).getValueType();
2426 bool is64BitVector = VT.is64BitVector();
2427
2428 unsigned Alignment = 0;
2429 if (NumVecs != 3) {
2430 Alignment = Align->getAsZExtVal();
2431 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2432 if (Alignment > NumBytes)
2433 Alignment = NumBytes;
2434 if (Alignment < 8 && Alignment < NumBytes)
2435 Alignment = 0;
2436 // Alignment must be a power of two; make sure of that.
2437 Alignment = (Alignment & -Alignment);
2438 if (Alignment == 1)
2439 Alignment = 0;
2440 }
2441 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2442
2443 unsigned OpcodeIndex;
2444 switch (VT.getSimpleVT().SimpleTy) {
2445 default: llvm_unreachable("unhandled vld/vst lane type");
2446 // Double-register operations:
2447 case MVT::v8i8: OpcodeIndex = 0; break;
2448 case MVT::v4f16:
2449 case MVT::v4bf16:
2450 case MVT::v4i16: OpcodeIndex = 1; break;
2451 case MVT::v2f32:
2452 case MVT::v2i32: OpcodeIndex = 2; break;
2453 // Quad-register operations:
2454 case MVT::v8f16:
2455 case MVT::v8bf16:
2456 case MVT::v8i16: OpcodeIndex = 0; break;
2457 case MVT::v4f32:
2458 case MVT::v4i32: OpcodeIndex = 1; break;
2459 }
2460
2461 std::vector<EVT> ResTys;
2462 if (IsLoad) {
2463 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2464 if (!is64BitVector)
2465 ResTyElts *= 2;
2466 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2467 MVT::i64, ResTyElts));
2468 }
2469 if (isUpdating)
2470 ResTys.push_back(MVT::i32);
2471 ResTys.push_back(MVT::Other);
2472
2473 SDValue Pred = getAL(CurDAG, dl);
2474 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2475
2477 Ops.push_back(MemAddr);
2478 Ops.push_back(Align);
2479 if (isUpdating) {
2480 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2481 bool IsImmUpdate =
2482 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2483 Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2484 }
2485
2486 SDValue SuperReg;
2487 SDValue V0 = N->getOperand(Vec0Idx + 0);
2488 SDValue V1 = N->getOperand(Vec0Idx + 1);
2489 if (NumVecs == 2) {
2490 if (is64BitVector)
2491 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2492 else
2493 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2494 } else {
2495 SDValue V2 = N->getOperand(Vec0Idx + 2);
2496 SDValue V3 = (NumVecs == 3)
2497 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2498 : N->getOperand(Vec0Idx + 3);
2499 if (is64BitVector)
2500 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2501 else
2502 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2503 }
2504 Ops.push_back(SuperReg);
2505 Ops.push_back(getI32Imm(Lane, dl));
2506 Ops.push_back(Pred);
2507 Ops.push_back(Reg0);
2508 Ops.push_back(Chain);
2509
2510 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2511 QOpcodes[OpcodeIndex]);
2512 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2513 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2514 if (!IsLoad) {
2515 ReplaceNode(N, VLdLn);
2516 return;
2517 }
2518
2519 // Extract the subregisters.
2520 SuperReg = SDValue(VLdLn, 0);
2521 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2522 ARM::qsub_3 == ARM::qsub_0 + 3,
2523 "Unexpected subreg numbering");
2524 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2525 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2526 ReplaceUses(SDValue(N, Vec),
2527 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2528 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2529 if (isUpdating)
2530 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2531 CurDAG->RemoveDeadNode(N);
2532}
2533
2534template <typename SDValueVector>
2535void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2536 SDValue PredicateMask) {
2537 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2538 Ops.push_back(PredicateMask);
2539 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2540}
2541
2542template <typename SDValueVector>
2543void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2544 SDValue PredicateMask,
2545 SDValue Inactive) {
2546 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2547 Ops.push_back(PredicateMask);
2548 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2549 Ops.push_back(Inactive);
2550}
2551
2552template <typename SDValueVector>
2553void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
2554 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2555 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2556 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2557}
2558
2559template <typename SDValueVector>
2560void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2561 EVT InactiveTy) {
2562 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2563 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2564 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2565 Ops.push_back(SDValue(
2566 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));
2567}
2568
2569void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
2570 bool Predicated) {
2571 SDLoc Loc(N);
2573
2574 uint16_t Opcode;
2575 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) {
2576 case 32:
2577 Opcode = Opcodes[0];
2578 break;
2579 case 64:
2580 Opcode = Opcodes[1];
2581 break;
2582 default:
2583 llvm_unreachable("bad vector element size in SelectMVE_WB");
2584 }
2585
2586 Ops.push_back(N->getOperand(2)); // vector of base addresses
2587
2588 int32_t ImmValue = N->getConstantOperandVal(3);
2589 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset
2590
2591 if (Predicated)
2592 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2593 else
2594 AddEmptyMVEPredicateToOps(Ops, Loc);
2595
2596 Ops.push_back(N->getOperand(0)); // chain
2597
2599 VTs.push_back(N->getValueType(1));
2600 VTs.push_back(N->getValueType(0));
2601 VTs.push_back(N->getValueType(2));
2602
2603 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops);
2604 ReplaceUses(SDValue(N, 0), SDValue(New, 1));
2605 ReplaceUses(SDValue(N, 1), SDValue(New, 0));
2606 ReplaceUses(SDValue(N, 2), SDValue(New, 2));
2607 transferMemOperands(N, New);
2608 CurDAG->RemoveDeadNode(N);
2609}
2610
2611void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
2612 bool Immediate,
2613 bool HasSaturationOperand) {
2614 SDLoc Loc(N);
2616
2617 // Two 32-bit halves of the value to be shifted
2618 Ops.push_back(N->getOperand(1));
2619 Ops.push_back(N->getOperand(2));
2620
2621 // The shift count
2622 if (Immediate) {
2623 int32_t ImmValue = N->getConstantOperandVal(3);
2624 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2625 } else {
2626 Ops.push_back(N->getOperand(3));
2627 }
2628
2629 // The immediate saturation operand, if any
2630 if (HasSaturationOperand) {
2631 int32_t SatOp = N->getConstantOperandVal(4);
2632 int SatBit = (SatOp == 64 ? 0 : 1);
2633 Ops.push_back(getI32Imm(SatBit, Loc));
2634 }
2635
2636 // MVE scalar shifts are IT-predicable, so include the standard
2637 // predicate arguments.
2638 Ops.push_back(getAL(CurDAG, Loc));
2639 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2640
2641 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2642}
2643
2644void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
2645 uint16_t OpcodeWithNoCarry,
2646 bool Add, bool Predicated) {
2647 SDLoc Loc(N);
2649 uint16_t Opcode;
2650
2651 unsigned FirstInputOp = Predicated ? 2 : 1;
2652
2653 // Two input vectors and the input carry flag
2654 Ops.push_back(N->getOperand(FirstInputOp));
2655 Ops.push_back(N->getOperand(FirstInputOp + 1));
2656 SDValue CarryIn = N->getOperand(FirstInputOp + 2);
2657 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn);
2658 uint32_t CarryMask = 1 << 29;
2659 uint32_t CarryExpected = Add ? 0 : CarryMask;
2660 if (CarryInConstant &&
2661 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
2662 Opcode = OpcodeWithNoCarry;
2663 } else {
2664 Ops.push_back(CarryIn);
2665 Opcode = OpcodeWithCarry;
2666 }
2667
2668 if (Predicated)
2669 AddMVEPredicateToOps(Ops, Loc,
2670 N->getOperand(FirstInputOp + 3), // predicate
2671 N->getOperand(FirstInputOp - 1)); // inactive
2672 else
2673 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2674
2675 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2676}
2677
2678void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {
2679 SDLoc Loc(N);
2681
2682 // One vector input, followed by a 32-bit word of bits to shift in
2683 // and then an immediate shift count
2684 Ops.push_back(N->getOperand(1));
2685 Ops.push_back(N->getOperand(2));
2686 int32_t ImmValue = N->getConstantOperandVal(3);
2687 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2688
2689 if (Predicated)
2690 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2691 else
2692 AddEmptyMVEPredicateToOps(Ops, Loc);
2693
2694 CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), ArrayRef(Ops));
2695}
2696
2697static bool SDValueToConstBool(SDValue SDVal) {
2698 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
2699 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
2700 uint64_t Value = SDValConstant->getZExtValue();
2701 assert((Value == 0 || Value == 1) && "expected value 0 or 1");
2702 return Value;
2703}
2704
2705void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
2706 const uint16_t *OpcodesS,
2707 const uint16_t *OpcodesU,
2708 size_t Stride, size_t TySize) {
2709 assert(TySize < Stride && "Invalid TySize");
2710 bool IsUnsigned = SDValueToConstBool(N->getOperand(1));
2711 bool IsSub = SDValueToConstBool(N->getOperand(2));
2712 bool IsExchange = SDValueToConstBool(N->getOperand(3));
2713 if (IsUnsigned) {
2714 assert(!IsSub &&
2715 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2716 assert(!IsExchange &&
2717 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2718 }
2719
2720 auto OpIsZero = [N](size_t OpNo) {
2721 return isNullConstant(N->getOperand(OpNo));
2722 };
2723
2724 // If the input accumulator value is not zero, select an instruction with
2725 // accumulator, otherwise select an instruction without accumulator
2726 bool IsAccum = !(OpIsZero(4) && OpIsZero(5));
2727
2728 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
2729 if (IsSub)
2730 Opcodes += 4 * Stride;
2731 if (IsExchange)
2732 Opcodes += 2 * Stride;
2733 if (IsAccum)
2734 Opcodes += Stride;
2735 uint16_t Opcode = Opcodes[TySize];
2736
2737 SDLoc Loc(N);
2739 // Push the accumulator operands, if they are used
2740 if (IsAccum) {
2741 Ops.push_back(N->getOperand(4));
2742 Ops.push_back(N->getOperand(5));
2743 }
2744 // Push the two vector operands
2745 Ops.push_back(N->getOperand(6));
2746 Ops.push_back(N->getOperand(7));
2747
2748 if (Predicated)
2749 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8));
2750 else
2751 AddEmptyMVEPredicateToOps(Ops, Loc);
2752
2753 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2754}
2755
2756void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,
2757 const uint16_t *OpcodesS,
2758 const uint16_t *OpcodesU) {
2759 EVT VecTy = N->getOperand(6).getValueType();
2760 size_t SizeIndex;
2761 switch (VecTy.getVectorElementType().getSizeInBits()) {
2762 case 16:
2763 SizeIndex = 0;
2764 break;
2765 case 32:
2766 SizeIndex = 1;
2767 break;
2768 default:
2769 llvm_unreachable("bad vector element size");
2770 }
2771
2772 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex);
2773}
2774
2775void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
2776 const uint16_t *OpcodesS,
2777 const uint16_t *OpcodesU) {
2778 assert(
2779 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==
2780 32 &&
2781 "bad vector element size");
2782 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0);
2783}
2784
2785void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
2786 const uint16_t *const *Opcodes,
2787 bool HasWriteback) {
2788 EVT VT = N->getValueType(0);
2789 SDLoc Loc(N);
2790
2791 const uint16_t *OurOpcodes;
2792 switch (VT.getVectorElementType().getSizeInBits()) {
2793 case 8:
2794 OurOpcodes = Opcodes[0];
2795 break;
2796 case 16:
2797 OurOpcodes = Opcodes[1];
2798 break;
2799 case 32:
2800 OurOpcodes = Opcodes[2];
2801 break;
2802 default:
2803 llvm_unreachable("bad vector element size in SelectMVE_VLD");
2804 }
2805
2806 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2);
2807 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other};
2808 unsigned PtrOperand = HasWriteback ? 1 : 2;
2809
2810 auto Data = SDValue(
2811 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0);
2812 SDValue Chain = N->getOperand(0);
2813 // Add a MVE_VLDn instruction for each Vec, except the last
2814 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) {
2815 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2816 auto LoadInst =
2817 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops);
2818 Data = SDValue(LoadInst, 0);
2819 Chain = SDValue(LoadInst, 1);
2820 transferMemOperands(N, LoadInst);
2821 }
2822 // The last may need a writeback on it
2823 if (HasWriteback)
2824 ResultTys = {DataTy, MVT::i32, MVT::Other};
2825 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2826 auto LoadInst =
2827 CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops);
2828 transferMemOperands(N, LoadInst);
2829
2830 unsigned i;
2831 for (i = 0; i < NumVecs; i++)
2832 ReplaceUses(SDValue(N, i),
2833 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT,
2834 SDValue(LoadInst, 0)));
2835 if (HasWriteback)
2836 ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1));
2837 ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1));
2838 CurDAG->RemoveDeadNode(N);
2839}
2840
2841void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
2842 bool Wrapping, bool Predicated) {
2843 EVT VT = N->getValueType(0);
2844 SDLoc Loc(N);
2845
2846 uint16_t Opcode;
2847 switch (VT.getScalarSizeInBits()) {
2848 case 8:
2849 Opcode = Opcodes[0];
2850 break;
2851 case 16:
2852 Opcode = Opcodes[1];
2853 break;
2854 case 32:
2855 Opcode = Opcodes[2];
2856 break;
2857 default:
2858 llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
2859 }
2860
2862 unsigned OpIdx = 1;
2863
2864 SDValue Inactive;
2865 if (Predicated)
2866 Inactive = N->getOperand(OpIdx++);
2867
2868 Ops.push_back(N->getOperand(OpIdx++)); // base
2869 if (Wrapping)
2870 Ops.push_back(N->getOperand(OpIdx++)); // limit
2871
2872 SDValue ImmOp = N->getOperand(OpIdx++); // step
2873 int ImmValue = ImmOp->getAsZExtVal();
2874 Ops.push_back(getI32Imm(ImmValue, Loc));
2875
2876 if (Predicated)
2877 AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive);
2878 else
2879 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2880
2881 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2882}
2883
2884void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
2885 size_t NumExtraOps, bool HasAccum) {
2886 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
2887 SDLoc Loc(N);
2889
2890 unsigned OpIdx = 1;
2891
2892 // Convert and append the immediate operand designating the coprocessor.
2893 SDValue ImmCorpoc = N->getOperand(OpIdx++);
2894 uint32_t ImmCoprocVal = ImmCorpoc->getAsZExtVal();
2895 Ops.push_back(getI32Imm(ImmCoprocVal, Loc));
2896
2897 // For accumulating variants copy the low and high order parts of the
2898 // accumulator into a register pair and add it to the operand vector.
2899 if (HasAccum) {
2900 SDValue AccLo = N->getOperand(OpIdx++);
2901 SDValue AccHi = N->getOperand(OpIdx++);
2902 if (IsBigEndian)
2903 std::swap(AccLo, AccHi);
2904 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0));
2905 }
2906
2907 // Copy extra operands as-is.
2908 for (size_t I = 0; I < NumExtraOps; I++)
2909 Ops.push_back(N->getOperand(OpIdx++));
2910
2911 // Convert and append the immediate operand
2912 SDValue Imm = N->getOperand(OpIdx);
2913 uint32_t ImmVal = Imm->getAsZExtVal();
2914 Ops.push_back(getI32Imm(ImmVal, Loc));
2915
2916 // Accumulating variants are IT-predicable, add predicate operands.
2917 if (HasAccum) {
2918 SDValue Pred = getAL(CurDAG, Loc);
2919 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2920 Ops.push_back(Pred);
2921 Ops.push_back(PredReg);
2922 }
2923
2924 // Create the CDE intruction
2925 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops);
2926 SDValue ResultPair = SDValue(InstrNode, 0);
2927
2928 // The original intrinsic had two outputs, and the output of the dual-register
2929 // CDE instruction is a register pair. We need to extract the two subregisters
2930 // and replace all uses of the original outputs with the extracted
2931 // subregisters.
2932 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1};
2933 if (IsBigEndian)
2934 std::swap(SubRegs[0], SubRegs[1]);
2935
2936 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) {
2937 if (SDValue(N, ResIdx).use_empty())
2938 continue;
2939 SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc,
2940 MVT::i32, ResultPair);
2941 ReplaceUses(SDValue(N, ResIdx), SubReg);
2942 }
2943
2944 CurDAG->RemoveDeadNode(N);
2945}
2946
2947void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2948 bool isUpdating, unsigned NumVecs,
2949 const uint16_t *DOpcodes,
2950 const uint16_t *QOpcodes0,
2951 const uint16_t *QOpcodes1) {
2952 assert(Subtarget->hasNEON());
2953 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2954 SDLoc dl(N);
2955
2956 SDValue MemAddr, Align;
2957 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2958 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2959 return;
2960
2961 SDValue Chain = N->getOperand(0);
2962 EVT VT = N->getValueType(0);
2963 bool is64BitVector = VT.is64BitVector();
2964
2965 unsigned Alignment = 0;
2966 if (NumVecs != 3) {
2967 Alignment = Align->getAsZExtVal();
2968 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2969 if (Alignment > NumBytes)
2970 Alignment = NumBytes;
2971 if (Alignment < 8 && Alignment < NumBytes)
2972 Alignment = 0;
2973 // Alignment must be a power of two; make sure of that.
2974 Alignment = (Alignment & -Alignment);
2975 if (Alignment == 1)
2976 Alignment = 0;
2977 }
2978 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2979
2980 unsigned OpcodeIndex;
2981 switch (VT.getSimpleVT().SimpleTy) {
2982 default: llvm_unreachable("unhandled vld-dup type");
2983 case MVT::v8i8:
2984 case MVT::v16i8: OpcodeIndex = 0; break;
2985 case MVT::v4i16:
2986 case MVT::v8i16:
2987 case MVT::v4f16:
2988 case MVT::v8f16:
2989 case MVT::v4bf16:
2990 case MVT::v8bf16:
2991 OpcodeIndex = 1; break;
2992 case MVT::v2f32:
2993 case MVT::v2i32:
2994 case MVT::v4f32:
2995 case MVT::v4i32: OpcodeIndex = 2; break;
2996 case MVT::v1f64:
2997 case MVT::v1i64: OpcodeIndex = 3; break;
2998 }
2999
3000 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
3001 if (!is64BitVector)
3002 ResTyElts *= 2;
3003 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
3004
3005 std::vector<EVT> ResTys;
3006 ResTys.push_back(ResTy);
3007 if (isUpdating)
3008 ResTys.push_back(MVT::i32);
3009 ResTys.push_back(MVT::Other);
3010
3011 SDValue Pred = getAL(CurDAG, dl);
3012 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3013
3015 Ops.push_back(MemAddr);
3016 Ops.push_back(Align);
3017 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex]
3018 : (NumVecs == 1) ? QOpcodes0[OpcodeIndex]
3019 : QOpcodes1[OpcodeIndex];
3020 if (isUpdating) {
3021 SDValue Inc = N->getOperand(2);
3022 bool IsImmUpdate =
3023 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
3024 if (IsImmUpdate) {
3025 if (!isVLDfixed(Opc))
3026 Ops.push_back(Reg0);
3027 } else {
3028 if (isVLDfixed(Opc))
3030 Ops.push_back(Inc);
3031 }
3032 }
3033 if (is64BitVector || NumVecs == 1) {
3034 // Double registers and VLD1 quad registers are directly supported.
3035 } else {
3036 SDValue ImplDef = SDValue(
3037 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
3038 const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain};
3039 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
3040 MVT::Other, OpsA);
3041 Ops.push_back(SDValue(VLdA, 0));
3042 Chain = SDValue(VLdA, 1);
3043 }
3044
3045 Ops.push_back(Pred);
3046 Ops.push_back(Reg0);
3047 Ops.push_back(Chain);
3048
3049 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
3050
3051 // Transfer memoperands.
3052 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3053 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
3054
3055 // Extract the subregisters.
3056 if (NumVecs == 1) {
3057 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
3058 } else {
3059 SDValue SuperReg = SDValue(VLdDup, 0);
3060 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
3061 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
3062 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
3063 ReplaceUses(SDValue(N, Vec),
3064 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
3065 }
3066 }
3067 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
3068 if (isUpdating)
3069 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
3070 CurDAG->RemoveDeadNode(N);
3071}
3072
3073bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
3074 if (!Subtarget->hasMVEIntegerOps())
3075 return false;
3076
3077 SDLoc dl(N);
3078
3079 // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and
3080 // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent
3081 // inserts of the correct type:
3082 SDValue Ins1 = SDValue(N, 0);
3083 SDValue Ins2 = N->getOperand(0);
3084 EVT VT = Ins1.getValueType();
3085 if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() ||
3086 !isa<ConstantSDNode>(Ins1.getOperand(2)) ||
3087 !isa<ConstantSDNode>(Ins2.getOperand(2)) ||
3088 (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT))
3089 return false;
3090
3091 unsigned Lane1 = Ins1.getConstantOperandVal(2);
3092 unsigned Lane2 = Ins2.getConstantOperandVal(2);
3093 if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1)
3094 return false;
3095
3096 // If the inserted values will be able to use T/B already, leave it to the
3097 // existing tablegen patterns. For example VCVTT/VCVTB.
3098 SDValue Val1 = Ins1.getOperand(1);
3099 SDValue Val2 = Ins2.getOperand(1);
3100 if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND)
3101 return false;
3102
3103 // Check if the inserted values are both extracts.
3104 if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3105 Val1.getOpcode() == ARMISD::VGETLANEu) &&
3107 Val2.getOpcode() == ARMISD::VGETLANEu) &&
3108 isa<ConstantSDNode>(Val1.getOperand(1)) &&
3109 isa<ConstantSDNode>(Val2.getOperand(1)) &&
3110 (Val1.getOperand(0).getValueType() == MVT::v8f16 ||
3111 Val1.getOperand(0).getValueType() == MVT::v8i16) &&
3112 (Val2.getOperand(0).getValueType() == MVT::v8f16 ||
3113 Val2.getOperand(0).getValueType() == MVT::v8i16)) {
3114 unsigned ExtractLane1 = Val1.getConstantOperandVal(1);
3115 unsigned ExtractLane2 = Val2.getConstantOperandVal(1);
3116
3117 // If the two extracted lanes are from the same place and adjacent, this
3118 // simplifies into a f32 lane move.
3119 if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 &&
3120 ExtractLane1 == ExtractLane2 + 1) {
3121 SDValue NewExt = CurDAG->getTargetExtractSubreg(
3122 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0));
3123 SDValue NewIns = CurDAG->getTargetInsertSubreg(
3124 ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0),
3125 NewExt);
3126 ReplaceUses(Ins1, NewIns);
3127 return true;
3128 }
3129
3130 // Else v8i16 pattern of an extract and an insert, with a optional vmovx for
3131 // extracting odd lanes.
3132 if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) {
3133 SDValue Inp1 = CurDAG->getTargetExtractSubreg(
3134 ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0));
3135 SDValue Inp2 = CurDAG->getTargetExtractSubreg(
3136 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0));
3137 if (ExtractLane1 % 2 != 0)
3138 Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0);
3139 if (ExtractLane2 % 2 != 0)
3140 Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0);
3141 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1);
3142 SDValue NewIns =
3143 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3144 Ins2.getOperand(0), SDValue(VINS, 0));
3145 ReplaceUses(Ins1, NewIns);
3146 return true;
3147 }
3148 }
3149
3150 // The inserted values are not extracted - if they are f16 then insert them
3151 // directly using a VINS.
3152 if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) {
3153 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1);
3154 SDValue NewIns =
3155 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3156 Ins2.getOperand(0), SDValue(VINS, 0));
3157 ReplaceUses(Ins1, NewIns);
3158 return true;
3159 }
3160
3161 return false;
3162}
3163
3164bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N,
3165 SDNode *FMul,
3166 bool IsUnsigned,
3167 bool FixedToFloat) {
3168 auto Type = N->getValueType(0);
3169 unsigned ScalarBits = Type.getScalarSizeInBits();
3170 if (ScalarBits > 32)
3171 return false;
3172
3173 SDNodeFlags FMulFlags = FMul->getFlags();
3174 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3175 // allowed in 16 bit unsigned floats
3176 if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned)
3177 return false;
3178
3179 SDValue ImmNode = FMul->getOperand(1);
3180 SDValue VecVal = FMul->getOperand(0);
3181 if (VecVal->getOpcode() == ISD::UINT_TO_FP ||
3182 VecVal->getOpcode() == ISD::SINT_TO_FP)
3183 VecVal = VecVal->getOperand(0);
3184
3185 if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits)
3186 return false;
3187
3188 if (ImmNode.getOpcode() == ISD::BITCAST) {
3189 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3190 return false;
3191 ImmNode = ImmNode.getOperand(0);
3192 }
3193
3194 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3195 return false;
3196
3197 APFloat ImmAPF(0.0f);
3198 switch (ImmNode.getOpcode()) {
3199 case ARMISD::VMOVIMM:
3200 case ARMISD::VDUP: {
3201 if (!isa<ConstantSDNode>(ImmNode.getOperand(0)))
3202 return false;
3203 unsigned Imm = ImmNode.getConstantOperandVal(0);
3204 if (ImmNode.getOpcode() == ARMISD::VMOVIMM)
3205 Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits);
3206 ImmAPF =
3207 APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(),
3208 APInt(ScalarBits, Imm));
3209 break;
3210 }
3211 case ARMISD::VMOVFPIMM: {
3213 break;
3214 }
3215 default:
3216 return false;
3217 }
3218
3219 // Where n is the number of fractional bits, multiplying by 2^n will convert
3220 // from float to fixed and multiplying by 2^-n will convert from fixed to
3221 // float. Taking log2 of the factor (after taking the inverse in the case of
3222 // float to fixed) will give n.
3223 APFloat ToConvert = ImmAPF;
3224 if (FixedToFloat) {
3225 if (!ImmAPF.getExactInverse(&ToConvert))
3226 return false;
3227 }
3228 APSInt Converted(64, false);
3229 bool IsExact;
3231 &IsExact);
3232 if (!IsExact || !Converted.isPowerOf2())
3233 return false;
3234
3235 unsigned FracBits = Converted.logBase2();
3236 if (FracBits > ScalarBits)
3237 return false;
3238
3240 VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)};
3241 AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type);
3242
3243 unsigned int Opcode;
3244 switch (ScalarBits) {
3245 case 16:
3246 if (FixedToFloat)
3247 Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix;
3248 else
3249 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3250 break;
3251 case 32:
3252 if (FixedToFloat)
3253 Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix;
3254 else
3255 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3256 break;
3257 default:
3258 llvm_unreachable("unexpected number of scalar bits");
3259 break;
3260 }
3261
3262 ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops));
3263 return true;
3264}
3265
3266bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
3267 // Transform a floating-point to fixed-point conversion to a VCVT
3268 if (!Subtarget->hasMVEFloatOps())
3269 return false;
3270 EVT Type = N->getValueType(0);
3271 if (!Type.isVector())
3272 return false;
3273 unsigned int ScalarBits = Type.getScalarSizeInBits();
3274
3275 bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT ||
3276 N->getOpcode() == ISD::FP_TO_UINT_SAT;
3277 SDNode *Node = N->getOperand(0).getNode();
3278
3279 // floating-point to fixed-point with one fractional bit gets turned into an
3280 // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y))
3281 if (Node->getOpcode() == ISD::FADD) {
3282 if (Node->getOperand(0) != Node->getOperand(1))
3283 return false;
3284 SDNodeFlags Flags = Node->getFlags();
3285 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3286 // allowed in 16 bit unsigned floats
3287 if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned)
3288 return false;
3289
3290 unsigned Opcode;
3291 switch (ScalarBits) {
3292 case 16:
3293 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3294 break;
3295 case 32:
3296 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3297 break;
3298 }
3299 SmallVector<SDValue, 3> Ops{Node->getOperand(0),
3300 CurDAG->getConstant(1, dl, MVT::i32)};
3301 AddEmptyMVEPredicateToOps(Ops, dl, Type);
3302
3303 ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops));
3304 return true;
3305 }
3306
3307 if (Node->getOpcode() != ISD::FMUL)
3308 return false;
3309
3310 return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false);
3311}
3312
3313bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
3314 // Transform a fixed-point to floating-point conversion to a VCVT
3315 if (!Subtarget->hasMVEFloatOps())
3316 return false;
3317 auto Type = N->getValueType(0);
3318 if (!Type.isVector())
3319 return false;
3320
3321 auto LHS = N->getOperand(0);
3322 if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP)
3323 return false;
3324
3325 return transformFixedFloatingPointConversion(
3326 N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true);
3327}
3328
3329bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
3330 if (!Subtarget->hasV6T2Ops())
3331 return false;
3332
3333 unsigned Opc = isSigned
3334 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
3335 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
3336 SDLoc dl(N);
3337
3338 // For unsigned extracts, check for a shift right and mask
3339 unsigned And_imm = 0;
3340 if (N->getOpcode() == ISD::AND) {
3341 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
3342
3343 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
3344 if (And_imm & (And_imm + 1))
3345 return false;
3346
3347 unsigned Srl_imm = 0;
3348 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
3349 Srl_imm)) {
3350 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3351
3352 // Mask off the unnecessary bits of the AND immediate; normally
3353 // DAGCombine will do this, but that might not happen if
3354 // targetShrinkDemandedConstant chooses a different immediate.
3355 And_imm &= -1U >> Srl_imm;
3356
3357 // Note: The width operand is encoded as width-1.
3358 unsigned Width = llvm::countr_one(And_imm) - 1;
3359 unsigned LSB = Srl_imm;
3360
3361 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3362
3363 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
3364 // It's cheaper to use a right shift to extract the top bits.
3365 if (Subtarget->isThumb()) {
3366 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
3367 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3368 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3369 getAL(CurDAG, dl), Reg0, Reg0 };
3370 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3371 return true;
3372 }
3373
3374 // ARM models shift instructions as MOVsi with shifter operand.
3376 SDValue ShOpc =
3377 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
3378 MVT::i32);
3379 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
3380 getAL(CurDAG, dl), Reg0, Reg0 };
3381 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
3382 return true;
3383 }
3384
3385 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3386 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3387 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3388 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3389 getAL(CurDAG, dl), Reg0 };
3390 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3391 return true;
3392 }
3393 }
3394 return false;
3395 }
3396
3397 // Otherwise, we're looking for a shift of a shift
3398 unsigned Shl_imm = 0;
3399 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
3400 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
3401 unsigned Srl_imm = 0;
3402 if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
3403 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3404 // Note: The width operand is encoded as width-1.
3405 unsigned Width = 32 - Srl_imm - 1;
3406 int LSB = Srl_imm - Shl_imm;
3407 if (LSB < 0)
3408 return false;
3409 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3410 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3411 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3412 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3413 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3414 getAL(CurDAG, dl), Reg0 };
3415 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3416 return true;
3417 }
3418 }
3419
3420 // Or we are looking for a shift of an and, with a mask operand
3421 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
3422 isShiftedMask_32(And_imm)) {
3423 unsigned Srl_imm = 0;
3424 unsigned LSB = llvm::countr_zero(And_imm);
3425 // Shift must be the same as the ands lsb
3426 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
3427 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3428 unsigned MSB = llvm::Log2_32(And_imm);
3429 // Note: The width operand is encoded as width-1.
3430 unsigned Width = MSB - LSB;
3431 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3432 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3433 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3434 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
3435 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3436 getAL(CurDAG, dl), Reg0 };
3437 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3438 return true;
3439 }
3440 }
3441
3442 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
3443 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
3444 unsigned LSB = 0;
3445 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
3446 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
3447 return false;
3448
3449 if (LSB + Width > 32)
3450 return false;
3451
3452 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3453 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
3454 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3455 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3456 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
3457 getAL(CurDAG, dl), Reg0 };
3458 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3459 return true;
3460 }
3461
3462 return false;
3463}
3464
3465/// Target-specific DAG combining for ISD::SUB.
3466/// Target-independent combining lowers SELECT_CC nodes of the form
3467/// select_cc setg[ge] X, 0, X, -X
3468/// select_cc setgt X, -1, X, -X
3469/// select_cc setl[te] X, 0, -X, X
3470/// select_cc setlt X, 1, -X, X
3471/// which represent Integer ABS into:
3472/// Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
3473/// ARM instruction selection detects the latter and matches it to
3474/// ARM::ABS or ARM::t2ABS machine node.
3475bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
3476 SDValue SUBSrc0 = N->getOperand(0);
3477 SDValue SUBSrc1 = N->getOperand(1);
3478 EVT VT = N->getValueType(0);
3479
3480 if (Subtarget->isThumb1Only())
3481 return false;
3482
3483 if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA)
3484 return false;
3485
3486 SDValue XORSrc0 = SUBSrc0.getOperand(0);
3487 SDValue XORSrc1 = SUBSrc0.getOperand(1);
3488 SDValue SRASrc0 = SUBSrc1.getOperand(0);
3489 SDValue SRASrc1 = SUBSrc1.getOperand(1);
3490 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
3491 EVT XType = SRASrc0.getValueType();
3492 unsigned Size = XType.getSizeInBits() - 1;
3493
3494 if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() &&
3495 SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) {
3496 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
3497 CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0);
3498 return true;
3499 }
3500
3501 return false;
3502}
3503
3504/// We've got special pseudo-instructions for these
3505void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3506 unsigned Opcode;
3507 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3508 if (MemTy == MVT::i8)
3509 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8;
3510 else if (MemTy == MVT::i16)
3511 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16;
3512 else if (MemTy == MVT::i32)
3513 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32;
3514 else
3515 llvm_unreachable("Unknown AtomicCmpSwap type");
3516
3517 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3518 N->getOperand(0)};
3519 SDNode *CmpSwap = CurDAG->getMachineNode(
3520 Opcode, SDLoc(N),
3521 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
3522
3523 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3524 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3525
3526 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3527 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3528 CurDAG->RemoveDeadNode(N);
3529}
3530
3531static std::optional<std::pair<unsigned, unsigned>>
3533 unsigned FirstOne = A.getBitWidth() - A.countl_zero() - 1;
3534 unsigned LastOne = A.countr_zero();
3535 if (A.popcount() != (FirstOne - LastOne + 1))
3536 return std::nullopt;
3537 return std::make_pair(FirstOne, LastOne);
3538}
3539
3540void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
3541 assert(N->getOpcode() == ARMISD::CMPZ);
3542 SwitchEQNEToPLMI = false;
3543
3544 if (!Subtarget->isThumb())
3545 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3546 // LSR don't exist as standalone instructions - they need the barrel shifter.
3547 return;
3548
3549 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3550 SDValue And = N->getOperand(0);
3551 if (!And->hasOneUse())
3552 return;
3553
3554 SDValue Zero = N->getOperand(1);
3555 if (!isNullConstant(Zero) || And->getOpcode() != ISD::AND)
3556 return;
3557 SDValue X = And.getOperand(0);
3558 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
3559
3560 if (!C)
3561 return;
3562 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
3563 if (!Range)
3564 return;
3565
3566 // There are several ways to lower this:
3567 SDNode *NewN;
3568 SDLoc dl(N);
3569
3570 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
3571 if (Subtarget->isThumb2()) {
3572 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
3573 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3574 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3575 CurDAG->getRegister(0, MVT::i32) };
3576 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3577 } else {
3578 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
3579 CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3580 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3581 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3582 }
3583 };
3584
3585 if (Range->second == 0) {
3586 // 1. Mask includes the LSB -> Simply shift the top N bits off
3587 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3588 ReplaceNode(And.getNode(), NewN);
3589 } else if (Range->first == 31) {
3590 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
3591 NewN = EmitShift(ARM::tLSRri, X, Range->second);
3592 ReplaceNode(And.getNode(), NewN);
3593 } else if (Range->first == Range->second) {
3594 // 3. Only one bit is set. We can shift this into the sign bit and use a
3595 // PL/MI comparison.
3596 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3597 ReplaceNode(And.getNode(), NewN);
3598
3599 SwitchEQNEToPLMI = true;
3600 } else if (!Subtarget->hasV6T2Ops()) {
3601 // 4. Do a double shift to clear bottom and top bits, but only in
3602 // thumb-1 mode as in thumb-2 we can use UBFX.
3603 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3604 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
3605 Range->second + (31 - Range->first));
3606 ReplaceNode(And.getNode(), NewN);
3607 }
3608}
3609
3610static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3],
3611 unsigned Opc128[3]) {
3612 assert((VT.is64BitVector() || VT.is128BitVector()) &&
3613 "Unexpected vector shuffle length");
3614 switch (VT.getScalarSizeInBits()) {
3615 default:
3616 llvm_unreachable("Unexpected vector shuffle element size");
3617 case 8:
3618 return VT.is64BitVector() ? Opc64[0] : Opc128[0];
3619 case 16:
3620 return VT.is64BitVector() ? Opc64[1] : Opc128[1];
3621 case 32:
3622 return VT.is64BitVector() ? Opc64[2] : Opc128[2];
3623 }
3624}
3625
3626void ARMDAGToDAGISel::Select(SDNode *N) {
3627 SDLoc dl(N);
3628
3629 if (N->isMachineOpcode()) {
3630 N->setNodeId(-1);
3631 return; // Already selected.
3632 }
3633
3634 switch (N->getOpcode()) {
3635 default: break;
3636 case ISD::STORE: {
3637 // For Thumb1, match an sp-relative store in C++. This is a little
3638 // unfortunate, but I don't think I can make the chain check work
3639 // otherwise. (The chain of the store has to be the same as the chain
3640 // of the CopyFromReg, or else we can't replace the CopyFromReg with
3641 // a direct reference to "SP".)
3642 //
3643 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3644 // a different addressing mode from other four-byte stores.
3645 //
3646 // This pattern usually comes up with call arguments.
3647 StoreSDNode *ST = cast<StoreSDNode>(N);
3648 SDValue Ptr = ST->getBasePtr();
3649 if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
3650 int RHSC = 0;
3651 if (Ptr.getOpcode() == ISD::ADD &&
3652 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
3653 Ptr = Ptr.getOperand(0);
3654
3655 if (Ptr.getOpcode() == ISD::CopyFromReg &&
3656 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
3657 Ptr.getOperand(0) == ST->getChain()) {
3658 SDValue Ops[] = {ST->getValue(),
3659 CurDAG->getRegister(ARM::SP, MVT::i32),
3660 CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
3661 getAL(CurDAG, dl),
3662 CurDAG->getRegister(0, MVT::i32),
3663 ST->getChain()};
3664 MachineSDNode *ResNode =
3665 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
3666 MachineMemOperand *MemOp = ST->getMemOperand();
3667 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3668 ReplaceNode(N, ResNode);
3669 return;
3670 }
3671 }
3672 break;
3673 }
3675 if (tryWriteRegister(N))
3676 return;
3677 break;
3678 case ISD::READ_REGISTER:
3679 if (tryReadRegister(N))
3680 return;
3681 break;
3682 case ISD::INLINEASM:
3683 case ISD::INLINEASM_BR:
3684 if (tryInlineAsm(N))
3685 return;
3686 break;
3687 case ISD::SUB:
3688 // Select special operations if SUB node forms integer ABS pattern
3689 if (tryABSOp(N))
3690 return;
3691 // Other cases are autogenerated.
3692 break;
3693 case ISD::Constant: {
3694 unsigned Val = N->getAsZExtVal();
3695 // If we can't materialize the constant we need to use a literal pool
3696 if (ConstantMaterializationCost(Val, Subtarget) > 2 &&
3697 !Subtarget->genExecuteOnly()) {
3698 SDValue CPIdx = CurDAG->getTargetConstantPool(
3699 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
3700 TLI->getPointerTy(CurDAG->getDataLayout()));
3701
3702 SDNode *ResNode;
3703 if (Subtarget->isThumb()) {
3704 SDValue Ops[] = {
3705 CPIdx,
3706 getAL(CurDAG, dl),
3707 CurDAG->getRegister(0, MVT::i32),
3708 CurDAG->getEntryNode()
3709 };
3710 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
3711 Ops);
3712 } else {
3713 SDValue Ops[] = {
3714 CPIdx,
3715 CurDAG->getTargetConstant(0, dl, MVT::i32),
3716 getAL(CurDAG, dl),
3717 CurDAG->getRegister(0, MVT::i32),
3718 CurDAG->getEntryNode()
3719 };
3720 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
3721 Ops);
3722 }
3723 // Annotate the Node with memory operand information so that MachineInstr
3724 // queries work properly. This e.g. gives the register allocation the
3725 // required information for rematerialization.
3726 MachineFunction& MF = CurDAG->getMachineFunction();
3730
3731 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3732
3733 ReplaceNode(N, ResNode);
3734 return;
3735 }
3736
3737 // Other cases are autogenerated.
3738 break;
3739 }
3740 case ISD::FrameIndex: {
3741 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3742 int FI = cast<FrameIndexSDNode>(N)->getIndex();
3743 SDValue TFI = CurDAG->getTargetFrameIndex(
3744 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3745 if (Subtarget->isThumb1Only()) {
3746 // Set the alignment of the frame object to 4, to avoid having to generate
3747 // more than one ADD
3748 MachineFrameInfo &MFI = MF->getFrameInfo();
3749 if (MFI.getObjectAlign(FI) < Align(4))
3750 MFI.setObjectAlignment(FI, Align(4));
3751 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
3752 CurDAG->getTargetConstant(0, dl, MVT::i32));
3753 return;
3754 } else {
3755 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
3756 ARM::t2ADDri : ARM::ADDri);
3757 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
3758 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3759 CurDAG->getRegister(0, MVT::i32) };
3760 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3761 return;
3762 }
3763 }
3765 if (tryInsertVectorElt(N))
3766 return;
3767 break;
3768 }
3769 case ISD::SRL:
3770 if (tryV6T2BitfieldExtractOp(N, false))
3771 return;
3772 break;
3774 case ISD::SRA:
3775 if (tryV6T2BitfieldExtractOp(N, true))
3776 return;
3777 break;
3778 case ISD::FP_TO_UINT:
3779 case ISD::FP_TO_SINT:
3782 if (tryFP_TO_INT(N, dl))
3783 return;
3784 break;
3785 case ISD::FMUL:
3786 if (tryFMULFixed(N, dl))
3787 return;
3788 break;
3789 case ISD::MUL:
3790 if (Subtarget->isThumb1Only())
3791 break;
3792 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
3793 unsigned RHSV = C->getZExtValue();
3794 if (!RHSV) break;
3795 if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
3796 unsigned ShImm = Log2_32(RHSV-1);
3797 if (ShImm >= 32)
3798 break;
3799 SDValue V = N->getOperand(0);
3800 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3801 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3802 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3803 if (Subtarget->isThumb()) {
3804 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3805 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
3806 return;
3807 } else {
3808 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3809 Reg0 };
3810 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
3811 return;
3812 }
3813 }
3814 if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
3815 unsigned ShImm = Log2_32(RHSV+1);
3816 if (ShImm >= 32)
3817 break;
3818 SDValue V = N->getOperand(0);
3819 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3820 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3821 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3822 if (Subtarget->isThumb()) {
3823 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3824 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
3825 return;
3826 } else {
3827 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3828 Reg0 };
3829 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
3830 return;
3831 }
3832 }
3833 }
3834 break;
3835 case ISD::AND: {
3836 // Check for unsigned bitfield extract
3837 if (tryV6T2BitfieldExtractOp(N, false))
3838 return;
3839
3840 // If an immediate is used in an AND node, it is possible that the immediate
3841 // can be more optimally materialized when negated. If this is the case we
3842 // can negate the immediate and use a BIC instead.
3843 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
3844 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
3845 uint32_t Imm = (uint32_t) N1C->getZExtValue();
3846
3847 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
3848 // immediate can be negated and fit in the immediate operand of
3849 // a t2BIC, don't do any manual transform here as this can be
3850 // handled by the generic ISel machinery.
3851 bool PreferImmediateEncoding =
3852 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
3853 if (!PreferImmediateEncoding &&
3854 ConstantMaterializationCost(Imm, Subtarget) >
3855 ConstantMaterializationCost(~Imm, Subtarget)) {
3856 // The current immediate costs more to materialize than a negated
3857 // immediate, so negate the immediate and use a BIC.
3858 SDValue NewImm =
3859 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
3860 // If the new constant didn't exist before, reposition it in the topological
3861 // ordering so it is just before N. Otherwise, don't touch its location.
3862 if (NewImm->getNodeId() == -1)
3863 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
3864
3865 if (!Subtarget->hasThumb2()) {
3866 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
3867 N->getOperand(0), NewImm, getAL(CurDAG, dl),
3868 CurDAG->getRegister(0, MVT::i32)};
3869 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
3870 return;
3871 } else {
3872 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
3873 CurDAG->getRegister(0, MVT::i32),
3874 CurDAG->getRegister(0, MVT::i32)};
3875 ReplaceNode(N,
3876 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
3877 return;
3878 }
3879 }
3880 }
3881
3882 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3883 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3884 // are entirely contributed by c2 and lower 16-bits are entirely contributed
3885 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3886 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
3887 EVT VT = N->getValueType(0);
3888 if (VT != MVT::i32)
3889 break;
3890 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3891 ? ARM::t2MOVTi16
3892 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
3893 if (!Opc)
3894 break;
3895 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3896 N1C = dyn_cast<ConstantSDNode>(N1);
3897 if (!N1C)
3898 break;
3899 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3900 SDValue N2 = N0.getOperand(1);
3901 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
3902 if (!N2C)
3903 break;
3904 unsigned N1CVal = N1C->getZExtValue();
3905 unsigned N2CVal = N2C->getZExtValue();
3906 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
3907 (N1CVal & 0xffffU) == 0xffffU &&
3908 (N2CVal & 0xffffU) == 0x0U) {
3909 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
3910 dl, MVT::i32);
3911 SDValue Ops[] = { N0.getOperand(0), Imm16,
3912 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3913 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
3914 return;
3915 }
3916 }
3917
3918 break;
3919 }
3920 case ARMISD::UMAAL: {
3921 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3922 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3923 N->getOperand(2), N->getOperand(3),
3924 getAL(CurDAG, dl),
3925 CurDAG->getRegister(0, MVT::i32) };
3926 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3927 return;
3928 }
3929 case ARMISD::UMLAL:{
3930 if (Subtarget->isThumb()) {
3931 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3932 N->getOperand(3), getAL(CurDAG, dl),
3933 CurDAG->getRegister(0, MVT::i32)};
3934 ReplaceNode(
3935 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3936 return;
3937 }else{
3938 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3939 N->getOperand(3), getAL(CurDAG, dl),
3940 CurDAG->getRegister(0, MVT::i32),
3941 CurDAG->getRegister(0, MVT::i32) };
3942 ReplaceNode(N, CurDAG->getMachineNode(
3943 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3944 MVT::i32, MVT::i32, Ops));
3945 return;
3946 }
3947 }
3948 case ARMISD::SMLAL:{
3949 if (Subtarget->isThumb()) {
3950 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3951 N->getOperand(3), getAL(CurDAG, dl),
3952 CurDAG->getRegister(0, MVT::i32)};
3953 ReplaceNode(
3954 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3955 return;
3956 }else{
3957 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3958 N->getOperand(3), getAL(CurDAG, dl),
3959 CurDAG->getRegister(0, MVT::i32),
3960 CurDAG->getRegister(0, MVT::i32) };
3961 ReplaceNode(N, CurDAG->getMachineNode(
3962 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3963 MVT::i32, MVT::i32, Ops));
3964 return;
3965 }
3966 }
3967 case ARMISD::SUBE: {
3968 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3969 break;
3970 // Look for a pattern to match SMMLS
3971 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3972 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3973 N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3974 !SDValue(N, 1).use_empty())
3975 break;
3976
3977 if (Subtarget->isThumb())
3978 assert(Subtarget->hasThumb2() &&
3979 "This pattern should not be generated for Thumb");
3980
3981 SDValue SmulLoHi = N->getOperand(1);
3982 SDValue Subc = N->getOperand(2);
3983 SDValue Zero = Subc.getOperand(0);
3984
3985 if (!isNullConstant(Zero) || Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3986 N->getOperand(1) != SmulLoHi.getValue(1) ||
3987 N->getOperand(2) != Subc.getValue(1))
3988 break;
3989
3990 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3991 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3992 N->getOperand(0), getAL(CurDAG, dl),
3993 CurDAG->getRegister(0, MVT::i32) };
3994 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
3995 return;
3996 }
3997 case ISD::LOAD: {
3998 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3999 return;
4000 if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
4001 if (tryT2IndexedLoad(N))
4002 return;
4003 } else if (Subtarget->isThumb()) {
4004 if (tryT1IndexedLoad(N))
4005 return;
4006 } else if (tryARMIndexedLoad(N))
4007 return;
4008 // Other cases are autogenerated.
4009 break;
4010 }
4011 case ISD::MLOAD:
4012 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
4013 return;
4014 // Other cases are autogenerated.
4015 break;
4016 case ARMISD::WLSSETUP: {
4017 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32,
4018 N->getOperand(0));
4019 ReplaceUses(N, New);
4020 CurDAG->RemoveDeadNode(N);
4021 return;
4022 }
4023 case ARMISD::WLS: {
4024 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other,
4025 N->getOperand(1), N->getOperand(2),
4026 N->getOperand(0));
4027 ReplaceUses(N, New);
4028 CurDAG->RemoveDeadNode(N);
4029 return;
4030 }
4031 case ARMISD::LE: {
4032 SDValue Ops[] = { N->getOperand(1),
4033 N->getOperand(2),
4034 N->getOperand(0) };
4035 unsigned Opc = ARM::t2LoopEnd;
4036 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
4037 ReplaceUses(N, New);
4038 CurDAG->RemoveDeadNode(N);
4039 return;
4040 }
4041 case ARMISD::LDRD: {
4042 if (Subtarget->isThumb2())
4043 break; // TableGen handles isel in this case.
4044 SDValue Base, RegOffset, ImmOffset;
4045 const SDValue &Chain = N->getOperand(0);
4046 const SDValue &Addr = N->getOperand(1);
4047 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4048 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4049 // The register-offset variant of LDRD mandates that the register
4050 // allocated to RegOffset is not reused in any of the remaining operands.
4051 // This restriction is currently not enforced. Therefore emitting this
4052 // variant is explicitly avoided.
4053 Base = Addr;
4054 RegOffset = CurDAG->getRegister(0, MVT::i32);
4055 }
4056 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
4057 SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
4058 {MVT::Untyped, MVT::Other}, Ops);
4059 SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4060 SDValue(New, 0));
4061 SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4062 SDValue(New, 0));
4063 transferMemOperands(N, New);
4064 ReplaceUses(SDValue(N, 0), Lo);
4065 ReplaceUses(SDValue(N, 1), Hi);
4066 ReplaceUses(SDValue(N, 2), SDValue(New, 1));
4067 CurDAG->RemoveDeadNode(N);
4068 return;
4069 }
4070 case ARMISD::STRD: {
4071 if (Subtarget->isThumb2())
4072 break; // TableGen handles isel in this case.
4073 SDValue Base, RegOffset, ImmOffset;
4074 const SDValue &Chain = N->getOperand(0);
4075 const SDValue &Addr = N->getOperand(3);
4076 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4077 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4078 // The register-offset variant of STRD mandates that the register
4079 // allocated to RegOffset is not reused in any of the remaining operands.
4080 // This restriction is currently not enforced. Therefore emitting this
4081 // variant is explicitly avoided.
4082 Base = Addr;
4083 RegOffset = CurDAG->getRegister(0, MVT::i32);
4084 }
4085 SDNode *RegPair =
4086 createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2));
4087 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
4088 SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);
4089 transferMemOperands(N, New);
4090 ReplaceUses(SDValue(N, 0), SDValue(New, 0));
4091 CurDAG->RemoveDeadNode(N);
4092 return;
4093 }
4094 case ARMISD::LOOP_DEC: {
4095 SDValue Ops[] = { N->getOperand(1),
4096 N->getOperand(2),
4097 N->getOperand(0) };
4098 SDNode *Dec =
4099 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4100 CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
4101 ReplaceUses(N, Dec);
4102 CurDAG->RemoveDeadNode(N);
4103 return;
4104 }
4105 case ARMISD::BRCOND: {
4106 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4107 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4108 // Pattern complexity = 6 cost = 1 size = 0
4109
4110 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4111 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
4112 // Pattern complexity = 6 cost = 1 size = 0
4113
4114 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4115 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4116 // Pattern complexity = 6 cost = 1 size = 0
4117
4118 unsigned Opc = Subtarget->isThumb() ?
4119 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
4120 SDValue Chain = N->getOperand(0);
4121 SDValue N1 = N->getOperand(1);
4122 SDValue N2 = N->getOperand(2);
4123 SDValue N3 = N->getOperand(3);
4124 SDValue InGlue = N->getOperand(4);
4128
4129 unsigned CC = (unsigned)N2->getAsZExtVal();
4130
4131 if (InGlue.getOpcode() == ARMISD::CMPZ) {
4132 if (InGlue.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4133 SDValue Int = InGlue.getOperand(0);
4134 uint64_t ID = Int->getConstantOperandVal(1);
4135
4136 // Handle low-overhead loops.
4137 if (ID == Intrinsic::loop_decrement_reg) {
4138 SDValue Elements = Int.getOperand(2);
4139 SDValue Size = CurDAG->getTargetConstant(Int.getConstantOperandVal(3),
4140 dl, MVT::i32);
4141
4142 SDValue Args[] = { Elements, Size, Int.getOperand(0) };
4143 SDNode *LoopDec =
4144 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4145 CurDAG->getVTList(MVT::i32, MVT::Other),
4146 Args);
4147 ReplaceUses(Int.getNode(), LoopDec);
4148
4149 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
4150 SDNode *LoopEnd =
4151 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
4152
4153 ReplaceUses(N, LoopEnd);
4154 CurDAG->RemoveDeadNode(N);
4155 CurDAG->RemoveDeadNode(InGlue.getNode());
4156 CurDAG->RemoveDeadNode(Int.getNode());
4157 return;
4158 }
4159 }
4160
4161 bool SwitchEQNEToPLMI;
4162 SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI);
4163 InGlue = N->getOperand(4);
4164
4165 if (SwitchEQNEToPLMI) {
4166 switch ((ARMCC::CondCodes)CC) {
4167 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4168 case ARMCC::NE:
4170 break;
4171 case ARMCC::EQ:
4173 break;
4174 }
4175 }
4176 }
4177
4178 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
4179 SDValue Ops[] = { N1, Tmp2, N3, Chain, InGlue };
4180 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
4181 MVT::Glue, Ops);
4182 Chain = SDValue(ResNode, 0);
4183 if (N->getNumValues() == 2) {
4184 InGlue = SDValue(ResNode, 1);
4185 ReplaceUses(SDValue(N, 1), InGlue);
4186 }
4187 ReplaceUses(SDValue(N, 0),
4188 SDValue(Chain.getNode(), Chain.getResNo()));
4189 CurDAG->RemoveDeadNode(N);
4190 return;
4191 }
4192
4193 case ARMISD::CMPZ: {
4194 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
4195 // This allows us to avoid materializing the expensive negative constant.
4196 // The CMPZ #0 is useless and will be peepholed away but we need to keep it
4197 // for its glue output.
4198 SDValue X = N->getOperand(0);
4199 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
4200 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
4201 int64_t Addend = -C->getSExtValue();
4202
4203 SDNode *Add = nullptr;
4204 // ADDS can be better than CMN if the immediate fits in a
4205 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
4206 // Outside that range we can just use a CMN which is 32-bit but has a
4207 // 12-bit immediate range.
4208 if (Addend < 1<<8) {
4209 if (Subtarget->isThumb2()) {
4210 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4211 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
4212 CurDAG->getRegister(0, MVT::i32) };
4213 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
4214 } else {
4215 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
4216 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
4217 CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4218 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
4219 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
4220 }
4221 }
4222 if (Add) {
4223 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
4224 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
4225 }
4226 }
4227 // Other cases are autogenerated.
4228 break;
4229 }
4230
4231 case ARMISD::CMOV: {
4232 SDValue InGlue = N->getOperand(4);
4233
4234 if (InGlue.getOpcode() == ARMISD::CMPZ) {
4235 bool SwitchEQNEToPLMI;
4236 SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI);
4237
4238 if (SwitchEQNEToPLMI) {
4239 SDValue ARMcc = N->getOperand(2);
4241
4242 switch (CC) {
4243 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4244 case ARMCC::NE:
4245 CC = ARMCC::MI;
4246 break;
4247 case ARMCC::EQ:
4248 CC = ARMCC::PL;
4249 break;
4250 }
4251 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
4252 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
4253 N->getOperand(3), N->getOperand(4)};
4254 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
4255 }
4256
4257 }
4258 // Other cases are autogenerated.
4259 break;
4260 }
4261 case ARMISD::VZIP: {
4262 EVT VT = N->getValueType(0);
4263 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4264 unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32};
4265 unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32};
4266 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4267 SDValue Pred = getAL(CurDAG, dl);
4268 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4269 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4270 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4271 return;
4272 }
4273 case ARMISD::VUZP: {
4274 EVT VT = N->getValueType(0);
4275 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4276 unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32};
4277 unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32};
4278 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4279 SDValue Pred = getAL(CurDAG, dl);
4280 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4281 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4282 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4283 return;
4284 }
4285 case ARMISD::VTRN: {
4286 EVT VT = N->getValueType(0);
4287 unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32};
4288 unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32};
4289 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4290 SDValue Pred = getAL(CurDAG, dl);
4291 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4292 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4293 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4294 return;
4295 }
4296 case ARMISD::BUILD_VECTOR: {
4297 EVT VecVT = N->getValueType(0);
4298 EVT EltVT = VecVT.getVectorElementType();
4299 unsigned NumElts = VecVT.getVectorNumElements();
4300 if (EltVT == MVT::f64) {
4301 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
4302 ReplaceNode(
4303 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4304 return;
4305 }
4306 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
4307 if (NumElts == 2) {
4308 ReplaceNode(
4309 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4310 return;
4311 }
4312 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
4313 ReplaceNode(N,
4314 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
4315 N->getOperand(2), N->getOperand(3)));
4316 return;
4317 }
4318
4319 case ARMISD::VLD1DUP: {
4320 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
4321 ARM::VLD1DUPd32 };
4322 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
4323 ARM::VLD1DUPq32 };
4324 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
4325 return;
4326 }
4327
4328 case ARMISD::VLD2DUP: {
4329 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4330 ARM::VLD2DUPd32 };
4331 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
4332 return;
4333 }
4334
4335 case ARMISD::VLD3DUP: {
4336 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
4337 ARM::VLD3DUPd16Pseudo,
4338 ARM::VLD3DUPd32Pseudo };
4339 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
4340 return;
4341 }
4342
4343 case ARMISD::VLD4DUP: {
4344 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
4345 ARM::VLD4DUPd16Pseudo,
4346 ARM::VLD4DUPd32Pseudo };
4347 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
4348 return;
4349 }
4350
4351 case ARMISD::VLD1DUP_UPD: {
4352 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
4353 ARM::VLD1DUPd16wb_fixed,
4354 ARM::VLD1DUPd32wb_fixed };
4355 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
4356 ARM::VLD1DUPq16wb_fixed,
4357 ARM::VLD1DUPq32wb_fixed };
4358 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
4359 return;
4360 }
4361
4362 case ARMISD::VLD2DUP_UPD: {
4363 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,
4364 ARM::VLD2DUPd16wb_fixed,
4365 ARM::VLD2DUPd32wb_fixed,
4366 ARM::VLD1q64wb_fixed };
4367 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4368 ARM::VLD2DUPq16EvenPseudo,
4369 ARM::VLD2DUPq32EvenPseudo };
4370 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,
4371 ARM::VLD2DUPq16OddPseudoWB_fixed,
4372 ARM::VLD2DUPq32OddPseudoWB_fixed };
4373 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
4374 return;
4375 }
4376
4377 case ARMISD::VLD3DUP_UPD: {
4378 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
4379 ARM::VLD3DUPd16Pseudo_UPD,
4380 ARM::VLD3DUPd32Pseudo_UPD,
4381 ARM::VLD1d64TPseudoWB_fixed };
4382 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4383 ARM::VLD3DUPq16EvenPseudo,
4384 ARM::VLD3DUPq32EvenPseudo };
4385 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,
4386 ARM::VLD3DUPq16OddPseudo_UPD,
4387 ARM::VLD3DUPq32OddPseudo_UPD };
4388 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4389 return;
4390 }
4391
4392 case ARMISD::VLD4DUP_UPD: {
4393 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
4394 ARM::VLD4DUPd16Pseudo_UPD,
4395 ARM::VLD4DUPd32Pseudo_UPD,
4396 ARM::VLD1d64QPseudoWB_fixed };
4397 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4398 ARM::VLD4DUPq16EvenPseudo,
4399 ARM::VLD4DUPq32EvenPseudo };
4400 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,
4401 ARM::VLD4DUPq16OddPseudo_UPD,
4402 ARM::VLD4DUPq32OddPseudo_UPD };
4403 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4404 return;
4405 }
4406
4407 case ARMISD::VLD1_UPD: {
4408 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
4409 ARM::VLD1d16wb_fixed,
4410 ARM::VLD1d32wb_fixed,
4411 ARM::VLD1d64wb_fixed };
4412 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
4413 ARM::VLD1q16wb_fixed,
4414 ARM::VLD1q32wb_fixed,
4415 ARM::VLD1q64wb_fixed };
4416 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
4417 return;
4418 }
4419
4420 case ARMISD::VLD2_UPD: {
4421 if (Subtarget->hasNEON()) {
4422 static const uint16_t DOpcodes[] = {
4423 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
4424 ARM::VLD1q64wb_fixed};
4425 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
4426 ARM::VLD2q16PseudoWB_fixed,
4427 ARM::VLD2q32PseudoWB_fixed};
4428 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4429 } else {
4430 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
4431 ARM::MVE_VLD21_8_wb};
4432 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4433 ARM::MVE_VLD21_16_wb};
4434 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4435 ARM::MVE_VLD21_32_wb};
4436 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4437 SelectMVE_VLD(N, 2, Opcodes, true);
4438 }
4439 return;
4440 }
4441
4442 case ARMISD::VLD3_UPD: {
4443 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
4444 ARM::VLD3d16Pseudo_UPD,
4445 ARM::VLD3d32Pseudo_UPD,
4446 ARM::VLD1d64TPseudoWB_fixed};
4447 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4448 ARM::VLD3q16Pseudo_UPD,
4449 ARM::VLD3q32Pseudo_UPD };
4450 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
4451 ARM::VLD3q16oddPseudo_UPD,
4452 ARM::VLD3q32oddPseudo_UPD };
4453 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4454 return;
4455 }
4456
4457 case ARMISD::VLD4_UPD: {
4458 if (Subtarget->hasNEON()) {
4459 static const uint16_t DOpcodes[] = {
4460 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
4461 ARM::VLD1d64QPseudoWB_fixed};
4462 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
4463 ARM::VLD4q16Pseudo_UPD,
4464 ARM::VLD4q32Pseudo_UPD};
4465 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
4466 ARM::VLD4q16oddPseudo_UPD,
4467 ARM::VLD4q32oddPseudo_UPD};
4468 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4469 } else {
4470 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4471 ARM::MVE_VLD42_8,
4472 ARM::MVE_VLD43_8_wb};
4473 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4474 ARM::MVE_VLD42_16,
4475 ARM::MVE_VLD43_16_wb};
4476 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4477 ARM::MVE_VLD42_32,
4478 ARM::MVE_VLD43_32_wb};
4479 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4480 SelectMVE_VLD(N, 4, Opcodes, true);
4481 }
4482 return;
4483 }
4484
4485 case ARMISD::VLD1x2_UPD: {
4486 if (Subtarget->hasNEON()) {
4487 static const uint16_t DOpcodes[] = {
4488 ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
4489 ARM::VLD1q64wb_fixed};
4490 static const uint16_t QOpcodes[] = {
4491 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4492 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4493 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4494 return;
4495 }
4496 break;
4497 }
4498
4499 case ARMISD::VLD1x3_UPD: {
4500 if (Subtarget->hasNEON()) {
4501 static const uint16_t DOpcodes[] = {
4502 ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
4503 ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
4504 static const uint16_t QOpcodes0[] = {
4505 ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
4506 ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
4507 static const uint16_t QOpcodes1[] = {
4508 ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
4509 ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
4510 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4511 return;
4512 }
4513 break;
4514 }
4515
4516 case ARMISD::VLD1x4_UPD: {
4517 if (Subtarget->hasNEON()) {
4518 static const uint16_t DOpcodes[] = {
4519 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4520 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4521 static const uint16_t QOpcodes0[] = {
4522 ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
4523 ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
4524 static const uint16_t QOpcodes1[] = {
4525 ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
4526 ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
4527 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4528 return;
4529 }
4530 break;
4531 }
4532
4533 case ARMISD::VLD2LN_UPD: {
4534 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
4535 ARM::VLD2LNd16Pseudo_UPD,
4536 ARM::VLD2LNd32Pseudo_UPD };
4537 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
4538 ARM::VLD2LNq32Pseudo_UPD };
4539 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
4540 return;
4541 }
4542
4543 case ARMISD::VLD3LN_UPD: {
4544 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
4545 ARM::VLD3LNd16Pseudo_UPD,
4546 ARM::VLD3LNd32Pseudo_UPD };
4547 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
4548 ARM::VLD3LNq32Pseudo_UPD };
4549 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
4550 return;
4551 }
4552
4553 case ARMISD::VLD4LN_UPD: {
4554 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
4555 ARM::VLD4LNd16Pseudo_UPD,
4556 ARM::VLD4LNd32Pseudo_UPD };
4557 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
4558 ARM::VLD4LNq32Pseudo_UPD };
4559 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
4560 return;
4561 }
4562
4563 case ARMISD::VST1_UPD: {
4564 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
4565 ARM::VST1d16wb_fixed,
4566 ARM::VST1d32wb_fixed,
4567 ARM::VST1d64wb_fixed };
4568 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
4569 ARM::VST1q16wb_fixed,
4570 ARM::VST1q32wb_fixed,
4571 ARM::VST1q64wb_fixed };
4572 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
4573 return;
4574 }
4575
4576 case ARMISD::VST2_UPD: {
4577 if (Subtarget->hasNEON()) {
4578 static const uint16_t DOpcodes[] = {
4579 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
4580 ARM::VST1q64wb_fixed};
4581 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
4582 ARM::VST2q16PseudoWB_fixed,
4583 ARM::VST2q32PseudoWB_fixed};
4584 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4585 return;
4586 }
4587 break;
4588 }
4589
4590 case ARMISD::VST3_UPD: {
4591 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
4592 ARM::VST3d16Pseudo_UPD,
4593 ARM::VST3d32Pseudo_UPD,
4594 ARM::VST1d64TPseudoWB_fixed};
4595 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4596 ARM::VST3q16Pseudo_UPD,
4597 ARM::VST3q32Pseudo_UPD };
4598 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
4599 ARM::VST3q16oddPseudo_UPD,
4600 ARM::VST3q32oddPseudo_UPD };
4601 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4602 return;
4603 }
4604
4605 case ARMISD::VST4_UPD: {
4606 if (Subtarget->hasNEON()) {
4607 static const uint16_t DOpcodes[] = {
4608 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
4609 ARM::VST1d64QPseudoWB_fixed};
4610 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
4611 ARM::VST4q16Pseudo_UPD,
4612 ARM::VST4q32Pseudo_UPD};
4613 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
4614 ARM::VST4q16oddPseudo_UPD,
4615 ARM::VST4q32oddPseudo_UPD};
4616 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4617 return;
4618 }
4619 break;
4620 }
4621
4622 case ARMISD::VST1x2_UPD: {
4623 if (Subtarget->hasNEON()) {
4624 static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,
4625 ARM::VST1q16wb_fixed,
4626 ARM::VST1q32wb_fixed,
4627 ARM::VST1q64wb_fixed};
4628 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4629 ARM::VST1d16QPseudoWB_fixed,
4630 ARM::VST1d32QPseudoWB_fixed,
4631 ARM::VST1d64QPseudoWB_fixed };
4632 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4633 return;
4634 }
4635 break;
4636 }
4637
4638 case ARMISD::VST1x3_UPD: {
4639 if (Subtarget->hasNEON()) {
4640 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,
4641 ARM::VST1d16TPseudoWB_fixed,
4642 ARM::VST1d32TPseudoWB_fixed,
4643 ARM::VST1d64TPseudoWB_fixed };
4644 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4645 ARM::VST1q16LowTPseudo_UPD,
4646 ARM::VST1q32LowTPseudo_UPD,
4647 ARM::VST1q64LowTPseudo_UPD };
4648 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,
4649 ARM::VST1q16HighTPseudo_UPD,
4650 ARM::VST1q32HighTPseudo_UPD,
4651 ARM::VST1q64HighTPseudo_UPD };
4652 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4653 return;
4654 }
4655 break;
4656 }
4657
4658 case ARMISD::VST1x4_UPD: {
4659 if (Subtarget->hasNEON()) {
4660 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4661 ARM::VST1d16QPseudoWB_fixed,
4662 ARM::VST1d32QPseudoWB_fixed,
4663 ARM::VST1d64QPseudoWB_fixed };
4664 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4665 ARM::VST1q16LowQPseudo_UPD,
4666 ARM::VST1q32LowQPseudo_UPD,
4667 ARM::VST1q64LowQPseudo_UPD };
4668 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,
4669 ARM::VST1q16HighQPseudo_UPD,
4670 ARM::VST1q32HighQPseudo_UPD,
4671 ARM::VST1q64HighQPseudo_UPD };
4672 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4673 return;
4674 }
4675 break;
4676 }
4677 case ARMISD::VST2LN_UPD: {
4678 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
4679 ARM::VST2LNd16Pseudo_UPD,
4680 ARM::VST2LNd32Pseudo_UPD };
4681 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
4682 ARM::VST2LNq32Pseudo_UPD };
4683 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
4684 return;
4685 }
4686
4687 case ARMISD::VST3LN_UPD: {
4688 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
4689 ARM::VST3LNd16Pseudo_UPD,
4690 ARM::VST3LNd32Pseudo_UPD };
4691 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
4692 ARM::VST3LNq32Pseudo_UPD };
4693 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
4694 return;
4695 }
4696
4697 case ARMISD::VST4LN_UPD: {
4698 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
4699 ARM::VST4LNd16Pseudo_UPD,
4700 ARM::VST4LNd32Pseudo_UPD };
4701 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
4702 ARM::VST4LNq32Pseudo_UPD };
4703 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
4704 return;
4705 }
4706
4709 unsigned IntNo = N->getConstantOperandVal(1);
4710 switch (IntNo) {
4711 default:
4712 break;
4713
4714 case Intrinsic::arm_mrrc:
4715 case Intrinsic::arm_mrrc2: {
4716 SDLoc dl(N);
4717 SDValue Chain = N->getOperand(0);
4718 unsigned Opc;
4719
4720 if (Subtarget->isThumb())
4721 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
4722 else
4723 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
4724
4726 Ops.push_back(getI32Imm(N->getConstantOperandVal(2), dl)); /* coproc */
4727 Ops.push_back(getI32Imm(N->getConstantOperandVal(3), dl)); /* opc */
4728 Ops.push_back(getI32Imm(N->getConstantOperandVal(4), dl)); /* CRm */
4729
4730 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4731 // instruction will always be '1111' but it is possible in assembly language to specify
4732 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4733 if (Opc != ARM::MRRC2) {
4734 Ops.push_back(getAL(CurDAG, dl));
4735 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4736 }
4737
4738 Ops.push_back(Chain);
4739
4740 // Writes to two registers.
4741 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
4742
4743 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
4744 return;
4745 }
4746 case Intrinsic::arm_ldaexd:
4747 case Intrinsic::arm_ldrexd: {
4748 SDLoc dl(N);
4749 SDValue Chain = N->getOperand(0);
4750 SDValue MemAddr = N->getOperand(2);
4751 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
4752
4753 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
4754 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
4755 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
4756
4757 // arm_ldrexd returns a i64 value in {i32, i32}
4758 std::vector<EVT> ResTys;
4759 if (isThumb) {
4760 ResTys.push_back(MVT::i32);
4761 ResTys.push_back(MVT::i32);
4762 } else
4763 ResTys.push_back(MVT::Untyped);
4764 ResTys.push_back(MVT::Other);
4765
4766 // Place arguments in the right order.
4767 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
4768 CurDAG->getRegister(0, MVT::i32), Chain};
4769 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4770 // Transfer memoperands.
4771 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4772 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4773
4774 // Remap uses.
4775 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
4776 if (!SDValue(N, 0).use_empty()) {
4778 if (isThumb)
4779 Result = SDValue(Ld, 0);
4780 else {
4781 SDValue SubRegIdx =
4782 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
4783 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4784 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4785 Result = SDValue(ResNode,0);
4786 }
4787 ReplaceUses(SDValue(N, 0), Result);
4788 }
4789 if (!SDValue(N, 1).use_empty()) {