LLVM 23.0.0git
ARMISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the ARM target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARM.h"
14#include "ARMBaseInstrInfo.h"
15#include "ARMTargetMachine.h"
17#include "Utils/ARMBaseInfo.h"
18#include "llvm/ADT/APSInt.h"
27#include "llvm/IR/Constants.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/Intrinsics.h"
31#include "llvm/IR/IntrinsicsARM.h"
32#include "llvm/IR/LLVMContext.h"
36#include <optional>
37
38using namespace llvm;
39
40#define DEBUG_TYPE "arm-isel"
41#define PASS_NAME "ARM Instruction Selection"
42
43static cl::opt<bool>
44DisableShifterOp("disable-shifter-op", cl::Hidden,
45 cl::desc("Disable isel of shifter-op"),
46 cl::init(false));
47
48//===--------------------------------------------------------------------===//
49/// ARMDAGToDAGISel - ARM specific code to select ARM machine
50/// instructions for SelectionDAG operations.
51///
52namespace {
53
54class ARMDAGToDAGISel : public SelectionDAGISel {
55 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
56 /// make the right decision when generating code for different targets.
57 const ARMSubtarget *Subtarget;
58
59public:
60 ARMDAGToDAGISel() = delete;
61
62 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
63 : SelectionDAGISel(tm, OptLevel) {}
64
65 bool runOnMachineFunction(MachineFunction &MF) override {
66 // Reset the subtarget each time through.
67 Subtarget = &MF.getSubtarget<ARMSubtarget>();
69 return true;
70 }
71
72 void PreprocessISelDAG() override;
73
74 /// getI32Imm - Return a target constant of type i32 with the specified
75 /// value.
76 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
77 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
78 }
79
80 void Select(SDNode *N) override;
81
82 /// Return true as some complex patterns, like those that call
83 /// canExtractShiftFromMul can modify the DAG inplace.
84 bool ComplexPatternFuncMutatesDAG() const override { return true; }
85
86 bool hasNoVMLxHazardUse(SDNode *N) const;
87 bool isShifterOpProfitable(const SDValue &Shift,
88 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
89 bool SelectRegShifterOperand(SDValue N, SDValue &A,
90 SDValue &B, SDValue &C,
91 bool CheckProfitability = true);
92 bool SelectImmShifterOperand(SDValue N, SDValue &A,
93 SDValue &B, bool CheckProfitability = true);
94 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B,
95 SDValue &C) {
96 // Don't apply the profitability check
97 return SelectRegShifterOperand(N, A, B, C, false);
98 }
99 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) {
100 // Don't apply the profitability check
101 return SelectImmShifterOperand(N, A, B, false);
102 }
103 bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) {
104 if (!N.hasOneUse())
105 return false;
106 return SelectImmShifterOperand(N, A, B, false);
107 }
108
109 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
110
111 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
112 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
113
114 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
116 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
118 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
120 bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
121 bool SelectAddrMode3(SDValue N, SDValue &Base,
123 bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
125 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
126 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
127 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
128 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
129 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
130
131 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
132
133 // Thumb Addressing Modes:
134 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
135 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
136 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
137 SDValue &OffImm);
138 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
139 SDValue &OffImm);
140 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
141 SDValue &OffImm);
142 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
143 SDValue &OffImm);
144 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
145 template <unsigned Shift>
146 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
147
148 // Thumb 2 Addressing Modes:
149 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
150 template <unsigned Shift>
151 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
152 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
153 SDValue &OffImm);
154 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
155 SDValue &OffImm);
156 template <unsigned Shift>
157 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
158 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
159 unsigned Shift);
160 template <unsigned Shift>
161 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
162 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
163 SDValue &OffReg, SDValue &ShImm);
164 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
165
166 template<int Min, int Max>
167 bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
168
169 inline bool is_so_imm(unsigned Imm) const {
170 return ARM_AM::getSOImmVal(Imm) != -1;
171 }
172
173 inline bool is_so_imm_not(unsigned Imm) const {
174 return ARM_AM::getSOImmVal(~Imm) != -1;
175 }
176
177 inline bool is_t2_so_imm(unsigned Imm) const {
178 return ARM_AM::getT2SOImmVal(Imm) != -1;
179 }
180
181 inline bool is_t2_so_imm_not(unsigned Imm) const {
182 return ARM_AM::getT2SOImmVal(~Imm) != -1;
183 }
184
185 // Include the pieces autogenerated from the target description.
186#include "ARMGenDAGISel.inc"
187
188private:
189 void transferMemOperands(SDNode *Src, SDNode *Dst);
190
191 /// Indexed (pre/post inc/dec) load matching code for ARM.
192 bool tryARMIndexedLoad(SDNode *N);
193 bool tryT1IndexedLoad(SDNode *N);
194 bool tryT2IndexedLoad(SDNode *N);
195 bool tryMVEIndexedLoad(SDNode *N);
196 bool tryFMULFixed(SDNode *N, SDLoc dl);
197 bool tryFP_TO_INT(SDNode *N, SDLoc dl);
198 bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul,
199 bool IsUnsigned,
200 bool FixedToFloat);
201
202 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
203 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
204 /// loads of D registers and even subregs and odd subregs of Q registers.
205 /// For NumVecs <= 2, QOpcodes1 is not used.
206 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
207 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
208 const uint16_t *QOpcodes1);
209
210 /// SelectVST - Select NEON store intrinsics. NumVecs should
211 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
212 /// stores of D registers and even subregs and odd subregs of Q registers.
213 /// For NumVecs <= 2, QOpcodes1 is not used.
214 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
215 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
216 const uint16_t *QOpcodes1);
217
218 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
219 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
220 /// load/store of D registers and Q registers.
221 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
222 unsigned NumVecs, const uint16_t *DOpcodes,
223 const uint16_t *QOpcodes);
224
225 /// Helper functions for setting up clusters of MVE predication operands.
226 template <typename SDValueVector>
227 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
228 SDValue PredicateMask);
229 template <typename SDValueVector>
230 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
231 SDValue PredicateMask, SDValue Inactive);
232
233 template <typename SDValueVector>
234 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
235 template <typename SDValueVector>
236 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
237
238 /// SelectMVE_WB - Select MVE writeback load/store intrinsics.
239 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);
240
241 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
242 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,
243 bool HasSaturationOperand);
244
245 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
246 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
247 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
248
249 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
250 /// vector lanes.
251 void SelectMVE_VSHLC(SDNode *N, bool Predicated);
252
253 /// Select long MVE vector reductions with two vector operands
254 /// Stride is the number of vector element widths the instruction can operate
255 /// on:
256 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
257 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
258 /// Stride is used when addressing the OpcodesS array which contains multiple
259 /// opcodes for each element width.
260 /// TySize is the index into the list of element types listed above
261 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
262 const uint16_t *OpcodesS, const uint16_t *OpcodesU,
263 size_t Stride, size_t TySize);
264
265 /// Select a 64-bit MVE vector reduction with two vector operands
266 /// arm_mve_vmlldava_[predicated]
267 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
268 const uint16_t *OpcodesU);
269 /// Select a 72-bit MVE vector rounding reduction with two vector operands
270 /// int_arm_mve_vrmlldavha[_predicated]
271 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
272 const uint16_t *OpcodesU);
273
274 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
275 /// should be 2 or 4. The opcode array specifies the instructions
276 /// used for 8, 16 and 32-bit lane sizes respectively, and each
277 /// pointer points to a set of NumVecs sub-opcodes used for the
278 /// different stages (e.g. VLD20 versus VLD21) of each load family.
279 void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
280 const uint16_t *const *Opcodes, bool HasWriteback);
281
282 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
283 /// array of 3 elements for the 8, 16 and 32-bit lane sizes.
284 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
285 bool Wrapping, bool Predicated);
286
287 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
288 /// CX1DA, CX2D, CX2DA, CX3, CX3DA).
289 /// \arg \c NumExtraOps number of extra operands besides the coprocossor,
290 /// the accumulator and the immediate operand, i.e. 0
291 /// for CX1*, 1 for CX2*, 2 for CX3*
292 /// \arg \c HasAccum whether the instruction has an accumulator operand
293 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
294 bool HasAccum);
295
296 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
297 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
298 /// for loading D registers.
299 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
300 unsigned NumVecs, const uint16_t *DOpcodes,
301 const uint16_t *QOpcodes0 = nullptr,
302 const uint16_t *QOpcodes1 = nullptr);
303
304 /// Try to select SBFX/UBFX instructions for ARM.
305 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
306
307 bool tryInsertVectorElt(SDNode *N);
308
309 bool tryShiftAmountMod(SDNode *N);
310
311 bool tryReadRegister(SDNode *N);
312 bool tryWriteRegister(SDNode *N);
313
314 bool tryInlineAsm(SDNode *N);
315
316 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
317
318 void SelectCMP_SWAP(SDNode *N);
319
320 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
321 /// inline asm expressions.
322 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
323 InlineAsm::ConstraintCode ConstraintID,
324 std::vector<SDValue> &OutOps) override;
325
326 // Form pairs of consecutive R, S, D, or Q registers.
327 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
328 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
329 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
330 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
331
332 // Form sequences of 4 consecutive S, D, or Q registers.
333 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
334 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
335 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
336
337 // Get the alignment operand for a NEON VLD or VST instruction.
338 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
339 bool is64BitVector);
340
341 /// Checks if N is a multiplication by a constant where we can extract out a
342 /// power of two from the constant so that it can be used in a shift, but only
343 /// if it simplifies the materialization of the constant. Returns true if it
344 /// is, and assigns to PowerOfTwo the power of two that should be extracted
345 /// out and to NewMulConst the new constant to be multiplied by.
346 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
347 unsigned &PowerOfTwo, SDValue &NewMulConst) const;
348
349 /// Replace N with M in CurDAG, in a way that also ensures that M gets
350 /// selected when N would have been selected.
351 void replaceDAGValue(const SDValue &N, SDValue M);
352};
353
354class ARMDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
355public:
356 static char ID;
357 ARMDAGToDAGISelLegacy(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
358 : SelectionDAGISelLegacy(
359 ID, std::make_unique<ARMDAGToDAGISel>(tm, OptLevel)) {}
360};
361}
362
363char ARMDAGToDAGISelLegacy::ID = 0;
364
365INITIALIZE_PASS(ARMDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
366
367/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
368/// operand. If so Imm will receive the 32-bit value.
369static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
370 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
371 Imm = N->getAsZExtVal();
372 return true;
373 }
374 return false;
375}
376
377// isInt32Immediate - This method tests to see if a constant operand.
378// If so Imm will receive the 32 bit value.
379static bool isInt32Immediate(SDValue N, unsigned &Imm) {
380 return isInt32Immediate(N.getNode(), Imm);
381}
382
383// isOpcWithIntImmediate - This method tests to see if the node is a specific
384// opcode and that it has a immediate integer right operand.
385// If so Imm will receive the 32 bit value.
386static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
387 return N->getOpcode() == Opc &&
388 isInt32Immediate(N->getOperand(1).getNode(), Imm);
389}
390
391/// Check whether a particular node is a constant value representable as
392/// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
393///
394/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
395static bool isScaledConstantInRange(SDValue Node, int Scale,
396 int RangeMin, int RangeMax,
397 int &ScaledConstant) {
398 assert(Scale > 0 && "Invalid scale!");
399
400 // Check that this is a constant.
402 if (!C)
403 return false;
404
405 ScaledConstant = (int) C->getZExtValue();
406 if ((ScaledConstant % Scale) != 0)
407 return false;
408
409 ScaledConstant /= Scale;
410 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
411}
412
413void ARMDAGToDAGISel::PreprocessISelDAG() {
414 if (!Subtarget->hasV6T2Ops())
415 return;
416
417 bool isThumb2 = Subtarget->isThumb();
418 // We use make_early_inc_range to avoid invalidation issues.
419 for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
420 if (N.getOpcode() != ISD::ADD)
421 continue;
422
423 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
424 // leading zeros, followed by consecutive set bits, followed by 1 or 2
425 // trailing zeros, e.g. 1020.
426 // Transform the expression to
427 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
428 // of trailing zeros of c2. The left shift would be folded as an shifter
429 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
430 // node (UBFX).
431
432 SDValue N0 = N.getOperand(0);
433 SDValue N1 = N.getOperand(1);
434 unsigned And_imm = 0;
435 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
436 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
437 std::swap(N0, N1);
438 }
439 if (!And_imm)
440 continue;
441
442 // Check if the AND mask is an immediate of the form: 000.....1111111100
443 unsigned TZ = llvm::countr_zero(And_imm);
444 if (TZ != 1 && TZ != 2)
445 // Be conservative here. Shifter operands aren't always free. e.g. On
446 // Swift, left shifter operand of 1 / 2 for free but others are not.
447 // e.g.
448 // ubfx r3, r1, #16, #8
449 // ldr.w r3, [r0, r3, lsl #2]
450 // vs.
451 // mov.w r9, #1020
452 // and.w r2, r9, r1, lsr #14
453 // ldr r2, [r0, r2]
454 continue;
455 And_imm >>= TZ;
456 if (And_imm & (And_imm + 1))
457 continue;
458
459 // Look for (and (srl X, c1), c2).
460 SDValue Srl = N1.getOperand(0);
461 unsigned Srl_imm = 0;
462 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
463 (Srl_imm <= 2))
464 continue;
465
466 // Make sure first operand is not a shifter operand which would prevent
467 // folding of the left shift.
468 SDValue CPTmp0;
469 SDValue CPTmp1;
470 SDValue CPTmp2;
471 if (isThumb2) {
472 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
473 continue;
474 } else {
475 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
476 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
477 continue;
478 }
479
480 // Now make the transformation.
481 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
482 Srl.getOperand(0),
483 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
484 MVT::i32));
485 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
486 Srl,
487 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
488 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
489 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
490 CurDAG->UpdateNodeOperands(&N, N0, N1);
491 }
492}
493
494/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
495/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
496/// least on current ARM implementations) which should be avoidded.
497bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
498 if (OptLevel == CodeGenOptLevel::None)
499 return true;
500
501 if (!Subtarget->hasVMLxHazards())
502 return true;
503
504 if (!N->hasOneUse())
505 return false;
506
507 SDNode *User = *N->user_begin();
508 if (User->getOpcode() == ISD::CopyToReg)
509 return true;
510 if (User->isMachineOpcode()) {
511 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
512 CurDAG->getSubtarget().getInstrInfo());
513
514 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
515 if (MCID.mayStore())
516 return true;
517 unsigned Opcode = MCID.getOpcode();
518 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
519 return true;
520 // vmlx feeding into another vmlx. We actually want to unfold
521 // the use later in the MLxExpansion pass. e.g.
522 // vmla
523 // vmla (stall 8 cycles)
524 //
525 // vmul (5 cycles)
526 // vadd (5 cycles)
527 // vmla
528 // This adds up to about 18 - 19 cycles.
529 //
530 // vmla
531 // vmul (stall 4 cycles)
532 // vadd adds up to about 14 cycles.
533 return TII->isFpMLxInstruction(Opcode);
534 }
535
536 return false;
537}
538
539bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
540 ARM_AM::ShiftOpc ShOpcVal,
541 unsigned ShAmt) {
542 if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
543 return true;
544 if (Shift.hasOneUse())
545 return true;
546 // R << 2 is free.
547 return ShOpcVal == ARM_AM::lsl &&
548 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
549}
550
551bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
552 unsigned MaxShift,
553 unsigned &PowerOfTwo,
554 SDValue &NewMulConst) const {
555 assert(N.getOpcode() == ISD::MUL);
556 assert(MaxShift > 0);
557
558 // If the multiply is used in more than one place then changing the constant
559 // will make other uses incorrect, so don't.
560 if (!N.hasOneUse()) return false;
561 // Check if the multiply is by a constant
562 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
563 if (!MulConst) return false;
564 // If the constant is used in more than one place then modifying it will mean
565 // we need to materialize two constants instead of one, which is a bad idea.
566 if (!MulConst->hasOneUse()) return false;
567 unsigned MulConstVal = MulConst->getZExtValue();
568 if (MulConstVal == 0) return false;
569
570 // Find the largest power of 2 that MulConstVal is a multiple of
571 PowerOfTwo = MaxShift;
572 while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
573 --PowerOfTwo;
574 if (PowerOfTwo == 0) return false;
575 }
576
577 // Only optimise if the new cost is better
578 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
579 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
580 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
581 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
582 return NewCost < OldCost;
583}
584
585void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
586 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
587 ReplaceUses(N, M);
588}
589
590bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
591 SDValue &BaseReg,
592 SDValue &Opc,
593 bool CheckProfitability) {
595 return false;
596
597 // If N is a multiply-by-constant and it's profitable to extract a shift and
598 // use it in a shifted operand do so.
599 if (N.getOpcode() == ISD::MUL) {
600 unsigned PowerOfTwo = 0;
601 SDValue NewMulConst;
602 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
603 HandleSDNode Handle(N);
604 SDLoc Loc(N);
605 replaceDAGValue(N.getOperand(1), NewMulConst);
606 BaseReg = Handle.getValue();
607 Opc = CurDAG->getTargetConstant(
608 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
609 return true;
610 }
611 }
612
613 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
614
615 // Don't match base register only case. That is matched to a separate
616 // lower complexity pattern with explicit register operand.
617 if (ShOpcVal == ARM_AM::no_shift) return false;
618
619 BaseReg = N.getOperand(0);
620 unsigned ShImmVal = 0;
621 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
622 if (!RHS) return false;
623 ShImmVal = RHS->getZExtValue() & 31;
624 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
625 SDLoc(N), MVT::i32);
626 return true;
627}
628
629bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
630 SDValue &BaseReg,
631 SDValue &ShReg,
632 SDValue &Opc,
633 bool CheckProfitability) {
635 return false;
636
637 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
638
639 // Don't match base register only case. That is matched to a separate
640 // lower complexity pattern with explicit register operand.
641 if (ShOpcVal == ARM_AM::no_shift) return false;
642
643 BaseReg = N.getOperand(0);
644 unsigned ShImmVal = 0;
645 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
646 if (RHS) return false;
647
648 ShReg = N.getOperand(1);
649 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
650 return false;
651 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
652 SDLoc(N), MVT::i32);
653 return true;
654}
655
656// Determine whether an ISD::OR's operands are suitable to turn the operation
657// into an addition, which often has more compact encodings.
658bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
659 assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
660 Out = N;
661 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
662}
663
664
665bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
666 SDValue &Base,
667 SDValue &OffImm) {
668 // Match simple R + imm12 operands.
669
670 // Base only.
671 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
672 !CurDAG->isBaseWithConstantOffset(N)) {
673 if (N.getOpcode() == ISD::FrameIndex) {
674 // Match frame index.
675 int FI = cast<FrameIndexSDNode>(N)->getIndex();
676 Base = CurDAG->getTargetFrameIndex(
677 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
678 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
679 return true;
680 }
681
682 if (N.getOpcode() == ARMISD::Wrapper &&
683 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
684 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
685 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
686 Base = N.getOperand(0);
687 } else
688 Base = N;
689 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
690 return true;
691 }
692
693 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
694 int RHSC = (int)RHS->getSExtValue();
695 if (N.getOpcode() == ISD::SUB)
696 RHSC = -RHSC;
697
698 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
699 Base = N.getOperand(0);
700 if (Base.getOpcode() == ISD::FrameIndex) {
701 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
702 Base = CurDAG->getTargetFrameIndex(
703 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
704 }
705 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32);
706 return true;
707 }
708 }
709
710 // Base only.
711 Base = N;
712 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
713 return true;
714}
715
716
717
718bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
719 SDValue &Opc) {
720 if (N.getOpcode() == ISD::MUL &&
721 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
722 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
723 // X * [3,5,9] -> X + X * [2,4,8] etc.
724 int RHSC = (int)RHS->getZExtValue();
725 if (RHSC & 1) {
726 RHSC = RHSC & ~1;
728 if (RHSC < 0) {
730 RHSC = - RHSC;
731 }
732 if (isPowerOf2_32(RHSC)) {
733 unsigned ShAmt = Log2_32(RHSC);
734 Base = Offset = N.getOperand(0);
735 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
737 SDLoc(N), MVT::i32);
738 return true;
739 }
740 }
741 }
742 }
743
744 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
745 // ISD::OR that is equivalent to an ISD::ADD.
746 !CurDAG->isBaseWithConstantOffset(N))
747 return false;
748
749 // Leave simple R +/- imm12 operands for LDRi12
750 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
751 int RHSC;
752 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
753 -0x1000+1, 0x1000, RHSC)) // 12 bits.
754 return false;
755 }
756
757 // Otherwise this is R +/- [possibly shifted] R.
759 ARM_AM::ShiftOpc ShOpcVal =
760 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
761 unsigned ShAmt = 0;
762
763 Base = N.getOperand(0);
764 Offset = N.getOperand(1);
765
766 if (ShOpcVal != ARM_AM::no_shift) {
767 // Check to see if the RHS of the shift is a constant, if not, we can't fold
768 // it.
769 if (ConstantSDNode *Sh =
770 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
771 ShAmt = Sh->getZExtValue();
772 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
773 Offset = N.getOperand(1).getOperand(0);
774 else {
775 ShAmt = 0;
776 ShOpcVal = ARM_AM::no_shift;
777 }
778 } else {
779 ShOpcVal = ARM_AM::no_shift;
780 }
781 }
782
783 // Try matching (R shl C) + (R).
784 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
785 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
786 N.getOperand(0).hasOneUse())) {
787 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
788 if (ShOpcVal != ARM_AM::no_shift) {
789 // Check to see if the RHS of the shift is a constant, if not, we can't
790 // fold it.
791 if (ConstantSDNode *Sh =
792 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
793 ShAmt = Sh->getZExtValue();
794 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
795 Offset = N.getOperand(0).getOperand(0);
796 Base = N.getOperand(1);
797 } else {
798 ShAmt = 0;
799 ShOpcVal = ARM_AM::no_shift;
800 }
801 } else {
802 ShOpcVal = ARM_AM::no_shift;
803 }
804 }
805 }
806
807 // If Offset is a multiply-by-constant and it's profitable to extract a shift
808 // and use it in a shifted operand do so.
809 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
810 unsigned PowerOfTwo = 0;
811 SDValue NewMulConst;
812 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
813 HandleSDNode Handle(Offset);
814 replaceDAGValue(Offset.getOperand(1), NewMulConst);
815 Offset = Handle.getValue();
816 ShAmt = PowerOfTwo;
817 ShOpcVal = ARM_AM::lsl;
818 }
819 }
820
821 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
822 SDLoc(N), MVT::i32);
823 return true;
824}
825
826bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
828 unsigned Opcode = Op->getOpcode();
829 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
830 ? cast<LoadSDNode>(Op)->getAddressingMode()
831 : cast<StoreSDNode>(Op)->getAddressingMode();
833 ? ARM_AM::add : ARM_AM::sub;
834 int Val;
835 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
836 return false;
837
838 Offset = N;
839 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
840 unsigned ShAmt = 0;
841 if (ShOpcVal != ARM_AM::no_shift) {
842 // Check to see if the RHS of the shift is a constant, if not, we can't fold
843 // it.
844 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
845 ShAmt = Sh->getZExtValue();
846 if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
847 Offset = N.getOperand(0);
848 else {
849 ShAmt = 0;
850 ShOpcVal = ARM_AM::no_shift;
851 }
852 } else {
853 ShOpcVal = ARM_AM::no_shift;
854 }
855 }
856
857 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
858 SDLoc(N), MVT::i32);
859 return true;
860}
861
862bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
864 unsigned Opcode = Op->getOpcode();
865 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
866 ? cast<LoadSDNode>(Op)->getAddressingMode()
867 : cast<StoreSDNode>(Op)->getAddressingMode();
869 ? ARM_AM::add : ARM_AM::sub;
870 int Val;
871 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
872 if (AddSub == ARM_AM::sub) Val *= -1;
873 Offset = CurDAG->getRegister(0, MVT::i32);
874 Opc = CurDAG->getSignedTargetConstant(Val, SDLoc(Op), MVT::i32);
875 return true;
876 }
877
878 return false;
879}
880
881
882bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
884 unsigned Opcode = Op->getOpcode();
885 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
886 ? cast<LoadSDNode>(Op)->getAddressingMode()
887 : cast<StoreSDNode>(Op)->getAddressingMode();
889 ? ARM_AM::add : ARM_AM::sub;
890 int Val;
891 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
892 Offset = CurDAG->getRegister(0, MVT::i32);
893 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
895 SDLoc(Op), MVT::i32);
896 return true;
897 }
898
899 return false;
900}
901
902bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
903 Base = N;
904 return true;
905}
906
907bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
909 SDValue &Opc) {
910 if (N.getOpcode() == ISD::SUB) {
911 // X - C is canonicalize to X + -C, no need to handle it here.
912 Base = N.getOperand(0);
913 Offset = N.getOperand(1);
914 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
915 MVT::i32);
916 return true;
917 }
918
919 if (!CurDAG->isBaseWithConstantOffset(N)) {
920 Base = N;
921 if (N.getOpcode() == ISD::FrameIndex) {
922 int FI = cast<FrameIndexSDNode>(N)->getIndex();
923 Base = CurDAG->getTargetFrameIndex(
924 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
925 }
926 Offset = CurDAG->getRegister(0, MVT::i32);
927 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
928 MVT::i32);
929 return true;
930 }
931
932 // If the RHS is +/- imm8, fold into addr mode.
933 int RHSC;
934 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
935 -256 + 1, 256, RHSC)) { // 8 bits.
936 Base = N.getOperand(0);
937 if (Base.getOpcode() == ISD::FrameIndex) {
938 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
939 Base = CurDAG->getTargetFrameIndex(
940 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
941 }
942 Offset = CurDAG->getRegister(0, MVT::i32);
943
945 if (RHSC < 0) {
947 RHSC = -RHSC;
948 }
949 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
950 MVT::i32);
951 return true;
952 }
953
954 Base = N.getOperand(0);
955 Offset = N.getOperand(1);
956 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
957 MVT::i32);
958 return true;
959}
960
961bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
963 unsigned Opcode = Op->getOpcode();
964 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
965 ? cast<LoadSDNode>(Op)->getAddressingMode()
966 : cast<StoreSDNode>(Op)->getAddressingMode();
968 ? ARM_AM::add : ARM_AM::sub;
969 int Val;
970 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
971 Offset = CurDAG->getRegister(0, MVT::i32);
972 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
973 MVT::i32);
974 return true;
975 }
976
977 Offset = N;
978 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
979 MVT::i32);
980 return true;
981}
982
983bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
984 bool FP16) {
985 if (!CurDAG->isBaseWithConstantOffset(N)) {
986 Base = N;
987 if (N.getOpcode() == ISD::FrameIndex) {
988 int FI = cast<FrameIndexSDNode>(N)->getIndex();
989 Base = CurDAG->getTargetFrameIndex(
990 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
991 } else if (N.getOpcode() == ARMISD::Wrapper &&
992 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
993 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
994 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
995 Base = N.getOperand(0);
996 }
997 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
998 SDLoc(N), MVT::i32);
999 return true;
1000 }
1001
1002 // If the RHS is +/- imm8, fold into addr mode.
1003 int RHSC;
1004 const int Scale = FP16 ? 2 : 4;
1005
1006 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
1007 Base = N.getOperand(0);
1008 if (Base.getOpcode() == ISD::FrameIndex) {
1009 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1010 Base = CurDAG->getTargetFrameIndex(
1011 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1012 }
1013
1015 if (RHSC < 0) {
1017 RHSC = -RHSC;
1018 }
1019
1020 if (FP16)
1021 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
1022 SDLoc(N), MVT::i32);
1023 else
1024 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1025 SDLoc(N), MVT::i32);
1026
1027 return true;
1028 }
1029
1030 Base = N;
1031
1032 if (FP16)
1033 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
1034 SDLoc(N), MVT::i32);
1035 else
1036 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1037 SDLoc(N), MVT::i32);
1038
1039 return true;
1040}
1041
1042bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1044 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
1045}
1046
1047bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
1049 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
1050}
1051
1052bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1053 SDValue &Align) {
1054 Addr = N;
1055
1056 unsigned Alignment = 0;
1057
1058 MemSDNode *MemN = cast<MemSDNode>(Parent);
1059
1060 if (isa<LSBaseSDNode>(MemN) ||
1061 ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1062 MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1063 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1064 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1065 // The maximum alignment is equal to the memory size being referenced.
1066 llvm::Align MMOAlign = MemN->getAlign();
1067 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1068 if (MMOAlign.value() >= MemSize && MemSize > 1)
1069 Alignment = MemSize;
1070 } else {
1071 // All other uses of addrmode6 are for intrinsics. For now just record
1072 // the raw alignment value; it will be refined later based on the legal
1073 // alignment operands for the intrinsic.
1074 Alignment = MemN->getAlign().value();
1075 }
1076
1077 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1078 return true;
1079}
1080
1081bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1082 SDValue &Offset) {
1083 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1085 if (AM != ISD::POST_INC)
1086 return false;
1087 Offset = N;
1088 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1089 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1090 Offset = CurDAG->getRegister(0, MVT::i32);
1091 }
1092 return true;
1093}
1094
1095bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1096 SDValue &Offset, SDValue &Label) {
1097 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1098 Offset = N.getOperand(0);
1099 SDValue N1 = N.getOperand(1);
1100 Label = CurDAG->getTargetConstant(N1->getAsZExtVal(), SDLoc(N), MVT::i32);
1101 return true;
1102 }
1103
1104 return false;
1105}
1106
1107
1108//===----------------------------------------------------------------------===//
1109// Thumb Addressing Modes
1110//===----------------------------------------------------------------------===//
1111
1113 // Negative numbers are difficult to materialise in thumb1. If we are
1114 // selecting the add of a negative, instead try to select ri with a zero
1115 // offset, so create the add node directly which will become a sub.
1116 if (N.getOpcode() != ISD::ADD)
1117 return false;
1118
1119 // Look for an imm which is not legal for ld/st, but is legal for sub.
1120 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1121 return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1122
1123 return false;
1124}
1125
1126bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1127 SDValue &Offset) {
1128 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1129 if (!isNullConstant(N))
1130 return false;
1131
1132 Base = Offset = N;
1133 return true;
1134 }
1135
1136 Base = N.getOperand(0);
1137 Offset = N.getOperand(1);
1138 return true;
1139}
1140
1141bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1142 SDValue &Offset) {
1144 return false; // Select ri instead
1145 return SelectThumbAddrModeRRSext(N, Base, Offset);
1146}
1147
1148bool
1149ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1150 SDValue &Base, SDValue &OffImm) {
1152 Base = N;
1153 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1154 return true;
1155 }
1156
1157 if (!CurDAG->isBaseWithConstantOffset(N)) {
1158 if (N.getOpcode() == ISD::ADD) {
1159 return false; // We want to select register offset instead
1160 } else if (N.getOpcode() == ARMISD::Wrapper &&
1161 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1162 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1163 N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1164 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1165 Base = N.getOperand(0);
1166 } else {
1167 Base = N;
1168 }
1169
1170 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1171 return true;
1172 }
1173
1174 // If the RHS is + imm5 * scale, fold into addr mode.
1175 int RHSC;
1176 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1177 Base = N.getOperand(0);
1178 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32);
1179 return true;
1180 }
1181
1182 // Offset is too large, so use register offset instead.
1183 return false;
1184}
1185
1186bool
1187ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1188 SDValue &OffImm) {
1189 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1190}
1191
1192bool
1193ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1194 SDValue &OffImm) {
1195 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1196}
1197
1198bool
1199ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1200 SDValue &OffImm) {
1201 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1202}
1203
1204bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1205 SDValue &Base, SDValue &OffImm) {
1206 if (N.getOpcode() == ISD::FrameIndex) {
1207 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1208 // Only multiples of 4 are allowed for the offset, so the frame object
1209 // alignment must be at least 4.
1210 MachineFrameInfo &MFI = MF->getFrameInfo();
1211 if (MFI.getObjectAlign(FI) < Align(4))
1212 MFI.setObjectAlignment(FI, Align(4));
1213 Base = CurDAG->getTargetFrameIndex(
1214 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1215 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1216 return true;
1217 }
1218
1219 if (!CurDAG->isBaseWithConstantOffset(N))
1220 return false;
1221
1222 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1223 // If the RHS is + imm8 * scale, fold into addr mode.
1224 int RHSC;
1225 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1226 Base = N.getOperand(0);
1227 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1228 // Make sure the offset is inside the object, or we might fail to
1229 // allocate an emergency spill slot. (An out-of-range access is UB, but
1230 // it could show up anyway.)
1231 MachineFrameInfo &MFI = MF->getFrameInfo();
1232 if (RHSC * 4 < MFI.getObjectSize(FI)) {
1233 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1234 // indexed by the LHS must be 4-byte aligned.
1235 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4))
1236 MFI.setObjectAlignment(FI, Align(4));
1237 if (MFI.getObjectAlign(FI) >= Align(4)) {
1238 Base = CurDAG->getTargetFrameIndex(
1239 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1240 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32);
1241 return true;
1242 }
1243 }
1244 }
1245 }
1246
1247 return false;
1248}
1249
1250template <unsigned Shift>
1251bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1252 SDValue &OffImm) {
1253 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1254 int RHSC;
1255 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1256 RHSC)) {
1257 Base = N.getOperand(0);
1258 if (N.getOpcode() == ISD::SUB)
1259 RHSC = -RHSC;
1260 OffImm = CurDAG->getSignedTargetConstant(RHSC * (1 << Shift), SDLoc(N),
1261 MVT::i32);
1262 return true;
1263 }
1264 }
1265
1266 // Base only.
1267 Base = N;
1268 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1269 return true;
1270}
1271
1272
1273//===----------------------------------------------------------------------===//
1274// Thumb 2 Addressing Modes
1275//===----------------------------------------------------------------------===//
1276
1277
1278bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1279 SDValue &Base, SDValue &OffImm) {
1280 // Match simple R + imm12 operands.
1281
1282 // Base only.
1283 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1284 !CurDAG->isBaseWithConstantOffset(N)) {
1285 if (N.getOpcode() == ISD::FrameIndex) {
1286 // Match frame index.
1287 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1288 Base = CurDAG->getTargetFrameIndex(
1289 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1290 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1291 return true;
1292 }
1293
1294 if (N.getOpcode() == ARMISD::Wrapper &&
1295 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1296 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1297 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1298 Base = N.getOperand(0);
1299 if (Base.getOpcode() == ISD::TargetConstantPool)
1300 return false; // We want to select t2LDRpci instead.
1301 } else
1302 Base = N;
1303 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1304 return true;
1305 }
1306
1307 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1308 if (SelectT2AddrModeImm8(N, Base, OffImm))
1309 // Let t2LDRi8 handle (R - imm8).
1310 return false;
1311
1312 int RHSC = (int)RHS->getZExtValue();
1313 if (N.getOpcode() == ISD::SUB)
1314 RHSC = -RHSC;
1315
1316 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1317 Base = N.getOperand(0);
1318 if (Base.getOpcode() == ISD::FrameIndex) {
1319 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1320 Base = CurDAG->getTargetFrameIndex(
1321 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1322 }
1323 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32);
1324 return true;
1325 }
1326 }
1327
1328 // Base only.
1329 Base = N;
1330 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1331 return true;
1332}
1333
1334template <unsigned Shift>
1335bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
1336 SDValue &OffImm) {
1337 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1338 int RHSC;
1339 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
1340 Base = N.getOperand(0);
1341 if (Base.getOpcode() == ISD::FrameIndex) {
1342 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1343 Base = CurDAG->getTargetFrameIndex(
1344 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1345 }
1346
1347 if (N.getOpcode() == ISD::SUB)
1348 RHSC = -RHSC;
1349 OffImm = CurDAG->getSignedTargetConstant(RHSC * (1 << Shift), SDLoc(N),
1350 MVT::i32);
1351 return true;
1352 }
1353 }
1354
1355 // Base only.
1356 Base = N;
1357 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1358 return true;
1359}
1360
1361bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1362 SDValue &Base, SDValue &OffImm) {
1363 // Match simple R - imm8 operands.
1364 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1365 !CurDAG->isBaseWithConstantOffset(N))
1366 return false;
1367
1368 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1369 int RHSC = (int)RHS->getSExtValue();
1370 if (N.getOpcode() == ISD::SUB)
1371 RHSC = -RHSC;
1372
1373 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1374 Base = N.getOperand(0);
1375 if (Base.getOpcode() == ISD::FrameIndex) {
1376 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1377 Base = CurDAG->getTargetFrameIndex(
1378 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1379 }
1380 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32);
1381 return true;
1382 }
1383 }
1384
1385 return false;
1386}
1387
1388bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1389 SDValue &OffImm){
1390 unsigned Opcode = Op->getOpcode();
1391 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1392 ? cast<LoadSDNode>(Op)->getAddressingMode()
1393 : cast<StoreSDNode>(Op)->getAddressingMode();
1394 int RHSC;
1395 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1396 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1397 ? CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32)
1398 : CurDAG->getSignedTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1399 return true;
1400 }
1401
1402 return false;
1403}
1404
1405template <unsigned Shift>
1406bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1407 SDValue &OffImm) {
1408 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1409 int RHSC;
1410 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1411 RHSC)) {
1412 Base = N.getOperand(0);
1413 if (Base.getOpcode() == ISD::FrameIndex) {
1414 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1415 Base = CurDAG->getTargetFrameIndex(
1416 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1417 }
1418
1419 if (N.getOpcode() == ISD::SUB)
1420 RHSC = -RHSC;
1421 OffImm = CurDAG->getSignedTargetConstant(RHSC * (1 << Shift), SDLoc(N),
1422 MVT::i32);
1423 return true;
1424 }
1425 }
1426
1427 // Base only.
1428 Base = N;
1429 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1430 return true;
1431}
1432
1433template <unsigned Shift>
1434bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1435 SDValue &OffImm) {
1436 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1437}
1438
1439bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1440 SDValue &OffImm,
1441 unsigned Shift) {
1442 unsigned Opcode = Op->getOpcode();
1444 switch (Opcode) {
1445 case ISD::LOAD:
1446 AM = cast<LoadSDNode>(Op)->getAddressingMode();
1447 break;
1448 case ISD::STORE:
1449 AM = cast<StoreSDNode>(Op)->getAddressingMode();
1450 break;
1451 case ISD::MLOAD:
1452 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode();
1453 break;
1454 case ISD::MSTORE:
1455 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode();
1456 break;
1457 default:
1458 llvm_unreachable("Unexpected Opcode for Imm7Offset");
1459 }
1460
1461 int RHSC;
1462 // 7 bit constant, shifted by Shift.
1463 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) {
1464 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1465 ? CurDAG->getSignedTargetConstant(RHSC * (1 << Shift),
1466 SDLoc(N), MVT::i32)
1467 : CurDAG->getSignedTargetConstant(-RHSC * (1 << Shift),
1468 SDLoc(N), MVT::i32);
1469 return true;
1470 }
1471 return false;
1472}
1473
1474template <int Min, int Max>
1475bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
1476 int Val;
1477 if (isScaledConstantInRange(N, 1, Min, Max, Val)) {
1478 OffImm = CurDAG->getSignedTargetConstant(Val, SDLoc(N), MVT::i32);
1479 return true;
1480 }
1481 return false;
1482}
1483
1484bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1485 SDValue &Base,
1486 SDValue &OffReg, SDValue &ShImm) {
1487 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1488 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1489 return false;
1490
1491 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1492 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1493 int RHSC = (int)RHS->getZExtValue();
1494 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1495 return false;
1496 else if (RHSC < 0 && RHSC >= -255) // 8 bits
1497 return false;
1498 }
1499
1500 // Look for (R + R) or (R + (R << [1,2,3])).
1501 unsigned ShAmt = 0;
1502 Base = N.getOperand(0);
1503 OffReg = N.getOperand(1);
1504
1505 // Swap if it is ((R << c) + R).
1507 if (ShOpcVal != ARM_AM::lsl) {
1508 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1509 if (ShOpcVal == ARM_AM::lsl)
1510 std::swap(Base, OffReg);
1511 }
1512
1513 if (ShOpcVal == ARM_AM::lsl) {
1514 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1515 // it.
1516 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1517 ShAmt = Sh->getZExtValue();
1518 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1519 OffReg = OffReg.getOperand(0);
1520 else {
1521 ShAmt = 0;
1522 }
1523 }
1524 }
1525
1526 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1527 // and use it in a shifted operand do so.
1528 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1529 unsigned PowerOfTwo = 0;
1530 SDValue NewMulConst;
1531 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1532 HandleSDNode Handle(OffReg);
1533 replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1534 OffReg = Handle.getValue();
1535 ShAmt = PowerOfTwo;
1536 }
1537 }
1538
1539 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1540
1541 return true;
1542}
1543
1544bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1545 SDValue &OffImm) {
1546 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1547 // instructions.
1548 Base = N;
1549 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1550
1551 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1552 return true;
1553
1554 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1555 if (!RHS)
1556 return true;
1557
1558 uint32_t RHSC = (int)RHS->getZExtValue();
1559 if (RHSC > 1020 || RHSC % 4 != 0)
1560 return true;
1561
1562 Base = N.getOperand(0);
1563 if (Base.getOpcode() == ISD::FrameIndex) {
1564 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1565 Base = CurDAG->getTargetFrameIndex(
1566 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1567 }
1568
1569 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1570 return true;
1571}
1572
1573//===--------------------------------------------------------------------===//
1574
1575/// getAL - Returns a ARMCC::AL immediate node.
1576static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1577 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1578}
1579
1580void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1581 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1582 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1583}
1584
1585bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1586 LoadSDNode *LD = cast<LoadSDNode>(N);
1587 ISD::MemIndexedMode AM = LD->getAddressingMode();
1588 if (AM == ISD::UNINDEXED)
1589 return false;
1590
1591 EVT LoadedVT = LD->getMemoryVT();
1592 SDValue Offset, AMOpc;
1593 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1594 unsigned Opcode = 0;
1595 bool Match = false;
1596 if (LoadedVT == MVT::i32 && isPre &&
1597 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1598 Opcode = ARM::LDR_PRE_IMM;
1599 Match = true;
1600 } else if (LoadedVT == MVT::i32 && !isPre &&
1601 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1602 Opcode = ARM::LDR_POST_IMM;
1603 Match = true;
1604 } else if (LoadedVT == MVT::i32 &&
1605 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1606 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1607 Match = true;
1608
1609 } else if (LoadedVT == MVT::i16 &&
1610 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1611 Match = true;
1612 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1613 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1614 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1615 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1616 if (LD->getExtensionType() == ISD::SEXTLOAD) {
1617 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1618 Match = true;
1619 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1620 }
1621 } else {
1622 if (isPre &&
1623 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1624 Match = true;
1625 Opcode = ARM::LDRB_PRE_IMM;
1626 } else if (!isPre &&
1627 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1628 Match = true;
1629 Opcode = ARM::LDRB_POST_IMM;
1630 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1631 Match = true;
1632 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1633 }
1634 }
1635 }
1636
1637 if (Match) {
1638 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1639 SDValue Chain = LD->getChain();
1640 SDValue Base = LD->getBasePtr();
1641 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1642 CurDAG->getRegister(0, MVT::i32), Chain };
1643 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1644 MVT::Other, Ops);
1645 transferMemOperands(N, New);
1646 ReplaceNode(N, New);
1647 return true;
1648 } else {
1649 SDValue Chain = LD->getChain();
1650 SDValue Base = LD->getBasePtr();
1651 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1652 CurDAG->getRegister(0, MVT::i32), Chain };
1653 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1654 MVT::Other, Ops);
1655 transferMemOperands(N, New);
1656 ReplaceNode(N, New);
1657 return true;
1658 }
1659 }
1660
1661 return false;
1662}
1663
1664bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1665 LoadSDNode *LD = cast<LoadSDNode>(N);
1666 EVT LoadedVT = LD->getMemoryVT();
1667 ISD::MemIndexedMode AM = LD->getAddressingMode();
1668 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1669 LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1670 return false;
1671
1672 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1673 if (!COffs || COffs->getZExtValue() != 4)
1674 return false;
1675
1676 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1677 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1678 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1679 // ISel.
1680 SDValue Chain = LD->getChain();
1681 SDValue Base = LD->getBasePtr();
1682 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1683 CurDAG->getRegister(0, MVT::i32), Chain };
1684 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1685 MVT::i32, MVT::Other, Ops);
1686 transferMemOperands(N, New);
1687 ReplaceNode(N, New);
1688 return true;
1689}
1690
1691bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1692 LoadSDNode *LD = cast<LoadSDNode>(N);
1693 ISD::MemIndexedMode AM = LD->getAddressingMode();
1694 if (AM == ISD::UNINDEXED)
1695 return false;
1696
1697 EVT LoadedVT = LD->getMemoryVT();
1698 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1700 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1701 unsigned Opcode = 0;
1702 bool Match = false;
1703 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1704 switch (LoadedVT.getSimpleVT().SimpleTy) {
1705 case MVT::i32:
1706 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1707 break;
1708 case MVT::i16:
1709 if (isSExtLd)
1710 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1711 else
1712 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1713 break;
1714 case MVT::i8:
1715 case MVT::i1:
1716 if (isSExtLd)
1717 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1718 else
1719 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1720 break;
1721 default:
1722 return false;
1723 }
1724 Match = true;
1725 }
1726
1727 if (Match) {
1728 SDValue Chain = LD->getChain();
1729 SDValue Base = LD->getBasePtr();
1730 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1731 CurDAG->getRegister(0, MVT::i32), Chain };
1732 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1733 MVT::Other, Ops);
1734 transferMemOperands(N, New);
1735 ReplaceNode(N, New);
1736 return true;
1737 }
1738
1739 return false;
1740}
1741
1742bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1743 EVT LoadedVT;
1744 unsigned Opcode = 0;
1745 bool isSExtLd, isPre;
1746 Align Alignment;
1747 ARMVCC::VPTCodes Pred;
1748 SDValue PredReg;
1749 SDValue Chain, Base, Offset;
1750
1751 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1752 ISD::MemIndexedMode AM = LD->getAddressingMode();
1753 if (AM == ISD::UNINDEXED)
1754 return false;
1755 LoadedVT = LD->getMemoryVT();
1756 if (!LoadedVT.isVector())
1757 return false;
1758
1759 Chain = LD->getChain();
1760 Base = LD->getBasePtr();
1761 Offset = LD->getOffset();
1762 Alignment = LD->getAlign();
1763 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1764 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1765 Pred = ARMVCC::None;
1766 PredReg = CurDAG->getRegister(0, MVT::i32);
1767 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
1768 ISD::MemIndexedMode AM = LD->getAddressingMode();
1769 if (AM == ISD::UNINDEXED)
1770 return false;
1771 LoadedVT = LD->getMemoryVT();
1772 if (!LoadedVT.isVector())
1773 return false;
1774
1775 Chain = LD->getChain();
1776 Base = LD->getBasePtr();
1777 Offset = LD->getOffset();
1778 Alignment = LD->getAlign();
1779 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1780 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1781 Pred = ARMVCC::Then;
1782 PredReg = LD->getMask();
1783 } else
1784 llvm_unreachable("Expected a Load or a Masked Load!");
1785
1786 // We allow LE non-masked loads to change the type (for example use a vldrb.8
1787 // as opposed to a vldrw.32). This can allow extra addressing modes or
1788 // alignments for what is otherwise an equivalent instruction.
1789 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N);
1790
1791 SDValue NewOffset;
1792 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 &&
1793 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) {
1794 if (isSExtLd)
1795 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1796 else
1797 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1798 } else if (LoadedVT == MVT::v8i8 &&
1799 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1800 if (isSExtLd)
1801 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1802 else
1803 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1804 } else if (LoadedVT == MVT::v4i8 &&
1805 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1806 if (isSExtLd)
1807 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1808 else
1809 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1810 } else if (Alignment >= Align(4) &&
1811 (CanChangeType || LoadedVT == MVT::v4i32 ||
1812 LoadedVT == MVT::v4f32) &&
1813 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2))
1814 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1815 else if (Alignment >= Align(2) &&
1816 (CanChangeType || LoadedVT == MVT::v8i16 ||
1817 LoadedVT == MVT::v8f16) &&
1818 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1))
1819 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1820 else if ((CanChangeType || LoadedVT == MVT::v16i8) &&
1821 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0))
1822 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1823 else
1824 return false;
1825
1826 SDValue Ops[] = {Base,
1827 NewOffset,
1828 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32),
1829 PredReg,
1830 CurDAG->getRegister(0, MVT::i32), // tp_reg
1831 Chain};
1832 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1833 N->getValueType(0), MVT::Other, Ops);
1834 transferMemOperands(N, New);
1835 ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1836 ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1837 ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1838 CurDAG->RemoveDeadNode(N);
1839 return true;
1840}
1841
1842/// Form a GPRPair pseudo register from a pair of GPR regs.
1843SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1844 SDLoc dl(V0.getNode());
1845 SDValue RegClass =
1846 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1847 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1848 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1849 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1850 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1851}
1852
1853/// Form a D register from a pair of S registers.
1854SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1855 SDLoc dl(V0.getNode());
1856 SDValue RegClass =
1857 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1858 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1859 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1860 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1861 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1862}
1863
1864/// Form a quad register from a pair of D registers.
1865SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1866 SDLoc dl(V0.getNode());
1867 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1868 MVT::i32);
1869 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1870 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1871 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1872 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1873}
1874
1875/// Form 4 consecutive D registers from a pair of Q registers.
1876SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1877 SDLoc dl(V0.getNode());
1878 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1879 MVT::i32);
1880 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1881 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1882 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1883 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1884}
1885
1886/// Form 4 consecutive S registers.
1887SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1888 SDValue V2, SDValue V3) {
1889 SDLoc dl(V0.getNode());
1890 SDValue RegClass =
1891 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1892 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1893 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1894 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1895 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1896 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1897 V2, SubReg2, V3, SubReg3 };
1898 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1899}
1900
1901/// Form 4 consecutive D registers.
1902SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1903 SDValue V2, SDValue V3) {
1904 SDLoc dl(V0.getNode());
1905 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1906 MVT::i32);
1907 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1908 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1909 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1910 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1911 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1912 V2, SubReg2, V3, SubReg3 };
1913 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1914}
1915
1916/// Form 4 consecutive Q registers.
1917SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1918 SDValue V2, SDValue V3) {
1919 SDLoc dl(V0.getNode());
1920 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1921 MVT::i32);
1922 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1923 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1924 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1925 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1926 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1927 V2, SubReg2, V3, SubReg3 };
1928 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1929}
1930
1931/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1932/// of a NEON VLD or VST instruction. The supported values depend on the
1933/// number of registers being loaded.
1934SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1935 unsigned NumVecs, bool is64BitVector) {
1936 unsigned NumRegs = NumVecs;
1937 if (!is64BitVector && NumVecs < 3)
1938 NumRegs *= 2;
1939
1940 unsigned Alignment = Align->getAsZExtVal();
1941 if (Alignment >= 32 && NumRegs == 4)
1942 Alignment = 32;
1943 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1944 Alignment = 16;
1945 else if (Alignment >= 8)
1946 Alignment = 8;
1947 else
1948 Alignment = 0;
1949
1950 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1951}
1952
1953static bool isVLDfixed(unsigned Opc)
1954{
1955 switch (Opc) {
1956 default: return false;
1957 case ARM::VLD1d8wb_fixed : return true;
1958 case ARM::VLD1d16wb_fixed : return true;
1959 case ARM::VLD1d64Qwb_fixed : return true;
1960 case ARM::VLD1d32wb_fixed : return true;
1961 case ARM::VLD1d64wb_fixed : return true;
1962 case ARM::VLD1d8TPseudoWB_fixed : return true;
1963 case ARM::VLD1d16TPseudoWB_fixed : return true;
1964 case ARM::VLD1d32TPseudoWB_fixed : return true;
1965 case ARM::VLD1d64TPseudoWB_fixed : return true;
1966 case ARM::VLD1d8QPseudoWB_fixed : return true;
1967 case ARM::VLD1d16QPseudoWB_fixed : return true;
1968 case ARM::VLD1d32QPseudoWB_fixed : return true;
1969 case ARM::VLD1d64QPseudoWB_fixed : return true;
1970 case ARM::VLD1q8wb_fixed : return true;
1971 case ARM::VLD1q16wb_fixed : return true;
1972 case ARM::VLD1q32wb_fixed : return true;
1973 case ARM::VLD1q64wb_fixed : return true;
1974 case ARM::VLD1DUPd8wb_fixed : return true;
1975 case ARM::VLD1DUPd16wb_fixed : return true;
1976 case ARM::VLD1DUPd32wb_fixed : return true;
1977 case ARM::VLD1DUPq8wb_fixed : return true;
1978 case ARM::VLD1DUPq16wb_fixed : return true;
1979 case ARM::VLD1DUPq32wb_fixed : return true;
1980 case ARM::VLD2d8wb_fixed : return true;
1981 case ARM::VLD2d16wb_fixed : return true;
1982 case ARM::VLD2d32wb_fixed : return true;
1983 case ARM::VLD2q8PseudoWB_fixed : return true;
1984 case ARM::VLD2q16PseudoWB_fixed : return true;
1985 case ARM::VLD2q32PseudoWB_fixed : return true;
1986 case ARM::VLD2DUPd8wb_fixed : return true;
1987 case ARM::VLD2DUPd16wb_fixed : return true;
1988 case ARM::VLD2DUPd32wb_fixed : return true;
1989 case ARM::VLD2DUPq8OddPseudoWB_fixed: return true;
1990 case ARM::VLD2DUPq16OddPseudoWB_fixed: return true;
1991 case ARM::VLD2DUPq32OddPseudoWB_fixed: return true;
1992 }
1993}
1994
1995static bool isVSTfixed(unsigned Opc)
1996{
1997 switch (Opc) {
1998 default: return false;
1999 case ARM::VST1d8wb_fixed : return true;
2000 case ARM::VST1d16wb_fixed : return true;
2001 case ARM::VST1d32wb_fixed : return true;
2002 case ARM::VST1d64wb_fixed : return true;
2003 case ARM::VST1q8wb_fixed : return true;
2004 case ARM::VST1q16wb_fixed : return true;
2005 case ARM::VST1q32wb_fixed : return true;
2006 case ARM::VST1q64wb_fixed : return true;
2007 case ARM::VST1d8TPseudoWB_fixed : return true;
2008 case ARM::VST1d16TPseudoWB_fixed : return true;
2009 case ARM::VST1d32TPseudoWB_fixed : return true;
2010 case ARM::VST1d64TPseudoWB_fixed : return true;
2011 case ARM::VST1d8QPseudoWB_fixed : return true;
2012 case ARM::VST1d16QPseudoWB_fixed : return true;
2013 case ARM::VST1d32QPseudoWB_fixed : return true;
2014 case ARM::VST1d64QPseudoWB_fixed : return true;
2015 case ARM::VST2d8wb_fixed : return true;
2016 case ARM::VST2d16wb_fixed : return true;
2017 case ARM::VST2d32wb_fixed : return true;
2018 case ARM::VST2q8PseudoWB_fixed : return true;
2019 case ARM::VST2q16PseudoWB_fixed : return true;
2020 case ARM::VST2q32PseudoWB_fixed : return true;
2021 }
2022}
2023
2024// Get the register stride update opcode of a VLD/VST instruction that
2025// is otherwise equivalent to the given fixed stride updating instruction.
2026static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
2028 && "Incorrect fixed stride updating instruction.");
2029 switch (Opc) {
2030 default: break;
2031 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
2032 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
2033 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
2034 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
2035 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
2036 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
2037 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
2038 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
2039 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
2040 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
2041 case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;
2042 case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;
2043 case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;
2044 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
2045 case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;
2046 case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;
2047 case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;
2048 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
2049 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
2050 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
2051 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
2052 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
2053 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
2054 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
2055 case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register;
2056 case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register;
2057 case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register;
2058
2059 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
2060 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
2061 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
2062 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
2063 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
2064 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
2065 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
2066 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
2067 case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register;
2068 case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register;
2069 case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register;
2070 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
2071 case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register;
2072 case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register;
2073 case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register;
2074 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
2075
2076 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
2077 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
2078 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
2079 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
2080 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
2081 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
2082
2083 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
2084 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
2085 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
2086 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
2087 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
2088 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
2089
2090 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
2091 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
2092 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
2093 }
2094 return Opc; // If not one we handle, return it unchanged.
2095}
2096
2097/// Returns true if the given increment is a Constant known to be equal to the
2098/// access size performed by a NEON load/store. This means the "[rN]!" form can
2099/// be used.
2100static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
2101 auto C = dyn_cast<ConstantSDNode>(Inc);
2102 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
2103}
2104
2105void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
2106 const uint16_t *DOpcodes,
2107 const uint16_t *QOpcodes0,
2108 const uint16_t *QOpcodes1) {
2109 assert(Subtarget->hasNEON());
2110 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
2111 SDLoc dl(N);
2112
2113 SDValue MemAddr, Align;
2114 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2115 // nodes are not intrinsics.
2116 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2117 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2118 return;
2119
2120 SDValue Chain = N->getOperand(0);
2121 EVT VT = N->getValueType(0);
2122 bool is64BitVector = VT.is64BitVector();
2123 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2124
2125 unsigned OpcodeIndex;
2126 switch (VT.getSimpleVT().SimpleTy) {
2127 default: llvm_unreachable("unhandled vld type");
2128 // Double-register operations:
2129 case MVT::v8i8: OpcodeIndex = 0; break;
2130 case MVT::v4f16:
2131 case MVT::v4bf16:
2132 case MVT::v4i16: OpcodeIndex = 1; break;
2133 case MVT::v2f32:
2134 case MVT::v2i32: OpcodeIndex = 2; break;
2135 case MVT::v1i64: OpcodeIndex = 3; break;
2136 // Quad-register operations:
2137 case MVT::v16i8: OpcodeIndex = 0; break;
2138 case MVT::v8f16:
2139 case MVT::v8bf16:
2140 case MVT::v8i16: OpcodeIndex = 1; break;
2141 case MVT::v4f32:
2142 case MVT::v4i32: OpcodeIndex = 2; break;
2143 case MVT::v2f64:
2144 case MVT::v2i64: OpcodeIndex = 3; break;
2145 }
2146
2147 EVT ResTy;
2148 if (NumVecs == 1)
2149 ResTy = VT;
2150 else {
2151 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2152 if (!is64BitVector)
2153 ResTyElts *= 2;
2154 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2155 }
2156 std::vector<EVT> ResTys;
2157 ResTys.push_back(ResTy);
2158 if (isUpdating)
2159 ResTys.push_back(MVT::i32);
2160 ResTys.push_back(MVT::Other);
2161
2162 SDValue Pred = getAL(CurDAG, dl);
2163 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2164 SDNode *VLd;
2166
2167 // Double registers and VLD1/VLD2 quad registers are directly supported.
2168 if (is64BitVector || NumVecs <= 2) {
2169 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2170 QOpcodes0[OpcodeIndex]);
2171 Ops.push_back(MemAddr);
2172 Ops.push_back(Align);
2173 if (isUpdating) {
2174 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2175 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2176 if (!IsImmUpdate) {
2177 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2178 // check for the opcode rather than the number of vector elements.
2179 if (isVLDfixed(Opc))
2181 Ops.push_back(Inc);
2182 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2183 // the operands if not such an opcode.
2184 } else if (!isVLDfixed(Opc))
2185 Ops.push_back(Reg0);
2186 }
2187 Ops.push_back(Pred);
2188 Ops.push_back(Reg0);
2189 Ops.push_back(Chain);
2190 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2191
2192 } else {
2193 // Otherwise, quad registers are loaded with two separate instructions,
2194 // where one loads the even registers and the other loads the odd registers.
2195 EVT AddrTy = MemAddr.getValueType();
2196
2197 // Load the even subregs. This is always an updating load, so that it
2198 // provides the address to the second load for the odd subregs.
2199 SDValue ImplDef =
2200 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2201 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
2202 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2203 ResTy, AddrTy, MVT::Other, OpsA);
2204 Chain = SDValue(VLdA, 2);
2205
2206 // Load the odd subregs.
2207 Ops.push_back(SDValue(VLdA, 1));
2208 Ops.push_back(Align);
2209 if (isUpdating) {
2210 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2212 "only constant post-increment update allowed for VLD3/4");
2213 (void)Inc;
2214 Ops.push_back(Reg0);
2215 }
2216 Ops.push_back(SDValue(VLdA, 0));
2217 Ops.push_back(Pred);
2218 Ops.push_back(Reg0);
2219 Ops.push_back(Chain);
2220 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
2221 }
2222
2223 // Transfer memoperands.
2224 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2225 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
2226
2227 if (NumVecs == 1) {
2228 ReplaceNode(N, VLd);
2229 return;
2230 }
2231
2232 // Extract out the subregisters.
2233 SDValue SuperReg = SDValue(VLd, 0);
2234 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2235 ARM::qsub_3 == ARM::qsub_0 + 3,
2236 "Unexpected subreg numbering");
2237 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2238 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2239 ReplaceUses(SDValue(N, Vec),
2240 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2241 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2242 if (isUpdating)
2243 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2244 CurDAG->RemoveDeadNode(N);
2245}
2246
2247void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2248 const uint16_t *DOpcodes,
2249 const uint16_t *QOpcodes0,
2250 const uint16_t *QOpcodes1) {
2251 assert(Subtarget->hasNEON());
2252 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2253 SDLoc dl(N);
2254
2255 SDValue MemAddr, Align;
2256 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2257 // nodes are not intrinsics.
2258 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2259 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2260 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2261 return;
2262
2263 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2264
2265 SDValue Chain = N->getOperand(0);
2266 EVT VT = N->getOperand(Vec0Idx).getValueType();
2267 bool is64BitVector = VT.is64BitVector();
2268 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2269
2270 unsigned OpcodeIndex;
2271 switch (VT.getSimpleVT().SimpleTy) {
2272 default: llvm_unreachable("unhandled vst type");
2273 // Double-register operations:
2274 case MVT::v8i8: OpcodeIndex = 0; break;
2275 case MVT::v4f16:
2276 case MVT::v4bf16:
2277 case MVT::v4i16: OpcodeIndex = 1; break;
2278 case MVT::v2f32:
2279 case MVT::v2i32: OpcodeIndex = 2; break;
2280 case MVT::v1i64: OpcodeIndex = 3; break;
2281 // Quad-register operations:
2282 case MVT::v16i8: OpcodeIndex = 0; break;
2283 case MVT::v8f16:
2284 case MVT::v8bf16:
2285 case MVT::v8i16: OpcodeIndex = 1; break;
2286 case MVT::v4f32:
2287 case MVT::v4i32: OpcodeIndex = 2; break;
2288 case MVT::v2f64:
2289 case MVT::v2i64: OpcodeIndex = 3; break;
2290 }
2291
2292 std::vector<EVT> ResTys;
2293 if (isUpdating)
2294 ResTys.push_back(MVT::i32);
2295 ResTys.push_back(MVT::Other);
2296
2297 SDValue Pred = getAL(CurDAG, dl);
2298 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2300
2301 // Double registers and VST1/VST2 quad registers are directly supported.
2302 if (is64BitVector || NumVecs <= 2) {
2303 SDValue SrcReg;
2304 if (NumVecs == 1) {
2305 SrcReg = N->getOperand(Vec0Idx);
2306 } else if (is64BitVector) {
2307 // Form a REG_SEQUENCE to force register allocation.
2308 SDValue V0 = N->getOperand(Vec0Idx + 0);
2309 SDValue V1 = N->getOperand(Vec0Idx + 1);
2310 if (NumVecs == 2)
2311 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2312 else {
2313 SDValue V2 = N->getOperand(Vec0Idx + 2);
2314 // If it's a vst3, form a quad D-register and leave the last part as
2315 // an undef.
2316 SDValue V3 = (NumVecs == 3)
2317 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2318 : N->getOperand(Vec0Idx + 3);
2319 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2320 }
2321 } else {
2322 // Form a QQ register.
2323 SDValue Q0 = N->getOperand(Vec0Idx);
2324 SDValue Q1 = N->getOperand(Vec0Idx + 1);
2325 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2326 }
2327
2328 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2329 QOpcodes0[OpcodeIndex]);
2330 Ops.push_back(MemAddr);
2331 Ops.push_back(Align);
2332 if (isUpdating) {
2333 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2334 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2335 if (!IsImmUpdate) {
2336 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2337 // check for the opcode rather than the number of vector elements.
2338 if (isVSTfixed(Opc))
2340 Ops.push_back(Inc);
2341 }
2342 // VST1/VST2 fixed increment does not need Reg0 so only include it in
2343 // the operands if not such an opcode.
2344 else if (!isVSTfixed(Opc))
2345 Ops.push_back(Reg0);
2346 }
2347 Ops.push_back(SrcReg);
2348 Ops.push_back(Pred);
2349 Ops.push_back(Reg0);
2350 Ops.push_back(Chain);
2351 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2352
2353 // Transfer memoperands.
2354 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2355
2356 ReplaceNode(N, VSt);
2357 return;
2358 }
2359
2360 // Otherwise, quad registers are stored with two separate instructions,
2361 // where one stores the even registers and the other stores the odd registers.
2362
2363 // Form the QQQQ REG_SEQUENCE.
2364 SDValue V0 = N->getOperand(Vec0Idx + 0);
2365 SDValue V1 = N->getOperand(Vec0Idx + 1);
2366 SDValue V2 = N->getOperand(Vec0Idx + 2);
2367 SDValue V3 = (NumVecs == 3)
2368 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2369 : N->getOperand(Vec0Idx + 3);
2370 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2371
2372 // Store the even D registers. This is always an updating store, so that it
2373 // provides the address to the second store for the odd subregs.
2374 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2375 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2376 MemAddr.getValueType(),
2377 MVT::Other, OpsA);
2378 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2379 Chain = SDValue(VStA, 1);
2380
2381 // Store the odd D registers.
2382 Ops.push_back(SDValue(VStA, 0));
2383 Ops.push_back(Align);
2384 if (isUpdating) {
2385 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2387 "only constant post-increment update allowed for VST3/4");
2388 (void)Inc;
2389 Ops.push_back(Reg0);
2390 }
2391 Ops.push_back(RegSeq);
2392 Ops.push_back(Pred);
2393 Ops.push_back(Reg0);
2394 Ops.push_back(Chain);
2395 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2396 Ops);
2397 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2398 ReplaceNode(N, VStB);
2399}
2400
2401void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2402 unsigned NumVecs,
2403 const uint16_t *DOpcodes,
2404 const uint16_t *QOpcodes) {
2405 assert(Subtarget->hasNEON());
2406 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2407 SDLoc dl(N);
2408
2409 SDValue MemAddr, Align;
2410 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2411 // nodes are not intrinsics.
2412 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2413 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2414 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2415 return;
2416
2417 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2418
2419 SDValue Chain = N->getOperand(0);
2420 unsigned Lane = N->getConstantOperandVal(Vec0Idx + NumVecs);
2421 EVT VT = N->getOperand(Vec0Idx).getValueType();
2422 bool is64BitVector = VT.is64BitVector();
2423
2424 unsigned Alignment = 0;
2425 if (NumVecs != 3) {
2426 Alignment = Align->getAsZExtVal();
2427 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2428 if (Alignment > NumBytes)
2429 Alignment = NumBytes;
2430 if (Alignment < 8 && Alignment < NumBytes)
2431 Alignment = 0;
2432 // Alignment must be a power of two; make sure of that.
2433 Alignment = (Alignment & -Alignment);
2434 if (Alignment == 1)
2435 Alignment = 0;
2436 }
2437 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2438
2439 unsigned OpcodeIndex;
2440 switch (VT.getSimpleVT().SimpleTy) {
2441 default: llvm_unreachable("unhandled vld/vst lane type");
2442 // Double-register operations:
2443 case MVT::v8i8: OpcodeIndex = 0; break;
2444 case MVT::v4f16:
2445 case MVT::v4bf16:
2446 case MVT::v4i16: OpcodeIndex = 1; break;
2447 case MVT::v2f32:
2448 case MVT::v2i32: OpcodeIndex = 2; break;
2449 // Quad-register operations:
2450 case MVT::v8f16:
2451 case MVT::v8bf16:
2452 case MVT::v8i16: OpcodeIndex = 0; break;
2453 case MVT::v4f32:
2454 case MVT::v4i32: OpcodeIndex = 1; break;
2455 }
2456
2457 std::vector<EVT> ResTys;
2458 if (IsLoad) {
2459 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2460 if (!is64BitVector)
2461 ResTyElts *= 2;
2462 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2463 MVT::i64, ResTyElts));
2464 }
2465 if (isUpdating)
2466 ResTys.push_back(MVT::i32);
2467 ResTys.push_back(MVT::Other);
2468
2469 SDValue Pred = getAL(CurDAG, dl);
2470 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2471
2473 Ops.push_back(MemAddr);
2474 Ops.push_back(Align);
2475 if (isUpdating) {
2476 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2477 bool IsImmUpdate =
2478 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2479 Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2480 }
2481
2482 SDValue SuperReg;
2483 SDValue V0 = N->getOperand(Vec0Idx + 0);
2484 SDValue V1 = N->getOperand(Vec0Idx + 1);
2485 if (NumVecs == 2) {
2486 if (is64BitVector)
2487 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2488 else
2489 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2490 } else {
2491 SDValue V2 = N->getOperand(Vec0Idx + 2);
2492 SDValue V3 = (NumVecs == 3)
2493 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2494 : N->getOperand(Vec0Idx + 3);
2495 if (is64BitVector)
2496 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2497 else
2498 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2499 }
2500 Ops.push_back(SuperReg);
2501 Ops.push_back(getI32Imm(Lane, dl));
2502 Ops.push_back(Pred);
2503 Ops.push_back(Reg0);
2504 Ops.push_back(Chain);
2505
2506 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2507 QOpcodes[OpcodeIndex]);
2508 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2509 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2510 if (!IsLoad) {
2511 ReplaceNode(N, VLdLn);
2512 return;
2513 }
2514
2515 // Extract the subregisters.
2516 SuperReg = SDValue(VLdLn, 0);
2517 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2518 ARM::qsub_3 == ARM::qsub_0 + 3,
2519 "Unexpected subreg numbering");
2520 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2521 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2522 ReplaceUses(SDValue(N, Vec),
2523 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2524 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2525 if (isUpdating)
2526 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2527 CurDAG->RemoveDeadNode(N);
2528}
2529
2530template <typename SDValueVector>
2531void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2532 SDValue PredicateMask) {
2533 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2534 Ops.push_back(PredicateMask);
2535 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2536}
2537
2538template <typename SDValueVector>
2539void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2540 SDValue PredicateMask,
2541 SDValue Inactive) {
2542 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2543 Ops.push_back(PredicateMask);
2544 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2545 Ops.push_back(Inactive);
2546}
2547
2548template <typename SDValueVector>
2549void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
2550 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2551 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2552 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2553}
2554
2555template <typename SDValueVector>
2556void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2557 EVT InactiveTy) {
2558 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2559 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2560 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2561 Ops.push_back(SDValue(
2562 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));
2563}
2564
2565void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
2566 bool Predicated) {
2567 SDLoc Loc(N);
2569
2570 uint16_t Opcode;
2571 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) {
2572 case 32:
2573 Opcode = Opcodes[0];
2574 break;
2575 case 64:
2576 Opcode = Opcodes[1];
2577 break;
2578 default:
2579 llvm_unreachable("bad vector element size in SelectMVE_WB");
2580 }
2581
2582 Ops.push_back(N->getOperand(2)); // vector of base addresses
2583
2584 int32_t ImmValue = N->getConstantOperandVal(3);
2585 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset
2586
2587 if (Predicated)
2588 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2589 else
2590 AddEmptyMVEPredicateToOps(Ops, Loc);
2591
2592 Ops.push_back(N->getOperand(0)); // chain
2593
2595 VTs.push_back(N->getValueType(1));
2596 VTs.push_back(N->getValueType(0));
2597 VTs.push_back(N->getValueType(2));
2598
2599 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops);
2600 ReplaceUses(SDValue(N, 0), SDValue(New, 1));
2601 ReplaceUses(SDValue(N, 1), SDValue(New, 0));
2602 ReplaceUses(SDValue(N, 2), SDValue(New, 2));
2603 transferMemOperands(N, New);
2604 CurDAG->RemoveDeadNode(N);
2605}
2606
2607void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
2608 bool Immediate,
2609 bool HasSaturationOperand) {
2610 SDLoc Loc(N);
2612
2613 // Two 32-bit halves of the value to be shifted
2614 Ops.push_back(N->getOperand(1));
2615 Ops.push_back(N->getOperand(2));
2616
2617 // The shift count
2618 if (Immediate) {
2619 int32_t ImmValue = N->getConstantOperandVal(3);
2620 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2621 } else {
2622 Ops.push_back(N->getOperand(3));
2623 }
2624
2625 // The immediate saturation operand, if any
2626 if (HasSaturationOperand) {
2627 int32_t SatOp = N->getConstantOperandVal(4);
2628 int SatBit = (SatOp == 64 ? 0 : 1);
2629 Ops.push_back(getI32Imm(SatBit, Loc));
2630 }
2631
2632 // MVE scalar shifts are IT-predicable, so include the standard
2633 // predicate arguments.
2634 Ops.push_back(getAL(CurDAG, Loc));
2635 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2636
2637 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2638}
2639
2640void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
2641 uint16_t OpcodeWithNoCarry,
2642 bool Add, bool Predicated) {
2643 SDLoc Loc(N);
2645 uint16_t Opcode;
2646
2647 unsigned FirstInputOp = Predicated ? 2 : 1;
2648
2649 // Two input vectors and the input carry flag
2650 Ops.push_back(N->getOperand(FirstInputOp));
2651 Ops.push_back(N->getOperand(FirstInputOp + 1));
2652 SDValue CarryIn = N->getOperand(FirstInputOp + 2);
2653 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn);
2654 uint32_t CarryMask = 1 << 29;
2655 uint32_t CarryExpected = Add ? 0 : CarryMask;
2656 if (CarryInConstant &&
2657 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
2658 Opcode = OpcodeWithNoCarry;
2659 } else {
2660 Ops.push_back(CarryIn);
2661 Opcode = OpcodeWithCarry;
2662 }
2663
2664 if (Predicated)
2665 AddMVEPredicateToOps(Ops, Loc,
2666 N->getOperand(FirstInputOp + 3), // predicate
2667 N->getOperand(FirstInputOp - 1)); // inactive
2668 else
2669 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2670
2671 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2672}
2673
2674void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {
2675 SDLoc Loc(N);
2677
2678 // One vector input, followed by a 32-bit word of bits to shift in
2679 // and then an immediate shift count
2680 Ops.push_back(N->getOperand(1));
2681 Ops.push_back(N->getOperand(2));
2682 int32_t ImmValue = N->getConstantOperandVal(3);
2683 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2684
2685 if (Predicated)
2686 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2687 else
2688 AddEmptyMVEPredicateToOps(Ops, Loc);
2689
2690 CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), ArrayRef(Ops));
2691}
2692
2693static bool SDValueToConstBool(SDValue SDVal) {
2694 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
2695 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
2696 uint64_t Value = SDValConstant->getZExtValue();
2697 assert((Value == 0 || Value == 1) && "expected value 0 or 1");
2698 return Value;
2699}
2700
2701void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
2702 const uint16_t *OpcodesS,
2703 const uint16_t *OpcodesU,
2704 size_t Stride, size_t TySize) {
2705 assert(TySize < Stride && "Invalid TySize");
2706 bool IsUnsigned = SDValueToConstBool(N->getOperand(1));
2707 bool IsSub = SDValueToConstBool(N->getOperand(2));
2708 bool IsExchange = SDValueToConstBool(N->getOperand(3));
2709 if (IsUnsigned) {
2710 assert(!IsSub &&
2711 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2712 assert(!IsExchange &&
2713 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2714 }
2715
2716 auto OpIsZero = [N](size_t OpNo) {
2717 return isNullConstant(N->getOperand(OpNo));
2718 };
2719
2720 // If the input accumulator value is not zero, select an instruction with
2721 // accumulator, otherwise select an instruction without accumulator
2722 bool IsAccum = !(OpIsZero(4) && OpIsZero(5));
2723
2724 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
2725 if (IsSub)
2726 Opcodes += 4 * Stride;
2727 if (IsExchange)
2728 Opcodes += 2 * Stride;
2729 if (IsAccum)
2730 Opcodes += Stride;
2731 uint16_t Opcode = Opcodes[TySize];
2732
2733 SDLoc Loc(N);
2735 // Push the accumulator operands, if they are used
2736 if (IsAccum) {
2737 Ops.push_back(N->getOperand(4));
2738 Ops.push_back(N->getOperand(5));
2739 }
2740 // Push the two vector operands
2741 Ops.push_back(N->getOperand(6));
2742 Ops.push_back(N->getOperand(7));
2743
2744 if (Predicated)
2745 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8));
2746 else
2747 AddEmptyMVEPredicateToOps(Ops, Loc);
2748
2749 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2750}
2751
2752void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,
2753 const uint16_t *OpcodesS,
2754 const uint16_t *OpcodesU) {
2755 EVT VecTy = N->getOperand(6).getValueType();
2756 size_t SizeIndex;
2757 switch (VecTy.getVectorElementType().getSizeInBits()) {
2758 case 16:
2759 SizeIndex = 0;
2760 break;
2761 case 32:
2762 SizeIndex = 1;
2763 break;
2764 default:
2765 llvm_unreachable("bad vector element size");
2766 }
2767
2768 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex);
2769}
2770
2771void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
2772 const uint16_t *OpcodesS,
2773 const uint16_t *OpcodesU) {
2774 assert(
2775 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==
2776 32 &&
2777 "bad vector element size");
2778 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0);
2779}
2780
2781void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
2782 const uint16_t *const *Opcodes,
2783 bool HasWriteback) {
2784 EVT VT = N->getValueType(0);
2785 SDLoc Loc(N);
2786
2787 const uint16_t *OurOpcodes;
2788 switch (VT.getVectorElementType().getSizeInBits()) {
2789 case 8:
2790 OurOpcodes = Opcodes[0];
2791 break;
2792 case 16:
2793 OurOpcodes = Opcodes[1];
2794 break;
2795 case 32:
2796 OurOpcodes = Opcodes[2];
2797 break;
2798 default:
2799 llvm_unreachable("bad vector element size in SelectMVE_VLD");
2800 }
2801
2802 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2);
2803 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other};
2804 unsigned PtrOperand = HasWriteback ? 1 : 2;
2805
2806 auto Data = SDValue(
2807 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0);
2808 SDValue Chain = N->getOperand(0);
2809 // Add a MVE_VLDn instruction for each Vec, except the last
2810 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) {
2811 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2812 auto LoadInst =
2813 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops);
2814 Data = SDValue(LoadInst, 0);
2815 Chain = SDValue(LoadInst, 1);
2816 transferMemOperands(N, LoadInst);
2817 }
2818 // The last may need a writeback on it
2819 if (HasWriteback)
2820 ResultTys = {DataTy, MVT::i32, MVT::Other};
2821 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2822 auto LoadInst =
2823 CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops);
2824 transferMemOperands(N, LoadInst);
2825
2826 unsigned i;
2827 for (i = 0; i < NumVecs; i++)
2828 ReplaceUses(SDValue(N, i),
2829 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT,
2830 SDValue(LoadInst, 0)));
2831 if (HasWriteback)
2832 ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1));
2833 ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1));
2834 CurDAG->RemoveDeadNode(N);
2835}
2836
2837void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
2838 bool Wrapping, bool Predicated) {
2839 EVT VT = N->getValueType(0);
2840 SDLoc Loc(N);
2841
2842 uint16_t Opcode;
2843 switch (VT.getScalarSizeInBits()) {
2844 case 8:
2845 Opcode = Opcodes[0];
2846 break;
2847 case 16:
2848 Opcode = Opcodes[1];
2849 break;
2850 case 32:
2851 Opcode = Opcodes[2];
2852 break;
2853 default:
2854 llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
2855 }
2856
2858 unsigned OpIdx = 1;
2859
2860 SDValue Inactive;
2861 if (Predicated)
2862 Inactive = N->getOperand(OpIdx++);
2863
2864 Ops.push_back(N->getOperand(OpIdx++)); // base
2865 if (Wrapping)
2866 Ops.push_back(N->getOperand(OpIdx++)); // limit
2867
2868 SDValue ImmOp = N->getOperand(OpIdx++); // step
2869 int ImmValue = ImmOp->getAsZExtVal();
2870 Ops.push_back(getI32Imm(ImmValue, Loc));
2871
2872 if (Predicated)
2873 AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive);
2874 else
2875 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2876
2877 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2878}
2879
2880void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
2881 size_t NumExtraOps, bool HasAccum) {
2882 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
2883 SDLoc Loc(N);
2885
2886 unsigned OpIdx = 1;
2887
2888 // Convert and append the immediate operand designating the coprocessor.
2889 SDValue ImmCorpoc = N->getOperand(OpIdx++);
2890 uint32_t ImmCoprocVal = ImmCorpoc->getAsZExtVal();
2891 Ops.push_back(getI32Imm(ImmCoprocVal, Loc));
2892
2893 // For accumulating variants copy the low and high order parts of the
2894 // accumulator into a register pair and add it to the operand vector.
2895 if (HasAccum) {
2896 SDValue AccLo = N->getOperand(OpIdx++);
2897 SDValue AccHi = N->getOperand(OpIdx++);
2898 if (IsBigEndian)
2899 std::swap(AccLo, AccHi);
2900 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0));
2901 }
2902
2903 // Copy extra operands as-is.
2904 for (size_t I = 0; I < NumExtraOps; I++)
2905 Ops.push_back(N->getOperand(OpIdx++));
2906
2907 // Convert and append the immediate operand
2908 SDValue Imm = N->getOperand(OpIdx);
2909 uint32_t ImmVal = Imm->getAsZExtVal();
2910 Ops.push_back(getI32Imm(ImmVal, Loc));
2911
2912 // Accumulating variants are IT-predicable, add predicate operands.
2913 if (HasAccum) {
2914 SDValue Pred = getAL(CurDAG, Loc);
2915 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2916 Ops.push_back(Pred);
2917 Ops.push_back(PredReg);
2918 }
2919
2920 // Create the CDE instruction
2921 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops);
2922 SDValue ResultPair = SDValue(InstrNode, 0);
2923
2924 // The original intrinsic had two outputs, and the output of the dual-register
2925 // CDE instruction is a register pair. We need to extract the two subregisters
2926 // and replace all uses of the original outputs with the extracted
2927 // subregisters.
2928 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1};
2929 if (IsBigEndian)
2930 std::swap(SubRegs[0], SubRegs[1]);
2931
2932 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) {
2933 if (SDValue(N, ResIdx).use_empty())
2934 continue;
2935 SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc,
2936 MVT::i32, ResultPair);
2937 ReplaceUses(SDValue(N, ResIdx), SubReg);
2938 }
2939
2940 CurDAG->RemoveDeadNode(N);
2941}
2942
2943void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2944 bool isUpdating, unsigned NumVecs,
2945 const uint16_t *DOpcodes,
2946 const uint16_t *QOpcodes0,
2947 const uint16_t *QOpcodes1) {
2948 assert(Subtarget->hasNEON());
2949 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2950 SDLoc dl(N);
2951
2952 SDValue MemAddr, Align;
2953 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2954 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2955 return;
2956
2957 SDValue Chain = N->getOperand(0);
2958 EVT VT = N->getValueType(0);
2959 bool is64BitVector = VT.is64BitVector();
2960
2961 unsigned Alignment = 0;
2962 if (NumVecs != 3) {
2963 Alignment = Align->getAsZExtVal();
2964 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2965 if (Alignment > NumBytes)
2966 Alignment = NumBytes;
2967 if (Alignment < 8 && Alignment < NumBytes)
2968 Alignment = 0;
2969 // Alignment must be a power of two; make sure of that.
2970 Alignment = (Alignment & -Alignment);
2971 if (Alignment == 1)
2972 Alignment = 0;
2973 }
2974 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2975
2976 unsigned OpcodeIndex;
2977 switch (VT.getSimpleVT().SimpleTy) {
2978 default: llvm_unreachable("unhandled vld-dup type");
2979 case MVT::v8i8:
2980 case MVT::v16i8: OpcodeIndex = 0; break;
2981 case MVT::v4i16:
2982 case MVT::v8i16:
2983 case MVT::v4f16:
2984 case MVT::v8f16:
2985 case MVT::v4bf16:
2986 case MVT::v8bf16:
2987 OpcodeIndex = 1; break;
2988 case MVT::v2f32:
2989 case MVT::v2i32:
2990 case MVT::v4f32:
2991 case MVT::v4i32: OpcodeIndex = 2; break;
2992 case MVT::v1f64:
2993 case MVT::v1i64: OpcodeIndex = 3; break;
2994 }
2995
2996 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2997 if (!is64BitVector)
2998 ResTyElts *= 2;
2999 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
3000
3001 std::vector<EVT> ResTys;
3002 ResTys.push_back(ResTy);
3003 if (isUpdating)
3004 ResTys.push_back(MVT::i32);
3005 ResTys.push_back(MVT::Other);
3006
3007 SDValue Pred = getAL(CurDAG, dl);
3008 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3009
3011 Ops.push_back(MemAddr);
3012 Ops.push_back(Align);
3013 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex]
3014 : (NumVecs == 1) ? QOpcodes0[OpcodeIndex]
3015 : QOpcodes1[OpcodeIndex];
3016 if (isUpdating) {
3017 SDValue Inc = N->getOperand(2);
3018 bool IsImmUpdate =
3019 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
3020 if (IsImmUpdate) {
3021 if (!isVLDfixed(Opc))
3022 Ops.push_back(Reg0);
3023 } else {
3024 if (isVLDfixed(Opc))
3026 Ops.push_back(Inc);
3027 }
3028 }
3029 if (is64BitVector || NumVecs == 1) {
3030 // Double registers and VLD1 quad registers are directly supported.
3031 } else {
3032 SDValue ImplDef = SDValue(
3033 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
3034 const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain};
3035 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
3036 MVT::Other, OpsA);
3037 Ops.push_back(SDValue(VLdA, 0));
3038 Chain = SDValue(VLdA, 1);
3039 }
3040
3041 Ops.push_back(Pred);
3042 Ops.push_back(Reg0);
3043 Ops.push_back(Chain);
3044
3045 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
3046
3047 // Transfer memoperands.
3048 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3049 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
3050
3051 // Extract the subregisters.
3052 if (NumVecs == 1) {
3053 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
3054 } else {
3055 SDValue SuperReg = SDValue(VLdDup, 0);
3056 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
3057 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
3058 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
3059 ReplaceUses(SDValue(N, Vec),
3060 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
3061 }
3062 }
3063 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
3064 if (isUpdating)
3065 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
3066 CurDAG->RemoveDeadNode(N);
3067}
3068
3069bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
3070 if (!Subtarget->hasMVEIntegerOps())
3071 return false;
3072
3073 SDLoc dl(N);
3074
3075 // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and
3076 // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent
3077 // inserts of the correct type:
3078 SDValue Ins1 = SDValue(N, 0);
3079 SDValue Ins2 = N->getOperand(0);
3080 EVT VT = Ins1.getValueType();
3081 if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() ||
3082 !isa<ConstantSDNode>(Ins1.getOperand(2)) ||
3083 !isa<ConstantSDNode>(Ins2.getOperand(2)) ||
3084 (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT))
3085 return false;
3086
3087 unsigned Lane1 = Ins1.getConstantOperandVal(2);
3088 unsigned Lane2 = Ins2.getConstantOperandVal(2);
3089 if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1)
3090 return false;
3091
3092 // If the inserted values will be able to use T/B already, leave it to the
3093 // existing tablegen patterns. For example VCVTT/VCVTB.
3094 SDValue Val1 = Ins1.getOperand(1);
3095 SDValue Val2 = Ins2.getOperand(1);
3096 if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND)
3097 return false;
3098
3099 // Check if the inserted values are both extracts.
3100 if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3101 Val1.getOpcode() == ARMISD::VGETLANEu) &&
3103 Val2.getOpcode() == ARMISD::VGETLANEu) &&
3106 (Val1.getOperand(0).getValueType() == MVT::v8f16 ||
3107 Val1.getOperand(0).getValueType() == MVT::v8i16) &&
3108 (Val2.getOperand(0).getValueType() == MVT::v8f16 ||
3109 Val2.getOperand(0).getValueType() == MVT::v8i16)) {
3110 unsigned ExtractLane1 = Val1.getConstantOperandVal(1);
3111 unsigned ExtractLane2 = Val2.getConstantOperandVal(1);
3112
3113 // If the two extracted lanes are from the same place and adjacent, this
3114 // simplifies into a f32 lane move.
3115 if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 &&
3116 ExtractLane1 == ExtractLane2 + 1) {
3117 SDValue NewExt = CurDAG->getTargetExtractSubreg(
3118 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0));
3119 SDValue NewIns = CurDAG->getTargetInsertSubreg(
3120 ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0),
3121 NewExt);
3122 ReplaceUses(Ins1, NewIns);
3123 return true;
3124 }
3125
3126 // Else v8i16 pattern of an extract and an insert, with a optional vmovx for
3127 // extracting odd lanes.
3128 if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) {
3129 SDValue Inp1 = CurDAG->getTargetExtractSubreg(
3130 ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0));
3131 SDValue Inp2 = CurDAG->getTargetExtractSubreg(
3132 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0));
3133 if (ExtractLane1 % 2 != 0)
3134 Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0);
3135 if (ExtractLane2 % 2 != 0)
3136 Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0);
3137 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1);
3138 SDValue NewIns =
3139 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3140 Ins2.getOperand(0), SDValue(VINS, 0));
3141 ReplaceUses(Ins1, NewIns);
3142 return true;
3143 }
3144 }
3145
3146 // The inserted values are not extracted - if they are f16 then insert them
3147 // directly using a VINS.
3148 if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) {
3149 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1);
3150 SDValue NewIns =
3151 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3152 Ins2.getOperand(0), SDValue(VINS, 0));
3153 ReplaceUses(Ins1, NewIns);
3154 return true;
3155 }
3156
3157 return false;
3158}
3159
3160/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3161/// variable shift/rotate instructions.
3162bool ARMDAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3163 EVT VT = N->getValueType(0);
3164 if (VT != MVT::i32)
3165 return false;
3166 // On ARM we intentionally do this only for ROTR. Unlike AArch64, variable
3167 // SHL/SRL/SRA do not all have the same modulo-shift semantics we can exploit.
3168 // Select ROR by register; in ARM state this is modeled as MOVsr with a ROR
3169 // shifter operand, while in Thumb we use tROR/t2RORrr directly.
3170
3171 SDValue ShiftAmt = N->getOperand(1);
3172 SDLoc DL(N);
3173 SDValue NewShiftAmt;
3174 auto emitUnary = [&](unsigned Opc, SDValue Src, bool IsRSB) {
3175 if (Subtarget->isThumb2() || !Subtarget->isThumb()) {
3176 SDValue Ops[] = {Src};
3177 if (IsRSB) {
3178 SDValue ZeroImm = CurDAG->getTargetConstant(0, DL, MVT::i32);
3179 SDValue FullOps[] = {Src, ZeroImm, getAL(CurDAG, DL),
3180 CurDAG->getRegister(0, MVT::i32),
3181 CurDAG->getRegister(0, MVT::i32)};
3182 MachineSDNode *Unary =
3183 CurDAG->getMachineNode(Opc, DL, MVT::i32, FullOps);
3184 return SDValue(Unary, 0);
3185 }
3186 SDValue FullOps[] = {Ops[0], getAL(CurDAG, DL),
3187 CurDAG->getRegister(0, MVT::i32),
3188 CurDAG->getRegister(0, MVT::i32)};
3189 MachineSDNode *Unary = CurDAG->getMachineNode(Opc, DL, MVT::i32, FullOps);
3190 return SDValue(Unary, 0);
3191 }
3192 SDValue Thumb1Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
3193 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32)};
3194 MachineSDNode *Unary = CurDAG->getMachineNode(Opc, DL, MVT::i32, Thumb1Ops);
3195 return SDValue(Unary, 0);
3196 };
3197
3198 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3199 SDValue Add0 = ShiftAmt->getOperand(0);
3200 SDValue Add1 = ShiftAmt->getOperand(1);
3201 unsigned Add0Imm;
3202 unsigned Add1Imm;
3203 if (isInt32Immediate(Add1, Add1Imm) && ((Add1Imm & 31) == 0)) {
3204 NewShiftAmt = Add0;
3205 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3206 isInt32Immediate(Add0, Add0Imm) && Add0Imm != 0 &&
3207 ((Add0Imm & 31) == 0)) {
3208 unsigned NegOpc =
3209 Subtarget->isThumb()
3210 ? (Subtarget->hasThumb2() ? ARM::t2RSBri : ARM::tRSB)
3211 : ARM::RSBri;
3212 NewShiftAmt = emitUnary(NegOpc, Add1, /*IsRSB=*/true);
3213 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3214 isInt32Immediate(Add0, Add0Imm) && ((Add0Imm & 31) == 31)) {
3215 unsigned NotOpc = Subtarget->isThumb()
3216 ? (Subtarget->isThumb2() ? ARM::t2MVNr : ARM::tMVN)
3217 : ARM::MVNr;
3218 NewShiftAmt = emitUnary(NotOpc, Add1, /*IsRSB=*/false);
3219 } else {
3220 return false;
3221 }
3222 } else {
3223 return false;
3224 }
3225
3226 if (Subtarget->isThumb()) {
3227 if (Subtarget->isThumb1Only()) {
3228 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
3229 N->getOperand(0), NewShiftAmt, getAL(CurDAG, DL),
3230 CurDAG->getRegister(0, MVT::i32)};
3231 CurDAG->SelectNodeTo(N, ARM::tROR, VT, Ops);
3232 } else {
3233 SDValue Ops[] = {N->getOperand(0), NewShiftAmt, getAL(CurDAG, DL),
3234 CurDAG->getRegister(0, MVT::i32),
3235 CurDAG->getRegister(0, MVT::i32)};
3236 CurDAG->SelectNodeTo(N, ARM::t2RORrr, VT, Ops);
3237 }
3238 } else {
3239 SDValue BaseReg = N->getOperand(0);
3240 SDValue ShReg = NewShiftAmt;
3241 SDValue OpcEnc = CurDAG->getTargetConstant(
3242 ARM_AM::getSORegOpc(ARM_AM::ror, 0), DL, MVT::i32);
3243 SDValue Ops[] = {BaseReg,
3244 ShReg,
3245 OpcEnc,
3246 getAL(CurDAG, DL),
3247 CurDAG->getRegister(0, MVT::i32),
3248 CurDAG->getRegister(0, MVT::i32)};
3249 CurDAG->SelectNodeTo(N, ARM::MOVsr, VT, Ops);
3250 }
3251 return true;
3252}
3253
3254bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N,
3255 SDNode *FMul,
3256 bool IsUnsigned,
3257 bool FixedToFloat) {
3258 auto Type = N->getValueType(0);
3259 unsigned ScalarBits = Type.getScalarSizeInBits();
3260 if (ScalarBits > 32)
3261 return false;
3262
3263 SDNodeFlags FMulFlags = FMul->getFlags();
3264 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3265 // allowed in 16 bit unsigned floats
3266 if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned)
3267 return false;
3268
3269 SDValue ImmNode = FMul->getOperand(1);
3270 SDValue VecVal = FMul->getOperand(0);
3271 if (VecVal->getOpcode() == ISD::UINT_TO_FP ||
3272 VecVal->getOpcode() == ISD::SINT_TO_FP)
3273 VecVal = VecVal->getOperand(0);
3274
3275 if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits)
3276 return false;
3277
3278 if (ImmNode.getOpcode() == ISD::BITCAST) {
3279 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3280 return false;
3281 ImmNode = ImmNode.getOperand(0);
3282 }
3283
3284 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3285 return false;
3286
3287 APFloat ImmAPF(0.0f);
3288 switch (ImmNode.getOpcode()) {
3289 case ARMISD::VMOVIMM:
3290 case ARMISD::VDUP: {
3291 if (!isa<ConstantSDNode>(ImmNode.getOperand(0)))
3292 return false;
3293 unsigned Imm = ImmNode.getConstantOperandVal(0);
3294 if (ImmNode.getOpcode() == ARMISD::VMOVIMM)
3295 Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits);
3296 ImmAPF =
3297 APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(),
3298 APInt(ScalarBits, Imm));
3299 break;
3300 }
3301 case ARMISD::VMOVFPIMM: {
3303 break;
3304 }
3305 default:
3306 return false;
3307 }
3308
3309 // Where n is the number of fractional bits, multiplying by 2^n will convert
3310 // from float to fixed and multiplying by 2^-n will convert from fixed to
3311 // float. Taking log2 of the factor (after taking the inverse in the case of
3312 // float to fixed) will give n.
3313 APFloat ToConvert = ImmAPF;
3314 if (FixedToFloat) {
3315 if (!ImmAPF.getExactInverse(&ToConvert))
3316 return false;
3317 }
3318 APSInt Converted(64, false);
3319 bool IsExact;
3321 &IsExact);
3322 if (!IsExact || !Converted.isPowerOf2())
3323 return false;
3324
3325 unsigned FracBits = Converted.logBase2();
3326 if (FracBits > ScalarBits)
3327 return false;
3328
3330 VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)};
3331 AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type);
3332
3333 unsigned int Opcode;
3334 switch (ScalarBits) {
3335 case 16:
3336 if (FixedToFloat)
3337 Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix;
3338 else
3339 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3340 break;
3341 case 32:
3342 if (FixedToFloat)
3343 Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix;
3344 else
3345 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3346 break;
3347 default:
3348 llvm_unreachable("unexpected number of scalar bits");
3349 break;
3350 }
3351
3352 ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops));
3353 return true;
3354}
3355
3356bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
3357 // Transform a floating-point to fixed-point conversion to a VCVT
3358 if (!Subtarget->hasMVEFloatOps())
3359 return false;
3360 EVT Type = N->getValueType(0);
3361 if (!Type.isVector())
3362 return false;
3363 unsigned int ScalarBits = Type.getScalarSizeInBits();
3364
3365 bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT ||
3366 N->getOpcode() == ISD::FP_TO_UINT_SAT;
3367 SDNode *Node = N->getOperand(0).getNode();
3368
3369 // floating-point to fixed-point with one fractional bit gets turned into an
3370 // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y))
3371 if (Node->getOpcode() == ISD::FADD) {
3372 if (Node->getOperand(0) != Node->getOperand(1))
3373 return false;
3374 SDNodeFlags Flags = Node->getFlags();
3375 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3376 // allowed in 16 bit unsigned floats
3377 if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned)
3378 return false;
3379
3380 unsigned Opcode;
3381 switch (ScalarBits) {
3382 case 16:
3383 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3384 break;
3385 case 32:
3386 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3387 break;
3388 }
3389 SmallVector<SDValue, 3> Ops{Node->getOperand(0),
3390 CurDAG->getConstant(1, dl, MVT::i32)};
3391 AddEmptyMVEPredicateToOps(Ops, dl, Type);
3392
3393 ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops));
3394 return true;
3395 }
3396
3397 if (Node->getOpcode() != ISD::FMUL)
3398 return false;
3399
3400 return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false);
3401}
3402
3403bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
3404 // Transform a fixed-point to floating-point conversion to a VCVT
3405 if (!Subtarget->hasMVEFloatOps())
3406 return false;
3407 auto Type = N->getValueType(0);
3408 if (!Type.isVector())
3409 return false;
3410
3411 auto LHS = N->getOperand(0);
3412 if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP)
3413 return false;
3414
3415 return transformFixedFloatingPointConversion(
3416 N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true);
3417}
3418
3419bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
3420 if (!Subtarget->hasV6T2Ops())
3421 return false;
3422
3423 unsigned Opc = isSigned
3424 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
3425 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
3426 SDLoc dl(N);
3427
3428 // For unsigned extracts, check for a shift right and mask
3429 unsigned And_imm = 0;
3430 if (N->getOpcode() == ISD::AND) {
3431 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
3432
3433 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
3434 if (And_imm & (And_imm + 1))
3435 return false;
3436
3437 unsigned Srl_imm = 0;
3438 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
3439 Srl_imm)) {
3440 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3441
3442 // Mask off the unnecessary bits of the AND immediate; normally
3443 // DAGCombine will do this, but that might not happen if
3444 // targetShrinkDemandedConstant chooses a different immediate.
3445 And_imm &= -1U >> Srl_imm;
3446
3447 // Note: The width operand is encoded as width-1.
3448 unsigned Width = llvm::countr_one(And_imm) - 1;
3449 unsigned LSB = Srl_imm;
3450
3451 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3452
3453 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
3454 // It's cheaper to use a right shift to extract the top bits.
3455 if (Subtarget->isThumb()) {
3456 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
3457 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3458 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3459 getAL(CurDAG, dl), Reg0, Reg0 };
3460 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3461 return true;
3462 }
3463
3464 // ARM models shift instructions as MOVsi with shifter operand.
3466 SDValue ShOpc =
3467 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
3468 MVT::i32);
3469 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
3470 getAL(CurDAG, dl), Reg0, Reg0 };
3471 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
3472 return true;
3473 }
3474
3475 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3476 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3477 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3478 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3479 getAL(CurDAG, dl), Reg0 };
3480 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3481 return true;
3482 }
3483 }
3484 return false;
3485 }
3486
3487 // Otherwise, we're looking for a shift of a shift
3488 unsigned Shl_imm = 0;
3489 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
3490 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
3491 unsigned Srl_imm = 0;
3492 if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
3493 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3494 // Note: The width operand is encoded as width-1.
3495 unsigned Width = 32 - Srl_imm - 1;
3496 int LSB = Srl_imm - Shl_imm;
3497 if (LSB < 0)
3498 return false;
3499 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3500 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3501 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3502 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3503 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3504 getAL(CurDAG, dl), Reg0 };
3505 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3506 return true;
3507 }
3508 }
3509
3510 // Or we are looking for a shift of an and, with a mask operand
3511 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
3512 isShiftedMask_32(And_imm)) {
3513 unsigned Srl_imm = 0;
3514 unsigned LSB = llvm::countr_zero(And_imm);
3515 // Shift must be the same as the ands lsb
3516 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
3517 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3518 unsigned MSB = llvm::Log2_32(And_imm);
3519 // Note: The width operand is encoded as width-1.
3520 unsigned Width = MSB - LSB;
3521 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3522 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3523 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3524 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
3525 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3526 getAL(CurDAG, dl), Reg0 };
3527 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3528 return true;
3529 }
3530 }
3531
3532 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
3533 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
3534 unsigned LSB = 0;
3535 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
3536 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
3537 return false;
3538
3539 if (LSB + Width > 32)
3540 return false;
3541
3542 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3543 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
3544 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3545 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3546 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
3547 getAL(CurDAG, dl), Reg0 };
3548 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3549 return true;
3550 }
3551
3552 return false;
3553}
3554
3555/// We've got special pseudo-instructions for these
3556void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3557 unsigned Opcode;
3558 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3559 if (MemTy == MVT::i8)
3560 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8;
3561 else if (MemTy == MVT::i16)
3562 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16;
3563 else if (MemTy == MVT::i32)
3564 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32;
3565 else
3566 llvm_unreachable("Unknown AtomicCmpSwap type");
3567
3568 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3569 N->getOperand(0)};
3570 SDNode *CmpSwap = CurDAG->getMachineNode(
3571 Opcode, SDLoc(N),
3572 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
3573
3574 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3575 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3576
3577 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3578 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3579 CurDAG->RemoveDeadNode(N);
3580}
3581
3582static std::optional<std::pair<unsigned, unsigned>>
3584 unsigned FirstOne = A.getBitWidth() - A.countl_zero() - 1;
3585 unsigned LastOne = A.countr_zero();
3586 if (A.popcount() != (FirstOne - LastOne + 1))
3587 return std::nullopt;
3588 return std::make_pair(FirstOne, LastOne);
3589}
3590
3591void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
3592 assert(N->getOpcode() == ARMISD::CMPZ);
3593 SwitchEQNEToPLMI = false;
3594
3595 if (!Subtarget->isThumb())
3596 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3597 // LSR don't exist as standalone instructions - they need the barrel shifter.
3598 return;
3599
3600 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3601 SDValue And = N->getOperand(0);
3602 if (!And->hasOneUse())
3603 return;
3604
3605 SDValue Zero = N->getOperand(1);
3606 if (!isNullConstant(Zero) || And->getOpcode() != ISD::AND)
3607 return;
3608 SDValue X = And.getOperand(0);
3609 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
3610
3611 if (!C)
3612 return;
3613 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
3614 if (!Range)
3615 return;
3616
3617 // There are several ways to lower this:
3618 SDNode *NewN;
3619 SDLoc dl(N);
3620
3621 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
3622 if (Subtarget->isThumb2()) {
3623 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
3624 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3625 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3626 CurDAG->getRegister(0, MVT::i32) };
3627 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3628 } else {
3629 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
3630 CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3631 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3632 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3633 }
3634 };
3635
3636 if (Range->second == 0) {
3637 // 1. Mask includes the LSB -> Simply shift the top N bits off
3638 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3639 ReplaceNode(And.getNode(), NewN);
3640 } else if (Range->first == 31) {
3641 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
3642 NewN = EmitShift(ARM::tLSRri, X, Range->second);
3643 ReplaceNode(And.getNode(), NewN);
3644 } else if (Range->first == Range->second) {
3645 // 3. Only one bit is set. We can shift this into the sign bit and use a
3646 // PL/MI comparison. This is not safe if CMPZ has multiple uses because
3647 // only one of them (the one currently being selected) will be switched
3648 // to use the new condition code.
3649 if (!N->hasOneUse())
3650 return;
3651 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3652 ReplaceNode(And.getNode(), NewN);
3653
3654 SwitchEQNEToPLMI = true;
3655 } else if (!Subtarget->hasV6T2Ops()) {
3656 // 4. Do a double shift to clear bottom and top bits, but only in
3657 // thumb-1 mode as in thumb-2 we can use UBFX.
3658 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3659 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
3660 Range->second + (31 - Range->first));
3661 ReplaceNode(And.getNode(), NewN);
3662 }
3663}
3664
3665static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3],
3666 unsigned Opc128[3]) {
3667 assert((VT.is64BitVector() || VT.is128BitVector()) &&
3668 "Unexpected vector shuffle length");
3669 switch (VT.getScalarSizeInBits()) {
3670 default:
3671 llvm_unreachable("Unexpected vector shuffle element size");
3672 case 8:
3673 return VT.is64BitVector() ? Opc64[0] : Opc128[0];
3674 case 16:
3675 return VT.is64BitVector() ? Opc64[1] : Opc128[1];
3676 case 32:
3677 return VT.is64BitVector() ? Opc64[2] : Opc128[2];
3678 }
3679}
3680
3681void ARMDAGToDAGISel::Select(SDNode *N) {
3682 SDLoc dl(N);
3683
3684 if (N->isMachineOpcode()) {
3685 N->setNodeId(-1);
3686 return; // Already selected.
3687 }
3688
3689 switch (N->getOpcode()) {
3690 default: break;
3691 case ISD::STORE: {
3692 // For Thumb1, match an sp-relative store in C++. This is a little
3693 // unfortunate, but I don't think I can make the chain check work
3694 // otherwise. (The chain of the store has to be the same as the chain
3695 // of the CopyFromReg, or else we can't replace the CopyFromReg with
3696 // a direct reference to "SP".)
3697 //
3698 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3699 // a different addressing mode from other four-byte stores.
3700 //
3701 // This pattern usually comes up with call arguments.
3702 StoreSDNode *ST = cast<StoreSDNode>(N);
3703 SDValue Ptr = ST->getBasePtr();
3704 if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
3705 int RHSC = 0;
3706 if (Ptr.getOpcode() == ISD::ADD &&
3707 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
3708 Ptr = Ptr.getOperand(0);
3709
3710 if (Ptr.getOpcode() == ISD::CopyFromReg &&
3711 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
3712 Ptr.getOperand(0) == ST->getChain()) {
3713 SDValue Ops[] = {ST->getValue(),
3714 CurDAG->getRegister(ARM::SP, MVT::i32),
3715 CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
3716 getAL(CurDAG, dl),
3717 CurDAG->getRegister(0, MVT::i32),
3718 ST->getChain()};
3719 MachineSDNode *ResNode =
3720 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
3721 MachineMemOperand *MemOp = ST->getMemOperand();
3722 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3723 ReplaceNode(N, ResNode);
3724 return;
3725 }
3726 }
3727 break;
3728 }
3730 if (tryWriteRegister(N))
3731 return;
3732 break;
3733 case ISD::READ_REGISTER:
3734 if (tryReadRegister(N))
3735 return;
3736 break;
3737 case ISD::INLINEASM:
3738 case ISD::INLINEASM_BR:
3739 if (tryInlineAsm(N))
3740 return;
3741 break;
3742 case ISD::Constant: {
3743 unsigned Val = N->getAsZExtVal();
3744 // If we can't materialize the constant we need to use a literal pool
3745 if (ConstantMaterializationCost(Val, Subtarget) > 2 &&
3746 !Subtarget->genExecuteOnly()) {
3747 SDValue CPIdx = CurDAG->getTargetConstantPool(
3748 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
3749 TLI->getPointerTy(CurDAG->getDataLayout()));
3750
3751 SDNode *ResNode;
3752 if (Subtarget->isThumb()) {
3753 SDValue Ops[] = {
3754 CPIdx,
3755 getAL(CurDAG, dl),
3756 CurDAG->getRegister(0, MVT::i32),
3757 CurDAG->getEntryNode()
3758 };
3759 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
3760 Ops);
3761 } else {
3762 SDValue Ops[] = {
3763 CPIdx,
3764 CurDAG->getTargetConstant(0, dl, MVT::i32),
3765 getAL(CurDAG, dl),
3766 CurDAG->getRegister(0, MVT::i32),
3767 CurDAG->getEntryNode()
3768 };
3769 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
3770 Ops);
3771 }
3772 // Annotate the Node with memory operand information so that MachineInstr
3773 // queries work properly. This e.g. gives the register allocation the
3774 // required information for rematerialization.
3775 MachineFunction& MF = CurDAG->getMachineFunction();
3776 MachineMemOperand *MemOp =
3779
3780 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3781
3782 ReplaceNode(N, ResNode);
3783 return;
3784 }
3785
3786 // Other cases are autogenerated.
3787 break;
3788 }
3789 case ISD::FrameIndex: {
3790 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3791 int FI = cast<FrameIndexSDNode>(N)->getIndex();
3792 SDValue TFI = CurDAG->getTargetFrameIndex(
3793 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3794 if (Subtarget->isThumb1Only()) {
3795 // Set the alignment of the frame object to 4, to avoid having to generate
3796 // more than one ADD
3797 MachineFrameInfo &MFI = MF->getFrameInfo();
3798 if (MFI.getObjectAlign(FI) < Align(4))
3799 MFI.setObjectAlignment(FI, Align(4));
3800 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
3801 CurDAG->getTargetConstant(0, dl, MVT::i32));
3802 return;
3803 } else {
3804 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
3805 ARM::t2ADDri : ARM::ADDri);
3806 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
3807 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3808 CurDAG->getRegister(0, MVT::i32) };
3809 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3810 return;
3811 }
3812 }
3814 if (tryInsertVectorElt(N))
3815 return;
3816 break;
3817 }
3818 case ISD::SRL:
3819 if (tryV6T2BitfieldExtractOp(N, false))
3820 return;
3821 break;
3823 case ISD::SRA:
3824 if (tryV6T2BitfieldExtractOp(N, true))
3825 return;
3826 break;
3827 case ISD::ROTR:
3828 if (tryShiftAmountMod(N))
3829 return;
3830 break;
3831 case ISD::FP_TO_UINT:
3832 case ISD::FP_TO_SINT:
3835 if (tryFP_TO_INT(N, dl))
3836 return;
3837 break;
3838 case ISD::FMUL:
3839 if (tryFMULFixed(N, dl))
3840 return;
3841 break;
3842 case ISD::MUL:
3843 if (Subtarget->isThumb1Only())
3844 break;
3845 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
3846 unsigned RHSV = C->getZExtValue();
3847 if (!RHSV) break;
3848 if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
3849 unsigned ShImm = Log2_32(RHSV-1);
3850 if (ShImm >= 32)
3851 break;
3852 SDValue V = N->getOperand(0);
3853 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3854 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3855 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3856 if (Subtarget->isThumb()) {
3857 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3858 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
3859 return;
3860 } else {
3861 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3862 Reg0 };
3863 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
3864 return;
3865 }
3866 }
3867 if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
3868 unsigned ShImm = Log2_32(RHSV+1);
3869 if (ShImm >= 32)
3870 break;
3871 SDValue V = N->getOperand(0);
3872 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3873 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3874 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3875 if (Subtarget->isThumb()) {
3876 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3877 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
3878 return;
3879 } else {
3880 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3881 Reg0 };
3882 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
3883 return;
3884 }
3885 }
3886 }
3887 break;
3888 case ISD::AND: {
3889 // Check for unsigned bitfield extract
3890 if (tryV6T2BitfieldExtractOp(N, false))
3891 return;
3892
3893 // If an immediate is used in an AND node, it is possible that the immediate
3894 // can be more optimally materialized when negated. If this is the case we
3895 // can negate the immediate and use a BIC instead.
3896 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
3897 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
3898 uint32_t Imm = (uint32_t) N1C->getZExtValue();
3899
3900 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
3901 // immediate can be negated and fit in the immediate operand of
3902 // a t2BIC, don't do any manual transform here as this can be
3903 // handled by the generic ISel machinery.
3904 bool PreferImmediateEncoding =
3905 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
3906 if (!PreferImmediateEncoding &&
3907 ConstantMaterializationCost(Imm, Subtarget) >
3908 ConstantMaterializationCost(~Imm, Subtarget)) {
3909 // The current immediate costs more to materialize than a negated
3910 // immediate, so negate the immediate and use a BIC.
3911 SDValue NewImm = CurDAG->getConstant(~Imm, dl, MVT::i32);
3912 // If the new constant didn't exist before, reposition it in the topological
3913 // ordering so it is just before N. Otherwise, don't touch its location.
3914 if (NewImm->getNodeId() == -1)
3915 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
3916
3917 if (!Subtarget->hasThumb2()) {
3918 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
3919 N->getOperand(0), NewImm, getAL(CurDAG, dl),
3920 CurDAG->getRegister(0, MVT::i32)};
3921 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
3922 return;
3923 } else {
3924 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
3925 CurDAG->getRegister(0, MVT::i32),
3926 CurDAG->getRegister(0, MVT::i32)};
3927 ReplaceNode(N,
3928 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
3929 return;
3930 }
3931 }
3932 }
3933
3934 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3935 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3936 // are entirely contributed by c2 and lower 16-bits are entirely contributed
3937 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3938 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
3939 EVT VT = N->getValueType(0);
3940 if (VT != MVT::i32)
3941 break;
3942 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3943 ? ARM::t2MOVTi16
3944 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
3945 if (!Opc)
3946 break;
3947 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3948 N1C = dyn_cast<ConstantSDNode>(N1);
3949 if (!N1C)
3950 break;
3951 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3952 SDValue N2 = N0.getOperand(1);
3953 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
3954 if (!N2C)
3955 break;
3956 unsigned N1CVal = N1C->getZExtValue();
3957 unsigned N2CVal = N2C->getZExtValue();
3958 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
3959 (N1CVal & 0xffffU) == 0xffffU &&
3960 (N2CVal & 0xffffU) == 0x0U) {
3961 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
3962 dl, MVT::i32);
3963 SDValue Ops[] = { N0.getOperand(0), Imm16,
3964 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3965 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
3966 return;
3967 }
3968 }
3969
3970 break;
3971 }
3972 case ARMISD::UMAAL: {
3973 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3974 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3975 N->getOperand(2), N->getOperand(3),
3976 getAL(CurDAG, dl),
3977 CurDAG->getRegister(0, MVT::i32) };
3978 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3979 return;
3980 }
3981 case ARMISD::UMLAL:{
3982 if (Subtarget->isThumb()) {
3983 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3984 N->getOperand(3), getAL(CurDAG, dl),
3985 CurDAG->getRegister(0, MVT::i32)};
3986 ReplaceNode(
3987 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3988 return;
3989 }else{
3990 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3991 N->getOperand(3), getAL(CurDAG, dl),
3992 CurDAG->getRegister(0, MVT::i32),
3993 CurDAG->getRegister(0, MVT::i32) };
3994 ReplaceNode(N, CurDAG->getMachineNode(
3995 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3996 MVT::i32, MVT::i32, Ops));
3997 return;
3998 }
3999 }
4000 case ARMISD::SMLAL:{
4001 if (Subtarget->isThumb()) {
4002 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
4003 N->getOperand(3), getAL(CurDAG, dl),
4004 CurDAG->getRegister(0, MVT::i32)};
4005 ReplaceNode(
4006 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
4007 return;
4008 }else{
4009 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
4010 N->getOperand(3), getAL(CurDAG, dl),
4011 CurDAG->getRegister(0, MVT::i32),
4012 CurDAG->getRegister(0, MVT::i32) };
4013 ReplaceNode(N, CurDAG->getMachineNode(
4014 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
4015 MVT::i32, MVT::i32, Ops));
4016 return;
4017 }
4018 }
4019 case ARMISD::SUBE: {
4020 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
4021 break;
4022 // Look for a pattern to match SMMLS
4023 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
4024 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
4025 N->getOperand(2).getOpcode() != ARMISD::SUBC ||
4026 !SDValue(N, 1).use_empty())
4027 break;
4028
4029 if (Subtarget->isThumb())
4030 assert(Subtarget->hasThumb2() &&
4031 "This pattern should not be generated for Thumb");
4032
4033 SDValue SmulLoHi = N->getOperand(1);
4034 SDValue Subc = N->getOperand(2);
4035 SDValue Zero = Subc.getOperand(0);
4036
4037 if (!isNullConstant(Zero) || Subc.getOperand(1) != SmulLoHi.getValue(0) ||
4038 N->getOperand(1) != SmulLoHi.getValue(1) ||
4039 N->getOperand(2) != Subc.getValue(1))
4040 break;
4041
4042 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
4043 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
4044 N->getOperand(0), getAL(CurDAG, dl),
4045 CurDAG->getRegister(0, MVT::i32) };
4046 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
4047 return;
4048 }
4049 case ISD::LOAD: {
4050 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
4051 return;
4052 if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
4053 if (tryT2IndexedLoad(N))
4054 return;
4055 } else if (Subtarget->isThumb()) {
4056 if (tryT1IndexedLoad(N))
4057 return;
4058 } else if (tryARMIndexedLoad(N))
4059 return;
4060 // Other cases are autogenerated.
4061 break;
4062 }
4063 case ISD::MLOAD:
4064 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
4065 return;
4066 // Other cases are autogenerated.
4067 break;
4068 case ARMISD::LDRD: {
4069 if (Subtarget->isThumb2())
4070 break; // TableGen handles isel in this case.
4071 SDValue Base, RegOffset, ImmOffset;
4072 const SDValue &Chain = N->getOperand(0);
4073 const SDValue &Addr = N->getOperand(1);
4074 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4075 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4076 // The register-offset variant of LDRD mandates that the register
4077 // allocated to RegOffset is not reused in any of the remaining operands.
4078 // This restriction is currently not enforced. Therefore emitting this
4079 // variant is explicitly avoided.
4080 Base = Addr;
4081 RegOffset = CurDAG->getRegister(0, MVT::i32);
4082 }
4083 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
4084 SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
4085 {MVT::Untyped, MVT::Other}, Ops);
4086 SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4087 SDValue(New, 0));
4088 SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4089 SDValue(New, 0));
4090 transferMemOperands(N, New);
4091 ReplaceUses(SDValue(N, 0), Lo);
4092 ReplaceUses(SDValue(N, 1), Hi);
4093 ReplaceUses(SDValue(N, 2), SDValue(New, 1));
4094 CurDAG->RemoveDeadNode(N);
4095 return;
4096 }
4097 case ARMISD::STRD: {
4098 if (Subtarget->isThumb2())
4099 break; // TableGen handles isel in this case.
4100 SDValue Base, RegOffset, ImmOffset;
4101 const SDValue &Chain = N->getOperand(0);
4102 const SDValue &Addr = N->getOperand(3);
4103 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4104 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4105 // The register-offset variant of STRD mandates that the register
4106 // allocated to RegOffset is not reused in any of the remaining operands.
4107 // This restriction is currently not enforced. Therefore emitting this
4108 // variant is explicitly avoided.
4109 Base = Addr;
4110 RegOffset = CurDAG->getRegister(0, MVT::i32);
4111 }
4112 SDNode *RegPair =
4113 createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2));
4114 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
4115 SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);
4116 transferMemOperands(N, New);
4117 ReplaceUses(SDValue(N, 0), SDValue(New, 0));
4118 CurDAG->RemoveDeadNode(N);
4119 return;
4120 }
4121 case ARMISD::BRCOND: {
4122 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4123 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4124 // Pattern complexity = 6 cost = 1 size = 0
4125
4126 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4127 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
4128 // Pattern complexity = 6 cost = 1 size = 0
4129
4130 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4131 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4132 // Pattern complexity = 6 cost = 1 size = 0
4133
4134 unsigned Opc = Subtarget->isThumb() ?
4135 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
4136 SDValue Chain = N->getOperand(0);
4137 SDValue N1 = N->getOperand(1);
4138 SDValue N2 = N->getOperand(2);
4139 SDValue Flags = N->getOperand(3);
4142
4143 unsigned CC = (unsigned)N2->getAsZExtVal();
4144
4145 if (Flags.getOpcode() == ARMISD::CMPZ) {
4146 if (Flags.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4147 SDValue Int = Flags.getOperand(0);
4148 uint64_t ID = Int->getConstantOperandVal(1);
4149
4150 // Handle low-overhead loops.
4151 if (ID == Intrinsic::loop_decrement_reg) {
4152 SDValue Elements = Int.getOperand(2);
4153 SDValue Size = CurDAG->getTargetConstant(Int.getConstantOperandVal(3),
4154 dl, MVT::i32);
4155
4156 SDValue Args[] = { Elements, Size, Int.getOperand(0) };
4157 SDNode *LoopDec =
4158 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4159 CurDAG->getVTList(MVT::i32, MVT::Other),
4160 Args);
4161 ReplaceUses(Int.getNode(), LoopDec);
4162
4163 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
4164 SDNode *LoopEnd =
4165 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
4166
4167 ReplaceUses(N, LoopEnd);
4168 CurDAG->RemoveDeadNode(N);
4169 CurDAG->RemoveDeadNode(Flags.getNode());
4170 CurDAG->RemoveDeadNode(Int.getNode());
4171 return;
4172 }
4173 }
4174
4175 bool SwitchEQNEToPLMI;
4176 SelectCMPZ(Flags.getNode(), SwitchEQNEToPLMI);
4177 Flags = N->getOperand(3);
4178
4179 if (SwitchEQNEToPLMI) {
4180 switch ((ARMCC::CondCodes)CC) {
4181 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4182 case ARMCC::NE:
4183 CC = (unsigned)ARMCC::MI;
4184 break;
4185 case ARMCC::EQ:
4186 CC = (unsigned)ARMCC::PL;
4187 break;
4188 }
4189 }
4190 }
4191
4192 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
4193 Chain = CurDAG->getCopyToReg(Chain, dl, ARM::CPSR, Flags, SDValue());
4194 SDValue Ops[] = {N1, Tmp2, CurDAG->getRegister(ARM::CPSR, MVT::i32), Chain,
4195 Chain.getValue(1)};
4196 CurDAG->SelectNodeTo(N, Opc, MVT::Other, Ops);
4197 return;
4198 }
4199
4200 case ARMISD::CMPZ: {
4201 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
4202 // This allows us to avoid materializing the expensive negative constant.
4203 // The CMPZ #0 is useless and will be peepholed away but we need to keep
4204 // it for its flags output.
4205 SDValue X = N->getOperand(0);
4206 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
4207 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
4208 int64_t Addend = -C->getSExtValue();
4209
4210 SDNode *Add = nullptr;
4211 // ADDS can be better than CMN if the immediate fits in a
4212 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
4213 // Outside that range we can just use a CMN which is 32-bit but has a
4214 // 12-bit immediate range.
4215 if (Addend < 1<<8) {
4216 if (Subtarget->isThumb2()) {
4217 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4218 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
4219 CurDAG->getRegister(0, MVT::i32) };
4220 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
4221 } else {
4222 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
4223 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
4224 CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4225 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
4226 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
4227 }
4228 }
4229 if (Add) {
4230 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
4231 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, N->getVTList(), Ops2);
4232 }
4233 }
4234 // Other cases are autogenerated.
4235 break;
4236 }
4237
4238 case ARMISD::CMOV: {
4239 SDValue Flags = N->getOperand(3);
4240
4241 if (Flags.getOpcode() == ARMISD::CMPZ) {
4242 bool SwitchEQNEToPLMI;
4243 SelectCMPZ(Flags.getNode(), SwitchEQNEToPLMI);
4244
4245 if (SwitchEQNEToPLMI) {
4246 SDValue ARMcc = N->getOperand(2);
4248
4249 switch (CC) {
4250 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4251 case ARMCC::NE:
4252 CC = ARMCC::MI;
4253 break;
4254 case ARMCC::EQ:
4255 CC = ARMCC::PL;
4256 break;
4257 }
4258 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
4259 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
4260 N->getOperand(3)};
4261 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
4262 }
4263 }
4264 // Other cases are autogenerated.
4265 break;
4266 }
4267 case ARMISD::VZIP: {
4268 EVT VT = N->getValueType(0);
4269 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4270 unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32};
4271 unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32};
4272 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4273 SDValue Pred = getAL(CurDAG, dl);
4274 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4275 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4276 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4277 return;
4278 }
4279 case ARMISD::VUZP: {
4280 EVT VT = N->getValueType(0);
4281 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4282 unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32};
4283 unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32};
4284 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4285 SDValue Pred = getAL(CurDAG, dl);
4286 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4287 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4288 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4289 return;
4290 }
4291 case ARMISD::VTRN: {
4292 EVT VT = N->getValueType(0);
4293 unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32};
4294 unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32};
4295 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4296 SDValue Pred = getAL(CurDAG, dl);
4297 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4298 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4299 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4300 return;
4301 }
4302 case ARMISD::BUILD_VECTOR: {
4303 EVT VecVT = N->getValueType(0);
4304 EVT EltVT = VecVT.getVectorElementType();
4305 unsigned NumElts = VecVT.getVectorNumElements();
4306 if (EltVT == MVT::f64) {
4307 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
4308 ReplaceNode(
4309 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4310 return;
4311 }
4312 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
4313 if (NumElts == 2) {
4314 ReplaceNode(
4315 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4316 return;
4317 }
4318 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
4319 ReplaceNode(N,
4320 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
4321 N->getOperand(2), N->getOperand(3)));
4322 return;
4323 }
4324
4325 case ARMISD::VLD1DUP: {
4326 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
4327 ARM::VLD1DUPd32 };
4328 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
4329 ARM::VLD1DUPq32 };
4330 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
4331 return;
4332 }
4333
4334 case ARMISD::VLD2DUP: {
4335 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4336 ARM::VLD2DUPd32 };
4337 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
4338 return;
4339 }
4340
4341 case ARMISD::VLD3DUP: {
4342 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
4343 ARM::VLD3DUPd16Pseudo,
4344 ARM::VLD3DUPd32Pseudo };
4345 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
4346 return;
4347 }
4348
4349 case ARMISD::VLD4DUP: {
4350 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
4351 ARM::VLD4DUPd16Pseudo,
4352 ARM::VLD4DUPd32Pseudo };
4353 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
4354 return;
4355 }
4356
4357 case ARMISD::VLD1DUP_UPD: {
4358 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
4359 ARM::VLD1DUPd16wb_fixed,
4360 ARM::VLD1DUPd32wb_fixed };
4361 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
4362 ARM::VLD1DUPq16wb_fixed,
4363 ARM::VLD1DUPq32wb_fixed };
4364 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
4365 return;
4366 }
4367
4368 case ARMISD::VLD2DUP_UPD: {
4369 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,
4370 ARM::VLD2DUPd16wb_fixed,
4371 ARM::VLD2DUPd32wb_fixed,
4372 ARM::VLD1q64wb_fixed };
4373 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4374 ARM::VLD2DUPq16EvenPseudo,
4375 ARM::VLD2DUPq32EvenPseudo };
4376 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,
4377 ARM::VLD2DUPq16OddPseudoWB_fixed,
4378 ARM::VLD2DUPq32OddPseudoWB_fixed };
4379 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
4380 return;
4381 }
4382
4383 case ARMISD::VLD3DUP_UPD: {
4384 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
4385 ARM::VLD3DUPd16Pseudo_UPD,
4386 ARM::VLD3DUPd32Pseudo_UPD,
4387 ARM::VLD1d64TPseudoWB_fixed };
4388 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4389 ARM::VLD3DUPq16EvenPseudo,
4390 ARM::VLD3DUPq32EvenPseudo };
4391 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,
4392 ARM::VLD3DUPq16OddPseudo_UPD,
4393 ARM::VLD3DUPq32OddPseudo_UPD };
4394 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4395 return;
4396 }
4397
4398 case ARMISD::VLD4DUP_UPD: {
4399 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
4400 ARM::VLD4DUPd16Pseudo_UPD,
4401 ARM::VLD4DUPd32Pseudo_UPD,
4402 ARM::VLD1d64QPseudoWB_fixed };
4403 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4404 ARM::VLD4DUPq16EvenPseudo,
4405 ARM::VLD4DUPq32EvenPseudo };
4406 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,
4407 ARM::VLD4DUPq16OddPseudo_UPD,
4408 ARM::VLD4DUPq32OddPseudo_UPD };
4409 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4410 return;
4411 }
4412
4413 case ARMISD::VLD1_UPD: {
4414 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
4415 ARM::VLD1d16wb_fixed,
4416 ARM::VLD1d32wb_fixed,
4417 ARM::VLD1d64wb_fixed };
4418 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
4419 ARM::VLD1q16wb_fixed,
4420 ARM::VLD1q32wb_fixed,
4421 ARM::VLD1q64wb_fixed };
4422 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
4423 return;
4424 }
4425
4426 case ARMISD::VLD2_UPD: {
4427 if (Subtarget->hasNEON()) {
4428 static const uint16_t DOpcodes[] = {
4429 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
4430 ARM::VLD1q64wb_fixed};
4431 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
4432 ARM::VLD2q16PseudoWB_fixed,
4433 ARM::VLD2q32PseudoWB_fixed};
4434 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4435 } else {
4436 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
4437 ARM::MVE_VLD21_8_wb};
4438 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4439 ARM::MVE_VLD21_16_wb};
4440 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4441 ARM::MVE_VLD21_32_wb};
4442 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4443 SelectMVE_VLD(N, 2, Opcodes, true);
4444 }
4445 return;
4446 }
4447
4448 case ARMISD::VLD3_UPD: {
4449 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
4450 ARM::VLD3d16Pseudo_UPD,
4451 ARM::VLD3d32Pseudo_UPD,
4452 ARM::VLD1d64TPseudoWB_fixed};
4453 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4454 ARM::VLD3q16Pseudo_UPD,
4455 ARM::VLD3q32Pseudo_UPD };
4456 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
4457 ARM::VLD3q16oddPseudo_UPD,
4458 ARM::VLD3q32oddPseudo_UPD };
4459 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4460 return;
4461 }
4462
4463 case ARMISD::VLD4_UPD: {
4464 if (Subtarget->hasNEON()) {
4465 static const uint16_t DOpcodes[] = {
4466 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
4467 ARM::VLD1d64QPseudoWB_fixed};
4468 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
4469 ARM::VLD4q16Pseudo_UPD,
4470 ARM::VLD4q32Pseudo_UPD};
4471 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
4472 ARM::VLD4q16oddPseudo_UPD,
4473 ARM::VLD4q32oddPseudo_UPD};
4474 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4475 } else {
4476 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4477 ARM::MVE_VLD42_8,
4478 ARM::MVE_VLD43_8_wb};
4479 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4480 ARM::MVE_VLD42_16,
4481 ARM::MVE_VLD43_16_wb};
4482 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4483 ARM::MVE_VLD42_32,
4484 ARM::MVE_VLD43_32_wb};
4485 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4486 SelectMVE_VLD(N, 4, Opcodes, true);
4487 }
4488 return;
4489 }
4490
4491 case ARMISD::VLD1x2_UPD: {
4492 if (Subtarget->hasNEON()) {
4493 static const uint16_t DOpcodes[] = {
4494 ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
4495 ARM::VLD1q64wb_fixed};
4496 static const uint16_t QOpcodes[] = {
4497 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4498 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4499 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4500 return;
4501 }
4502 break;
4503 }
4504
4505 case ARMISD::VLD1x3_UPD: {
4506 if (Subtarget->hasNEON()) {
4507 static const uint16_t DOpcodes[] = {
4508 ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
4509 ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
4510 static const uint16_t QOpcodes0[] = {
4511 ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
4512 ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
4513 static const uint16_t QOpcodes1[] = {
4514 ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
4515 ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
4516 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4517 return;
4518 }
4519 break;
4520 }
4521
4522 case ARMISD::VLD1x4_UPD: {
4523 if (Subtarget->hasNEON()) {
4524 static const uint16_t DOpcodes[] = {
4525 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4526 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4527 static const uint16_t QOpcodes0[] = {
4528 ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
4529 ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
4530 static const uint16_t QOpcodes1[] = {
4531 ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
4532 ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
4533 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4534 return;
4535 }
4536 break;
4537 }
4538
4539 case ARMISD::VLD2LN_UPD: {
4540 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
4541 ARM::VLD2LNd16Pseudo_UPD,
4542 ARM::VLD2LNd32Pseudo_UPD };
4543 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
4544 ARM::VLD2LNq32Pseudo_UPD };
4545 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
4546 return;
4547 }
4548
4549 case ARMISD::VLD3LN_UPD: {
4550 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
4551 ARM::VLD3LNd16Pseudo_UPD,
4552 ARM::VLD3LNd32Pseudo_UPD };
4553 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
4554 ARM::VLD3LNq32Pseudo_UPD };
4555 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
4556 return;
4557 }
4558
4559 case ARMISD::VLD4LN_UPD: {
4560 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
4561 ARM::VLD4LNd16Pseudo_UPD,
4562 ARM::VLD4LNd32Pseudo_UPD };
4563 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
4564 ARM::VLD4LNq32Pseudo_UPD };
4565 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
4566 return;
4567 }
4568
4569 case ARMISD::VST1_UPD: {
4570 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
4571 ARM::VST1d16wb_fixed,
4572 ARM::VST1d32wb_fixed,
4573 ARM::VST1d64wb_fixed };
4574 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
4575 ARM::VST1q16wb_fixed,
4576 ARM::VST1q32wb_fixed,
4577 ARM::VST1q64wb_fixed };
4578 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
4579 return;
4580 }
4581
4582 case ARMISD::VST2_UPD: {
4583 if (Subtarget->hasNEON()) {
4584 static const uint16_t DOpcodes[] = {
4585 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
4586 ARM::VST1q64wb_fixed};
4587 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
4588 ARM::VST2q16PseudoWB_fixed,
4589 ARM::VST2q32PseudoWB_fixed};
4590 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4591 return;
4592 }
4593 break;
4594 }
4595
4596 case ARMISD::VST3_UPD: {
4597 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
4598 ARM::VST3d16Pseudo_UPD,
4599 ARM::VST3d32Pseudo_UPD,
4600 ARM::VST1d64TPseudoWB_fixed};
4601 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4602 ARM::VST3q16Pseudo_UPD,
4603 ARM::VST3q32Pseudo_UPD };
4604 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
4605 ARM::VST3q16oddPseudo_UPD,
4606 ARM::VST3q32oddPseudo_UPD };
4607 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4608 return;
4609 }
4610
4611 case ARMISD::VST4_UPD: {
4612 if (Subtarget->hasNEON()) {
4613 static const uint16_t DOpcodes[] = {
4614 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
4615 ARM::VST1d64QPseudoWB_fixed};
4616 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
4617 ARM::VST4q16Pseudo_UPD,
4618 ARM::VST4q32Pseudo_UPD};
4619 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
4620 ARM::VST4q16oddPseudo_UPD,
4621 ARM::VST4q32oddPseudo_UPD};
4622 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4623 return;
4624 }
4625 break;
4626 }
4627
4628 case ARMISD::VST1x2_UPD: {
4629 if (Subtarget->hasNEON()) {
4630 static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,
4631 ARM::VST1q16wb_fixed,
4632 ARM::VST1q32wb_fixed,
4633 ARM::VST1q64wb_fixed};
4634 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4635 ARM::VST1d16QPseudoWB_fixed,
4636 ARM::VST1d32QPseudoWB_fixed,
4637 ARM::VST1d64QPseudoWB_fixed };
4638 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4639 return;
4640 }
4641 break;
4642 }
4643
4644 case ARMISD::VST1x3_UPD: {
4645 if (Subtarget->hasNEON()) {
4646 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,
4647 ARM::VST1d16TPseudoWB_fixed,
4648 ARM::VST1d32TPseudoWB_fixed,
4649 ARM::VST1d64TPseudoWB_fixed };
4650 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4651 ARM::VST1q16LowTPseudo_UPD,
4652 ARM::VST1q32LowTPseudo_UPD,
4653 ARM::VST1q64LowTPseudo_UPD };
4654 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,
4655 ARM::VST1q16HighTPseudo_UPD,
4656 ARM::VST1q32HighTPseudo_UPD,
4657 ARM::VST1q64HighTPseudo_UPD };
4658 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4659 return;
4660 }
4661 break;
4662 }
4663
4664 case ARMISD::VST1x4_UPD: {
4665 if (Subtarget->hasNEON()) {
4666 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4667 ARM::VST1d16QPseudoWB_fixed,
4668 ARM::VST1d32QPseudoWB_fixed,
4669 ARM::VST1d64QPseudoWB_fixed };
4670 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4671 ARM::VST1q16LowQPseudo_UPD,
4672 ARM::VST1q32LowQPseudo_UPD,
4673 ARM::VST1q64LowQPseudo_UPD };
4674 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,
4675 ARM::VST1q16HighQPseudo_UPD,
4676 ARM::VST1q32HighQPseudo_UPD,
4677 ARM::VST1q64HighQPseudo_UPD };
4678 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4679 return;
4680 }
4681 break;
4682 }
4683 case ARMISD::VST2LN_UPD: {
4684 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
4685 ARM::VST2LNd16Pseudo_UPD,
4686 ARM::VST2LNd32Pseudo_UPD };
4687 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
4688 ARM::VST2LNq32Pseudo_UPD };
4689 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
4690 return;
4691 }
4692
4693 case ARMISD::VST3LN_UPD: {
4694 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
4695 ARM::VST3LNd16Pseudo_UPD,
4696 ARM::VST3LNd32Pseudo_UPD };
4697 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
4698 ARM::VST3LNq32Pseudo_UPD };
4699 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
4700 return;
4701 }
4702
4703 case ARMISD::VST4LN_UPD: {
4704 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
4705 ARM::VST4LNd16Pseudo_UPD,
4706 ARM::VST4LNd32Pseudo_UPD };
4707 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
4708 ARM::VST4LNq32Pseudo_UPD };
4709 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
4710 return;
4711 }
4712
4715 unsigned IntNo = N->getConstantOperandVal(1);
4716 switch (IntNo) {
4717 default:
4718 break;
4719
4720 case Intrinsic::arm_mrrc:
4721 case Intrinsic::arm_mrrc2: {
4722 SDLoc dl(N);
4723 SDValue Chain = N->getOperand(0);
4724 unsigned Opc;
4725
4726 if (Subtarget->isThumb())
4727 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
4728 else
4729 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
4730
4732 Ops.push_back(getI32Imm(N->getConstantOperandVal(2), dl)); /* coproc */
4733 Ops.push_back(getI32Imm(N->getConstantOperandVal(3), dl)); /* opc */
4734 Ops.push_back(getI32Imm(N->getConstantOperandVal(4), dl)); /* CRm */
4735
4736 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4737 // instruction will always be '1111' but it is possible in assembly language to specify
4738 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4739 if (Opc != ARM::MRRC2) {
4740 Ops.push_back(getAL(CurDAG, dl));
4741 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4742 }
4743
4744 Ops.push_back(Chain);
4745
4746 // Writes to two registers.
4747 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
4748
4749 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
4750 return;
4751 }
4752 case Intrinsic::arm_ldaexd:
4753 case Intrinsic::arm_ldrexd: {
4754 SDLoc dl(N);
4755 SDValue Chain = N->getOperand(0);
4756 SDValue MemAddr = N->getOperand(2);
4757 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
4758
4759 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
4760 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
4761 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
4762
4763 // arm_ldrexd returns a i64 value in {i32, i32}
4764 std::vector<EVT> ResTys;
4765 if (isThumb) {
4766 ResTys.push_back(MVT::i32);
4767 ResTys.push_back(MVT::i32);
4768 } else
4769 ResTys.push_back(MVT::Untyped);
4770 ResTys.push_back(MVT::Other);
4771
4772 // Place arguments in the right order.
4773 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
4774 CurDAG->getRegister(0, MVT::i32), Chain};
4775 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4776 // Transfer memoperands.
4777 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4778 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4779
4780 // Remap uses.
4781 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
4782 if (!SDValue(N, 0).use_empty()) {
4784 if (isThumb)
4785 Result = SDValue(Ld, 0);
4786 else {
4787 SDValue SubRegIdx =
4788 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
4789 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4790 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4791 Result = SDValue(ResNode,0);
4792 }
4793 ReplaceUses(SDValue(N, 0), Result);
4794 }
4795 if (!SDValue(N, 1).use_empty()) {
4797 if (isThumb)
4798 Result = SDValue(Ld, 1);
4799 else {
4800 SDValue SubRegIdx =
4801 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
4802 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4803 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4804 Result = SDValue(ResNode,0);
4805 }
4806 ReplaceUses(SDValue(N, 1), Result);
4807 }
4808 ReplaceUses(SDValue(N, 2), OutChain);
4809 CurDAG->RemoveDeadNode(N);
4810 return;
4811 }
4812 case Intrinsic::arm_stlexd:
4813 case Intrinsic::arm_strexd: {
4814 SDLoc dl(N);
4815 SDValue Chain = N->getOperand(0);
4816 SDValue Val0 = N->getOperand(2);
4817 SDValue Val1 = N->getOperand(3);
4818 SDValue MemAddr = N->getOperand(4);
4819
4820 // Store exclusive double return a i32 value which is the return status
4821 // of the issued store.
4822 const EVT ResTys[] = {MVT::i32, MVT::Other};
4823
4824 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
4825 // Place arguments in the right order.
4827 if (isThumb) {
4828 Ops.push_back(Val0);
4829 Ops.push_back(Val1);
4830 } else
4831 // arm_strexd uses GPRPair.
4832 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
4833 Ops.push_back(MemAddr);
4834 Ops.push_back(getAL(CurDAG, dl));
4835 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4836 Ops.push_back(Chain);
4837
4838 bool IsRelease = IntNo == Intrinsic::arm_stlexd;
4839 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
4840 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
4841
4842 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4843 // Transfer memoperands.
4844 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4845 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4846
4847 ReplaceNode(N, St);
4848 return;
4849 }
4850
4851 case Intrinsic::arm_neon_vld1: {
4852 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
4853 ARM::VLD1d32, ARM::VLD1d64 };
4854 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4855 ARM::VLD1q32, ARM::VLD1q64};
4856 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
4857 return;
4858 }
4859
4860 case Intrinsic::arm_neon_vld1x2: {
4861 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4862 ARM::VLD1q32, ARM::VLD1q64 };
4863 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
4864 ARM::VLD1d16QPseudo,
4865 ARM::VLD1d32QPseudo,
4866 ARM::VLD1d64QPseudo };
4867 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4868 return;
4869 }
4870
4871 case Intrinsic::arm_neon_vld1x3: {
4872 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
4873 ARM::VLD1d16TPseudo,
4874 ARM::VLD1d32TPseudo,
4875 ARM::VLD1d64TPseudo };
4876 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
4877 ARM::VLD1q16LowTPseudo_UPD,
4878 ARM::VLD1q32LowTPseudo_UPD,
4879 ARM::VLD1q64LowTPseudo_UPD };
4880 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
4881 ARM::VLD1q16HighTPseudo,
4882 ARM::VLD1q32HighTPseudo,
4883 ARM::VLD1q64HighTPseudo };
4884 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4885 return;
4886 }
4887
4888 case Intrinsic::arm_neon_vld1x4: {
4889 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
4890 ARM::VLD1d16QPseudo,
4891 ARM::VLD1d32QPseudo,
4892 ARM::VLD1d64QPseudo };
4893 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
4894 ARM::VLD1q16LowQPseudo_UPD,
4895 ARM::VLD1q32LowQPseudo_UPD,
4896 ARM::VLD1q64LowQPseudo_UPD };
4897 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
4898 ARM::VLD1q16HighQPseudo,
4899 ARM::VLD1q32HighQPseudo,
4900 ARM::VLD1q64HighQPseudo };
4901 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4902 return;
4903 }
4904
4905 case Intrinsic::arm_neon_vld2: {
4906 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
4907 ARM::VLD2d32, ARM::VLD1q64 };
4908 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
4909 ARM::VLD2q32Pseudo };
4910 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4911 return;
4912 }
4913
4914 case Intrinsic::arm_neon_vld3: {
4915 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
4916 ARM::VLD3d16Pseudo,
4917 ARM::VLD3d32Pseudo,
4918 ARM::VLD1d64TPseudo };
4919 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4920 ARM::VLD3q16Pseudo_UPD,
4921 ARM::VLD3q32Pseudo_UPD };
4922 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
4923 ARM::VLD3q16oddPseudo,
4924 ARM::VLD3q32oddPseudo };
4925 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4926 return;
4927 }
4928
4929 case Intrinsic::arm_neon_vld4: {
4930 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
4931 ARM::VLD4d16Pseudo,
4932 ARM::VLD4d32Pseudo,
4933 ARM::VLD1d64QPseudo };
4934 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
4935 ARM::VLD4q16Pseudo_UPD,
4936 ARM::VLD4q32Pseudo_UPD };
4937 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
4938 ARM::VLD4q16oddPseudo,
4939 ARM::VLD4q32oddPseudo };
4940 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4941 return;
4942 }
4943
4944 case Intrinsic::arm_neon_vld2dup: {
4945 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4946 ARM::VLD2DUPd32, ARM::VLD1q64 };
4947 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4948 ARM::VLD2DUPq16EvenPseudo,
4949 ARM::VLD2DUPq32EvenPseudo };
4950 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
4951 ARM::VLD2DUPq16OddPseudo,
4952 ARM::VLD2DUPq32OddPseudo };
4953 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
4954 DOpcodes, QOpcodes0, QOpcodes1);
4955 return;
4956 }
4957
4958 case Intrinsic::arm_neon_vld3dup: {
4959 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
4960 ARM::VLD3DUPd16Pseudo,
4961 ARM::VLD3DUPd32Pseudo,
4962 ARM::VLD1d64TPseudo };
4963 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4964 ARM::VLD3DUPq16EvenPseudo,
4965 ARM::VLD3DUPq32EvenPseudo };
4966 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
4967 ARM::VLD3DUPq16OddPseudo,
4968 ARM::VLD3DUPq32OddPseudo };
4969 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
4970 DOpcodes, QOpcodes0, QOpcodes1);
4971 return;
4972 }
4973
4974 case Intrinsic::arm_neon_vld4dup: {
4975 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
4976 ARM::VLD4DUPd16Pseudo,
4977 ARM::VLD4DUPd32Pseudo,
4978 ARM::VLD1d64QPseudo };
4979 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4980 ARM::VLD4DUPq16EvenPseudo,
4981 ARM::VLD4DUPq32EvenPseudo };
4982 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
4983 ARM::VLD4DUPq16OddPseudo,
4984 ARM::VLD4DUPq32OddPseudo };
4985 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
4986 DOpcodes, QOpcodes0, QOpcodes1);
4987 return;
4988 }
4989
4990 case Intrinsic::arm_neon_vld2lane: {
4991 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
4992 ARM::VLD2LNd16Pseudo,
4993 ARM::VLD2LNd32Pseudo };
4994 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
4995 ARM::VLD2LNq32Pseudo };
4996 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
4997 return;
4998 }
4999
5000 case Intrinsic::arm_neon_vld3lane: {
5001 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
5002 ARM::VLD3LNd16Pseudo,
5003 ARM::VLD3LNd32Pseudo };
5004 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
5005 ARM::VLD3LNq32Pseudo };
5006 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
5007 return;
5008 }
5009
5010 case Intrinsic::arm_neon_vld4lane: {
5011 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
5012 ARM::VLD4LNd16Pseudo,
5013 ARM::VLD4LNd32Pseudo };
5014 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
5015 ARM::VLD4LNq32Pseudo };
5016 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
5017 return;
5018 }
5019
5020 case Intrinsic::arm_neon_vst1: {
5021 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
5022 ARM::VST1d32, ARM::VST1d64 };
5023 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5024 ARM::VST1q32, ARM::VST1q64 };
5025 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
5026 return;
5027 }
5028
5029 case Intrinsic::arm_neon_vst1x2: {
5030 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5031 ARM::VST1q32, ARM::VST1q64 };
5032 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
5033 ARM::VST1d16QPseudo,
5034 ARM::VST1d32QPseudo,
5035 ARM::VST1d64QPseudo };
5036 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
5037 return;
5038 }
5039
5040 case Intrinsic::arm_neon_vst1x3: {
5041 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
5042 ARM::VST1d16TPseudo,
5043 ARM::VST1d32TPseudo,
5044 ARM::VST1d64TPseudo };
5045 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
5046 ARM::VST1q16LowTPseudo_UPD,
5047 ARM::VST1q32LowTPseudo_UPD,
5048 ARM::VST1q64LowTPseudo_UPD };
5049 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
5050 ARM::VST1q16HighTPseudo,
5051 ARM::VST1q32HighTPseudo,
5052 ARM::VST1q64HighTPseudo };
5053 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
5054 return;
5055 }
5056
5057 case Intrinsic::arm_neon_vst1x4: {
5058 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
5059 ARM::VST1d16QPseudo,
5060 ARM::VST1d32QPseudo,
5061 ARM::VST1d64QPseudo };
5062 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
5063 ARM::VST1q16LowQPseudo_UPD,
5064 ARM::VST1q32LowQPseudo_UPD,
5065 ARM::VST1q64LowQPseudo_UPD };
5066 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
5067 ARM::VST1q16HighQPseudo,
5068 ARM::VST1q32HighQPseudo,
5069 ARM::VST1q64HighQPseudo };
5070 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
5071 return;
5072 }
5073
5074 case Intrinsic::arm_neon_vst2: {
5075 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
5076 ARM::VST2d32, ARM::VST1q64 };
5077 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
5078 ARM::VST2q32Pseudo };
5079 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
5080 return;
5081 }
5082
5083 case Intrinsic::arm_neon_vst3: {
5084 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
5085 ARM::VST3d16Pseudo,
5086 ARM::VST3d32Pseudo,
5087 ARM::VST1d64TPseudo };
5088 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
5089 ARM::VST3q16Pseudo_UPD,
5090 ARM::VST3q32Pseudo_UPD };
5091 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
5092 ARM::VST3q16oddPseudo,
5093 ARM::VST3q32oddPseudo };
5094 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
5095 return;
5096 }
5097
5098 case Intrinsic::arm_neon_vst4: {
5099 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
5100 ARM::VST4d16Pseudo,
5101 ARM::VST4d32Pseudo,
5102 ARM::VST1d64QPseudo };
5103 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
5104 ARM::VST4q16Pseudo_UPD,
5105 ARM::VST4q32Pseudo_UPD };
5106 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
5107 ARM::VST4q16oddPseudo,
5108 ARM::VST4q32oddPseudo };
5109 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
5110 return;
5111 }
5112
5113 case Intrinsic::arm_neon_vst2lane: {
5114 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
5115 ARM::VST2LNd16Pseudo,
5116 ARM::VST2LNd32Pseudo };
5117 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
5118 ARM::VST2LNq32Pseudo };
5119 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
5120 return;
5121 }
5122
5123 case Intrinsic::arm_neon_vst3lane: {
5124 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
5125 ARM::VST3LNd16Pseudo,
5126 ARM::VST3LNd32Pseudo };
5127 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
5128 ARM::VST3LNq32Pseudo };
5129 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
5130 return;
5131 }
5132
5133 case Intrinsic::arm_neon_vst4lane: {
5134 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
5135 ARM::VST4LNd16Pseudo,
5136 ARM::VST4LNd32Pseudo };
5137 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
5138 ARM::VST4LNq32Pseudo };
5139 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
5140 return;
5141 }
5142
5143 case Intrinsic::arm_mve_vldr_gather_base_wb:
5144 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
5145 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
5146 ARM::MVE_VLDRDU64_qi_pre};
5147 SelectMVE_WB(N, Opcodes,
5148 IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
5149 return;
5150 }
5151
5152 case Intrinsic::arm_mve_vld2q: {
5153 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
5154 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
5155 ARM::MVE_VLD21_16};
5156 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
5157 ARM::MVE_VLD21_32};
5158 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5159 SelectMVE_VLD(N, 2, Opcodes, false);
5160 return;
5161 }
5162
5163 case Intrinsic::arm_mve_vld4q: {
5164 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
5165 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
5166 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
5167 ARM::MVE_VLD42_16,
5168 ARM::MVE_VLD43_16};
5169 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
5170 ARM::MVE_VLD42_32,
5171 ARM::MVE_VLD43_32};
5172 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5173 SelectMVE_VLD(N, 4, Opcodes, false);
5174 return;
5175 }
5176 }
5177 break;
5178 }
5179
5181 unsigned IntNo = N->getConstantOperandVal(0);
5182 switch (IntNo) {
5183 default:
5184 break;
5185
5186 // Scalar f32 -> bf16
5187 case Intrinsic::arm_neon_vcvtbfp2bf: {
5188 SDLoc dl(N);
5189 const SDValue &Src = N->getOperand(1);
5190 llvm::EVT DestTy = N->getValueType(0);
5191 SDValue Pred = getAL(CurDAG, dl);
5192 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
5193 SDValue Ops[] = { Src, Src, Pred, Reg0 };
5194 CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops);
5195 return;
5196 }
5197
5198 // Vector v4f32 -> v4bf16
5199 case Intrinsic::arm_neon_vcvtfp2bf: {
5200 SDLoc dl(N);
5201 const SDValue &Src = N->getOperand(1);
5202 SDValue Pred = getAL(CurDAG, dl);
5203 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
5204 SDValue Ops[] = { Src, Pred, Reg0 };
5205 CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops);
5206 return;
5207 }
5208
5209 case Intrinsic::arm_mve_urshrl:
5210 SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false);
5211 return;
5212 case Intrinsic::arm_mve_uqshll:
5213 SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false);
5214 return;
5215 case Intrinsic::arm_mve_srshrl:
5216 SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false);
5217 return;
5218 case Intrinsic::arm_mve_sqshll:
5219 SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false);
5220 return;
5221 case Intrinsic::arm_mve_uqrshll:
5222 SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true);
5223 return;
5224 case Intrinsic::arm_mve_sqrshrl:
5225 SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
5226 return;
5227
5228 case Intrinsic::arm_mve_vadc:
5229 case Intrinsic::arm_mve_vadc_predicated:
5230 SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true,
5231 IntNo == Intrinsic::arm_mve_vadc_predicated);
5232 return;
5233 case Intrinsic::arm_mve_vsbc:
5234 case Intrinsic::arm_mve_vsbc_predicated:
5235 SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, false,
5236 IntNo == Intrinsic::arm_mve_vsbc_predicated);
5237 return;
5238 case Intrinsic::arm_mve_vshlc:
5239 case Intrinsic::arm_mve_vshlc_predicated:
5240 SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated);
5241 return;
5242
5243 case Intrinsic::arm_mve_vmlldava:
5244 case Intrinsic::arm_mve_vmlldava_predicated: {
5245 static const uint16_t OpcodesU[] = {
5246 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32,
5247 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32,
5248 };
5249 static const uint16_t OpcodesS[] = {
5250 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32,
5251 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32,
5252 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32,
5253 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
5254 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32,
5255 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32,
5256 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32,
5257 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
5258 };
5259 SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated,
5260 OpcodesS, OpcodesU);
5261 return;
5262 }
5263
5264 case Intrinsic::arm_mve_vrmlldavha:
5265 case Intrinsic::arm_mve_vrmlldavha_predicated: {
5266 static const uint16_t OpcodesU[] = {
5267 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32,
5268 };
5269 static const uint16_t OpcodesS[] = {
5270 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32,
5271 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
5272 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32,
5273 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
5274 };
5275 SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
5276 OpcodesS, OpcodesU);
5277 return;
5278 }
5279
5280 case Intrinsic::arm_mve_vidup:
5281 case Intrinsic::arm_mve_vidup_predicated: {
5282 static const uint16_t Opcodes[] = {
5283 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
5284 };
5285 SelectMVE_VxDUP(N, Opcodes, false,
5286 IntNo == Intrinsic::arm_mve_vidup_predicated);
5287 return;
5288 }
5289
5290 case Intrinsic::arm_mve_vddup:
5291 case Intrinsic::arm_mve_vddup_predicated: {
5292 static const uint16_t Opcodes[] = {
5293 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
5294 };
5295 SelectMVE_VxDUP(N, Opcodes, false,
5296 IntNo == Intrinsic::arm_mve_vddup_predicated);
5297 return;
5298 }
5299
5300 case Intrinsic::arm_mve_viwdup:
5301 case Intrinsic::arm_mve_viwdup_predicated: {
5302 static const uint16_t Opcodes[] = {
5303 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
5304 };
5305 SelectMVE_VxDUP(N, Opcodes, true,
5306 IntNo == Intrinsic::arm_mve_viwdup_predicated);
5307 return;
5308 }
5309
5310 case Intrinsic::arm_mve_vdwdup:
5311 case Intrinsic::arm_mve_vdwdup_predicated: {
5312 static const uint16_t Opcodes[] = {
5313 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
5314 };
5315 SelectMVE_VxDUP(N, Opcodes, true,
5316 IntNo == Intrinsic::arm_mve_vdwdup_predicated);
5317 return;
5318 }
5319
5320 case Intrinsic::arm_cde_cx1d:
5321 case Intrinsic::arm_cde_cx1da:
5322 case Intrinsic::arm_cde_cx2d:
5323 case Intrinsic::arm_cde_cx2da:
5324 case Intrinsic::arm_cde_cx3d:
5325 case Intrinsic::arm_cde_cx3da: {
5326 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da ||
5327 IntNo == Intrinsic::arm_cde_cx2da ||
5328 IntNo == Intrinsic::arm_cde_cx3da;
5329 size_t NumExtraOps;
5330 uint16_t Opcode;
5331 switch (IntNo) {
5332 case Intrinsic::arm_cde_cx1d:
5333 case Intrinsic::arm_cde_cx1da:
5334 NumExtraOps = 0;
5335 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
5336 break;
5337 case Intrinsic::arm_cde_cx2d:
5338 case Intrinsic::arm_cde_cx2da:
5339 NumExtraOps = 1;
5340 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
5341 break;
5342 case Intrinsic::arm_cde_cx3d:
5343 case Intrinsic::arm_cde_cx3da:
5344 NumExtraOps = 2;
5345 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
5346 break;
5347 default:
5348 llvm_unreachable("Unexpected opcode");
5349 }
5350 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
5351 return;
5352 }
5353 }
5354 break;
5355 }
5356
5358 SelectCMP_SWAP(N);
5359 return;
5360 }
5361
5362 SelectCode(N);
5363}
5364
5365// Inspect a register string of the form
5366// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
5367// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
5368// and obtain the integer operands from them, adding these operands to the
5369// provided vector.
5371 SelectionDAG *CurDAG,
5372 const SDLoc &DL,
5373 std::vector<SDValue> &Ops) {
5375 RegString.split(Fields, ':');
5376
5377 if (Fields.size() > 1) {
5378 bool AllIntFields = true;
5379
5380 for (StringRef Field : Fields) {
5381 // Need to trim out leading 'cp' characters and get the integer field.
5382 unsigned IntField;
5383 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
5384 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
5385 }
5386
5387 assert(AllIntFields &&
5388 "Unexpected non-integer value in special register string.");
5389 (void)AllIntFields;
5390 }
5391}
5392
5393// Maps a Banked Register string to its mask value. The mask value returned is
5394// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
5395// mask operand, which expresses which register is to be used, e.g. r8, and in
5396// which mode it is to be used, e.g. usr. Returns -1 to signify that the string
5397// was invalid.
5398static inline int getBankedRegisterMask(StringRef RegString) {
5399 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
5400 if (!TheReg)
5401 return -1;
5402 return TheReg->Encoding;
5403}
5404
5405// The flags here are common to those allowed for apsr in the A class cores and
5406// those allowed for the special registers in the M class cores. Returns a
5407// value representing which flags were present, -1 if invalid.
5408static inline int getMClassFlagsMask(StringRef Flags) {
5409 return StringSwitch<int>(Flags)
5410 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
5411 // correct when flags are not permitted
5412 .Case("g", 0x1)
5413 .Case("nzcvq", 0x2)
5414 .Case("nzcvqg", 0x3)
5415 .Default(-1);
5416}
5417
5418// Maps MClass special registers string to its value for use in the
5419// t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
5420// Returns -1 to signify that the string was invalid.
5421static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
5422 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
5423 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
5424 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
5425 return -1;
5426 return (int)(TheReg->Encoding & 0xFFF); // SYSm value
5427}
5428
5430 // The mask operand contains the special register (R Bit) in bit 4, whether
5431 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
5432 // bits 3-0 contains the fields to be accessed in the special register, set by
5433 // the flags provided with the register.
5434 int Mask = 0;
5435 if (Reg == "apsr") {
5436 // The flags permitted for apsr are the same flags that are allowed in
5437 // M class registers. We get the flag value and then shift the flags into
5438 // the correct place to combine with the mask.
5439 Mask = getMClassFlagsMask(Flags);
5440 if (Mask == -1)
5441 return -1;
5442 return Mask << 2;
5443 }
5444
5445 if (Reg != "cpsr" && Reg != "spsr") {
5446 return -1;
5447 }
5448
5449 // This is the same as if the flags were "fc"
5450 if (Flags.empty() || Flags == "all")
5451 return Mask | 0x9;
5452
5453 // Inspect the supplied flags string and set the bits in the mask for
5454 // the relevant and valid flags allowed for cpsr and spsr.
5455 for (char Flag : Flags) {
5456 int FlagVal;
5457 switch (Flag) {
5458 case 'c':
5459 FlagVal = 0x1;
5460 break;
5461 case 'x':
5462 FlagVal = 0x2;
5463 break;
5464 case 's':
5465 FlagVal = 0x4;
5466 break;
5467 case 'f':
5468 FlagVal = 0x8;
5469 break;
5470 default:
5471 FlagVal = 0;
5472 }
5473
5474 // This avoids allowing strings where the same flag bit appears twice.
5475 if (!FlagVal || (Mask & FlagVal))
5476 return -1;
5477 Mask |= FlagVal;
5478 }
5479
5480 // If the register is spsr then we need to set the R bit.
5481 if (Reg == "spsr")
5482 Mask |= 0x10;
5483
5484 return Mask;
5485}
5486
5487// Lower the read_register intrinsic to ARM specific DAG nodes
5488// using the supplied metadata string to select the instruction node to use
5489// and the registers/masks to construct as operands for the node.
5490bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
5491 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
5492 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
5493 bool IsThumb2 = Subtarget->isThumb2();
5494 SDLoc DL(N);
5495
5496 std::vector<SDValue> Ops;
5497 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
5498
5499 if (!Ops.empty()) {
5500 // If the special register string was constructed of fields (as defined
5501 // in the ACLE) then need to lower to MRC node (32 bit) or
5502 // MRRC node(64 bit), we can make the distinction based on the number of
5503 // operands we have.
5504 unsigned Opcode;
5505 SmallVector<EVT, 3> ResTypes;
5506 if (Ops.size() == 5){
5507 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
5508 ResTypes.append({ MVT::i32, MVT::Other });
5509 } else {
5510 assert(Ops.size() == 3 &&
5511 "Invalid number of fields in special register string.");
5512 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
5513 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
5514 }
5515
5516 Ops.push_back(getAL(CurDAG, DL));
5517 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
5518 Ops.push_back(N->getOperand(0));
5519 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
5520 return true;
5521 }
5522
5523 std::string SpecialReg = RegString->getString().lower();
5524
5525 int BankedReg = getBankedRegisterMask(SpecialReg);
5526 if (BankedReg != -1) {
5527 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
5528 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5529 N->getOperand(0) };
5530 ReplaceNode(
5531 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
5532 DL, MVT::i32, MVT::Other, Ops));
5533 return true;
5534 }
5535
5536 // The VFP registers are read by creating SelectionDAG nodes with opcodes
5537 // corresponding to the register that is being read from. So we switch on the
5538 // string to find which opcode we need to use.
5539 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5540 .Case("fpscr", ARM::VMRS)
5541 .Case("fpexc", ARM::VMRS_FPEXC)
5542 .Case("fpsid", ARM::VMRS_FPSID)
5543 .Case("mvfr0", ARM::VMRS_MVFR0)
5544 .Case("mvfr1", ARM::VMRS_MVFR1)
5545 .Case("mvfr2", ARM::VMRS_MVFR2)
5546 .Case("fpinst", ARM::VMRS_FPINST)
5547 .Case("fpinst2", ARM::VMRS_FPINST2)
5548 .Default(0);
5549
5550 // If an opcode was found then we can lower the read to a VFP instruction.
5551 if (Opcode) {
5552 if (!Subtarget->hasVFP2Base())
5553 return false;
5554 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
5555 return false;
5556
5557 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5558 N->getOperand(0) };
5559 ReplaceNode(N,
5560 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
5561 return true;
5562 }
5563
5564 // If the target is M Class then need to validate that the register string
5565 // is an acceptable value, so check that a mask can be constructed from the
5566 // string.
5567 if (Subtarget->isMClass()) {
5568 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
5569 if (SYSmValue == -1)
5570 return false;
5571
5572 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5573 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5574 N->getOperand(0) };
5575 ReplaceNode(
5576 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
5577 return true;
5578 }
5579
5580 // Here we know the target is not M Class so we need to check if it is one
5581 // of the remaining possible values which are apsr, cpsr or spsr.
5582 if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
5583 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5584 N->getOperand(0) };
5585 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
5586 DL, MVT::i32, MVT::Other, Ops));
5587 return true;
5588 }
5589
5590 if (SpecialReg == "spsr") {
5591 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5592 N->getOperand(0) };
5593 ReplaceNode(
5594 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
5595 MVT::i32, MVT::Other, Ops));
5596 return true;
5597 }
5598
5599 return false;
5600}
5601
5602// Lower the write_register intrinsic to ARM specific DAG nodes
5603// using the supplied metadata string to select the instruction node to use
5604// and the registers/masks to use in the nodes
5605bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
5606 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
5607 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
5608 bool IsThumb2 = Subtarget->isThumb2();
5609 SDLoc DL(N);
5610
5611 std::vector<SDValue> Ops;
5612 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
5613
5614 if (!Ops.empty()) {
5615 // If the special register string was constructed of fields (as defined
5616 // in the ACLE) then need to lower to MCR node (32 bit) or
5617 // MCRR node(64 bit), we can make the distinction based on the number of
5618 // operands we have.
5619 unsigned Opcode;
5620 if (Ops.size() == 5) {
5621 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
5622 Ops.insert(Ops.begin()+2, N->getOperand(2));
5623 } else {
5624 assert(Ops.size() == 3 &&
5625 "Invalid number of fields in special register string.");
5626 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
5627 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
5628 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
5629 }
5630
5631 Ops.push_back(getAL(CurDAG, DL));
5632 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
5633 Ops.push_back(N->getOperand(0));
5634
5635 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5636 return true;
5637 }
5638
5639 std::string SpecialReg = RegString->getString().lower();
5640 int BankedReg = getBankedRegisterMask(SpecialReg);
5641 if (BankedReg != -1) {
5642 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
5643 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5644 N->getOperand(0) };
5645 ReplaceNode(
5646 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
5647 DL, MVT::Other, Ops));
5648 return true;
5649 }
5650
5651 // The VFP registers are written to by creating SelectionDAG nodes with
5652 // opcodes corresponding to the register that is being written. So we switch
5653 // on the string to find which opcode we need to use.
5654 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5655 .Case("fpscr", ARM::VMSR)
5656 .Case("fpexc", ARM::VMSR_FPEXC)
5657 .Case("fpsid", ARM::VMSR_FPSID)
5658 .Case("fpinst", ARM::VMSR_FPINST)
5659 .Case("fpinst2", ARM::VMSR_FPINST2)
5660 .Default(0);
5661
5662 if (Opcode) {
5663 if (!Subtarget->hasVFP2Base())
5664 return false;
5665 Ops = { N->getOperand(2), getAL(CurDAG, DL),
5666 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
5667 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5668 return true;
5669 }
5670
5671 std::pair<StringRef, StringRef> Fields;
5672 Fields = StringRef(SpecialReg).rsplit('_');
5673 std::string Reg = Fields.first.str();
5674 StringRef Flags = Fields.second;
5675
5676 // If the target was M Class then need to validate the special register value
5677 // and retrieve the mask for use in the instruction node.
5678 if (Subtarget->isMClass()) {
5679 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
5680 if (SYSmValue == -1)
5681 return false;
5682
5683 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5684 N->getOperand(2), getAL(CurDAG, DL),
5685 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
5686 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
5687 return true;
5688 }
5689
5690 // We then check to see if a valid mask can be constructed for one of the
5691 // register string values permitted for the A and R class cores. These values
5692 // are apsr, spsr and cpsr; these are also valid on older cores.
5693 int Mask = getARClassRegisterMask(Reg, Flags);
5694 if (Mask != -1) {
5695 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
5696 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5697 N->getOperand(0) };
5698 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
5699 DL, MVT::Other, Ops));
5700 return true;
5701 }
5702
5703 return false;
5704}
5705
5706bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
5707 std::vector<SDValue> AsmNodeOperands;
5708 InlineAsm::Flag Flag;
5709 bool Changed = false;
5710 unsigned NumOps = N->getNumOperands();
5711
5712 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
5713 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
5714 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
5715 // respectively. Since there is no constraint to explicitly specify a
5716 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
5717 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
5718 // them into a GPRPair.
5719
5720 SDLoc dl(N);
5721 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue();
5722
5723 SmallVector<bool, 8> OpChanged;
5724 // Glue node will be appended late.
5725 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
5726 SDValue op = N->getOperand(i);
5727 AsmNodeOperands.push_back(op);
5728
5730 continue;
5731
5732 if (const auto *C = dyn_cast<ConstantSDNode>(N->getOperand(i)))
5733 Flag = InlineAsm::Flag(C->getZExtValue());
5734 else
5735 continue;
5736
5737 // Immediate operands to inline asm in the SelectionDAG are modeled with
5738 // two operands. The first is a constant of value InlineAsm::Kind::Imm, and
5739 // the second is a constant with the value of the immediate. If we get here
5740 // and we have a Kind::Imm, skip the next operand, and continue.
5741 if (Flag.isImmKind()) {
5742 SDValue op = N->getOperand(++i);
5743 AsmNodeOperands.push_back(op);
5744 continue;
5745 }
5746
5747 const unsigned NumRegs = Flag.getNumOperandRegisters();
5748 if (NumRegs)
5749 OpChanged.push_back(false);
5750
5751 unsigned DefIdx = 0;
5752 bool IsTiedToChangedOp = false;
5753 // If it's a use that is tied with a previous def, it has no
5754 // reg class constraint.
5755 if (Changed && Flag.isUseOperandTiedToDef(DefIdx))
5756 IsTiedToChangedOp = OpChanged[DefIdx];
5757
5758 // Memory operands to inline asm in the SelectionDAG are modeled with two
5759 // operands: a constant of value InlineAsm::Kind::Mem followed by the input
5760 // operand. If we get here and we have a Kind::Mem, skip the next operand
5761 // (so it doesn't get misinterpreted), and continue. We do this here because
5762 // it's important to update the OpChanged array correctly before moving on.
5763 if (Flag.isMemKind()) {
5764 SDValue op = N->getOperand(++i);
5765 AsmNodeOperands.push_back(op);
5766 continue;
5767 }
5768
5769 if (!Flag.isRegUseKind() && !Flag.isRegDefKind() &&
5770 !Flag.isRegDefEarlyClobberKind())
5771 continue;
5772
5773 unsigned RC;
5774 const bool HasRC = Flag.hasRegClassConstraint(RC);
5775 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
5776 || NumRegs != 2)
5777 continue;
5778
5779 assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
5780 SDValue V0 = N->getOperand(i+1);
5781 SDValue V1 = N->getOperand(i+2);
5782 Register Reg0 = cast<RegisterSDNode>(V0)->getReg();
5783 Register Reg1 = cast<RegisterSDNode>(V1)->getReg();
5784 SDValue PairedReg;
5785 MachineRegisterInfo &MRI = MF->getRegInfo();
5786
5787 if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) {
5788 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
5789 // the original GPRs.
5790
5791 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5792 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5793 SDValue Chain = SDValue(N,0);
5794
5795 SDNode *GU = N->getGluedUser();
5796 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
5797 Chain.getValue(1));
5798
5799 // Extract values from a GPRPair reg and copy to the original GPR reg.
5800 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
5801 RegCopy);
5802 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
5803 RegCopy);
5804 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
5805 RegCopy.getValue(1));
5806 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
5807
5808 // Update the original glue user.
5809 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
5810 Ops.push_back(T1.getValue(1));
5811 CurDAG->UpdateNodeOperands(GU, Ops);
5812 } else {
5813 // For Kind == InlineAsm::Kind::RegUse, we first copy two GPRs into a
5814 // GPRPair and then pass the GPRPair to the inline asm.
5815 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
5816
5817 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
5818 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
5819 Chain.getValue(1));
5820 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
5821 T0.getValue(1));
5822 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
5823
5824 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
5825 // i32 VRs of inline asm with it.
5826 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5827 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5828 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
5829
5830 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
5831 Glue = Chain.getValue(1);
5832 }
5833
5834 Changed = true;
5835
5836 if(PairedReg.getNode()) {
5837 OpChanged[OpChanged.size() -1 ] = true;
5838 Flag = InlineAsm::Flag(Flag.getKind(), 1 /* RegNum*/);
5839 if (IsTiedToChangedOp)
5840 Flag.setMatchingOp(DefIdx);
5841 else
5842 Flag.setRegClass(ARM::GPRPairRegClassID);
5843 // Replace the current flag.
5844 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
5845 Flag, dl, MVT::i32);
5846 // Add the new register node and skip the original two GPRs.
5847 AsmNodeOperands.push_back(PairedReg);
5848 // Skip the next two GPRs.
5849 i += 2;
5850 }
5851 }
5852
5853 if (Glue.getNode())
5854 AsmNodeOperands.push_back(Glue);
5855 if (!Changed)
5856 return false;
5857
5858 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
5859 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
5860 New->setNodeId(-1);
5861 ReplaceNode(N, New.getNode());
5862 return true;
5863}
5864
5865bool ARMDAGToDAGISel::SelectInlineAsmMemoryOperand(
5866 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
5867 std::vector<SDValue> &OutOps) {
5868 switch(ConstraintID) {
5869 default:
5870 llvm_unreachable("Unexpected asm memory constraint");
5871 case InlineAsm::ConstraintCode::m:
5872 case InlineAsm::ConstraintCode::o:
5873 case InlineAsm::ConstraintCode::Q:
5874 case InlineAsm::ConstraintCode::Um:
5875 case InlineAsm::ConstraintCode::Un:
5876 case InlineAsm::ConstraintCode::Uq:
5877 case InlineAsm::ConstraintCode::Us:
5878 case InlineAsm::ConstraintCode::Ut:
5879 case InlineAsm::ConstraintCode::Uv:
5880 case InlineAsm::ConstraintCode::Uy:
5881 // Require the address to be in a register. That is safe for all ARM
5882 // variants and it is hard to do anything much smarter without knowing
5883 // how the operand is used.
5884 OutOps.push_back(Op);
5885 return false;
5886 }
5887 return true;
5888}
5889
5890/// createARMISelDag - This pass converts a legalized DAG into a
5891/// ARM-specific DAG, ready for instruction scheduling.
5892///
5894 CodeGenOptLevel OptLevel) {
5895 return new ARMDAGToDAGISelLegacy(TM, OptLevel);
5896}
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
return SDValue()
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isThumb(const MCSubtargetInfo &STI)
static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3], unsigned Opc128[3])
static int getBankedRegisterMask(StringRef RegString)
static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs)
Returns true if the given increment is a Constant known to be equal to the access size performed by a...
static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc)
static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned &Imm)
static bool isVSTfixed(unsigned Opc)
static bool isVLDfixed(unsigned Opc)
static bool isInt32Immediate(SDNode *N, unsigned &Imm)
isInt32Immediate - This method tests to see if the node is a 32-bit constant operand.
static std::optional< std::pair< unsigned, unsigned > > getContiguousRangeOfSetBits(const APInt &A)
static void getIntOperandsFromRegisterString(StringRef RegString, SelectionDAG *CurDAG, const SDLoc &DL, std::vector< SDValue > &Ops)
static int getARClassRegisterMask(StringRef Reg, StringRef Flags)
static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget)
static cl::opt< bool > DisableShifterOp("disable-shifter-op", cl::Hidden, cl::desc("Disable isel of shifter-op"), cl::init(false))
static SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl)
getAL - Returns a ARMCC::AL immediate node.
static bool shouldUseZeroOffsetLdSt(SDValue N)
static int getMClassFlagsMask(StringRef Flags)
static bool SDValueToConstBool(SDValue SDVal)
static bool isScaledConstantInRange(SDValue Node, int Scale, int RangeMin, int RangeMax, int &ScaledConstant)
Check whether a particular node is a constant value representable as (N * Scale) where (N in [RangeMi...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define DEBUG_TYPE
#define op(i)
const HexagonInstrInfo * TII
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T1
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
OptimizedStructLayoutField Field
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
This file describes how to lower LLVM code to machine code.
#define PASS_NAME
Value * RHS
Value * LHS
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1387
Class for arbitrary precision integers.
Definition APInt.h:78
bool isSwift() const
bool isThumb1Only() const
bool hasFPARMv8Base() const
bool isThumb2() const
bool isLikeA9() const
bool hasVFP2Base() const
bool isLittle() const
bool isMClass() const
uint64_t getZExtValue() const
Container class for subtarget features.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
bool mayStore() const
Return true if this instruction could possibly modify memory.
unsigned getOpcode() const
Return the opcode number for this descriptor.
SimpleValueType SimpleTy
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineOperand & getOperand(unsigned i) const
@ MOLoad
The memory access reads data.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Align getAlign() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
int getNodeId() const
Return the unique node id.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
op_iterator op_end() const
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
LLVM_ABI std::string lower() const
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
LLVM Value Representation.
Definition Value.h:75
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static ShiftOpc getShiftOpcForNode(unsigned Opcode)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits)
decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the element value and the element ...
float getFPImmFloat(unsigned Imm)
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset)
getAM5Opc - This function encodes the addrmode5 opc field.
unsigned getAM5FP16Opc(AddrOpc Opc, unsigned char Offset)
getAM5FP16Opc - This function encodes the addrmode5fp16 opc field.
unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, unsigned IdxMode=0)
getAM3Opc - This function encodes the addrmode3 opc field.
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ TargetConstantPool
Definition ISDOpcodes.h:189
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ TargetExternalSymbol
Definition ISDOpcodes.h:190
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:185
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:139
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ INLINEASM
INLINEASM - Represents an inline asm block.
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:945
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ TargetGlobalTLSAddress
Definition ISDOpcodes.h:186
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
@ ARM
Windows AXP64.
Definition MCAsmInfo.h:47
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:315
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
FunctionPass * createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOptLevel OptLevel)
createARMISelDag - This pass converts a legalized DAG into a ARM-specific DAG, ready for instruction ...
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:267
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
@ NearestTiesToEven
roundTiesToEven.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
#define NC
Definition regutils.h:42
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:215
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition ValueTypes.h:210
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.