LLVM 22.0.0git
ARMISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the ARM target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARM.h"
14#include "ARMBaseInstrInfo.h"
15#include "ARMTargetMachine.h"
17#include "Utils/ARMBaseInfo.h"
18#include "llvm/ADT/APSInt.h"
27#include "llvm/IR/Constants.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/Intrinsics.h"
31#include "llvm/IR/IntrinsicsARM.h"
32#include "llvm/IR/LLVMContext.h"
36#include <optional>
37
38using namespace llvm;
39
40#define DEBUG_TYPE "arm-isel"
41#define PASS_NAME "ARM Instruction Selection"
42
43static cl::opt<bool>
44DisableShifterOp("disable-shifter-op", cl::Hidden,
45 cl::desc("Disable isel of shifter-op"),
46 cl::init(false));
47
48//===--------------------------------------------------------------------===//
49/// ARMDAGToDAGISel - ARM specific code to select ARM machine
50/// instructions for SelectionDAG operations.
51///
52namespace {
53
54class ARMDAGToDAGISel : public SelectionDAGISel {
55 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
56 /// make the right decision when generating code for different targets.
57 const ARMSubtarget *Subtarget;
58
59public:
60 ARMDAGToDAGISel() = delete;
61
62 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
63 : SelectionDAGISel(tm, OptLevel) {}
64
65 bool runOnMachineFunction(MachineFunction &MF) override {
66 // Reset the subtarget each time through.
67 Subtarget = &MF.getSubtarget<ARMSubtarget>();
69 return true;
70 }
71
72 void PreprocessISelDAG() override;
73
74 /// getI32Imm - Return a target constant of type i32 with the specified
75 /// value.
76 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
77 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
78 }
79
80 void Select(SDNode *N) override;
81
82 /// Return true as some complex patterns, like those that call
83 /// canExtractShiftFromMul can modify the DAG inplace.
84 bool ComplexPatternFuncMutatesDAG() const override { return true; }
85
86 bool hasNoVMLxHazardUse(SDNode *N) const;
87 bool isShifterOpProfitable(const SDValue &Shift,
88 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
89 bool SelectRegShifterOperand(SDValue N, SDValue &A,
90 SDValue &B, SDValue &C,
91 bool CheckProfitability = true);
92 bool SelectImmShifterOperand(SDValue N, SDValue &A,
93 SDValue &B, bool CheckProfitability = true);
94 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B,
95 SDValue &C) {
96 // Don't apply the profitability check
97 return SelectRegShifterOperand(N, A, B, C, false);
98 }
99 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) {
100 // Don't apply the profitability check
101 return SelectImmShifterOperand(N, A, B, false);
102 }
103 bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) {
104 if (!N.hasOneUse())
105 return false;
106 return SelectImmShifterOperand(N, A, B, false);
107 }
108
109 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
110
111 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
112 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
113
114 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
116 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
118 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
120 bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
121 bool SelectAddrMode3(SDValue N, SDValue &Base,
123 bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
125 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
126 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
127 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
128 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
129 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
130
131 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
132
133 // Thumb Addressing Modes:
134 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
135 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
136 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
137 SDValue &OffImm);
138 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
139 SDValue &OffImm);
140 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
141 SDValue &OffImm);
142 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
143 SDValue &OffImm);
144 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
145 template <unsigned Shift>
146 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
147
148 // Thumb 2 Addressing Modes:
149 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
150 template <unsigned Shift>
151 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
152 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
153 SDValue &OffImm);
154 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
155 SDValue &OffImm);
156 template <unsigned Shift>
157 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
158 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
159 unsigned Shift);
160 template <unsigned Shift>
161 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
162 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
163 SDValue &OffReg, SDValue &ShImm);
164 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
165
166 template<int Min, int Max>
167 bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
168
169 inline bool is_so_imm(unsigned Imm) const {
170 return ARM_AM::getSOImmVal(Imm) != -1;
171 }
172
173 inline bool is_so_imm_not(unsigned Imm) const {
174 return ARM_AM::getSOImmVal(~Imm) != -1;
175 }
176
177 inline bool is_t2_so_imm(unsigned Imm) const {
178 return ARM_AM::getT2SOImmVal(Imm) != -1;
179 }
180
181 inline bool is_t2_so_imm_not(unsigned Imm) const {
182 return ARM_AM::getT2SOImmVal(~Imm) != -1;
183 }
184
185 // Include the pieces autogenerated from the target description.
186#include "ARMGenDAGISel.inc"
187
188private:
189 void transferMemOperands(SDNode *Src, SDNode *Dst);
190
191 /// Indexed (pre/post inc/dec) load matching code for ARM.
192 bool tryARMIndexedLoad(SDNode *N);
193 bool tryT1IndexedLoad(SDNode *N);
194 bool tryT2IndexedLoad(SDNode *N);
195 bool tryMVEIndexedLoad(SDNode *N);
196 bool tryFMULFixed(SDNode *N, SDLoc dl);
197 bool tryFP_TO_INT(SDNode *N, SDLoc dl);
198 bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul,
199 bool IsUnsigned,
200 bool FixedToFloat);
201
202 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
203 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
204 /// loads of D registers and even subregs and odd subregs of Q registers.
205 /// For NumVecs <= 2, QOpcodes1 is not used.
206 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
207 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
208 const uint16_t *QOpcodes1);
209
210 /// SelectVST - Select NEON store intrinsics. NumVecs should
211 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
212 /// stores of D registers and even subregs and odd subregs of Q registers.
213 /// For NumVecs <= 2, QOpcodes1 is not used.
214 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
215 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
216 const uint16_t *QOpcodes1);
217
218 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
219 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
220 /// load/store of D registers and Q registers.
221 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
222 unsigned NumVecs, const uint16_t *DOpcodes,
223 const uint16_t *QOpcodes);
224
225 /// Helper functions for setting up clusters of MVE predication operands.
226 template <typename SDValueVector>
227 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
228 SDValue PredicateMask);
229 template <typename SDValueVector>
230 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
231 SDValue PredicateMask, SDValue Inactive);
232
233 template <typename SDValueVector>
234 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
235 template <typename SDValueVector>
236 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
237
238 /// SelectMVE_WB - Select MVE writeback load/store intrinsics.
239 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);
240
241 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
242 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,
243 bool HasSaturationOperand);
244
245 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
246 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
247 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
248
249 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
250 /// vector lanes.
251 void SelectMVE_VSHLC(SDNode *N, bool Predicated);
252
253 /// Select long MVE vector reductions with two vector operands
254 /// Stride is the number of vector element widths the instruction can operate
255 /// on:
256 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
257 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
258 /// Stride is used when addressing the OpcodesS array which contains multiple
259 /// opcodes for each element width.
260 /// TySize is the index into the list of element types listed above
261 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
262 const uint16_t *OpcodesS, const uint16_t *OpcodesU,
263 size_t Stride, size_t TySize);
264
265 /// Select a 64-bit MVE vector reduction with two vector operands
266 /// arm_mve_vmlldava_[predicated]
267 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
268 const uint16_t *OpcodesU);
269 /// Select a 72-bit MVE vector rounding reduction with two vector operands
270 /// int_arm_mve_vrmlldavha[_predicated]
271 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
272 const uint16_t *OpcodesU);
273
274 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
275 /// should be 2 or 4. The opcode array specifies the instructions
276 /// used for 8, 16 and 32-bit lane sizes respectively, and each
277 /// pointer points to a set of NumVecs sub-opcodes used for the
278 /// different stages (e.g. VLD20 versus VLD21) of each load family.
279 void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
280 const uint16_t *const *Opcodes, bool HasWriteback);
281
282 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
283 /// array of 3 elements for the 8, 16 and 32-bit lane sizes.
284 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
285 bool Wrapping, bool Predicated);
286
287 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
288 /// CX1DA, CX2D, CX2DA, CX3, CX3DA).
289 /// \arg \c NumExtraOps number of extra operands besides the coprocossor,
290 /// the accumulator and the immediate operand, i.e. 0
291 /// for CX1*, 1 for CX2*, 2 for CX3*
292 /// \arg \c HasAccum whether the instruction has an accumulator operand
293 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
294 bool HasAccum);
295
296 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
297 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
298 /// for loading D registers.
299 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
300 unsigned NumVecs, const uint16_t *DOpcodes,
301 const uint16_t *QOpcodes0 = nullptr,
302 const uint16_t *QOpcodes1 = nullptr);
303
304 /// Try to select SBFX/UBFX instructions for ARM.
305 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
306
307 bool tryInsertVectorElt(SDNode *N);
308
309 bool tryReadRegister(SDNode *N);
310 bool tryWriteRegister(SDNode *N);
311
312 bool tryInlineAsm(SDNode *N);
313
314 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
315
316 void SelectCMP_SWAP(SDNode *N);
317
318 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
319 /// inline asm expressions.
320 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
321 InlineAsm::ConstraintCode ConstraintID,
322 std::vector<SDValue> &OutOps) override;
323
324 // Form pairs of consecutive R, S, D, or Q registers.
325 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
326 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
327 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
328 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
329
330 // Form sequences of 4 consecutive S, D, or Q registers.
331 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
332 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
333 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
334
335 // Get the alignment operand for a NEON VLD or VST instruction.
336 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
337 bool is64BitVector);
338
339 /// Checks if N is a multiplication by a constant where we can extract out a
340 /// power of two from the constant so that it can be used in a shift, but only
341 /// if it simplifies the materialization of the constant. Returns true if it
342 /// is, and assigns to PowerOfTwo the power of two that should be extracted
343 /// out and to NewMulConst the new constant to be multiplied by.
344 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
345 unsigned &PowerOfTwo, SDValue &NewMulConst) const;
346
347 /// Replace N with M in CurDAG, in a way that also ensures that M gets
348 /// selected when N would have been selected.
349 void replaceDAGValue(const SDValue &N, SDValue M);
350};
351
352class ARMDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
353public:
354 static char ID;
355 ARMDAGToDAGISelLegacy(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
356 : SelectionDAGISelLegacy(
357 ID, std::make_unique<ARMDAGToDAGISel>(tm, OptLevel)) {}
358};
359}
360
361char ARMDAGToDAGISelLegacy::ID = 0;
362
363INITIALIZE_PASS(ARMDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
364
365/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
366/// operand. If so Imm will receive the 32-bit value.
367static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
368 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
369 Imm = N->getAsZExtVal();
370 return true;
371 }
372 return false;
373}
374
375// isInt32Immediate - This method tests to see if a constant operand.
376// If so Imm will receive the 32 bit value.
377static bool isInt32Immediate(SDValue N, unsigned &Imm) {
378 return isInt32Immediate(N.getNode(), Imm);
379}
380
381// isOpcWithIntImmediate - This method tests to see if the node is a specific
382// opcode and that it has a immediate integer right operand.
383// If so Imm will receive the 32 bit value.
384static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
385 return N->getOpcode() == Opc &&
386 isInt32Immediate(N->getOperand(1).getNode(), Imm);
387}
388
389/// Check whether a particular node is a constant value representable as
390/// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
391///
392/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
393static bool isScaledConstantInRange(SDValue Node, int Scale,
394 int RangeMin, int RangeMax,
395 int &ScaledConstant) {
396 assert(Scale > 0 && "Invalid scale!");
397
398 // Check that this is a constant.
400 if (!C)
401 return false;
402
403 ScaledConstant = (int) C->getZExtValue();
404 if ((ScaledConstant % Scale) != 0)
405 return false;
406
407 ScaledConstant /= Scale;
408 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
409}
410
411void ARMDAGToDAGISel::PreprocessISelDAG() {
412 if (!Subtarget->hasV6T2Ops())
413 return;
414
415 bool isThumb2 = Subtarget->isThumb();
416 // We use make_early_inc_range to avoid invalidation issues.
417 for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
418 if (N.getOpcode() != ISD::ADD)
419 continue;
420
421 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
422 // leading zeros, followed by consecutive set bits, followed by 1 or 2
423 // trailing zeros, e.g. 1020.
424 // Transform the expression to
425 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
426 // of trailing zeros of c2. The left shift would be folded as an shifter
427 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
428 // node (UBFX).
429
430 SDValue N0 = N.getOperand(0);
431 SDValue N1 = N.getOperand(1);
432 unsigned And_imm = 0;
433 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
434 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
435 std::swap(N0, N1);
436 }
437 if (!And_imm)
438 continue;
439
440 // Check if the AND mask is an immediate of the form: 000.....1111111100
441 unsigned TZ = llvm::countr_zero(And_imm);
442 if (TZ != 1 && TZ != 2)
443 // Be conservative here. Shifter operands aren't always free. e.g. On
444 // Swift, left shifter operand of 1 / 2 for free but others are not.
445 // e.g.
446 // ubfx r3, r1, #16, #8
447 // ldr.w r3, [r0, r3, lsl #2]
448 // vs.
449 // mov.w r9, #1020
450 // and.w r2, r9, r1, lsr #14
451 // ldr r2, [r0, r2]
452 continue;
453 And_imm >>= TZ;
454 if (And_imm & (And_imm + 1))
455 continue;
456
457 // Look for (and (srl X, c1), c2).
458 SDValue Srl = N1.getOperand(0);
459 unsigned Srl_imm = 0;
460 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
461 (Srl_imm <= 2))
462 continue;
463
464 // Make sure first operand is not a shifter operand which would prevent
465 // folding of the left shift.
466 SDValue CPTmp0;
467 SDValue CPTmp1;
468 SDValue CPTmp2;
469 if (isThumb2) {
470 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
471 continue;
472 } else {
473 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
474 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
475 continue;
476 }
477
478 // Now make the transformation.
479 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
480 Srl.getOperand(0),
481 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
482 MVT::i32));
483 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
484 Srl,
485 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
486 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
487 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
488 CurDAG->UpdateNodeOperands(&N, N0, N1);
489 }
490}
491
492/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
493/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
494/// least on current ARM implementations) which should be avoidded.
495bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
496 if (OptLevel == CodeGenOptLevel::None)
497 return true;
498
499 if (!Subtarget->hasVMLxHazards())
500 return true;
501
502 if (!N->hasOneUse())
503 return false;
504
505 SDNode *User = *N->user_begin();
506 if (User->getOpcode() == ISD::CopyToReg)
507 return true;
508 if (User->isMachineOpcode()) {
509 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
510 CurDAG->getSubtarget().getInstrInfo());
511
512 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
513 if (MCID.mayStore())
514 return true;
515 unsigned Opcode = MCID.getOpcode();
516 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
517 return true;
518 // vmlx feeding into another vmlx. We actually want to unfold
519 // the use later in the MLxExpansion pass. e.g.
520 // vmla
521 // vmla (stall 8 cycles)
522 //
523 // vmul (5 cycles)
524 // vadd (5 cycles)
525 // vmla
526 // This adds up to about 18 - 19 cycles.
527 //
528 // vmla
529 // vmul (stall 4 cycles)
530 // vadd adds up to about 14 cycles.
531 return TII->isFpMLxInstruction(Opcode);
532 }
533
534 return false;
535}
536
537bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
538 ARM_AM::ShiftOpc ShOpcVal,
539 unsigned ShAmt) {
540 if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
541 return true;
542 if (Shift.hasOneUse())
543 return true;
544 // R << 2 is free.
545 return ShOpcVal == ARM_AM::lsl &&
546 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
547}
548
549bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
550 unsigned MaxShift,
551 unsigned &PowerOfTwo,
552 SDValue &NewMulConst) const {
553 assert(N.getOpcode() == ISD::MUL);
554 assert(MaxShift > 0);
555
556 // If the multiply is used in more than one place then changing the constant
557 // will make other uses incorrect, so don't.
558 if (!N.hasOneUse()) return false;
559 // Check if the multiply is by a constant
560 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
561 if (!MulConst) return false;
562 // If the constant is used in more than one place then modifying it will mean
563 // we need to materialize two constants instead of one, which is a bad idea.
564 if (!MulConst->hasOneUse()) return false;
565 unsigned MulConstVal = MulConst->getZExtValue();
566 if (MulConstVal == 0) return false;
567
568 // Find the largest power of 2 that MulConstVal is a multiple of
569 PowerOfTwo = MaxShift;
570 while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
571 --PowerOfTwo;
572 if (PowerOfTwo == 0) return false;
573 }
574
575 // Only optimise if the new cost is better
576 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
577 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
578 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
579 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
580 return NewCost < OldCost;
581}
582
583void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
584 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
585 ReplaceUses(N, M);
586}
587
588bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
589 SDValue &BaseReg,
590 SDValue &Opc,
591 bool CheckProfitability) {
593 return false;
594
595 // If N is a multiply-by-constant and it's profitable to extract a shift and
596 // use it in a shifted operand do so.
597 if (N.getOpcode() == ISD::MUL) {
598 unsigned PowerOfTwo = 0;
599 SDValue NewMulConst;
600 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
601 HandleSDNode Handle(N);
602 SDLoc Loc(N);
603 replaceDAGValue(N.getOperand(1), NewMulConst);
604 BaseReg = Handle.getValue();
605 Opc = CurDAG->getTargetConstant(
606 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
607 return true;
608 }
609 }
610
611 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
612
613 // Don't match base register only case. That is matched to a separate
614 // lower complexity pattern with explicit register operand.
615 if (ShOpcVal == ARM_AM::no_shift) return false;
616
617 BaseReg = N.getOperand(0);
618 unsigned ShImmVal = 0;
619 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
620 if (!RHS) return false;
621 ShImmVal = RHS->getZExtValue() & 31;
622 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
623 SDLoc(N), MVT::i32);
624 return true;
625}
626
627bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
628 SDValue &BaseReg,
629 SDValue &ShReg,
630 SDValue &Opc,
631 bool CheckProfitability) {
633 return false;
634
635 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
636
637 // Don't match base register only case. That is matched to a separate
638 // lower complexity pattern with explicit register operand.
639 if (ShOpcVal == ARM_AM::no_shift) return false;
640
641 BaseReg = N.getOperand(0);
642 unsigned ShImmVal = 0;
643 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
644 if (RHS) return false;
645
646 ShReg = N.getOperand(1);
647 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
648 return false;
649 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
650 SDLoc(N), MVT::i32);
651 return true;
652}
653
654// Determine whether an ISD::OR's operands are suitable to turn the operation
655// into an addition, which often has more compact encodings.
656bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
657 assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
658 Out = N;
659 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
660}
661
662
663bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
664 SDValue &Base,
665 SDValue &OffImm) {
666 // Match simple R + imm12 operands.
667
668 // Base only.
669 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
670 !CurDAG->isBaseWithConstantOffset(N)) {
671 if (N.getOpcode() == ISD::FrameIndex) {
672 // Match frame index.
673 int FI = cast<FrameIndexSDNode>(N)->getIndex();
674 Base = CurDAG->getTargetFrameIndex(
675 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
676 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
677 return true;
678 }
679
680 if (N.getOpcode() == ARMISD::Wrapper &&
681 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
682 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
683 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
684 Base = N.getOperand(0);
685 } else
686 Base = N;
687 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
688 return true;
689 }
690
691 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
692 int RHSC = (int)RHS->getSExtValue();
693 if (N.getOpcode() == ISD::SUB)
694 RHSC = -RHSC;
695
696 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
697 Base = N.getOperand(0);
698 if (Base.getOpcode() == ISD::FrameIndex) {
699 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
700 Base = CurDAG->getTargetFrameIndex(
701 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
702 }
703 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32);
704 return true;
705 }
706 }
707
708 // Base only.
709 Base = N;
710 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
711 return true;
712}
713
714
715
716bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
717 SDValue &Opc) {
718 if (N.getOpcode() == ISD::MUL &&
719 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
720 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
721 // X * [3,5,9] -> X + X * [2,4,8] etc.
722 int RHSC = (int)RHS->getZExtValue();
723 if (RHSC & 1) {
724 RHSC = RHSC & ~1;
726 if (RHSC < 0) {
728 RHSC = - RHSC;
729 }
730 if (isPowerOf2_32(RHSC)) {
731 unsigned ShAmt = Log2_32(RHSC);
732 Base = Offset = N.getOperand(0);
733 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
735 SDLoc(N), MVT::i32);
736 return true;
737 }
738 }
739 }
740 }
741
742 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
743 // ISD::OR that is equivalent to an ISD::ADD.
744 !CurDAG->isBaseWithConstantOffset(N))
745 return false;
746
747 // Leave simple R +/- imm12 operands for LDRi12
748 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
749 int RHSC;
750 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
751 -0x1000+1, 0x1000, RHSC)) // 12 bits.
752 return false;
753 }
754
755 // Otherwise this is R +/- [possibly shifted] R.
757 ARM_AM::ShiftOpc ShOpcVal =
758 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
759 unsigned ShAmt = 0;
760
761 Base = N.getOperand(0);
762 Offset = N.getOperand(1);
763
764 if (ShOpcVal != ARM_AM::no_shift) {
765 // Check to see if the RHS of the shift is a constant, if not, we can't fold
766 // it.
767 if (ConstantSDNode *Sh =
768 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
769 ShAmt = Sh->getZExtValue();
770 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
771 Offset = N.getOperand(1).getOperand(0);
772 else {
773 ShAmt = 0;
774 ShOpcVal = ARM_AM::no_shift;
775 }
776 } else {
777 ShOpcVal = ARM_AM::no_shift;
778 }
779 }
780
781 // Try matching (R shl C) + (R).
782 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
783 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
784 N.getOperand(0).hasOneUse())) {
785 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
786 if (ShOpcVal != ARM_AM::no_shift) {
787 // Check to see if the RHS of the shift is a constant, if not, we can't
788 // fold it.
789 if (ConstantSDNode *Sh =
790 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
791 ShAmt = Sh->getZExtValue();
792 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
793 Offset = N.getOperand(0).getOperand(0);
794 Base = N.getOperand(1);
795 } else {
796 ShAmt = 0;
797 ShOpcVal = ARM_AM::no_shift;
798 }
799 } else {
800 ShOpcVal = ARM_AM::no_shift;
801 }
802 }
803 }
804
805 // If Offset is a multiply-by-constant and it's profitable to extract a shift
806 // and use it in a shifted operand do so.
807 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
808 unsigned PowerOfTwo = 0;
809 SDValue NewMulConst;
810 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
811 HandleSDNode Handle(Offset);
812 replaceDAGValue(Offset.getOperand(1), NewMulConst);
813 Offset = Handle.getValue();
814 ShAmt = PowerOfTwo;
815 ShOpcVal = ARM_AM::lsl;
816 }
817 }
818
819 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
820 SDLoc(N), MVT::i32);
821 return true;
822}
823
824bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
826 unsigned Opcode = Op->getOpcode();
827 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
828 ? cast<LoadSDNode>(Op)->getAddressingMode()
829 : cast<StoreSDNode>(Op)->getAddressingMode();
831 ? ARM_AM::add : ARM_AM::sub;
832 int Val;
833 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
834 return false;
835
836 Offset = N;
837 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
838 unsigned ShAmt = 0;
839 if (ShOpcVal != ARM_AM::no_shift) {
840 // Check to see if the RHS of the shift is a constant, if not, we can't fold
841 // it.
842 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
843 ShAmt = Sh->getZExtValue();
844 if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
845 Offset = N.getOperand(0);
846 else {
847 ShAmt = 0;
848 ShOpcVal = ARM_AM::no_shift;
849 }
850 } else {
851 ShOpcVal = ARM_AM::no_shift;
852 }
853 }
854
855 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
856 SDLoc(N), MVT::i32);
857 return true;
858}
859
860bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
862 unsigned Opcode = Op->getOpcode();
863 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
864 ? cast<LoadSDNode>(Op)->getAddressingMode()
865 : cast<StoreSDNode>(Op)->getAddressingMode();
867 ? ARM_AM::add : ARM_AM::sub;
868 int Val;
869 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
870 if (AddSub == ARM_AM::sub) Val *= -1;
871 Offset = CurDAG->getRegister(0, MVT::i32);
872 Opc = CurDAG->getSignedTargetConstant(Val, SDLoc(Op), MVT::i32);
873 return true;
874 }
875
876 return false;
877}
878
879
880bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
882 unsigned Opcode = Op->getOpcode();
883 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
884 ? cast<LoadSDNode>(Op)->getAddressingMode()
885 : cast<StoreSDNode>(Op)->getAddressingMode();
887 ? ARM_AM::add : ARM_AM::sub;
888 int Val;
889 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
890 Offset = CurDAG->getRegister(0, MVT::i32);
891 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
893 SDLoc(Op), MVT::i32);
894 return true;
895 }
896
897 return false;
898}
899
900bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
901 Base = N;
902 return true;
903}
904
905bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
907 SDValue &Opc) {
908 if (N.getOpcode() == ISD::SUB) {
909 // X - C is canonicalize to X + -C, no need to handle it here.
910 Base = N.getOperand(0);
911 Offset = N.getOperand(1);
912 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
913 MVT::i32);
914 return true;
915 }
916
917 if (!CurDAG->isBaseWithConstantOffset(N)) {
918 Base = N;
919 if (N.getOpcode() == ISD::FrameIndex) {
920 int FI = cast<FrameIndexSDNode>(N)->getIndex();
921 Base = CurDAG->getTargetFrameIndex(
922 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
923 }
924 Offset = CurDAG->getRegister(0, MVT::i32);
925 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
926 MVT::i32);
927 return true;
928 }
929
930 // If the RHS is +/- imm8, fold into addr mode.
931 int RHSC;
932 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
933 -256 + 1, 256, RHSC)) { // 8 bits.
934 Base = N.getOperand(0);
935 if (Base.getOpcode() == ISD::FrameIndex) {
936 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
937 Base = CurDAG->getTargetFrameIndex(
938 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
939 }
940 Offset = CurDAG->getRegister(0, MVT::i32);
941
943 if (RHSC < 0) {
945 RHSC = -RHSC;
946 }
947 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
948 MVT::i32);
949 return true;
950 }
951
952 Base = N.getOperand(0);
953 Offset = N.getOperand(1);
954 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
955 MVT::i32);
956 return true;
957}
958
959bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
961 unsigned Opcode = Op->getOpcode();
962 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
963 ? cast<LoadSDNode>(Op)->getAddressingMode()
964 : cast<StoreSDNode>(Op)->getAddressingMode();
966 ? ARM_AM::add : ARM_AM::sub;
967 int Val;
968 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
969 Offset = CurDAG->getRegister(0, MVT::i32);
970 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
971 MVT::i32);
972 return true;
973 }
974
975 Offset = N;
976 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
977 MVT::i32);
978 return true;
979}
980
981bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
982 bool FP16) {
983 if (!CurDAG->isBaseWithConstantOffset(N)) {
984 Base = N;
985 if (N.getOpcode() == ISD::FrameIndex) {
986 int FI = cast<FrameIndexSDNode>(N)->getIndex();
987 Base = CurDAG->getTargetFrameIndex(
988 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
989 } else if (N.getOpcode() == ARMISD::Wrapper &&
990 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
991 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
992 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
993 Base = N.getOperand(0);
994 }
995 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
996 SDLoc(N), MVT::i32);
997 return true;
998 }
999
1000 // If the RHS is +/- imm8, fold into addr mode.
1001 int RHSC;
1002 const int Scale = FP16 ? 2 : 4;
1003
1004 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
1005 Base = N.getOperand(0);
1006 if (Base.getOpcode() == ISD::FrameIndex) {
1007 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1008 Base = CurDAG->getTargetFrameIndex(
1009 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1010 }
1011
1013 if (RHSC < 0) {
1015 RHSC = -RHSC;
1016 }
1017
1018 if (FP16)
1019 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
1020 SDLoc(N), MVT::i32);
1021 else
1022 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1023 SDLoc(N), MVT::i32);
1024
1025 return true;
1026 }
1027
1028 Base = N;
1029
1030 if (FP16)
1031 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
1032 SDLoc(N), MVT::i32);
1033 else
1034 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1035 SDLoc(N), MVT::i32);
1036
1037 return true;
1038}
1039
1040bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1042 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
1043}
1044
1045bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
1047 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
1048}
1049
1050bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1051 SDValue &Align) {
1052 Addr = N;
1053
1054 unsigned Alignment = 0;
1055
1056 MemSDNode *MemN = cast<MemSDNode>(Parent);
1057
1058 if (isa<LSBaseSDNode>(MemN) ||
1059 ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1060 MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1061 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1062 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1063 // The maximum alignment is equal to the memory size being referenced.
1064 llvm::Align MMOAlign = MemN->getAlign();
1065 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1066 if (MMOAlign.value() >= MemSize && MemSize > 1)
1067 Alignment = MemSize;
1068 } else {
1069 // All other uses of addrmode6 are for intrinsics. For now just record
1070 // the raw alignment value; it will be refined later based on the legal
1071 // alignment operands for the intrinsic.
1072 Alignment = MemN->getAlign().value();
1073 }
1074
1075 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1076 return true;
1077}
1078
1079bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1080 SDValue &Offset) {
1081 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1083 if (AM != ISD::POST_INC)
1084 return false;
1085 Offset = N;
1086 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1087 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1088 Offset = CurDAG->getRegister(0, MVT::i32);
1089 }
1090 return true;
1091}
1092
1093bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1094 SDValue &Offset, SDValue &Label) {
1095 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1096 Offset = N.getOperand(0);
1097 SDValue N1 = N.getOperand(1);
1098 Label = CurDAG->getTargetConstant(N1->getAsZExtVal(), SDLoc(N), MVT::i32);
1099 return true;
1100 }
1101
1102 return false;
1103}
1104
1105
1106//===----------------------------------------------------------------------===//
1107// Thumb Addressing Modes
1108//===----------------------------------------------------------------------===//
1109
1111 // Negative numbers are difficult to materialise in thumb1. If we are
1112 // selecting the add of a negative, instead try to select ri with a zero
1113 // offset, so create the add node directly which will become a sub.
1114 if (N.getOpcode() != ISD::ADD)
1115 return false;
1116
1117 // Look for an imm which is not legal for ld/st, but is legal for sub.
1118 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1119 return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1120
1121 return false;
1122}
1123
1124bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1125 SDValue &Offset) {
1126 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1127 if (!isNullConstant(N))
1128 return false;
1129
1130 Base = Offset = N;
1131 return true;
1132 }
1133
1134 Base = N.getOperand(0);
1135 Offset = N.getOperand(1);
1136 return true;
1137}
1138
1139bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1140 SDValue &Offset) {
1142 return false; // Select ri instead
1143 return SelectThumbAddrModeRRSext(N, Base, Offset);
1144}
1145
1146bool
1147ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1148 SDValue &Base, SDValue &OffImm) {
1150 Base = N;
1151 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1152 return true;
1153 }
1154
1155 if (!CurDAG->isBaseWithConstantOffset(N)) {
1156 if (N.getOpcode() == ISD::ADD) {
1157 return false; // We want to select register offset instead
1158 } else if (N.getOpcode() == ARMISD::Wrapper &&
1159 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1160 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1161 N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1162 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1163 Base = N.getOperand(0);
1164 } else {
1165 Base = N;
1166 }
1167
1168 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1169 return true;
1170 }
1171
1172 // If the RHS is + imm5 * scale, fold into addr mode.
1173 int RHSC;
1174 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1175 Base = N.getOperand(0);
1176 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32);
1177 return true;
1178 }
1179
1180 // Offset is too large, so use register offset instead.
1181 return false;
1182}
1183
1184bool
1185ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1186 SDValue &OffImm) {
1187 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1188}
1189
1190bool
1191ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1192 SDValue &OffImm) {
1193 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1194}
1195
1196bool
1197ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1198 SDValue &OffImm) {
1199 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1200}
1201
1202bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1203 SDValue &Base, SDValue &OffImm) {
1204 if (N.getOpcode() == ISD::FrameIndex) {
1205 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1206 // Only multiples of 4 are allowed for the offset, so the frame object
1207 // alignment must be at least 4.
1208 MachineFrameInfo &MFI = MF->getFrameInfo();
1209 if (MFI.getObjectAlign(FI) < Align(4))
1210 MFI.setObjectAlignment(FI, Align(4));
1211 Base = CurDAG->getTargetFrameIndex(
1212 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1213 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1214 return true;
1215 }
1216
1217 if (!CurDAG->isBaseWithConstantOffset(N))
1218 return false;
1219
1220 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1221 // If the RHS is + imm8 * scale, fold into addr mode.
1222 int RHSC;
1223 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1224 Base = N.getOperand(0);
1225 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1226 // Make sure the offset is inside the object, or we might fail to
1227 // allocate an emergency spill slot. (An out-of-range access is UB, but
1228 // it could show up anyway.)
1229 MachineFrameInfo &MFI = MF->getFrameInfo();
1230 if (RHSC * 4 < MFI.getObjectSize(FI)) {
1231 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1232 // indexed by the LHS must be 4-byte aligned.
1233 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4))
1234 MFI.setObjectAlignment(FI, Align(4));
1235 if (MFI.getObjectAlign(FI) >= Align(4)) {
1236 Base = CurDAG->getTargetFrameIndex(
1237 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1238 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32);
1239 return true;
1240 }
1241 }
1242 }
1243 }
1244
1245 return false;
1246}
1247
1248template <unsigned Shift>
1249bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1250 SDValue &OffImm) {
1251 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1252 int RHSC;
1253 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1254 RHSC)) {
1255 Base = N.getOperand(0);
1256 if (N.getOpcode() == ISD::SUB)
1257 RHSC = -RHSC;
1258 OffImm = CurDAG->getSignedTargetConstant(RHSC * (1 << Shift), SDLoc(N),
1259 MVT::i32);
1260 return true;
1261 }
1262 }
1263
1264 // Base only.
1265 Base = N;
1266 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1267 return true;
1268}
1269
1270
1271//===----------------------------------------------------------------------===//
1272// Thumb 2 Addressing Modes
1273//===----------------------------------------------------------------------===//
1274
1275
1276bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1277 SDValue &Base, SDValue &OffImm) {
1278 // Match simple R + imm12 operands.
1279
1280 // Base only.
1281 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1282 !CurDAG->isBaseWithConstantOffset(N)) {
1283 if (N.getOpcode() == ISD::FrameIndex) {
1284 // Match frame index.
1285 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1286 Base = CurDAG->getTargetFrameIndex(
1287 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1288 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1289 return true;
1290 }
1291
1292 if (N.getOpcode() == ARMISD::Wrapper &&
1293 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1294 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1295 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1296 Base = N.getOperand(0);
1297 if (Base.getOpcode() == ISD::TargetConstantPool)
1298 return false; // We want to select t2LDRpci instead.
1299 } else
1300 Base = N;
1301 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1302 return true;
1303 }
1304
1305 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1306 if (SelectT2AddrModeImm8(N, Base, OffImm))
1307 // Let t2LDRi8 handle (R - imm8).
1308 return false;
1309
1310 int RHSC = (int)RHS->getZExtValue();
1311 if (N.getOpcode() == ISD::SUB)
1312 RHSC = -RHSC;
1313
1314 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1315 Base = N.getOperand(0);
1316 if (Base.getOpcode() == ISD::FrameIndex) {
1317 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1318 Base = CurDAG->getTargetFrameIndex(
1319 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1320 }
1321 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32);
1322 return true;
1323 }
1324 }
1325
1326 // Base only.
1327 Base = N;
1328 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1329 return true;
1330}
1331
1332template <unsigned Shift>
1333bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
1334 SDValue &OffImm) {
1335 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1336 int RHSC;
1337 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
1338 Base = N.getOperand(0);
1339 if (Base.getOpcode() == ISD::FrameIndex) {
1340 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1341 Base = CurDAG->getTargetFrameIndex(
1342 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1343 }
1344
1345 if (N.getOpcode() == ISD::SUB)
1346 RHSC = -RHSC;
1347 OffImm = CurDAG->getSignedTargetConstant(RHSC * (1 << Shift), SDLoc(N),
1348 MVT::i32);
1349 return true;
1350 }
1351 }
1352
1353 // Base only.
1354 Base = N;
1355 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1356 return true;
1357}
1358
1359bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1360 SDValue &Base, SDValue &OffImm) {
1361 // Match simple R - imm8 operands.
1362 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1363 !CurDAG->isBaseWithConstantOffset(N))
1364 return false;
1365
1366 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1367 int RHSC = (int)RHS->getSExtValue();
1368 if (N.getOpcode() == ISD::SUB)
1369 RHSC = -RHSC;
1370
1371 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1372 Base = N.getOperand(0);
1373 if (Base.getOpcode() == ISD::FrameIndex) {
1374 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1375 Base = CurDAG->getTargetFrameIndex(
1376 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1377 }
1378 OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32);
1379 return true;
1380 }
1381 }
1382
1383 return false;
1384}
1385
1386bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1387 SDValue &OffImm){
1388 unsigned Opcode = Op->getOpcode();
1389 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1390 ? cast<LoadSDNode>(Op)->getAddressingMode()
1391 : cast<StoreSDNode>(Op)->getAddressingMode();
1392 int RHSC;
1393 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1394 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1395 ? CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32)
1396 : CurDAG->getSignedTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1397 return true;
1398 }
1399
1400 return false;
1401}
1402
1403template <unsigned Shift>
1404bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1405 SDValue &OffImm) {
1406 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1407 int RHSC;
1408 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1409 RHSC)) {
1410 Base = N.getOperand(0);
1411 if (Base.getOpcode() == ISD::FrameIndex) {
1412 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1413 Base = CurDAG->getTargetFrameIndex(
1414 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1415 }
1416
1417 if (N.getOpcode() == ISD::SUB)
1418 RHSC = -RHSC;
1419 OffImm = CurDAG->getSignedTargetConstant(RHSC * (1 << Shift), SDLoc(N),
1420 MVT::i32);
1421 return true;
1422 }
1423 }
1424
1425 // Base only.
1426 Base = N;
1427 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1428 return true;
1429}
1430
1431template <unsigned Shift>
1432bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1433 SDValue &OffImm) {
1434 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1435}
1436
1437bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1438 SDValue &OffImm,
1439 unsigned Shift) {
1440 unsigned Opcode = Op->getOpcode();
1442 switch (Opcode) {
1443 case ISD::LOAD:
1444 AM = cast<LoadSDNode>(Op)->getAddressingMode();
1445 break;
1446 case ISD::STORE:
1447 AM = cast<StoreSDNode>(Op)->getAddressingMode();
1448 break;
1449 case ISD::MLOAD:
1450 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode();
1451 break;
1452 case ISD::MSTORE:
1453 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode();
1454 break;
1455 default:
1456 llvm_unreachable("Unexpected Opcode for Imm7Offset");
1457 }
1458
1459 int RHSC;
1460 // 7 bit constant, shifted by Shift.
1461 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) {
1462 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1463 ? CurDAG->getSignedTargetConstant(RHSC * (1 << Shift),
1464 SDLoc(N), MVT::i32)
1465 : CurDAG->getSignedTargetConstant(-RHSC * (1 << Shift),
1466 SDLoc(N), MVT::i32);
1467 return true;
1468 }
1469 return false;
1470}
1471
1472template <int Min, int Max>
1473bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
1474 int Val;
1475 if (isScaledConstantInRange(N, 1, Min, Max, Val)) {
1476 OffImm = CurDAG->getSignedTargetConstant(Val, SDLoc(N), MVT::i32);
1477 return true;
1478 }
1479 return false;
1480}
1481
1482bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1483 SDValue &Base,
1484 SDValue &OffReg, SDValue &ShImm) {
1485 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1486 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1487 return false;
1488
1489 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1490 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1491 int RHSC = (int)RHS->getZExtValue();
1492 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1493 return false;
1494 else if (RHSC < 0 && RHSC >= -255) // 8 bits
1495 return false;
1496 }
1497
1498 // Look for (R + R) or (R + (R << [1,2,3])).
1499 unsigned ShAmt = 0;
1500 Base = N.getOperand(0);
1501 OffReg = N.getOperand(1);
1502
1503 // Swap if it is ((R << c) + R).
1505 if (ShOpcVal != ARM_AM::lsl) {
1506 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1507 if (ShOpcVal == ARM_AM::lsl)
1508 std::swap(Base, OffReg);
1509 }
1510
1511 if (ShOpcVal == ARM_AM::lsl) {
1512 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1513 // it.
1514 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1515 ShAmt = Sh->getZExtValue();
1516 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1517 OffReg = OffReg.getOperand(0);
1518 else {
1519 ShAmt = 0;
1520 }
1521 }
1522 }
1523
1524 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1525 // and use it in a shifted operand do so.
1526 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1527 unsigned PowerOfTwo = 0;
1528 SDValue NewMulConst;
1529 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1530 HandleSDNode Handle(OffReg);
1531 replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1532 OffReg = Handle.getValue();
1533 ShAmt = PowerOfTwo;
1534 }
1535 }
1536
1537 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1538
1539 return true;
1540}
1541
1542bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1543 SDValue &OffImm) {
1544 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1545 // instructions.
1546 Base = N;
1547 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1548
1549 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1550 return true;
1551
1552 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1553 if (!RHS)
1554 return true;
1555
1556 uint32_t RHSC = (int)RHS->getZExtValue();
1557 if (RHSC > 1020 || RHSC % 4 != 0)
1558 return true;
1559
1560 Base = N.getOperand(0);
1561 if (Base.getOpcode() == ISD::FrameIndex) {
1562 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1563 Base = CurDAG->getTargetFrameIndex(
1564 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1565 }
1566
1567 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1568 return true;
1569}
1570
1571//===--------------------------------------------------------------------===//
1572
1573/// getAL - Returns a ARMCC::AL immediate node.
1574static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1575 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1576}
1577
1578void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1579 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1580 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1581}
1582
1583bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1584 LoadSDNode *LD = cast<LoadSDNode>(N);
1585 ISD::MemIndexedMode AM = LD->getAddressingMode();
1586 if (AM == ISD::UNINDEXED)
1587 return false;
1588
1589 EVT LoadedVT = LD->getMemoryVT();
1590 SDValue Offset, AMOpc;
1591 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1592 unsigned Opcode = 0;
1593 bool Match = false;
1594 if (LoadedVT == MVT::i32 && isPre &&
1595 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1596 Opcode = ARM::LDR_PRE_IMM;
1597 Match = true;
1598 } else if (LoadedVT == MVT::i32 && !isPre &&
1599 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1600 Opcode = ARM::LDR_POST_IMM;
1601 Match = true;
1602 } else if (LoadedVT == MVT::i32 &&
1603 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1604 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1605 Match = true;
1606
1607 } else if (LoadedVT == MVT::i16 &&
1608 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1609 Match = true;
1610 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1611 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1612 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1613 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1614 if (LD->getExtensionType() == ISD::SEXTLOAD) {
1615 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1616 Match = true;
1617 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1618 }
1619 } else {
1620 if (isPre &&
1621 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1622 Match = true;
1623 Opcode = ARM::LDRB_PRE_IMM;
1624 } else if (!isPre &&
1625 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1626 Match = true;
1627 Opcode = ARM::LDRB_POST_IMM;
1628 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1629 Match = true;
1630 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1631 }
1632 }
1633 }
1634
1635 if (Match) {
1636 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1637 SDValue Chain = LD->getChain();
1638 SDValue Base = LD->getBasePtr();
1639 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1640 CurDAG->getRegister(0, MVT::i32), Chain };
1641 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1642 MVT::Other, Ops);
1643 transferMemOperands(N, New);
1644 ReplaceNode(N, New);
1645 return true;
1646 } else {
1647 SDValue Chain = LD->getChain();
1648 SDValue Base = LD->getBasePtr();
1649 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1650 CurDAG->getRegister(0, MVT::i32), Chain };
1651 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1652 MVT::Other, Ops);
1653 transferMemOperands(N, New);
1654 ReplaceNode(N, New);
1655 return true;
1656 }
1657 }
1658
1659 return false;
1660}
1661
1662bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1663 LoadSDNode *LD = cast<LoadSDNode>(N);
1664 EVT LoadedVT = LD->getMemoryVT();
1665 ISD::MemIndexedMode AM = LD->getAddressingMode();
1666 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1667 LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1668 return false;
1669
1670 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1671 if (!COffs || COffs->getZExtValue() != 4)
1672 return false;
1673
1674 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1675 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1676 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1677 // ISel.
1678 SDValue Chain = LD->getChain();
1679 SDValue Base = LD->getBasePtr();
1680 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1681 CurDAG->getRegister(0, MVT::i32), Chain };
1682 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1683 MVT::i32, MVT::Other, Ops);
1684 transferMemOperands(N, New);
1685 ReplaceNode(N, New);
1686 return true;
1687}
1688
1689bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1690 LoadSDNode *LD = cast<LoadSDNode>(N);
1691 ISD::MemIndexedMode AM = LD->getAddressingMode();
1692 if (AM == ISD::UNINDEXED)
1693 return false;
1694
1695 EVT LoadedVT = LD->getMemoryVT();
1696 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1698 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1699 unsigned Opcode = 0;
1700 bool Match = false;
1701 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1702 switch (LoadedVT.getSimpleVT().SimpleTy) {
1703 case MVT::i32:
1704 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1705 break;
1706 case MVT::i16:
1707 if (isSExtLd)
1708 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1709 else
1710 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1711 break;
1712 case MVT::i8:
1713 case MVT::i1:
1714 if (isSExtLd)
1715 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1716 else
1717 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1718 break;
1719 default:
1720 return false;
1721 }
1722 Match = true;
1723 }
1724
1725 if (Match) {
1726 SDValue Chain = LD->getChain();
1727 SDValue Base = LD->getBasePtr();
1728 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1729 CurDAG->getRegister(0, MVT::i32), Chain };
1730 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1731 MVT::Other, Ops);
1732 transferMemOperands(N, New);
1733 ReplaceNode(N, New);
1734 return true;
1735 }
1736
1737 return false;
1738}
1739
1740bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1741 EVT LoadedVT;
1742 unsigned Opcode = 0;
1743 bool isSExtLd, isPre;
1744 Align Alignment;
1745 ARMVCC::VPTCodes Pred;
1746 SDValue PredReg;
1747 SDValue Chain, Base, Offset;
1748
1749 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1750 ISD::MemIndexedMode AM = LD->getAddressingMode();
1751 if (AM == ISD::UNINDEXED)
1752 return false;
1753 LoadedVT = LD->getMemoryVT();
1754 if (!LoadedVT.isVector())
1755 return false;
1756
1757 Chain = LD->getChain();
1758 Base = LD->getBasePtr();
1759 Offset = LD->getOffset();
1760 Alignment = LD->getAlign();
1761 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1762 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1763 Pred = ARMVCC::None;
1764 PredReg = CurDAG->getRegister(0, MVT::i32);
1765 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
1766 ISD::MemIndexedMode AM = LD->getAddressingMode();
1767 if (AM == ISD::UNINDEXED)
1768 return false;
1769 LoadedVT = LD->getMemoryVT();
1770 if (!LoadedVT.isVector())
1771 return false;
1772
1773 Chain = LD->getChain();
1774 Base = LD->getBasePtr();
1775 Offset = LD->getOffset();
1776 Alignment = LD->getAlign();
1777 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1778 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1779 Pred = ARMVCC::Then;
1780 PredReg = LD->getMask();
1781 } else
1782 llvm_unreachable("Expected a Load or a Masked Load!");
1783
1784 // We allow LE non-masked loads to change the type (for example use a vldrb.8
1785 // as opposed to a vldrw.32). This can allow extra addressing modes or
1786 // alignments for what is otherwise an equivalent instruction.
1787 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N);
1788
1789 SDValue NewOffset;
1790 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 &&
1791 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) {
1792 if (isSExtLd)
1793 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1794 else
1795 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1796 } else if (LoadedVT == MVT::v8i8 &&
1797 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1798 if (isSExtLd)
1799 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1800 else
1801 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1802 } else if (LoadedVT == MVT::v4i8 &&
1803 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1804 if (isSExtLd)
1805 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1806 else
1807 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1808 } else if (Alignment >= Align(4) &&
1809 (CanChangeType || LoadedVT == MVT::v4i32 ||
1810 LoadedVT == MVT::v4f32) &&
1811 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2))
1812 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1813 else if (Alignment >= Align(2) &&
1814 (CanChangeType || LoadedVT == MVT::v8i16 ||
1815 LoadedVT == MVT::v8f16) &&
1816 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1))
1817 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1818 else if ((CanChangeType || LoadedVT == MVT::v16i8) &&
1819 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0))
1820 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1821 else
1822 return false;
1823
1824 SDValue Ops[] = {Base,
1825 NewOffset,
1826 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32),
1827 PredReg,
1828 CurDAG->getRegister(0, MVT::i32), // tp_reg
1829 Chain};
1830 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1831 N->getValueType(0), MVT::Other, Ops);
1832 transferMemOperands(N, New);
1833 ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1834 ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1835 ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1836 CurDAG->RemoveDeadNode(N);
1837 return true;
1838}
1839
1840/// Form a GPRPair pseudo register from a pair of GPR regs.
1841SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1842 SDLoc dl(V0.getNode());
1843 SDValue RegClass =
1844 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1845 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1846 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1847 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1848 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1849}
1850
1851/// Form a D register from a pair of S registers.
1852SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1853 SDLoc dl(V0.getNode());
1854 SDValue RegClass =
1855 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1856 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1857 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1858 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1859 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1860}
1861
1862/// Form a quad register from a pair of D registers.
1863SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1864 SDLoc dl(V0.getNode());
1865 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1866 MVT::i32);
1867 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1868 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1869 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1870 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1871}
1872
1873/// Form 4 consecutive D registers from a pair of Q registers.
1874SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1875 SDLoc dl(V0.getNode());
1876 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1877 MVT::i32);
1878 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1879 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1880 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1881 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1882}
1883
1884/// Form 4 consecutive S registers.
1885SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1886 SDValue V2, SDValue V3) {
1887 SDLoc dl(V0.getNode());
1888 SDValue RegClass =
1889 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1890 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1891 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1892 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1893 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1894 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1895 V2, SubReg2, V3, SubReg3 };
1896 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1897}
1898
1899/// Form 4 consecutive D registers.
1900SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1901 SDValue V2, SDValue V3) {
1902 SDLoc dl(V0.getNode());
1903 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1904 MVT::i32);
1905 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1906 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1907 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1908 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1909 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1910 V2, SubReg2, V3, SubReg3 };
1911 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1912}
1913
1914/// Form 4 consecutive Q registers.
1915SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1916 SDValue V2, SDValue V3) {
1917 SDLoc dl(V0.getNode());
1918 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1919 MVT::i32);
1920 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1921 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1922 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1923 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1924 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1925 V2, SubReg2, V3, SubReg3 };
1926 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1927}
1928
1929/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1930/// of a NEON VLD or VST instruction. The supported values depend on the
1931/// number of registers being loaded.
1932SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1933 unsigned NumVecs, bool is64BitVector) {
1934 unsigned NumRegs = NumVecs;
1935 if (!is64BitVector && NumVecs < 3)
1936 NumRegs *= 2;
1937
1938 unsigned Alignment = Align->getAsZExtVal();
1939 if (Alignment >= 32 && NumRegs == 4)
1940 Alignment = 32;
1941 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1942 Alignment = 16;
1943 else if (Alignment >= 8)
1944 Alignment = 8;
1945 else
1946 Alignment = 0;
1947
1948 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1949}
1950
1951static bool isVLDfixed(unsigned Opc)
1952{
1953 switch (Opc) {
1954 default: return false;
1955 case ARM::VLD1d8wb_fixed : return true;
1956 case ARM::VLD1d16wb_fixed : return true;
1957 case ARM::VLD1d64Qwb_fixed : return true;
1958 case ARM::VLD1d32wb_fixed : return true;
1959 case ARM::VLD1d64wb_fixed : return true;
1960 case ARM::VLD1d8TPseudoWB_fixed : return true;
1961 case ARM::VLD1d16TPseudoWB_fixed : return true;
1962 case ARM::VLD1d32TPseudoWB_fixed : return true;
1963 case ARM::VLD1d64TPseudoWB_fixed : return true;
1964 case ARM::VLD1d8QPseudoWB_fixed : return true;
1965 case ARM::VLD1d16QPseudoWB_fixed : return true;
1966 case ARM::VLD1d32QPseudoWB_fixed : return true;
1967 case ARM::VLD1d64QPseudoWB_fixed : return true;
1968 case ARM::VLD1q8wb_fixed : return true;
1969 case ARM::VLD1q16wb_fixed : return true;
1970 case ARM::VLD1q32wb_fixed : return true;
1971 case ARM::VLD1q64wb_fixed : return true;
1972 case ARM::VLD1DUPd8wb_fixed : return true;
1973 case ARM::VLD1DUPd16wb_fixed : return true;
1974 case ARM::VLD1DUPd32wb_fixed : return true;
1975 case ARM::VLD1DUPq8wb_fixed : return true;
1976 case ARM::VLD1DUPq16wb_fixed : return true;
1977 case ARM::VLD1DUPq32wb_fixed : return true;
1978 case ARM::VLD2d8wb_fixed : return true;
1979 case ARM::VLD2d16wb_fixed : return true;
1980 case ARM::VLD2d32wb_fixed : return true;
1981 case ARM::VLD2q8PseudoWB_fixed : return true;
1982 case ARM::VLD2q16PseudoWB_fixed : return true;
1983 case ARM::VLD2q32PseudoWB_fixed : return true;
1984 case ARM::VLD2DUPd8wb_fixed : return true;
1985 case ARM::VLD2DUPd16wb_fixed : return true;
1986 case ARM::VLD2DUPd32wb_fixed : return true;
1987 case ARM::VLD2DUPq8OddPseudoWB_fixed: return true;
1988 case ARM::VLD2DUPq16OddPseudoWB_fixed: return true;
1989 case ARM::VLD2DUPq32OddPseudoWB_fixed: return true;
1990 }
1991}
1992
1993static bool isVSTfixed(unsigned Opc)
1994{
1995 switch (Opc) {
1996 default: return false;
1997 case ARM::VST1d8wb_fixed : return true;
1998 case ARM::VST1d16wb_fixed : return true;
1999 case ARM::VST1d32wb_fixed : return true;
2000 case ARM::VST1d64wb_fixed : return true;
2001 case ARM::VST1q8wb_fixed : return true;
2002 case ARM::VST1q16wb_fixed : return true;
2003 case ARM::VST1q32wb_fixed : return true;
2004 case ARM::VST1q64wb_fixed : return true;
2005 case ARM::VST1d8TPseudoWB_fixed : return true;
2006 case ARM::VST1d16TPseudoWB_fixed : return true;
2007 case ARM::VST1d32TPseudoWB_fixed : return true;
2008 case ARM::VST1d64TPseudoWB_fixed : return true;
2009 case ARM::VST1d8QPseudoWB_fixed : return true;
2010 case ARM::VST1d16QPseudoWB_fixed : return true;
2011 case ARM::VST1d32QPseudoWB_fixed : return true;
2012 case ARM::VST1d64QPseudoWB_fixed : return true;
2013 case ARM::VST2d8wb_fixed : return true;
2014 case ARM::VST2d16wb_fixed : return true;
2015 case ARM::VST2d32wb_fixed : return true;
2016 case ARM::VST2q8PseudoWB_fixed : return true;
2017 case ARM::VST2q16PseudoWB_fixed : return true;
2018 case ARM::VST2q32PseudoWB_fixed : return true;
2019 }
2020}
2021
2022// Get the register stride update opcode of a VLD/VST instruction that
2023// is otherwise equivalent to the given fixed stride updating instruction.
2024static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
2026 && "Incorrect fixed stride updating instruction.");
2027 switch (Opc) {
2028 default: break;
2029 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
2030 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
2031 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
2032 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
2033 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
2034 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
2035 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
2036 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
2037 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
2038 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
2039 case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;
2040 case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;
2041 case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;
2042 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
2043 case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;
2044 case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;
2045 case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;
2046 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
2047 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
2048 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
2049 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
2050 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
2051 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
2052 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
2053 case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register;
2054 case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register;
2055 case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register;
2056
2057 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
2058 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
2059 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
2060 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
2061 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
2062 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
2063 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
2064 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
2065 case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register;
2066 case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register;
2067 case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register;
2068 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
2069 case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register;
2070 case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register;
2071 case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register;
2072 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
2073
2074 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
2075 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
2076 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
2077 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
2078 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
2079 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
2080
2081 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
2082 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
2083 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
2084 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
2085 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
2086 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
2087
2088 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
2089 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
2090 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
2091 }
2092 return Opc; // If not one we handle, return it unchanged.
2093}
2094
2095/// Returns true if the given increment is a Constant known to be equal to the
2096/// access size performed by a NEON load/store. This means the "[rN]!" form can
2097/// be used.
2098static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
2099 auto C = dyn_cast<ConstantSDNode>(Inc);
2100 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
2101}
2102
2103void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
2104 const uint16_t *DOpcodes,
2105 const uint16_t *QOpcodes0,
2106 const uint16_t *QOpcodes1) {
2107 assert(Subtarget->hasNEON());
2108 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
2109 SDLoc dl(N);
2110
2111 SDValue MemAddr, Align;
2112 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2113 // nodes are not intrinsics.
2114 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2115 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2116 return;
2117
2118 SDValue Chain = N->getOperand(0);
2119 EVT VT = N->getValueType(0);
2120 bool is64BitVector = VT.is64BitVector();
2121 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2122
2123 unsigned OpcodeIndex;
2124 switch (VT.getSimpleVT().SimpleTy) {
2125 default: llvm_unreachable("unhandled vld type");
2126 // Double-register operations:
2127 case MVT::v8i8: OpcodeIndex = 0; break;
2128 case MVT::v4f16:
2129 case MVT::v4bf16:
2130 case MVT::v4i16: OpcodeIndex = 1; break;
2131 case MVT::v2f32:
2132 case MVT::v2i32: OpcodeIndex = 2; break;
2133 case MVT::v1i64: OpcodeIndex = 3; break;
2134 // Quad-register operations:
2135 case MVT::v16i8: OpcodeIndex = 0; break;
2136 case MVT::v8f16:
2137 case MVT::v8bf16:
2138 case MVT::v8i16: OpcodeIndex = 1; break;
2139 case MVT::v4f32:
2140 case MVT::v4i32: OpcodeIndex = 2; break;
2141 case MVT::v2f64:
2142 case MVT::v2i64: OpcodeIndex = 3; break;
2143 }
2144
2145 EVT ResTy;
2146 if (NumVecs == 1)
2147 ResTy = VT;
2148 else {
2149 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2150 if (!is64BitVector)
2151 ResTyElts *= 2;
2152 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2153 }
2154 std::vector<EVT> ResTys;
2155 ResTys.push_back(ResTy);
2156 if (isUpdating)
2157 ResTys.push_back(MVT::i32);
2158 ResTys.push_back(MVT::Other);
2159
2160 SDValue Pred = getAL(CurDAG, dl);
2161 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2162 SDNode *VLd;
2164
2165 // Double registers and VLD1/VLD2 quad registers are directly supported.
2166 if (is64BitVector || NumVecs <= 2) {
2167 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2168 QOpcodes0[OpcodeIndex]);
2169 Ops.push_back(MemAddr);
2170 Ops.push_back(Align);
2171 if (isUpdating) {
2172 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2173 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2174 if (!IsImmUpdate) {
2175 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2176 // check for the opcode rather than the number of vector elements.
2177 if (isVLDfixed(Opc))
2179 Ops.push_back(Inc);
2180 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2181 // the operands if not such an opcode.
2182 } else if (!isVLDfixed(Opc))
2183 Ops.push_back(Reg0);
2184 }
2185 Ops.push_back(Pred);
2186 Ops.push_back(Reg0);
2187 Ops.push_back(Chain);
2188 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2189
2190 } else {
2191 // Otherwise, quad registers are loaded with two separate instructions,
2192 // where one loads the even registers and the other loads the odd registers.
2193 EVT AddrTy = MemAddr.getValueType();
2194
2195 // Load the even subregs. This is always an updating load, so that it
2196 // provides the address to the second load for the odd subregs.
2197 SDValue ImplDef =
2198 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2199 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
2200 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2201 ResTy, AddrTy, MVT::Other, OpsA);
2202 Chain = SDValue(VLdA, 2);
2203
2204 // Load the odd subregs.
2205 Ops.push_back(SDValue(VLdA, 1));
2206 Ops.push_back(Align);
2207 if (isUpdating) {
2208 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2210 "only constant post-increment update allowed for VLD3/4");
2211 (void)Inc;
2212 Ops.push_back(Reg0);
2213 }
2214 Ops.push_back(SDValue(VLdA, 0));
2215 Ops.push_back(Pred);
2216 Ops.push_back(Reg0);
2217 Ops.push_back(Chain);
2218 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
2219 }
2220
2221 // Transfer memoperands.
2222 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2223 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
2224
2225 if (NumVecs == 1) {
2226 ReplaceNode(N, VLd);
2227 return;
2228 }
2229
2230 // Extract out the subregisters.
2231 SDValue SuperReg = SDValue(VLd, 0);
2232 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2233 ARM::qsub_3 == ARM::qsub_0 + 3,
2234 "Unexpected subreg numbering");
2235 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2236 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2237 ReplaceUses(SDValue(N, Vec),
2238 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2239 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2240 if (isUpdating)
2241 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2242 CurDAG->RemoveDeadNode(N);
2243}
2244
2245void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2246 const uint16_t *DOpcodes,
2247 const uint16_t *QOpcodes0,
2248 const uint16_t *QOpcodes1) {
2249 assert(Subtarget->hasNEON());
2250 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2251 SDLoc dl(N);
2252
2253 SDValue MemAddr, Align;
2254 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2255 // nodes are not intrinsics.
2256 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2257 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2258 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2259 return;
2260
2261 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2262
2263 SDValue Chain = N->getOperand(0);
2264 EVT VT = N->getOperand(Vec0Idx).getValueType();
2265 bool is64BitVector = VT.is64BitVector();
2266 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2267
2268 unsigned OpcodeIndex;
2269 switch (VT.getSimpleVT().SimpleTy) {
2270 default: llvm_unreachable("unhandled vst type");
2271 // Double-register operations:
2272 case MVT::v8i8: OpcodeIndex = 0; break;
2273 case MVT::v4f16:
2274 case MVT::v4bf16:
2275 case MVT::v4i16: OpcodeIndex = 1; break;
2276 case MVT::v2f32:
2277 case MVT::v2i32: OpcodeIndex = 2; break;
2278 case MVT::v1i64: OpcodeIndex = 3; break;
2279 // Quad-register operations:
2280 case MVT::v16i8: OpcodeIndex = 0; break;
2281 case MVT::v8f16:
2282 case MVT::v8bf16:
2283 case MVT::v8i16: OpcodeIndex = 1; break;
2284 case MVT::v4f32:
2285 case MVT::v4i32: OpcodeIndex = 2; break;
2286 case MVT::v2f64:
2287 case MVT::v2i64: OpcodeIndex = 3; break;
2288 }
2289
2290 std::vector<EVT> ResTys;
2291 if (isUpdating)
2292 ResTys.push_back(MVT::i32);
2293 ResTys.push_back(MVT::Other);
2294
2295 SDValue Pred = getAL(CurDAG, dl);
2296 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2298
2299 // Double registers and VST1/VST2 quad registers are directly supported.
2300 if (is64BitVector || NumVecs <= 2) {
2301 SDValue SrcReg;
2302 if (NumVecs == 1) {
2303 SrcReg = N->getOperand(Vec0Idx);
2304 } else if (is64BitVector) {
2305 // Form a REG_SEQUENCE to force register allocation.
2306 SDValue V0 = N->getOperand(Vec0Idx + 0);
2307 SDValue V1 = N->getOperand(Vec0Idx + 1);
2308 if (NumVecs == 2)
2309 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2310 else {
2311 SDValue V2 = N->getOperand(Vec0Idx + 2);
2312 // If it's a vst3, form a quad D-register and leave the last part as
2313 // an undef.
2314 SDValue V3 = (NumVecs == 3)
2315 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2316 : N->getOperand(Vec0Idx + 3);
2317 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2318 }
2319 } else {
2320 // Form a QQ register.
2321 SDValue Q0 = N->getOperand(Vec0Idx);
2322 SDValue Q1 = N->getOperand(Vec0Idx + 1);
2323 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2324 }
2325
2326 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2327 QOpcodes0[OpcodeIndex]);
2328 Ops.push_back(MemAddr);
2329 Ops.push_back(Align);
2330 if (isUpdating) {
2331 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2332 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2333 if (!IsImmUpdate) {
2334 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2335 // check for the opcode rather than the number of vector elements.
2336 if (isVSTfixed(Opc))
2338 Ops.push_back(Inc);
2339 }
2340 // VST1/VST2 fixed increment does not need Reg0 so only include it in
2341 // the operands if not such an opcode.
2342 else if (!isVSTfixed(Opc))
2343 Ops.push_back(Reg0);
2344 }
2345 Ops.push_back(SrcReg);
2346 Ops.push_back(Pred);
2347 Ops.push_back(Reg0);
2348 Ops.push_back(Chain);
2349 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2350
2351 // Transfer memoperands.
2352 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2353
2354 ReplaceNode(N, VSt);
2355 return;
2356 }
2357
2358 // Otherwise, quad registers are stored with two separate instructions,
2359 // where one stores the even registers and the other stores the odd registers.
2360
2361 // Form the QQQQ REG_SEQUENCE.
2362 SDValue V0 = N->getOperand(Vec0Idx + 0);
2363 SDValue V1 = N->getOperand(Vec0Idx + 1);
2364 SDValue V2 = N->getOperand(Vec0Idx + 2);
2365 SDValue V3 = (NumVecs == 3)
2366 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2367 : N->getOperand(Vec0Idx + 3);
2368 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2369
2370 // Store the even D registers. This is always an updating store, so that it
2371 // provides the address to the second store for the odd subregs.
2372 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2373 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2374 MemAddr.getValueType(),
2375 MVT::Other, OpsA);
2376 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2377 Chain = SDValue(VStA, 1);
2378
2379 // Store the odd D registers.
2380 Ops.push_back(SDValue(VStA, 0));
2381 Ops.push_back(Align);
2382 if (isUpdating) {
2383 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2385 "only constant post-increment update allowed for VST3/4");
2386 (void)Inc;
2387 Ops.push_back(Reg0);
2388 }
2389 Ops.push_back(RegSeq);
2390 Ops.push_back(Pred);
2391 Ops.push_back(Reg0);
2392 Ops.push_back(Chain);
2393 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2394 Ops);
2395 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2396 ReplaceNode(N, VStB);
2397}
2398
2399void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2400 unsigned NumVecs,
2401 const uint16_t *DOpcodes,
2402 const uint16_t *QOpcodes) {
2403 assert(Subtarget->hasNEON());
2404 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2405 SDLoc dl(N);
2406
2407 SDValue MemAddr, Align;
2408 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2409 // nodes are not intrinsics.
2410 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2411 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2412 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2413 return;
2414
2415 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2416
2417 SDValue Chain = N->getOperand(0);
2418 unsigned Lane = N->getConstantOperandVal(Vec0Idx + NumVecs);
2419 EVT VT = N->getOperand(Vec0Idx).getValueType();
2420 bool is64BitVector = VT.is64BitVector();
2421
2422 unsigned Alignment = 0;
2423 if (NumVecs != 3) {
2424 Alignment = Align->getAsZExtVal();
2425 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2426 if (Alignment > NumBytes)
2427 Alignment = NumBytes;
2428 if (Alignment < 8 && Alignment < NumBytes)
2429 Alignment = 0;
2430 // Alignment must be a power of two; make sure of that.
2431 Alignment = (Alignment & -Alignment);
2432 if (Alignment == 1)
2433 Alignment = 0;
2434 }
2435 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2436
2437 unsigned OpcodeIndex;
2438 switch (VT.getSimpleVT().SimpleTy) {
2439 default: llvm_unreachable("unhandled vld/vst lane type");
2440 // Double-register operations:
2441 case MVT::v8i8: OpcodeIndex = 0; break;
2442 case MVT::v4f16:
2443 case MVT::v4bf16:
2444 case MVT::v4i16: OpcodeIndex = 1; break;
2445 case MVT::v2f32:
2446 case MVT::v2i32: OpcodeIndex = 2; break;
2447 // Quad-register operations:
2448 case MVT::v8f16:
2449 case MVT::v8bf16:
2450 case MVT::v8i16: OpcodeIndex = 0; break;
2451 case MVT::v4f32:
2452 case MVT::v4i32: OpcodeIndex = 1; break;
2453 }
2454
2455 std::vector<EVT> ResTys;
2456 if (IsLoad) {
2457 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2458 if (!is64BitVector)
2459 ResTyElts *= 2;
2460 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2461 MVT::i64, ResTyElts));
2462 }
2463 if (isUpdating)
2464 ResTys.push_back(MVT::i32);
2465 ResTys.push_back(MVT::Other);
2466
2467 SDValue Pred = getAL(CurDAG, dl);
2468 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2469
2471 Ops.push_back(MemAddr);
2472 Ops.push_back(Align);
2473 if (isUpdating) {
2474 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2475 bool IsImmUpdate =
2476 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2477 Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2478 }
2479
2480 SDValue SuperReg;
2481 SDValue V0 = N->getOperand(Vec0Idx + 0);
2482 SDValue V1 = N->getOperand(Vec0Idx + 1);
2483 if (NumVecs == 2) {
2484 if (is64BitVector)
2485 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2486 else
2487 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2488 } else {
2489 SDValue V2 = N->getOperand(Vec0Idx + 2);
2490 SDValue V3 = (NumVecs == 3)
2491 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2492 : N->getOperand(Vec0Idx + 3);
2493 if (is64BitVector)
2494 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2495 else
2496 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2497 }
2498 Ops.push_back(SuperReg);
2499 Ops.push_back(getI32Imm(Lane, dl));
2500 Ops.push_back(Pred);
2501 Ops.push_back(Reg0);
2502 Ops.push_back(Chain);
2503
2504 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2505 QOpcodes[OpcodeIndex]);
2506 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2507 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2508 if (!IsLoad) {
2509 ReplaceNode(N, VLdLn);
2510 return;
2511 }
2512
2513 // Extract the subregisters.
2514 SuperReg = SDValue(VLdLn, 0);
2515 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2516 ARM::qsub_3 == ARM::qsub_0 + 3,
2517 "Unexpected subreg numbering");
2518 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2519 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2520 ReplaceUses(SDValue(N, Vec),
2521 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2522 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2523 if (isUpdating)
2524 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2525 CurDAG->RemoveDeadNode(N);
2526}
2527
2528template <typename SDValueVector>
2529void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2530 SDValue PredicateMask) {
2531 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2532 Ops.push_back(PredicateMask);
2533 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2534}
2535
2536template <typename SDValueVector>
2537void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2538 SDValue PredicateMask,
2539 SDValue Inactive) {
2540 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2541 Ops.push_back(PredicateMask);
2542 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2543 Ops.push_back(Inactive);
2544}
2545
2546template <typename SDValueVector>
2547void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
2548 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2549 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2550 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2551}
2552
2553template <typename SDValueVector>
2554void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2555 EVT InactiveTy) {
2556 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2557 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2558 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2559 Ops.push_back(SDValue(
2560 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));
2561}
2562
2563void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
2564 bool Predicated) {
2565 SDLoc Loc(N);
2567
2568 uint16_t Opcode;
2569 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) {
2570 case 32:
2571 Opcode = Opcodes[0];
2572 break;
2573 case 64:
2574 Opcode = Opcodes[1];
2575 break;
2576 default:
2577 llvm_unreachable("bad vector element size in SelectMVE_WB");
2578 }
2579
2580 Ops.push_back(N->getOperand(2)); // vector of base addresses
2581
2582 int32_t ImmValue = N->getConstantOperandVal(3);
2583 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset
2584
2585 if (Predicated)
2586 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2587 else
2588 AddEmptyMVEPredicateToOps(Ops, Loc);
2589
2590 Ops.push_back(N->getOperand(0)); // chain
2591
2593 VTs.push_back(N->getValueType(1));
2594 VTs.push_back(N->getValueType(0));
2595 VTs.push_back(N->getValueType(2));
2596
2597 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops);
2598 ReplaceUses(SDValue(N, 0), SDValue(New, 1));
2599 ReplaceUses(SDValue(N, 1), SDValue(New, 0));
2600 ReplaceUses(SDValue(N, 2), SDValue(New, 2));
2601 transferMemOperands(N, New);
2602 CurDAG->RemoveDeadNode(N);
2603}
2604
2605void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
2606 bool Immediate,
2607 bool HasSaturationOperand) {
2608 SDLoc Loc(N);
2610
2611 // Two 32-bit halves of the value to be shifted
2612 Ops.push_back(N->getOperand(1));
2613 Ops.push_back(N->getOperand(2));
2614
2615 // The shift count
2616 if (Immediate) {
2617 int32_t ImmValue = N->getConstantOperandVal(3);
2618 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2619 } else {
2620 Ops.push_back(N->getOperand(3));
2621 }
2622
2623 // The immediate saturation operand, if any
2624 if (HasSaturationOperand) {
2625 int32_t SatOp = N->getConstantOperandVal(4);
2626 int SatBit = (SatOp == 64 ? 0 : 1);
2627 Ops.push_back(getI32Imm(SatBit, Loc));
2628 }
2629
2630 // MVE scalar shifts are IT-predicable, so include the standard
2631 // predicate arguments.
2632 Ops.push_back(getAL(CurDAG, Loc));
2633 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2634
2635 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2636}
2637
2638void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
2639 uint16_t OpcodeWithNoCarry,
2640 bool Add, bool Predicated) {
2641 SDLoc Loc(N);
2643 uint16_t Opcode;
2644
2645 unsigned FirstInputOp = Predicated ? 2 : 1;
2646
2647 // Two input vectors and the input carry flag
2648 Ops.push_back(N->getOperand(FirstInputOp));
2649 Ops.push_back(N->getOperand(FirstInputOp + 1));
2650 SDValue CarryIn = N->getOperand(FirstInputOp + 2);
2651 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn);
2652 uint32_t CarryMask = 1 << 29;
2653 uint32_t CarryExpected = Add ? 0 : CarryMask;
2654 if (CarryInConstant &&
2655 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
2656 Opcode = OpcodeWithNoCarry;
2657 } else {
2658 Ops.push_back(CarryIn);
2659 Opcode = OpcodeWithCarry;
2660 }
2661
2662 if (Predicated)
2663 AddMVEPredicateToOps(Ops, Loc,
2664 N->getOperand(FirstInputOp + 3), // predicate
2665 N->getOperand(FirstInputOp - 1)); // inactive
2666 else
2667 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2668
2669 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2670}
2671
2672void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {
2673 SDLoc Loc(N);
2675
2676 // One vector input, followed by a 32-bit word of bits to shift in
2677 // and then an immediate shift count
2678 Ops.push_back(N->getOperand(1));
2679 Ops.push_back(N->getOperand(2));
2680 int32_t ImmValue = N->getConstantOperandVal(3);
2681 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2682
2683 if (Predicated)
2684 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2685 else
2686 AddEmptyMVEPredicateToOps(Ops, Loc);
2687
2688 CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), ArrayRef(Ops));
2689}
2690
2691static bool SDValueToConstBool(SDValue SDVal) {
2692 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
2693 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
2694 uint64_t Value = SDValConstant->getZExtValue();
2695 assert((Value == 0 || Value == 1) && "expected value 0 or 1");
2696 return Value;
2697}
2698
2699void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
2700 const uint16_t *OpcodesS,
2701 const uint16_t *OpcodesU,
2702 size_t Stride, size_t TySize) {
2703 assert(TySize < Stride && "Invalid TySize");
2704 bool IsUnsigned = SDValueToConstBool(N->getOperand(1));
2705 bool IsSub = SDValueToConstBool(N->getOperand(2));
2706 bool IsExchange = SDValueToConstBool(N->getOperand(3));
2707 if (IsUnsigned) {
2708 assert(!IsSub &&
2709 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2710 assert(!IsExchange &&
2711 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2712 }
2713
2714 auto OpIsZero = [N](size_t OpNo) {
2715 return isNullConstant(N->getOperand(OpNo));
2716 };
2717
2718 // If the input accumulator value is not zero, select an instruction with
2719 // accumulator, otherwise select an instruction without accumulator
2720 bool IsAccum = !(OpIsZero(4) && OpIsZero(5));
2721
2722 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
2723 if (IsSub)
2724 Opcodes += 4 * Stride;
2725 if (IsExchange)
2726 Opcodes += 2 * Stride;
2727 if (IsAccum)
2728 Opcodes += Stride;
2729 uint16_t Opcode = Opcodes[TySize];
2730
2731 SDLoc Loc(N);
2733 // Push the accumulator operands, if they are used
2734 if (IsAccum) {
2735 Ops.push_back(N->getOperand(4));
2736 Ops.push_back(N->getOperand(5));
2737 }
2738 // Push the two vector operands
2739 Ops.push_back(N->getOperand(6));
2740 Ops.push_back(N->getOperand(7));
2741
2742 if (Predicated)
2743 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8));
2744 else
2745 AddEmptyMVEPredicateToOps(Ops, Loc);
2746
2747 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2748}
2749
2750void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,
2751 const uint16_t *OpcodesS,
2752 const uint16_t *OpcodesU) {
2753 EVT VecTy = N->getOperand(6).getValueType();
2754 size_t SizeIndex;
2755 switch (VecTy.getVectorElementType().getSizeInBits()) {
2756 case 16:
2757 SizeIndex = 0;
2758 break;
2759 case 32:
2760 SizeIndex = 1;
2761 break;
2762 default:
2763 llvm_unreachable("bad vector element size");
2764 }
2765
2766 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex);
2767}
2768
2769void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
2770 const uint16_t *OpcodesS,
2771 const uint16_t *OpcodesU) {
2772 assert(
2773 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==
2774 32 &&
2775 "bad vector element size");
2776 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0);
2777}
2778
2779void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
2780 const uint16_t *const *Opcodes,
2781 bool HasWriteback) {
2782 EVT VT = N->getValueType(0);
2783 SDLoc Loc(N);
2784
2785 const uint16_t *OurOpcodes;
2786 switch (VT.getVectorElementType().getSizeInBits()) {
2787 case 8:
2788 OurOpcodes = Opcodes[0];
2789 break;
2790 case 16:
2791 OurOpcodes = Opcodes[1];
2792 break;
2793 case 32:
2794 OurOpcodes = Opcodes[2];
2795 break;
2796 default:
2797 llvm_unreachable("bad vector element size in SelectMVE_VLD");
2798 }
2799
2800 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2);
2801 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other};
2802 unsigned PtrOperand = HasWriteback ? 1 : 2;
2803
2804 auto Data = SDValue(
2805 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0);
2806 SDValue Chain = N->getOperand(0);
2807 // Add a MVE_VLDn instruction for each Vec, except the last
2808 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) {
2809 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2810 auto LoadInst =
2811 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops);
2812 Data = SDValue(LoadInst, 0);
2813 Chain = SDValue(LoadInst, 1);
2814 transferMemOperands(N, LoadInst);
2815 }
2816 // The last may need a writeback on it
2817 if (HasWriteback)
2818 ResultTys = {DataTy, MVT::i32, MVT::Other};
2819 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2820 auto LoadInst =
2821 CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops);
2822 transferMemOperands(N, LoadInst);
2823
2824 unsigned i;
2825 for (i = 0; i < NumVecs; i++)
2826 ReplaceUses(SDValue(N, i),
2827 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT,
2828 SDValue(LoadInst, 0)));
2829 if (HasWriteback)
2830 ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1));
2831 ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1));
2832 CurDAG->RemoveDeadNode(N);
2833}
2834
2835void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
2836 bool Wrapping, bool Predicated) {
2837 EVT VT = N->getValueType(0);
2838 SDLoc Loc(N);
2839
2840 uint16_t Opcode;
2841 switch (VT.getScalarSizeInBits()) {
2842 case 8:
2843 Opcode = Opcodes[0];
2844 break;
2845 case 16:
2846 Opcode = Opcodes[1];
2847 break;
2848 case 32:
2849 Opcode = Opcodes[2];
2850 break;
2851 default:
2852 llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
2853 }
2854
2856 unsigned OpIdx = 1;
2857
2858 SDValue Inactive;
2859 if (Predicated)
2860 Inactive = N->getOperand(OpIdx++);
2861
2862 Ops.push_back(N->getOperand(OpIdx++)); // base
2863 if (Wrapping)
2864 Ops.push_back(N->getOperand(OpIdx++)); // limit
2865
2866 SDValue ImmOp = N->getOperand(OpIdx++); // step
2867 int ImmValue = ImmOp->getAsZExtVal();
2868 Ops.push_back(getI32Imm(ImmValue, Loc));
2869
2870 if (Predicated)
2871 AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive);
2872 else
2873 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2874
2875 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2876}
2877
2878void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
2879 size_t NumExtraOps, bool HasAccum) {
2880 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
2881 SDLoc Loc(N);
2883
2884 unsigned OpIdx = 1;
2885
2886 // Convert and append the immediate operand designating the coprocessor.
2887 SDValue ImmCorpoc = N->getOperand(OpIdx++);
2888 uint32_t ImmCoprocVal = ImmCorpoc->getAsZExtVal();
2889 Ops.push_back(getI32Imm(ImmCoprocVal, Loc));
2890
2891 // For accumulating variants copy the low and high order parts of the
2892 // accumulator into a register pair and add it to the operand vector.
2893 if (HasAccum) {
2894 SDValue AccLo = N->getOperand(OpIdx++);
2895 SDValue AccHi = N->getOperand(OpIdx++);
2896 if (IsBigEndian)
2897 std::swap(AccLo, AccHi);
2898 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0));
2899 }
2900
2901 // Copy extra operands as-is.
2902 for (size_t I = 0; I < NumExtraOps; I++)
2903 Ops.push_back(N->getOperand(OpIdx++));
2904
2905 // Convert and append the immediate operand
2906 SDValue Imm = N->getOperand(OpIdx);
2907 uint32_t ImmVal = Imm->getAsZExtVal();
2908 Ops.push_back(getI32Imm(ImmVal, Loc));
2909
2910 // Accumulating variants are IT-predicable, add predicate operands.
2911 if (HasAccum) {
2912 SDValue Pred = getAL(CurDAG, Loc);
2913 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2914 Ops.push_back(Pred);
2915 Ops.push_back(PredReg);
2916 }
2917
2918 // Create the CDE intruction
2919 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops);
2920 SDValue ResultPair = SDValue(InstrNode, 0);
2921
2922 // The original intrinsic had two outputs, and the output of the dual-register
2923 // CDE instruction is a register pair. We need to extract the two subregisters
2924 // and replace all uses of the original outputs with the extracted
2925 // subregisters.
2926 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1};
2927 if (IsBigEndian)
2928 std::swap(SubRegs[0], SubRegs[1]);
2929
2930 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) {
2931 if (SDValue(N, ResIdx).use_empty())
2932 continue;
2933 SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc,
2934 MVT::i32, ResultPair);
2935 ReplaceUses(SDValue(N, ResIdx), SubReg);
2936 }
2937
2938 CurDAG->RemoveDeadNode(N);
2939}
2940
2941void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2942 bool isUpdating, unsigned NumVecs,
2943 const uint16_t *DOpcodes,
2944 const uint16_t *QOpcodes0,
2945 const uint16_t *QOpcodes1) {
2946 assert(Subtarget->hasNEON());
2947 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2948 SDLoc dl(N);
2949
2950 SDValue MemAddr, Align;
2951 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2952 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2953 return;
2954
2955 SDValue Chain = N->getOperand(0);
2956 EVT VT = N->getValueType(0);
2957 bool is64BitVector = VT.is64BitVector();
2958
2959 unsigned Alignment = 0;
2960 if (NumVecs != 3) {
2961 Alignment = Align->getAsZExtVal();
2962 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2963 if (Alignment > NumBytes)
2964 Alignment = NumBytes;
2965 if (Alignment < 8 && Alignment < NumBytes)
2966 Alignment = 0;
2967 // Alignment must be a power of two; make sure of that.
2968 Alignment = (Alignment & -Alignment);
2969 if (Alignment == 1)
2970 Alignment = 0;
2971 }
2972 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2973
2974 unsigned OpcodeIndex;
2975 switch (VT.getSimpleVT().SimpleTy) {
2976 default: llvm_unreachable("unhandled vld-dup type");
2977 case MVT::v8i8:
2978 case MVT::v16i8: OpcodeIndex = 0; break;
2979 case MVT::v4i16:
2980 case MVT::v8i16:
2981 case MVT::v4f16:
2982 case MVT::v8f16:
2983 case MVT::v4bf16:
2984 case MVT::v8bf16:
2985 OpcodeIndex = 1; break;
2986 case MVT::v2f32:
2987 case MVT::v2i32:
2988 case MVT::v4f32:
2989 case MVT::v4i32: OpcodeIndex = 2; break;
2990 case MVT::v1f64:
2991 case MVT::v1i64: OpcodeIndex = 3; break;
2992 }
2993
2994 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2995 if (!is64BitVector)
2996 ResTyElts *= 2;
2997 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2998
2999 std::vector<EVT> ResTys;
3000 ResTys.push_back(ResTy);
3001 if (isUpdating)
3002 ResTys.push_back(MVT::i32);
3003 ResTys.push_back(MVT::Other);
3004
3005 SDValue Pred = getAL(CurDAG, dl);
3006 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3007
3009 Ops.push_back(MemAddr);
3010 Ops.push_back(Align);
3011 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex]
3012 : (NumVecs == 1) ? QOpcodes0[OpcodeIndex]
3013 : QOpcodes1[OpcodeIndex];
3014 if (isUpdating) {
3015 SDValue Inc = N->getOperand(2);
3016 bool IsImmUpdate =
3017 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
3018 if (IsImmUpdate) {
3019 if (!isVLDfixed(Opc))
3020 Ops.push_back(Reg0);
3021 } else {
3022 if (isVLDfixed(Opc))
3024 Ops.push_back(Inc);
3025 }
3026 }
3027 if (is64BitVector || NumVecs == 1) {
3028 // Double registers and VLD1 quad registers are directly supported.
3029 } else {
3030 SDValue ImplDef = SDValue(
3031 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
3032 const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain};
3033 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
3034 MVT::Other, OpsA);
3035 Ops.push_back(SDValue(VLdA, 0));
3036 Chain = SDValue(VLdA, 1);
3037 }
3038
3039 Ops.push_back(Pred);
3040 Ops.push_back(Reg0);
3041 Ops.push_back(Chain);
3042
3043 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
3044
3045 // Transfer memoperands.
3046 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3047 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
3048
3049 // Extract the subregisters.
3050 if (NumVecs == 1) {
3051 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
3052 } else {
3053 SDValue SuperReg = SDValue(VLdDup, 0);
3054 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
3055 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
3056 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
3057 ReplaceUses(SDValue(N, Vec),
3058 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
3059 }
3060 }
3061 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
3062 if (isUpdating)
3063 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
3064 CurDAG->RemoveDeadNode(N);
3065}
3066
3067bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
3068 if (!Subtarget->hasMVEIntegerOps())
3069 return false;
3070
3071 SDLoc dl(N);
3072
3073 // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and
3074 // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent
3075 // inserts of the correct type:
3076 SDValue Ins1 = SDValue(N, 0);
3077 SDValue Ins2 = N->getOperand(0);
3078 EVT VT = Ins1.getValueType();
3079 if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() ||
3080 !isa<ConstantSDNode>(Ins1.getOperand(2)) ||
3081 !isa<ConstantSDNode>(Ins2.getOperand(2)) ||
3082 (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT))
3083 return false;
3084
3085 unsigned Lane1 = Ins1.getConstantOperandVal(2);
3086 unsigned Lane2 = Ins2.getConstantOperandVal(2);
3087 if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1)
3088 return false;
3089
3090 // If the inserted values will be able to use T/B already, leave it to the
3091 // existing tablegen patterns. For example VCVTT/VCVTB.
3092 SDValue Val1 = Ins1.getOperand(1);
3093 SDValue Val2 = Ins2.getOperand(1);
3094 if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND)
3095 return false;
3096
3097 // Check if the inserted values are both extracts.
3098 if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3099 Val1.getOpcode() == ARMISD::VGETLANEu) &&
3101 Val2.getOpcode() == ARMISD::VGETLANEu) &&
3104 (Val1.getOperand(0).getValueType() == MVT::v8f16 ||
3105 Val1.getOperand(0).getValueType() == MVT::v8i16) &&
3106 (Val2.getOperand(0).getValueType() == MVT::v8f16 ||
3107 Val2.getOperand(0).getValueType() == MVT::v8i16)) {
3108 unsigned ExtractLane1 = Val1.getConstantOperandVal(1);
3109 unsigned ExtractLane2 = Val2.getConstantOperandVal(1);
3110
3111 // If the two extracted lanes are from the same place and adjacent, this
3112 // simplifies into a f32 lane move.
3113 if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 &&
3114 ExtractLane1 == ExtractLane2 + 1) {
3115 SDValue NewExt = CurDAG->getTargetExtractSubreg(
3116 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0));
3117 SDValue NewIns = CurDAG->getTargetInsertSubreg(
3118 ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0),
3119 NewExt);
3120 ReplaceUses(Ins1, NewIns);
3121 return true;
3122 }
3123
3124 // Else v8i16 pattern of an extract and an insert, with a optional vmovx for
3125 // extracting odd lanes.
3126 if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) {
3127 SDValue Inp1 = CurDAG->getTargetExtractSubreg(
3128 ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0));
3129 SDValue Inp2 = CurDAG->getTargetExtractSubreg(
3130 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0));
3131 if (ExtractLane1 % 2 != 0)
3132 Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0);
3133 if (ExtractLane2 % 2 != 0)
3134 Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0);
3135 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1);
3136 SDValue NewIns =
3137 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3138 Ins2.getOperand(0), SDValue(VINS, 0));
3139 ReplaceUses(Ins1, NewIns);
3140 return true;
3141 }
3142 }
3143
3144 // The inserted values are not extracted - if they are f16 then insert them
3145 // directly using a VINS.
3146 if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) {
3147 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1);
3148 SDValue NewIns =
3149 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3150 Ins2.getOperand(0), SDValue(VINS, 0));
3151 ReplaceUses(Ins1, NewIns);
3152 return true;
3153 }
3154
3155 return false;
3156}
3157
3158bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N,
3159 SDNode *FMul,
3160 bool IsUnsigned,
3161 bool FixedToFloat) {
3162 auto Type = N->getValueType(0);
3163 unsigned ScalarBits = Type.getScalarSizeInBits();
3164 if (ScalarBits > 32)
3165 return false;
3166
3167 SDNodeFlags FMulFlags = FMul->getFlags();
3168 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3169 // allowed in 16 bit unsigned floats
3170 if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned)
3171 return false;
3172
3173 SDValue ImmNode = FMul->getOperand(1);
3174 SDValue VecVal = FMul->getOperand(0);
3175 if (VecVal->getOpcode() == ISD::UINT_TO_FP ||
3176 VecVal->getOpcode() == ISD::SINT_TO_FP)
3177 VecVal = VecVal->getOperand(0);
3178
3179 if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits)
3180 return false;
3181
3182 if (ImmNode.getOpcode() == ISD::BITCAST) {
3183 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3184 return false;
3185 ImmNode = ImmNode.getOperand(0);
3186 }
3187
3188 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3189 return false;
3190
3191 APFloat ImmAPF(0.0f);
3192 switch (ImmNode.getOpcode()) {
3193 case ARMISD::VMOVIMM:
3194 case ARMISD::VDUP: {
3195 if (!isa<ConstantSDNode>(ImmNode.getOperand(0)))
3196 return false;
3197 unsigned Imm = ImmNode.getConstantOperandVal(0);
3198 if (ImmNode.getOpcode() == ARMISD::VMOVIMM)
3199 Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits);
3200 ImmAPF =
3201 APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(),
3202 APInt(ScalarBits, Imm));
3203 break;
3204 }
3205 case ARMISD::VMOVFPIMM: {
3207 break;
3208 }
3209 default:
3210 return false;
3211 }
3212
3213 // Where n is the number of fractional bits, multiplying by 2^n will convert
3214 // from float to fixed and multiplying by 2^-n will convert from fixed to
3215 // float. Taking log2 of the factor (after taking the inverse in the case of
3216 // float to fixed) will give n.
3217 APFloat ToConvert = ImmAPF;
3218 if (FixedToFloat) {
3219 if (!ImmAPF.getExactInverse(&ToConvert))
3220 return false;
3221 }
3222 APSInt Converted(64, false);
3223 bool IsExact;
3225 &IsExact);
3226 if (!IsExact || !Converted.isPowerOf2())
3227 return false;
3228
3229 unsigned FracBits = Converted.logBase2();
3230 if (FracBits > ScalarBits)
3231 return false;
3232
3234 VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)};
3235 AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type);
3236
3237 unsigned int Opcode;
3238 switch (ScalarBits) {
3239 case 16:
3240 if (FixedToFloat)
3241 Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix;
3242 else
3243 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3244 break;
3245 case 32:
3246 if (FixedToFloat)
3247 Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix;
3248 else
3249 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3250 break;
3251 default:
3252 llvm_unreachable("unexpected number of scalar bits");
3253 break;
3254 }
3255
3256 ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops));
3257 return true;
3258}
3259
3260bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
3261 // Transform a floating-point to fixed-point conversion to a VCVT
3262 if (!Subtarget->hasMVEFloatOps())
3263 return false;
3264 EVT Type = N->getValueType(0);
3265 if (!Type.isVector())
3266 return false;
3267 unsigned int ScalarBits = Type.getScalarSizeInBits();
3268
3269 bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT ||
3270 N->getOpcode() == ISD::FP_TO_UINT_SAT;
3271 SDNode *Node = N->getOperand(0).getNode();
3272
3273 // floating-point to fixed-point with one fractional bit gets turned into an
3274 // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y))
3275 if (Node->getOpcode() == ISD::FADD) {
3276 if (Node->getOperand(0) != Node->getOperand(1))
3277 return false;
3278 SDNodeFlags Flags = Node->getFlags();
3279 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3280 // allowed in 16 bit unsigned floats
3281 if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned)
3282 return false;
3283
3284 unsigned Opcode;
3285 switch (ScalarBits) {
3286 case 16:
3287 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3288 break;
3289 case 32:
3290 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3291 break;
3292 }
3293 SmallVector<SDValue, 3> Ops{Node->getOperand(0),
3294 CurDAG->getConstant(1, dl, MVT::i32)};
3295 AddEmptyMVEPredicateToOps(Ops, dl, Type);
3296
3297 ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops));
3298 return true;
3299 }
3300
3301 if (Node->getOpcode() != ISD::FMUL)
3302 return false;
3303
3304 return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false);
3305}
3306
3307bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
3308 // Transform a fixed-point to floating-point conversion to a VCVT
3309 if (!Subtarget->hasMVEFloatOps())
3310 return false;
3311 auto Type = N->getValueType(0);
3312 if (!Type.isVector())
3313 return false;
3314
3315 auto LHS = N->getOperand(0);
3316 if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP)
3317 return false;
3318
3319 return transformFixedFloatingPointConversion(
3320 N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true);
3321}
3322
3323bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
3324 if (!Subtarget->hasV6T2Ops())
3325 return false;
3326
3327 unsigned Opc = isSigned
3328 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
3329 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
3330 SDLoc dl(N);
3331
3332 // For unsigned extracts, check for a shift right and mask
3333 unsigned And_imm = 0;
3334 if (N->getOpcode() == ISD::AND) {
3335 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
3336
3337 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
3338 if (And_imm & (And_imm + 1))
3339 return false;
3340
3341 unsigned Srl_imm = 0;
3342 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
3343 Srl_imm)) {
3344 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3345
3346 // Mask off the unnecessary bits of the AND immediate; normally
3347 // DAGCombine will do this, but that might not happen if
3348 // targetShrinkDemandedConstant chooses a different immediate.
3349 And_imm &= -1U >> Srl_imm;
3350
3351 // Note: The width operand is encoded as width-1.
3352 unsigned Width = llvm::countr_one(And_imm) - 1;
3353 unsigned LSB = Srl_imm;
3354
3355 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3356
3357 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
3358 // It's cheaper to use a right shift to extract the top bits.
3359 if (Subtarget->isThumb()) {
3360 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
3361 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3362 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3363 getAL(CurDAG, dl), Reg0, Reg0 };
3364 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3365 return true;
3366 }
3367
3368 // ARM models shift instructions as MOVsi with shifter operand.
3370 SDValue ShOpc =
3371 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
3372 MVT::i32);
3373 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
3374 getAL(CurDAG, dl), Reg0, Reg0 };
3375 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
3376 return true;
3377 }
3378
3379 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3380 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3381 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3382 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3383 getAL(CurDAG, dl), Reg0 };
3384 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3385 return true;
3386 }
3387 }
3388 return false;
3389 }
3390
3391 // Otherwise, we're looking for a shift of a shift
3392 unsigned Shl_imm = 0;
3393 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
3394 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
3395 unsigned Srl_imm = 0;
3396 if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
3397 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3398 // Note: The width operand is encoded as width-1.
3399 unsigned Width = 32 - Srl_imm - 1;
3400 int LSB = Srl_imm - Shl_imm;
3401 if (LSB < 0)
3402 return false;
3403 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3404 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3405 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3406 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3407 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3408 getAL(CurDAG, dl), Reg0 };
3409 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3410 return true;
3411 }
3412 }
3413
3414 // Or we are looking for a shift of an and, with a mask operand
3415 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
3416 isShiftedMask_32(And_imm)) {
3417 unsigned Srl_imm = 0;
3418 unsigned LSB = llvm::countr_zero(And_imm);
3419 // Shift must be the same as the ands lsb
3420 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
3421 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3422 unsigned MSB = llvm::Log2_32(And_imm);
3423 // Note: The width operand is encoded as width-1.
3424 unsigned Width = MSB - LSB;
3425 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3426 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3427 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3428 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
3429 CurDAG->getTargetConstant(Width, dl, MVT::i32),
3430 getAL(CurDAG, dl), Reg0 };
3431 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3432 return true;
3433 }
3434 }
3435
3436 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
3437 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
3438 unsigned LSB = 0;
3439 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
3440 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
3441 return false;
3442
3443 if (LSB + Width > 32)
3444 return false;
3445
3446 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3447 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
3448 SDValue Ops[] = { N->getOperand(0).getOperand(0),
3449 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3450 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
3451 getAL(CurDAG, dl), Reg0 };
3452 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3453 return true;
3454 }
3455
3456 return false;
3457}
3458
3459/// We've got special pseudo-instructions for these
3460void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3461 unsigned Opcode;
3462 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3463 if (MemTy == MVT::i8)
3464 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8;
3465 else if (MemTy == MVT::i16)
3466 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16;
3467 else if (MemTy == MVT::i32)
3468 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32;
3469 else
3470 llvm_unreachable("Unknown AtomicCmpSwap type");
3471
3472 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3473 N->getOperand(0)};
3474 SDNode *CmpSwap = CurDAG->getMachineNode(
3475 Opcode, SDLoc(N),
3476 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
3477
3478 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3479 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3480
3481 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3482 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3483 CurDAG->RemoveDeadNode(N);
3484}
3485
3486static std::optional<std::pair<unsigned, unsigned>>
3488 unsigned FirstOne = A.getBitWidth() - A.countl_zero() - 1;
3489 unsigned LastOne = A.countr_zero();
3490 if (A.popcount() != (FirstOne - LastOne + 1))
3491 return std::nullopt;
3492 return std::make_pair(FirstOne, LastOne);
3493}
3494
3495void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
3496 assert(N->getOpcode() == ARMISD::CMPZ);
3497 SwitchEQNEToPLMI = false;
3498
3499 if (!Subtarget->isThumb())
3500 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3501 // LSR don't exist as standalone instructions - they need the barrel shifter.
3502 return;
3503
3504 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3505 SDValue And = N->getOperand(0);
3506 if (!And->hasOneUse())
3507 return;
3508
3509 SDValue Zero = N->getOperand(1);
3510 if (!isNullConstant(Zero) || And->getOpcode() != ISD::AND)
3511 return;
3512 SDValue X = And.getOperand(0);
3513 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
3514
3515 if (!C)
3516 return;
3517 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
3518 if (!Range)
3519 return;
3520
3521 // There are several ways to lower this:
3522 SDNode *NewN;
3523 SDLoc dl(N);
3524
3525 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
3526 if (Subtarget->isThumb2()) {
3527 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
3528 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3529 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3530 CurDAG->getRegister(0, MVT::i32) };
3531 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3532 } else {
3533 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
3534 CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3535 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3536 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3537 }
3538 };
3539
3540 if (Range->second == 0) {
3541 // 1. Mask includes the LSB -> Simply shift the top N bits off
3542 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3543 ReplaceNode(And.getNode(), NewN);
3544 } else if (Range->first == 31) {
3545 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
3546 NewN = EmitShift(ARM::tLSRri, X, Range->second);
3547 ReplaceNode(And.getNode(), NewN);
3548 } else if (Range->first == Range->second) {
3549 // 3. Only one bit is set. We can shift this into the sign bit and use a
3550 // PL/MI comparison. This is not safe if CMPZ has multiple uses because
3551 // only one of them (the one currently being selected) will be switched
3552 // to use the new condition code.
3553 if (!N->hasOneUse())
3554 return;
3555 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3556 ReplaceNode(And.getNode(), NewN);
3557
3558 SwitchEQNEToPLMI = true;
3559 } else if (!Subtarget->hasV6T2Ops()) {
3560 // 4. Do a double shift to clear bottom and top bits, but only in
3561 // thumb-1 mode as in thumb-2 we can use UBFX.
3562 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3563 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
3564 Range->second + (31 - Range->first));
3565 ReplaceNode(And.getNode(), NewN);
3566 }
3567}
3568
3569static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3],
3570 unsigned Opc128[3]) {
3571 assert((VT.is64BitVector() || VT.is128BitVector()) &&
3572 "Unexpected vector shuffle length");
3573 switch (VT.getScalarSizeInBits()) {
3574 default:
3575 llvm_unreachable("Unexpected vector shuffle element size");
3576 case 8:
3577 return VT.is64BitVector() ? Opc64[0] : Opc128[0];
3578 case 16:
3579 return VT.is64BitVector() ? Opc64[1] : Opc128[1];
3580 case 32:
3581 return VT.is64BitVector() ? Opc64[2] : Opc128[2];
3582 }
3583}
3584
3585void ARMDAGToDAGISel::Select(SDNode *N) {
3586 SDLoc dl(N);
3587
3588 if (N->isMachineOpcode()) {
3589 N->setNodeId(-1);
3590 return; // Already selected.
3591 }
3592
3593 switch (N->getOpcode()) {
3594 default: break;
3595 case ISD::STORE: {
3596 // For Thumb1, match an sp-relative store in C++. This is a little
3597 // unfortunate, but I don't think I can make the chain check work
3598 // otherwise. (The chain of the store has to be the same as the chain
3599 // of the CopyFromReg, or else we can't replace the CopyFromReg with
3600 // a direct reference to "SP".)
3601 //
3602 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3603 // a different addressing mode from other four-byte stores.
3604 //
3605 // This pattern usually comes up with call arguments.
3606 StoreSDNode *ST = cast<StoreSDNode>(N);
3607 SDValue Ptr = ST->getBasePtr();
3608 if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
3609 int RHSC = 0;
3610 if (Ptr.getOpcode() == ISD::ADD &&
3611 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
3612 Ptr = Ptr.getOperand(0);
3613
3614 if (Ptr.getOpcode() == ISD::CopyFromReg &&
3615 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
3616 Ptr.getOperand(0) == ST->getChain()) {
3617 SDValue Ops[] = {ST->getValue(),
3618 CurDAG->getRegister(ARM::SP, MVT::i32),
3619 CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
3620 getAL(CurDAG, dl),
3621 CurDAG->getRegister(0, MVT::i32),
3622 ST->getChain()};
3623 MachineSDNode *ResNode =
3624 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
3625 MachineMemOperand *MemOp = ST->getMemOperand();
3626 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3627 ReplaceNode(N, ResNode);
3628 return;
3629 }
3630 }
3631 break;
3632 }
3634 if (tryWriteRegister(N))
3635 return;
3636 break;
3637 case ISD::READ_REGISTER:
3638 if (tryReadRegister(N))
3639 return;
3640 break;
3641 case ISD::INLINEASM:
3642 case ISD::INLINEASM_BR:
3643 if (tryInlineAsm(N))
3644 return;
3645 break;
3646 case ISD::Constant: {
3647 unsigned Val = N->getAsZExtVal();
3648 // If we can't materialize the constant we need to use a literal pool
3649 if (ConstantMaterializationCost(Val, Subtarget) > 2 &&
3650 !Subtarget->genExecuteOnly()) {
3651 SDValue CPIdx = CurDAG->getTargetConstantPool(
3652 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
3653 TLI->getPointerTy(CurDAG->getDataLayout()));
3654
3655 SDNode *ResNode;
3656 if (Subtarget->isThumb()) {
3657 SDValue Ops[] = {
3658 CPIdx,
3659 getAL(CurDAG, dl),
3660 CurDAG->getRegister(0, MVT::i32),
3661 CurDAG->getEntryNode()
3662 };
3663 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
3664 Ops);
3665 } else {
3666 SDValue Ops[] = {
3667 CPIdx,
3668 CurDAG->getTargetConstant(0, dl, MVT::i32),
3669 getAL(CurDAG, dl),
3670 CurDAG->getRegister(0, MVT::i32),
3671 CurDAG->getEntryNode()
3672 };
3673 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
3674 Ops);
3675 }
3676 // Annotate the Node with memory operand information so that MachineInstr
3677 // queries work properly. This e.g. gives the register allocation the
3678 // required information for rematerialization.
3679 MachineFunction& MF = CurDAG->getMachineFunction();
3680 MachineMemOperand *MemOp =
3683
3684 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3685
3686 ReplaceNode(N, ResNode);
3687 return;
3688 }
3689
3690 // Other cases are autogenerated.
3691 break;
3692 }
3693 case ISD::FrameIndex: {
3694 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3695 int FI = cast<FrameIndexSDNode>(N)->getIndex();
3696 SDValue TFI = CurDAG->getTargetFrameIndex(
3697 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3698 if (Subtarget->isThumb1Only()) {
3699 // Set the alignment of the frame object to 4, to avoid having to generate
3700 // more than one ADD
3701 MachineFrameInfo &MFI = MF->getFrameInfo();
3702 if (MFI.getObjectAlign(FI) < Align(4))
3703 MFI.setObjectAlignment(FI, Align(4));
3704 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
3705 CurDAG->getTargetConstant(0, dl, MVT::i32));
3706 return;
3707 } else {
3708 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
3709 ARM::t2ADDri : ARM::ADDri);
3710 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
3711 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3712 CurDAG->getRegister(0, MVT::i32) };
3713 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3714 return;
3715 }
3716 }
3718 if (tryInsertVectorElt(N))
3719 return;
3720 break;
3721 }
3722 case ISD::SRL:
3723 if (tryV6T2BitfieldExtractOp(N, false))
3724 return;
3725 break;
3727 case ISD::SRA:
3728 if (tryV6T2BitfieldExtractOp(N, true))
3729 return;
3730 break;
3731 case ISD::FP_TO_UINT:
3732 case ISD::FP_TO_SINT:
3735 if (tryFP_TO_INT(N, dl))
3736 return;
3737 break;
3738 case ISD::FMUL:
3739 if (tryFMULFixed(N, dl))
3740 return;
3741 break;
3742 case ISD::MUL:
3743 if (Subtarget->isThumb1Only())
3744 break;
3745 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
3746 unsigned RHSV = C->getZExtValue();
3747 if (!RHSV) break;
3748 if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
3749 unsigned ShImm = Log2_32(RHSV-1);
3750 if (ShImm >= 32)
3751 break;
3752 SDValue V = N->getOperand(0);
3753 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3754 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3755 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3756 if (Subtarget->isThumb()) {
3757 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3758 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
3759 return;
3760 } else {
3761 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3762 Reg0 };
3763 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
3764 return;
3765 }
3766 }
3767 if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
3768 unsigned ShImm = Log2_32(RHSV+1);
3769 if (ShImm >= 32)
3770 break;
3771 SDValue V = N->getOperand(0);
3772 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3773 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3774 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3775 if (Subtarget->isThumb()) {
3776 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3777 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
3778 return;
3779 } else {
3780 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3781 Reg0 };
3782 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
3783 return;
3784 }
3785 }
3786 }
3787 break;
3788 case ISD::AND: {
3789 // Check for unsigned bitfield extract
3790 if (tryV6T2BitfieldExtractOp(N, false))
3791 return;
3792
3793 // If an immediate is used in an AND node, it is possible that the immediate
3794 // can be more optimally materialized when negated. If this is the case we
3795 // can negate the immediate and use a BIC instead.
3796 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
3797 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
3798 uint32_t Imm = (uint32_t) N1C->getZExtValue();
3799
3800 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
3801 // immediate can be negated and fit in the immediate operand of
3802 // a t2BIC, don't do any manual transform here as this can be
3803 // handled by the generic ISel machinery.
3804 bool PreferImmediateEncoding =
3805 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
3806 if (!PreferImmediateEncoding &&
3807 ConstantMaterializationCost(Imm, Subtarget) >
3808 ConstantMaterializationCost(~Imm, Subtarget)) {
3809 // The current immediate costs more to materialize than a negated
3810 // immediate, so negate the immediate and use a BIC.
3811 SDValue NewImm = CurDAG->getConstant(~Imm, dl, MVT::i32);
3812 // If the new constant didn't exist before, reposition it in the topological
3813 // ordering so it is just before N. Otherwise, don't touch its location.
3814 if (NewImm->getNodeId() == -1)
3815 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
3816
3817 if (!Subtarget->hasThumb2()) {
3818 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
3819 N->getOperand(0), NewImm, getAL(CurDAG, dl),
3820 CurDAG->getRegister(0, MVT::i32)};
3821 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
3822 return;
3823 } else {
3824 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
3825 CurDAG->getRegister(0, MVT::i32),
3826 CurDAG->getRegister(0, MVT::i32)};
3827 ReplaceNode(N,
3828 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
3829 return;
3830 }
3831 }
3832 }
3833
3834 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3835 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3836 // are entirely contributed by c2 and lower 16-bits are entirely contributed
3837 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3838 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
3839 EVT VT = N->getValueType(0);
3840 if (VT != MVT::i32)
3841 break;
3842 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3843 ? ARM::t2MOVTi16
3844 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
3845 if (!Opc)
3846 break;
3847 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3848 N1C = dyn_cast<ConstantSDNode>(N1);
3849 if (!N1C)
3850 break;
3851 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3852 SDValue N2 = N0.getOperand(1);
3853 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
3854 if (!N2C)
3855 break;
3856 unsigned N1CVal = N1C->getZExtValue();
3857 unsigned N2CVal = N2C->getZExtValue();
3858 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
3859 (N1CVal & 0xffffU) == 0xffffU &&
3860 (N2CVal & 0xffffU) == 0x0U) {
3861 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
3862 dl, MVT::i32);
3863 SDValue Ops[] = { N0.getOperand(0), Imm16,
3864 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3865 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
3866 return;
3867 }
3868 }
3869
3870 break;
3871 }
3872 case ARMISD::UMAAL: {
3873 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3874 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3875 N->getOperand(2), N->getOperand(3),
3876 getAL(CurDAG, dl),
3877 CurDAG->getRegister(0, MVT::i32) };
3878 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3879 return;
3880 }
3881 case ARMISD::UMLAL:{
3882 if (Subtarget->isThumb()) {
3883 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3884 N->getOperand(3), getAL(CurDAG, dl),
3885 CurDAG->getRegister(0, MVT::i32)};
3886 ReplaceNode(
3887 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3888 return;
3889 }else{
3890 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3891 N->getOperand(3), getAL(CurDAG, dl),
3892 CurDAG->getRegister(0, MVT::i32),
3893 CurDAG->getRegister(0, MVT::i32) };
3894 ReplaceNode(N, CurDAG->getMachineNode(
3895 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3896 MVT::i32, MVT::i32, Ops));
3897 return;
3898 }
3899 }
3900 case ARMISD::SMLAL:{
3901 if (Subtarget->isThumb()) {
3902 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3903 N->getOperand(3), getAL(CurDAG, dl),
3904 CurDAG->getRegister(0, MVT::i32)};
3905 ReplaceNode(
3906 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3907 return;
3908 }else{
3909 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3910 N->getOperand(3), getAL(CurDAG, dl),
3911 CurDAG->getRegister(0, MVT::i32),
3912 CurDAG->getRegister(0, MVT::i32) };
3913 ReplaceNode(N, CurDAG->getMachineNode(
3914 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3915 MVT::i32, MVT::i32, Ops));
3916 return;
3917 }
3918 }
3919 case ARMISD::SUBE: {
3920 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3921 break;
3922 // Look for a pattern to match SMMLS
3923 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3924 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3925 N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3926 !SDValue(N, 1).use_empty())
3927 break;
3928
3929 if (Subtarget->isThumb())
3930 assert(Subtarget->hasThumb2() &&
3931 "This pattern should not be generated for Thumb");
3932
3933 SDValue SmulLoHi = N->getOperand(1);
3934 SDValue Subc = N->getOperand(2);
3935 SDValue Zero = Subc.getOperand(0);
3936
3937 if (!isNullConstant(Zero) || Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3938 N->getOperand(1) != SmulLoHi.getValue(1) ||
3939 N->getOperand(2) != Subc.getValue(1))
3940 break;
3941
3942 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3943 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3944 N->getOperand(0), getAL(CurDAG, dl),
3945 CurDAG->getRegister(0, MVT::i32) };
3946 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
3947 return;
3948 }
3949 case ISD::LOAD: {
3950 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3951 return;
3952 if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
3953 if (tryT2IndexedLoad(N))
3954 return;
3955 } else if (Subtarget->isThumb()) {
3956 if (tryT1IndexedLoad(N))
3957 return;
3958 } else if (tryARMIndexedLoad(N))
3959 return;
3960 // Other cases are autogenerated.
3961 break;
3962 }
3963 case ISD::MLOAD:
3964 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3965 return;
3966 // Other cases are autogenerated.
3967 break;
3968 case ARMISD::LDRD: {
3969 if (Subtarget->isThumb2())
3970 break; // TableGen handles isel in this case.
3971 SDValue Base, RegOffset, ImmOffset;
3972 const SDValue &Chain = N->getOperand(0);
3973 const SDValue &Addr = N->getOperand(1);
3974 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
3975 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
3976 // The register-offset variant of LDRD mandates that the register
3977 // allocated to RegOffset is not reused in any of the remaining operands.
3978 // This restriction is currently not enforced. Therefore emitting this
3979 // variant is explicitly avoided.
3980 Base = Addr;
3981 RegOffset = CurDAG->getRegister(0, MVT::i32);
3982 }
3983 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
3984 SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
3985 {MVT::Untyped, MVT::Other}, Ops);
3986 SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
3987 SDValue(New, 0));
3988 SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
3989 SDValue(New, 0));
3990 transferMemOperands(N, New);
3991 ReplaceUses(SDValue(N, 0), Lo);
3992 ReplaceUses(SDValue(N, 1), Hi);
3993 ReplaceUses(SDValue(N, 2), SDValue(New, 1));
3994 CurDAG->RemoveDeadNode(N);
3995 return;
3996 }
3997 case ARMISD::STRD: {
3998 if (Subtarget->isThumb2())
3999 break; // TableGen handles isel in this case.
4000 SDValue Base, RegOffset, ImmOffset;
4001 const SDValue &Chain = N->getOperand(0);
4002 const SDValue &Addr = N->getOperand(3);
4003 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4004 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4005 // The register-offset variant of STRD mandates that the register
4006 // allocated to RegOffset is not reused in any of the remaining operands.
4007 // This restriction is currently not enforced. Therefore emitting this
4008 // variant is explicitly avoided.
4009 Base = Addr;
4010 RegOffset = CurDAG->getRegister(0, MVT::i32);
4011 }
4012 SDNode *RegPair =
4013 createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2));
4014 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
4015 SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);
4016 transferMemOperands(N, New);
4017 ReplaceUses(SDValue(N, 0), SDValue(New, 0));
4018 CurDAG->RemoveDeadNode(N);
4019 return;
4020 }
4021 case ARMISD::BRCOND: {
4022 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4023 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4024 // Pattern complexity = 6 cost = 1 size = 0
4025
4026 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4027 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
4028 // Pattern complexity = 6 cost = 1 size = 0
4029
4030 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4031 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4032 // Pattern complexity = 6 cost = 1 size = 0
4033
4034 unsigned Opc = Subtarget->isThumb() ?
4035 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
4036 SDValue Chain = N->getOperand(0);
4037 SDValue N1 = N->getOperand(1);
4038 SDValue N2 = N->getOperand(2);
4039 SDValue Flags = N->getOperand(3);
4042
4043 unsigned CC = (unsigned)N2->getAsZExtVal();
4044
4045 if (Flags.getOpcode() == ARMISD::CMPZ) {
4046 if (Flags.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4047 SDValue Int = Flags.getOperand(0);
4048 uint64_t ID = Int->getConstantOperandVal(1);
4049
4050 // Handle low-overhead loops.
4051 if (ID == Intrinsic::loop_decrement_reg) {
4052 SDValue Elements = Int.getOperand(2);
4053 SDValue Size = CurDAG->getTargetConstant(Int.getConstantOperandVal(3),
4054 dl, MVT::i32);
4055
4056 SDValue Args[] = { Elements, Size, Int.getOperand(0) };
4057 SDNode *LoopDec =
4058 CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4059 CurDAG->getVTList(MVT::i32, MVT::Other),
4060 Args);
4061 ReplaceUses(Int.getNode(), LoopDec);
4062
4063 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
4064 SDNode *LoopEnd =
4065 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
4066
4067 ReplaceUses(N, LoopEnd);
4068 CurDAG->RemoveDeadNode(N);
4069 CurDAG->RemoveDeadNode(Flags.getNode());
4070 CurDAG->RemoveDeadNode(Int.getNode());
4071 return;
4072 }
4073 }
4074
4075 bool SwitchEQNEToPLMI;
4076 SelectCMPZ(Flags.getNode(), SwitchEQNEToPLMI);
4077 Flags = N->getOperand(3);
4078
4079 if (SwitchEQNEToPLMI) {
4080 switch ((ARMCC::CondCodes)CC) {
4081 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4082 case ARMCC::NE:
4083 CC = (unsigned)ARMCC::MI;
4084 break;
4085 case ARMCC::EQ:
4086 CC = (unsigned)ARMCC::PL;
4087 break;
4088 }
4089 }
4090 }
4091
4092 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
4093 Chain = CurDAG->getCopyToReg(Chain, dl, ARM::CPSR, Flags, SDValue());
4094 SDValue Ops[] = {N1, Tmp2, CurDAG->getRegister(ARM::CPSR, MVT::i32), Chain,
4095 Chain.getValue(1)};
4096 CurDAG->SelectNodeTo(N, Opc, MVT::Other, Ops);
4097 return;
4098 }
4099
4100 case ARMISD::CMPZ: {
4101 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
4102 // This allows us to avoid materializing the expensive negative constant.
4103 // The CMPZ #0 is useless and will be peepholed away but we need to keep
4104 // it for its flags output.
4105 SDValue X = N->getOperand(0);
4106 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
4107 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
4108 int64_t Addend = -C->getSExtValue();
4109
4110 SDNode *Add = nullptr;
4111 // ADDS can be better than CMN if the immediate fits in a
4112 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
4113 // Outside that range we can just use a CMN which is 32-bit but has a
4114 // 12-bit immediate range.
4115 if (Addend < 1<<8) {
4116 if (Subtarget->isThumb2()) {
4117 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4118 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
4119 CurDAG->getRegister(0, MVT::i32) };
4120 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
4121 } else {
4122 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
4123 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
4124 CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4125 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
4126 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
4127 }
4128 }
4129 if (Add) {
4130 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
4131 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, N->getVTList(), Ops2);
4132 }
4133 }
4134 // Other cases are autogenerated.
4135 break;
4136 }
4137
4138 case ARMISD::CMOV: {
4139 SDValue Flags = N->getOperand(3);
4140
4141 if (Flags.getOpcode() == ARMISD::CMPZ) {
4142 bool SwitchEQNEToPLMI;
4143 SelectCMPZ(Flags.getNode(), SwitchEQNEToPLMI);
4144
4145 if (SwitchEQNEToPLMI) {
4146 SDValue ARMcc = N->getOperand(2);
4148
4149 switch (CC) {
4150 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4151 case ARMCC::NE:
4152 CC = ARMCC::MI;
4153 break;
4154 case ARMCC::EQ:
4155 CC = ARMCC::PL;
4156 break;
4157 }
4158 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
4159 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
4160 N->getOperand(3)};
4161 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
4162 }
4163 }
4164 // Other cases are autogenerated.
4165 break;
4166 }
4167 case ARMISD::VZIP: {
4168 EVT VT = N->getValueType(0);
4169 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4170 unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32};
4171 unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32};
4172 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4173 SDValue Pred = getAL(CurDAG, dl);
4174 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4175 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4176 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4177 return;
4178 }
4179 case ARMISD::VUZP: {
4180 EVT VT = N->getValueType(0);
4181 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4182 unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32};
4183 unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32};
4184 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4185 SDValue Pred = getAL(CurDAG, dl);
4186 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4187 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4188 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4189 return;
4190 }
4191 case ARMISD::VTRN: {
4192 EVT VT = N->getValueType(0);
4193 unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32};
4194 unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32};
4195 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4196 SDValue Pred = getAL(CurDAG, dl);
4197 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4198 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4199 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4200 return;
4201 }
4202 case ARMISD::BUILD_VECTOR: {
4203 EVT VecVT = N->getValueType(0);
4204 EVT EltVT = VecVT.getVectorElementType();
4205 unsigned NumElts = VecVT.getVectorNumElements();
4206 if (EltVT == MVT::f64) {
4207 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
4208 ReplaceNode(
4209 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4210 return;
4211 }
4212 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
4213 if (NumElts == 2) {
4214 ReplaceNode(
4215 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4216 return;
4217 }
4218 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
4219 ReplaceNode(N,
4220 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
4221 N->getOperand(2), N->getOperand(3)));
4222 return;
4223 }
4224
4225 case ARMISD::VLD1DUP: {
4226 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
4227 ARM::VLD1DUPd32 };
4228 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
4229 ARM::VLD1DUPq32 };
4230 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
4231 return;
4232 }
4233
4234 case ARMISD::VLD2DUP: {
4235 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4236 ARM::VLD2DUPd32 };
4237 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
4238 return;
4239 }
4240
4241 case ARMISD::VLD3DUP: {
4242 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
4243 ARM::VLD3DUPd16Pseudo,
4244 ARM::VLD3DUPd32Pseudo };
4245 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
4246 return;
4247 }
4248
4249 case ARMISD::VLD4DUP: {
4250 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
4251 ARM::VLD4DUPd16Pseudo,
4252 ARM::VLD4DUPd32Pseudo };
4253 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
4254 return;
4255 }
4256
4257 case ARMISD::VLD1DUP_UPD: {
4258 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
4259 ARM::VLD1DUPd16wb_fixed,
4260 ARM::VLD1DUPd32wb_fixed };
4261 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
4262 ARM::VLD1DUPq16wb_fixed,
4263 ARM::VLD1DUPq32wb_fixed };
4264 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
4265 return;
4266 }
4267
4268 case ARMISD::VLD2DUP_UPD: {
4269 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,
4270 ARM::VLD2DUPd16wb_fixed,
4271 ARM::VLD2DUPd32wb_fixed,
4272 ARM::VLD1q64wb_fixed };
4273 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4274 ARM::VLD2DUPq16EvenPseudo,
4275 ARM::VLD2DUPq32EvenPseudo };
4276 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,
4277 ARM::VLD2DUPq16OddPseudoWB_fixed,
4278 ARM::VLD2DUPq32OddPseudoWB_fixed };
4279 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
4280 return;
4281 }
4282
4283 case ARMISD::VLD3DUP_UPD: {
4284 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
4285 ARM::VLD3DUPd16Pseudo_UPD,
4286 ARM::VLD3DUPd32Pseudo_UPD,
4287 ARM::VLD1d64TPseudoWB_fixed };
4288 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4289 ARM::VLD3DUPq16EvenPseudo,
4290 ARM::VLD3DUPq32EvenPseudo };
4291 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,
4292 ARM::VLD3DUPq16OddPseudo_UPD,
4293 ARM::VLD3DUPq32OddPseudo_UPD };
4294 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4295 return;
4296 }
4297
4298 case ARMISD::VLD4DUP_UPD: {
4299 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
4300 ARM::VLD4DUPd16Pseudo_UPD,
4301 ARM::VLD4DUPd32Pseudo_UPD,
4302 ARM::VLD1d64QPseudoWB_fixed };
4303 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4304 ARM::VLD4DUPq16EvenPseudo,
4305 ARM::VLD4DUPq32EvenPseudo };
4306 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,
4307 ARM::VLD4DUPq16OddPseudo_UPD,
4308 ARM::VLD4DUPq32OddPseudo_UPD };
4309 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4310 return;
4311 }
4312
4313 case ARMISD::VLD1_UPD: {
4314 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
4315 ARM::VLD1d16wb_fixed,
4316 ARM::VLD1d32wb_fixed,
4317 ARM::VLD1d64wb_fixed };
4318 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
4319 ARM::VLD1q16wb_fixed,
4320 ARM::VLD1q32wb_fixed,
4321 ARM::VLD1q64wb_fixed };
4322 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
4323 return;
4324 }
4325
4326 case ARMISD::VLD2_UPD: {
4327 if (Subtarget->hasNEON()) {
4328 static const uint16_t DOpcodes[] = {
4329 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
4330 ARM::VLD1q64wb_fixed};
4331 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
4332 ARM::VLD2q16PseudoWB_fixed,
4333 ARM::VLD2q32PseudoWB_fixed};
4334 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4335 } else {
4336 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
4337 ARM::MVE_VLD21_8_wb};
4338 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4339 ARM::MVE_VLD21_16_wb};
4340 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4341 ARM::MVE_VLD21_32_wb};
4342 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4343 SelectMVE_VLD(N, 2, Opcodes, true);
4344 }
4345 return;
4346 }
4347
4348 case ARMISD::VLD3_UPD: {
4349 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
4350 ARM::VLD3d16Pseudo_UPD,
4351 ARM::VLD3d32Pseudo_UPD,
4352 ARM::VLD1d64TPseudoWB_fixed};
4353 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4354 ARM::VLD3q16Pseudo_UPD,
4355 ARM::VLD3q32Pseudo_UPD };
4356 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
4357 ARM::VLD3q16oddPseudo_UPD,
4358 ARM::VLD3q32oddPseudo_UPD };
4359 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4360 return;
4361 }
4362
4363 case ARMISD::VLD4_UPD: {
4364 if (Subtarget->hasNEON()) {
4365 static const uint16_t DOpcodes[] = {
4366 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
4367 ARM::VLD1d64QPseudoWB_fixed};
4368 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
4369 ARM::VLD4q16Pseudo_UPD,
4370 ARM::VLD4q32Pseudo_UPD};
4371 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
4372 ARM::VLD4q16oddPseudo_UPD,
4373 ARM::VLD4q32oddPseudo_UPD};
4374 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4375 } else {
4376 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4377 ARM::MVE_VLD42_8,
4378 ARM::MVE_VLD43_8_wb};
4379 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4380 ARM::MVE_VLD42_16,
4381 ARM::MVE_VLD43_16_wb};
4382 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4383 ARM::MVE_VLD42_32,
4384 ARM::MVE_VLD43_32_wb};
4385 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4386 SelectMVE_VLD(N, 4, Opcodes, true);
4387 }
4388 return;
4389 }
4390
4391 case ARMISD::VLD1x2_UPD: {
4392 if (Subtarget->hasNEON()) {
4393 static const uint16_t DOpcodes[] = {
4394 ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
4395 ARM::VLD1q64wb_fixed};
4396 static const uint16_t QOpcodes[] = {
4397 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4398 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4399 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4400 return;
4401 }
4402 break;
4403 }
4404
4405 case ARMISD::VLD1x3_UPD: {
4406 if (Subtarget->hasNEON()) {
4407 static const uint16_t DOpcodes[] = {
4408 ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
4409 ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
4410 static const uint16_t QOpcodes0[] = {
4411 ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
4412 ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
4413 static const uint16_t QOpcodes1[] = {
4414 ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
4415 ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
4416 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4417 return;
4418 }
4419 break;
4420 }
4421
4422 case ARMISD::VLD1x4_UPD: {
4423 if (Subtarget->hasNEON()) {
4424 static const uint16_t DOpcodes[] = {
4425 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4426 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4427 static const uint16_t QOpcodes0[] = {
4428 ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
4429 ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
4430 static const uint16_t QOpcodes1[] = {
4431 ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
4432 ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
4433 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4434 return;
4435 }
4436 break;
4437 }
4438
4439 case ARMISD::VLD2LN_UPD: {
4440 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
4441 ARM::VLD2LNd16Pseudo_UPD,
4442 ARM::VLD2LNd32Pseudo_UPD };
4443 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
4444 ARM::VLD2LNq32Pseudo_UPD };
4445 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
4446 return;
4447 }
4448
4449 case ARMISD::VLD3LN_UPD: {
4450 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
4451 ARM::VLD3LNd16Pseudo_UPD,
4452 ARM::VLD3LNd32Pseudo_UPD };
4453 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
4454 ARM::VLD3LNq32Pseudo_UPD };
4455 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
4456 return;
4457 }
4458
4459 case ARMISD::VLD4LN_UPD: {
4460 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
4461 ARM::VLD4LNd16Pseudo_UPD,
4462 ARM::VLD4LNd32Pseudo_UPD };
4463 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
4464 ARM::VLD4LNq32Pseudo_UPD };
4465 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
4466 return;
4467 }
4468
4469 case ARMISD::VST1_UPD: {
4470 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
4471 ARM::VST1d16wb_fixed,
4472 ARM::VST1d32wb_fixed,
4473 ARM::VST1d64wb_fixed };
4474 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
4475 ARM::VST1q16wb_fixed,
4476 ARM::VST1q32wb_fixed,
4477 ARM::VST1q64wb_fixed };
4478 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
4479 return;
4480 }
4481
4482 case ARMISD::VST2_UPD: {
4483 if (Subtarget->hasNEON()) {
4484 static const uint16_t DOpcodes[] = {
4485 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
4486 ARM::VST1q64wb_fixed};
4487 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
4488 ARM::VST2q16PseudoWB_fixed,
4489 ARM::VST2q32PseudoWB_fixed};
4490 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4491 return;
4492 }
4493 break;
4494 }
4495
4496 case ARMISD::VST3_UPD: {
4497 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
4498 ARM::VST3d16Pseudo_UPD,
4499 ARM::VST3d32Pseudo_UPD,
4500 ARM::VST1d64TPseudoWB_fixed};
4501 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4502 ARM::VST3q16Pseudo_UPD,
4503 ARM::VST3q32Pseudo_UPD };
4504 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
4505 ARM::VST3q16oddPseudo_UPD,
4506 ARM::VST3q32oddPseudo_UPD };
4507 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4508 return;
4509 }
4510
4511 case ARMISD::VST4_UPD: {
4512 if (Subtarget->hasNEON()) {
4513 static const uint16_t DOpcodes[] = {
4514 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
4515 ARM::VST1d64QPseudoWB_fixed};
4516 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
4517 ARM::VST4q16Pseudo_UPD,
4518 ARM::VST4q32Pseudo_UPD};
4519 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
4520 ARM::VST4q16oddPseudo_UPD,
4521 ARM::VST4q32oddPseudo_UPD};
4522 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4523 return;
4524 }
4525 break;
4526 }
4527
4528 case ARMISD::VST1x2_UPD: {
4529 if (Subtarget->hasNEON()) {
4530 static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,
4531 ARM::VST1q16wb_fixed,
4532 ARM::VST1q32wb_fixed,
4533 ARM::VST1q64wb_fixed};
4534 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4535 ARM::VST1d16QPseudoWB_fixed,
4536 ARM::VST1d32QPseudoWB_fixed,
4537 ARM::VST1d64QPseudoWB_fixed };
4538 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4539 return;
4540 }
4541 break;
4542 }
4543
4544 case ARMISD::VST1x3_UPD: {
4545 if (Subtarget->hasNEON()) {
4546 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,
4547 ARM::VST1d16TPseudoWB_fixed,
4548 ARM::VST1d32TPseudoWB_fixed,
4549 ARM::VST1d64TPseudoWB_fixed };
4550 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4551 ARM::VST1q16LowTPseudo_UPD,
4552 ARM::VST1q32LowTPseudo_UPD,
4553 ARM::VST1q64LowTPseudo_UPD };
4554 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,
4555 ARM::VST1q16HighTPseudo_UPD,
4556 ARM::VST1q32HighTPseudo_UPD,
4557 ARM::VST1q64HighTPseudo_UPD };
4558 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4559 return;
4560 }
4561 break;
4562 }
4563
4564 case ARMISD::VST1x4_UPD: {
4565 if (Subtarget->hasNEON()) {
4566 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4567 ARM::VST1d16QPseudoWB_fixed,
4568 ARM::VST1d32QPseudoWB_fixed,
4569 ARM::VST1d64QPseudoWB_fixed };
4570 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4571 ARM::VST1q16LowQPseudo_UPD,
4572 ARM::VST1q32LowQPseudo_UPD,
4573 ARM::VST1q64LowQPseudo_UPD };
4574 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,
4575 ARM::VST1q16HighQPseudo_UPD,
4576 ARM::VST1q32HighQPseudo_UPD,
4577 ARM::VST1q64HighQPseudo_UPD };
4578 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4579 return;
4580 }
4581 break;
4582 }
4583 case ARMISD::VST2LN_UPD: {
4584 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
4585 ARM::VST2LNd16Pseudo_UPD,
4586 ARM::VST2LNd32Pseudo_UPD };
4587 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
4588 ARM::VST2LNq32Pseudo_UPD };
4589 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
4590 return;
4591 }
4592
4593 case ARMISD::VST3LN_UPD: {
4594 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
4595 ARM::VST3LNd16Pseudo_UPD,
4596 ARM::VST3LNd32Pseudo_UPD };
4597 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
4598 ARM::VST3LNq32Pseudo_UPD };
4599 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
4600 return;
4601 }
4602
4603 case ARMISD::VST4LN_UPD: {
4604 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
4605 ARM::VST4LNd16Pseudo_UPD,
4606 ARM::VST4LNd32Pseudo_UPD };
4607 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
4608 ARM::VST4LNq32Pseudo_UPD };
4609 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
4610 return;
4611 }
4612
4615 unsigned IntNo = N->getConstantOperandVal(1);
4616 switch (IntNo) {
4617 default:
4618 break;
4619
4620 case Intrinsic::arm_mrrc:
4621 case Intrinsic::arm_mrrc2: {
4622 SDLoc dl(N);
4623 SDValue Chain = N->getOperand(0);
4624 unsigned Opc;
4625
4626 if (Subtarget->isThumb())
4627 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
4628 else
4629 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
4630
4632 Ops.push_back(getI32Imm(N->getConstantOperandVal(2), dl)); /* coproc */
4633 Ops.push_back(getI32Imm(N->getConstantOperandVal(3), dl)); /* opc */
4634 Ops.push_back(getI32Imm(N->getConstantOperandVal(4), dl)); /* CRm */
4635
4636 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4637 // instruction will always be '1111' but it is possible in assembly language to specify
4638 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4639 if (Opc != ARM::MRRC2) {
4640 Ops.push_back(getAL(CurDAG, dl));
4641 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4642 }
4643
4644 Ops.push_back(Chain);
4645
4646 // Writes to two registers.
4647 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
4648
4649 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
4650 return;
4651 }
4652 case Intrinsic::arm_ldaexd:
4653 case Intrinsic::arm_ldrexd: {
4654 SDLoc dl(N);
4655 SDValue Chain = N->getOperand(0);
4656 SDValue MemAddr = N->getOperand(2);
4657 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
4658
4659 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
4660 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
4661 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
4662
4663 // arm_ldrexd returns a i64 value in {i32, i32}
4664 std::vector<EVT> ResTys;
4665 if (isThumb) {
4666 ResTys.push_back(MVT::i32);
4667 ResTys.push_back(MVT::i32);
4668 } else
4669 ResTys.push_back(MVT::Untyped);
4670 ResTys.push_back(MVT::Other);
4671
4672 // Place arguments in the right order.
4673 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
4674 CurDAG->getRegister(0, MVT::i32), Chain};
4675 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4676 // Transfer memoperands.
4677 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4678 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4679
4680 // Remap uses.
4681 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
4682 if (!SDValue(N, 0).use_empty()) {
4684 if (isThumb)
4685 Result = SDValue(Ld, 0);
4686 else {
4687 SDValue SubRegIdx =
4688 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
4689 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4690 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4691 Result = SDValue(ResNode,0);
4692 }
4693 ReplaceUses(SDValue(N, 0), Result);
4694 }
4695 if (!SDValue(N, 1).use_empty()) {
4697 if (isThumb)
4698 Result = SDValue(Ld, 1);
4699 else {
4700 SDValue SubRegIdx =
4701 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
4702 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4703 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4704 Result = SDValue(ResNode,0);
4705 }
4706 ReplaceUses(SDValue(N, 1), Result);
4707 }
4708 ReplaceUses(SDValue(N, 2), OutChain);
4709 CurDAG->RemoveDeadNode(N);
4710 return;
4711 }
4712 case Intrinsic::arm_stlexd:
4713 case Intrinsic::arm_strexd: {
4714 SDLoc dl(N);
4715 SDValue Chain = N->getOperand(0);
4716 SDValue Val0 = N->getOperand(2);
4717 SDValue Val1 = N->getOperand(3);
4718 SDValue MemAddr = N->getOperand(4);
4719
4720 // Store exclusive double return a i32 value which is the return status
4721 // of the issued store.
4722 const EVT ResTys[] = {MVT::i32, MVT::Other};
4723
4724 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
4725 // Place arguments in the right order.
4727 if (isThumb) {
4728 Ops.push_back(Val0);
4729 Ops.push_back(Val1);
4730 } else
4731 // arm_strexd uses GPRPair.
4732 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
4733 Ops.push_back(MemAddr);
4734 Ops.push_back(getAL(CurDAG, dl));
4735 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4736 Ops.push_back(Chain);
4737
4738 bool IsRelease = IntNo == Intrinsic::arm_stlexd;
4739 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
4740 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
4741
4742 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4743 // Transfer memoperands.
4744 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4745 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4746
4747 ReplaceNode(N, St);
4748 return;
4749 }
4750
4751 case Intrinsic::arm_neon_vld1: {
4752 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
4753 ARM::VLD1d32, ARM::VLD1d64 };
4754 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4755 ARM::VLD1q32, ARM::VLD1q64};
4756 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
4757 return;
4758 }
4759
4760 case Intrinsic::arm_neon_vld1x2: {
4761 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4762 ARM::VLD1q32, ARM::VLD1q64 };
4763 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
4764 ARM::VLD1d16QPseudo,
4765 ARM::VLD1d32QPseudo,
4766 ARM::VLD1d64QPseudo };
4767 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4768 return;
4769 }
4770
4771 case Intrinsic::arm_neon_vld1x3: {
4772 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
4773 ARM::VLD1d16TPseudo,
4774 ARM::VLD1d32TPseudo,
4775 ARM::VLD1d64TPseudo };
4776 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
4777 ARM::VLD1q16LowTPseudo_UPD,
4778 ARM::VLD1q32LowTPseudo_UPD,
4779 ARM::VLD1q64LowTPseudo_UPD };
4780 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
4781 ARM::VLD1q16HighTPseudo,
4782 ARM::VLD1q32HighTPseudo,
4783 ARM::VLD1q64HighTPseudo };
4784 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4785 return;
4786 }
4787
4788 case Intrinsic::arm_neon_vld1x4: {
4789 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
4790 ARM::VLD1d16QPseudo,
4791 ARM::VLD1d32QPseudo,
4792 ARM::VLD1d64QPseudo };
4793 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
4794 ARM::VLD1q16LowQPseudo_UPD,
4795 ARM::VLD1q32LowQPseudo_UPD,
4796 ARM::VLD1q64LowQPseudo_UPD };
4797 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
4798 ARM::VLD1q16HighQPseudo,
4799 ARM::VLD1q32HighQPseudo,
4800 ARM::VLD1q64HighQPseudo };
4801 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4802 return;
4803 }
4804
4805 case Intrinsic::arm_neon_vld2: {
4806 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
4807 ARM::VLD2d32, ARM::VLD1q64 };
4808 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
4809 ARM::VLD2q32Pseudo };
4810 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4811 return;
4812 }
4813
4814 case Intrinsic::arm_neon_vld3: {
4815 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
4816 ARM::VLD3d16Pseudo,
4817 ARM::VLD3d32Pseudo,
4818 ARM::VLD1d64TPseudo };
4819 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4820 ARM::VLD3q16Pseudo_UPD,
4821 ARM::VLD3q32Pseudo_UPD };
4822 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
4823 ARM::VLD3q16oddPseudo,
4824 ARM::VLD3q32oddPseudo };
4825 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4826 return;
4827 }
4828
4829 case Intrinsic::arm_neon_vld4: {
4830 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
4831 ARM::VLD4d16Pseudo,
4832 ARM::VLD4d32Pseudo,
4833 ARM::VLD1d64QPseudo };
4834 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
4835 ARM::VLD4q16Pseudo_UPD,
4836 ARM::VLD4q32Pseudo_UPD };
4837 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
4838 ARM::VLD4q16oddPseudo,
4839 ARM::VLD4q32oddPseudo };
4840 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4841 return;
4842 }
4843
4844 case Intrinsic::arm_neon_vld2dup: {
4845 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4846 ARM::VLD2DUPd32, ARM::VLD1q64 };
4847 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4848 ARM::VLD2DUPq16EvenPseudo,
4849 ARM::VLD2DUPq32EvenPseudo };
4850 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
4851 ARM::VLD2DUPq16OddPseudo,
4852 ARM::VLD2DUPq32OddPseudo };
4853 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
4854 DOpcodes, QOpcodes0, QOpcodes1);
4855 return;
4856 }
4857
4858 case Intrinsic::arm_neon_vld3dup: {
4859 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
4860 ARM::VLD3DUPd16Pseudo,
4861 ARM::VLD3DUPd32Pseudo,
4862 ARM::VLD1d64TPseudo };
4863 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4864 ARM::VLD3DUPq16EvenPseudo,
4865 ARM::VLD3DUPq32EvenPseudo };
4866 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
4867 ARM::VLD3DUPq16OddPseudo,
4868 ARM::VLD3DUPq32OddPseudo };
4869 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
4870 DOpcodes, QOpcodes0, QOpcodes1);
4871 return;
4872 }
4873
4874 case Intrinsic::arm_neon_vld4dup: {
4875 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
4876 ARM::VLD4DUPd16Pseudo,
4877 ARM::VLD4DUPd32Pseudo,
4878 ARM::VLD1d64QPseudo };
4879 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4880 ARM::VLD4DUPq16EvenPseudo,
4881 ARM::VLD4DUPq32EvenPseudo };
4882 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
4883 ARM::VLD4DUPq16OddPseudo,
4884 ARM::VLD4DUPq32OddPseudo };
4885 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
4886 DOpcodes, QOpcodes0, QOpcodes1);
4887 return;
4888 }
4889
4890 case Intrinsic::arm_neon_vld2lane: {
4891 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
4892 ARM::VLD2LNd16Pseudo,
4893 ARM::VLD2LNd32Pseudo };
4894 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
4895 ARM::VLD2LNq32Pseudo };
4896 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
4897 return;
4898 }
4899
4900 case Intrinsic::arm_neon_vld3lane: {
4901 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
4902 ARM::VLD3LNd16Pseudo,
4903 ARM::VLD3LNd32Pseudo };
4904 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
4905 ARM::VLD3LNq32Pseudo };
4906 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
4907 return;
4908 }
4909
4910 case Intrinsic::arm_neon_vld4lane: {
4911 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
4912 ARM::VLD4LNd16Pseudo,
4913 ARM::VLD4LNd32Pseudo };
4914 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
4915 ARM::VLD4LNq32Pseudo };
4916 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
4917 return;
4918 }
4919
4920 case Intrinsic::arm_neon_vst1: {
4921 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
4922 ARM::VST1d32, ARM::VST1d64 };
4923 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
4924 ARM::VST1q32, ARM::VST1q64 };
4925 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
4926 return;
4927 }
4928
4929 case Intrinsic::arm_neon_vst1x2: {
4930 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
4931 ARM::VST1q32, ARM::VST1q64 };
4932 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
4933 ARM::VST1d16QPseudo,
4934 ARM::VST1d32QPseudo,
4935 ARM::VST1d64QPseudo };
4936 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
4937 return;
4938 }
4939
4940 case Intrinsic::arm_neon_vst1x3: {
4941 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
4942 ARM::VST1d16TPseudo,
4943 ARM::VST1d32TPseudo,
4944 ARM::VST1d64TPseudo };
4945 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4946 ARM::VST1q16LowTPseudo_UPD,
4947 ARM::VST1q32LowTPseudo_UPD,
4948 ARM::VST1q64LowTPseudo_UPD };
4949 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
4950 ARM::VST1q16HighTPseudo,
4951 ARM::VST1q32HighTPseudo,
4952 ARM::VST1q64HighTPseudo };
4953 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4954 return;
4955 }
4956
4957 case Intrinsic::arm_neon_vst1x4: {
4958 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
4959 ARM::VST1d16QPseudo,
4960 ARM::VST1d32QPseudo,
4961 ARM::VST1d64QPseudo };
4962 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4963 ARM::VST1q16LowQPseudo_UPD,
4964 ARM::VST1q32LowQPseudo_UPD,
4965 ARM::VST1q64LowQPseudo_UPD };
4966 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
4967 ARM::VST1q16HighQPseudo,
4968 ARM::VST1q32HighQPseudo,
4969 ARM::VST1q64HighQPseudo };
4970 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4971 return;
4972 }
4973
4974 case Intrinsic::arm_neon_vst2: {
4975 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
4976 ARM::VST2d32, ARM::VST1q64 };
4977 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
4978 ARM::VST2q32Pseudo };
4979 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
4980 return;
4981 }
4982
4983 case Intrinsic::arm_neon_vst3: {
4984 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
4985 ARM::VST3d16Pseudo,
4986 ARM::VST3d32Pseudo,
4987 ARM::VST1d64TPseudo };
4988 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4989 ARM::VST3q16Pseudo_UPD,
4990 ARM::VST3q32Pseudo_UPD };
4991 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
4992 ARM::VST3q16oddPseudo,
4993 ARM::VST3q32oddPseudo };
4994 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4995 return;
4996 }
4997
4998 case Intrinsic::arm_neon_vst4: {
4999 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
5000 ARM::VST4d16Pseudo,
5001 ARM::VST4d32Pseudo,
5002 ARM::VST1d64QPseudo };
5003 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
5004 ARM::VST4q16Pseudo_UPD,
5005 ARM::VST4q32Pseudo_UPD };
5006 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
5007 ARM::VST4q16oddPseudo,
5008 ARM::VST4q32oddPseudo };
5009 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
5010 return;
5011 }
5012
5013 case Intrinsic::arm_neon_vst2lane: {
5014 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
5015 ARM::VST2LNd16Pseudo,
5016 ARM::VST2LNd32Pseudo };
5017 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
5018 ARM::VST2LNq32Pseudo };
5019 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
5020 return;
5021 }
5022
5023 case Intrinsic::arm_neon_vst3lane: {
5024 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
5025 ARM::VST3LNd16Pseudo,
5026 ARM::VST3LNd32Pseudo };
5027 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
5028 ARM::VST3LNq32Pseudo };
5029 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
5030 return;
5031 }
5032
5033 case Intrinsic::arm_neon_vst4lane: {
5034 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
5035 ARM::VST4LNd16Pseudo,
5036 ARM::VST4LNd32Pseudo };
5037 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
5038 ARM::VST4LNq32Pseudo };
5039 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
5040 return;
5041 }
5042
5043 case Intrinsic::arm_mve_vldr_gather_base_wb:
5044 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
5045 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
5046 ARM::MVE_VLDRDU64_qi_pre};
5047 SelectMVE_WB(N, Opcodes,
5048 IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
5049 return;
5050 }
5051
5052 case Intrinsic::arm_mve_vld2q: {
5053 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
5054 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
5055 ARM::MVE_VLD21_16};
5056 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
5057 ARM::MVE_VLD21_32};
5058 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5059 SelectMVE_VLD(N, 2, Opcodes, false);
5060 return;
5061 }
5062
5063 case Intrinsic::arm_mve_vld4q: {
5064 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
5065 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
5066 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
5067 ARM::MVE_VLD42_16,
5068 ARM::MVE_VLD43_16};
5069 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
5070 ARM::MVE_VLD42_32,
5071 ARM::MVE_VLD43_32};
5072 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5073 SelectMVE_VLD(N, 4, Opcodes, false);
5074 return;
5075 }
5076 }
5077 break;
5078 }
5079
5081 unsigned IntNo = N->getConstantOperandVal(0);
5082 switch (IntNo) {
5083 default:
5084 break;
5085
5086 // Scalar f32 -> bf16
5087 case Intrinsic::arm_neon_vcvtbfp2bf: {
5088 SDLoc dl(N);
5089 const SDValue &Src = N->getOperand(1);
5090 llvm::EVT DestTy = N->getValueType(0);
5091 SDValue Pred = getAL(CurDAG, dl);
5092 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
5093 SDValue Ops[] = { Src, Src, Pred, Reg0 };
5094 CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops);
5095 return;
5096 }
5097
5098 // Vector v4f32 -> v4bf16
5099 case Intrinsic::arm_neon_vcvtfp2bf: {
5100 SDLoc dl(N);
5101 const SDValue &Src = N->getOperand(1);
5102 SDValue Pred = getAL(CurDAG, dl);
5103 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
5104 SDValue Ops[] = { Src, Pred, Reg0 };
5105 CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops);
5106 return;
5107 }
5108
5109 case Intrinsic::arm_mve_urshrl:
5110 SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false);
5111 return;
5112 case Intrinsic::arm_mve_uqshll:
5113 SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false);
5114 return;
5115 case Intrinsic::arm_mve_srshrl:
5116 SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false);
5117 return;
5118 case Intrinsic::arm_mve_sqshll:
5119 SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false);
5120 return;
5121 case Intrinsic::arm_mve_uqrshll:
5122 SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true);
5123 return;
5124 case Intrinsic::arm_mve_sqrshrl:
5125 SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
5126 return;
5127
5128 case Intrinsic::arm_mve_vadc:
5129 case Intrinsic::arm_mve_vadc_predicated:
5130 SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true,
5131 IntNo == Intrinsic::arm_mve_vadc_predicated);
5132 return;
5133 case Intrinsic::arm_mve_vsbc:
5134 case Intrinsic::arm_mve_vsbc_predicated:
5135 SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, false,
5136 IntNo == Intrinsic::arm_mve_vsbc_predicated);
5137 return;
5138 case Intrinsic::arm_mve_vshlc:
5139 case Intrinsic::arm_mve_vshlc_predicated:
5140 SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated);
5141 return;
5142
5143 case Intrinsic::arm_mve_vmlldava:
5144 case Intrinsic::arm_mve_vmlldava_predicated: {
5145 static const uint16_t OpcodesU[] = {
5146 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32,
5147 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32,
5148 };
5149 static const uint16_t OpcodesS[] = {
5150 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32,
5151 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32,
5152 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32,
5153 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
5154 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32,
5155 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32,
5156 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32,
5157 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
5158 };
5159 SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated,
5160 OpcodesS, OpcodesU);
5161 return;
5162 }
5163
5164 case Intrinsic::arm_mve_vrmlldavha:
5165 case Intrinsic::arm_mve_vrmlldavha_predicated: {
5166 static const uint16_t OpcodesU[] = {
5167 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32,
5168 };
5169 static const uint16_t OpcodesS[] = {
5170 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32,
5171 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
5172 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32,
5173 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
5174 };
5175 SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
5176 OpcodesS, OpcodesU);
5177 return;
5178 }
5179
5180 case Intrinsic::arm_mve_vidup:
5181 case Intrinsic::arm_mve_vidup_predicated: {
5182 static const uint16_t Opcodes[] = {
5183 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
5184 };
5185 SelectMVE_VxDUP(N, Opcodes, false,
5186 IntNo == Intrinsic::arm_mve_vidup_predicated);
5187 return;
5188 }
5189
5190 case Intrinsic::arm_mve_vddup:
5191 case Intrinsic::arm_mve_vddup_predicated: {
5192 static const uint16_t Opcodes[] = {
5193 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
5194 };
5195 SelectMVE_VxDUP(N, Opcodes, false,
5196 IntNo == Intrinsic::arm_mve_vddup_predicated);
5197 return;
5198 }
5199
5200 case Intrinsic::arm_mve_viwdup:
5201 case Intrinsic::arm_mve_viwdup_predicated: {
5202 static const uint16_t Opcodes[] = {
5203 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
5204 };
5205 SelectMVE_VxDUP(N, Opcodes, true,
5206 IntNo == Intrinsic::arm_mve_viwdup_predicated);
5207 return;
5208 }
5209
5210 case Intrinsic::arm_mve_vdwdup:
5211 case Intrinsic::arm_mve_vdwdup_predicated: {
5212 static const uint16_t Opcodes[] = {
5213 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
5214 };
5215 SelectMVE_VxDUP(N, Opcodes, true,
5216 IntNo == Intrinsic::arm_mve_vdwdup_predicated);
5217 return;
5218 }
5219
5220 case Intrinsic::arm_cde_cx1d:
5221 case Intrinsic::arm_cde_cx1da:
5222 case Intrinsic::arm_cde_cx2d:
5223 case Intrinsic::arm_cde_cx2da:
5224 case Intrinsic::arm_cde_cx3d:
5225 case Intrinsic::arm_cde_cx3da: {
5226 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da ||
5227 IntNo == Intrinsic::arm_cde_cx2da ||
5228 IntNo == Intrinsic::arm_cde_cx3da;
5229 size_t NumExtraOps;
5230 uint16_t Opcode;
5231 switch (IntNo) {
5232 case Intrinsic::arm_cde_cx1d:
5233 case Intrinsic::arm_cde_cx1da:
5234 NumExtraOps = 0;
5235 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
5236 break;
5237 case Intrinsic::arm_cde_cx2d:
5238 case Intrinsic::arm_cde_cx2da:
5239 NumExtraOps = 1;
5240 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
5241 break;
5242 case Intrinsic::arm_cde_cx3d:
5243 case Intrinsic::arm_cde_cx3da:
5244 NumExtraOps = 2;
5245 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
5246 break;
5247 default:
5248 llvm_unreachable("Unexpected opcode");
5249 }
5250 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
5251 return;
5252 }
5253 }
5254 break;
5255 }
5256
5257 case ISD::ATOMIC_CMP_SWAP:
5258 SelectCMP_SWAP(N);
5259 return;
5260 }
5261
5262 SelectCode(N);
5263}
5264
5265// Inspect a register string of the form
5266// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
5267// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
5268// and obtain the integer operands from them, adding these operands to the
5269// provided vector.
5271 SelectionDAG *CurDAG,
5272 const SDLoc &DL,
5273 std::vector<SDValue> &Ops) {
5275 RegString.split(Fields, ':');
5276
5277 if (Fields.size() > 1) {
5278 bool AllIntFields = true;
5279
5280 for (StringRef Field : Fields) {
5281 // Need to trim out leading 'cp' characters and get the integer field.
5282 unsigned IntField;
5283 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
5284 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
5285 }
5286
5287 assert(AllIntFields &&
5288 "Unexpected non-integer value in special register string.");
5289 (void)AllIntFields;
5290 }
5291}
5292
5293// Maps a Banked Register string to its mask value. The mask value returned is
5294// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
5295// mask operand, which expresses which register is to be used, e.g. r8, and in
5296// which mode it is to be used, e.g. usr. Returns -1 to signify that the string
5297// was invalid.
5298static inline int getBankedRegisterMask(StringRef RegString) {
5299 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
5300 if (!TheReg)
5301 return -1;
5302 return TheReg->Encoding;
5303}
5304
5305// The flags here are common to those allowed for apsr in the A class cores and
5306// those allowed for the special registers in the M class cores. Returns a
5307// value representing which flags were present, -1 if invalid.
5308static inline int getMClassFlagsMask(StringRef Flags) {
5309 return StringSwitch<int>(Flags)
5310 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
5311 // correct when flags are not permitted
5312 .Case("g", 0x1)
5313 .Case("nzcvq", 0x2)
5314 .Case("nzcvqg", 0x3)
5315 .Default(-1);
5316}
5317
5318// Maps MClass special registers string to its value for use in the
5319// t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
5320// Returns -1 to signify that the string was invalid.
5321static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
5322 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
5323 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
5324 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
5325 return -1;
5326 return (int)(TheReg->Encoding & 0xFFF); // SYSm value
5327}
5328
5330 // The mask operand contains the special register (R Bit) in bit 4, whether
5331 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
5332 // bits 3-0 contains the fields to be accessed in the special register, set by
5333 // the flags provided with the register.
5334 int Mask = 0;
5335 if (Reg == "apsr") {
5336 // The flags permitted for apsr are the same flags that are allowed in
5337 // M class registers. We get the flag value and then shift the flags into
5338 // the correct place to combine with the mask.
5339 Mask = getMClassFlagsMask(Flags);
5340 if (Mask == -1)
5341 return -1;
5342 return Mask << 2;
5343 }
5344
5345 if (Reg != "cpsr" && Reg != "spsr") {
5346 return -1;
5347 }
5348
5349 // This is the same as if the flags were "fc"
5350 if (Flags.empty() || Flags == "all")
5351 return Mask | 0x9;
5352
5353 // Inspect the supplied flags string and set the bits in the mask for
5354 // the relevant and valid flags allowed for cpsr and spsr.
5355 for (char Flag : Flags) {
5356 int FlagVal;
5357 switch (Flag) {
5358 case 'c':
5359 FlagVal = 0x1;
5360 break;
5361 case 'x':
5362 FlagVal = 0x2;
5363 break;
5364 case 's':
5365 FlagVal = 0x4;
5366 break;
5367 case 'f':
5368 FlagVal = 0x8;
5369 break;
5370 default:
5371 FlagVal = 0;
5372 }
5373
5374 // This avoids allowing strings where the same flag bit appears twice.
5375 if (!FlagVal || (Mask & FlagVal))
5376 return -1;
5377 Mask |= FlagVal;
5378 }
5379
5380 // If the register is spsr then we need to set the R bit.
5381 if (Reg == "spsr")
5382 Mask |= 0x10;
5383
5384 return Mask;
5385}
5386
5387// Lower the read_register intrinsic to ARM specific DAG nodes
5388// using the supplied metadata string to select the instruction node to use
5389// and the registers/masks to construct as operands for the node.
5390bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
5391 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
5392 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
5393 bool IsThumb2 = Subtarget->isThumb2();
5394 SDLoc DL(N);
5395
5396 std::vector<SDValue> Ops;
5397 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
5398
5399 if (!Ops.empty()) {
5400 // If the special register string was constructed of fields (as defined
5401 // in the ACLE) then need to lower to MRC node (32 bit) or
5402 // MRRC node(64 bit), we can make the distinction based on the number of
5403 // operands we have.
5404 unsigned Opcode;
5405 SmallVector<EVT, 3> ResTypes;
5406 if (Ops.size() == 5){
5407 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
5408 ResTypes.append({ MVT::i32, MVT::Other });
5409 } else {
5410 assert(Ops.size() == 3 &&
5411 "Invalid number of fields in special register string.");
5412 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
5413 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
5414 }
5415
5416 Ops.push_back(getAL(CurDAG, DL));
5417 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
5418 Ops.push_back(N->getOperand(0));
5419 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
5420 return true;
5421 }
5422
5423 std::string SpecialReg = RegString->getString().lower();
5424
5425 int BankedReg = getBankedRegisterMask(SpecialReg);
5426 if (BankedReg != -1) {
5427 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
5428 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5429 N->getOperand(0) };
5430 ReplaceNode(
5431 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
5432 DL, MVT::i32, MVT::Other, Ops));
5433 return true;
5434 }
5435
5436 // The VFP registers are read by creating SelectionDAG nodes with opcodes
5437 // corresponding to the register that is being read from. So we switch on the
5438 // string to find which opcode we need to use.
5439 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5440 .Case("fpscr", ARM::VMRS)
5441 .Case("fpexc", ARM::VMRS_FPEXC)
5442 .Case("fpsid", ARM::VMRS_FPSID)
5443 .Case("mvfr0", ARM::VMRS_MVFR0)
5444 .Case("mvfr1", ARM::VMRS_MVFR1)
5445 .Case("mvfr2", ARM::VMRS_MVFR2)
5446 .Case("fpinst", ARM::VMRS_FPINST)
5447 .Case("fpinst2", ARM::VMRS_FPINST2)
5448 .Default(0);
5449
5450 // If an opcode was found then we can lower the read to a VFP instruction.
5451 if (Opcode) {
5452 if (!Subtarget->hasVFP2Base())
5453 return false;
5454 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
5455 return false;
5456
5457 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5458 N->getOperand(0) };
5459 ReplaceNode(N,
5460 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
5461 return true;
5462 }
5463
5464 // If the target is M Class then need to validate that the register string
5465 // is an acceptable value, so check that a mask can be constructed from the
5466 // string.
5467 if (Subtarget->isMClass()) {
5468 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
5469 if (SYSmValue == -1)
5470 return false;
5471
5472 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5473 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5474 N->getOperand(0) };
5475 ReplaceNode(
5476 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
5477 return true;
5478 }
5479
5480 // Here we know the target is not M Class so we need to check if it is one
5481 // of the remaining possible values which are apsr, cpsr or spsr.
5482 if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
5483 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5484 N->getOperand(0) };
5485 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
5486 DL, MVT::i32, MVT::Other, Ops));
5487 return true;
5488 }
5489
5490 if (SpecialReg == "spsr") {
5491 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5492 N->getOperand(0) };
5493 ReplaceNode(
5494 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
5495 MVT::i32, MVT::Other, Ops));
5496 return true;
5497 }
5498
5499 return false;
5500}
5501
5502// Lower the write_register intrinsic to ARM specific DAG nodes
5503// using the supplied metadata string to select the instruction node to use
5504// and the registers/masks to use in the nodes
5505bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
5506 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
5507 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
5508 bool IsThumb2 = Subtarget->isThumb2();
5509 SDLoc DL(N);
5510
5511 std::vector<SDValue> Ops;
5512 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
5513
5514 if (!Ops.empty()) {
5515 // If the special register string was constructed of fields (as defined
5516 // in the ACLE) then need to lower to MCR node (32 bit) or
5517 // MCRR node(64 bit), we can make the distinction based on the number of
5518 // operands we have.
5519 unsigned Opcode;
5520 if (Ops.size() == 5) {
5521 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
5522 Ops.insert(Ops.begin()+2, N->getOperand(2));
5523 } else {
5524 assert(Ops.size() == 3 &&
5525 "Invalid number of fields in special register string.");
5526 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
5527 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
5528 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
5529 }
5530
5531 Ops.push_back(getAL(CurDAG, DL));
5532 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
5533 Ops.push_back(N->getOperand(0));
5534
5535 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5536 return true;
5537 }
5538
5539 std::string SpecialReg = RegString->getString().lower();
5540 int BankedReg = getBankedRegisterMask(SpecialReg);
5541 if (BankedReg != -1) {
5542 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
5543 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5544 N->getOperand(0) };
5545 ReplaceNode(
5546 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
5547 DL, MVT::Other, Ops));
5548 return true;
5549 }
5550
5551 // The VFP registers are written to by creating SelectionDAG nodes with
5552 // opcodes corresponding to the register that is being written. So we switch
5553 // on the string to find which opcode we need to use.
5554 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5555 .Case("fpscr", ARM::VMSR)
5556 .Case("fpexc", ARM::VMSR_FPEXC)
5557 .Case("fpsid", ARM::VMSR_FPSID)
5558 .Case("fpinst", ARM::VMSR_FPINST)
5559 .Case("fpinst2", ARM::VMSR_FPINST2)
5560 .Default(0);
5561
5562 if (Opcode) {
5563 if (!Subtarget->hasVFP2Base())
5564 return false;
5565 Ops = { N->getOperand(2), getAL(CurDAG, DL),
5566 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
5567 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5568 return true;
5569 }
5570
5571 std::pair<StringRef, StringRef> Fields;
5572 Fields = StringRef(SpecialReg).rsplit('_');
5573 std::string Reg = Fields.first.str();
5574 StringRef Flags = Fields.second;
5575
5576 // If the target was M Class then need to validate the special register value
5577 // and retrieve the mask for use in the instruction node.
5578 if (Subtarget->isMClass()) {
5579 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
5580 if (SYSmValue == -1)
5581 return false;
5582
5583 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5584 N->getOperand(2), getAL(CurDAG, DL),
5585 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
5586 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
5587 return true;
5588 }
5589
5590 // We then check to see if a valid mask can be constructed for one of the
5591 // register string values permitted for the A and R class cores. These values
5592 // are apsr, spsr and cpsr; these are also valid on older cores.
5593 int Mask = getARClassRegisterMask(Reg, Flags);
5594 if (Mask != -1) {
5595 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
5596 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5597 N->getOperand(0) };
5598 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
5599 DL, MVT::Other, Ops));
5600 return true;
5601 }
5602
5603 return false;
5604}
5605
5606bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
5607 std::vector<SDValue> AsmNodeOperands;
5608 InlineAsm::Flag Flag;
5609 bool Changed = false;
5610 unsigned NumOps = N->getNumOperands();
5611
5612 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
5613 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
5614 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
5615 // respectively. Since there is no constraint to explicitly specify a
5616 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
5617 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
5618 // them into a GPRPair.
5619
5620 SDLoc dl(N);
5621 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue();
5622
5623 SmallVector<bool, 8> OpChanged;
5624 // Glue node will be appended late.
5625 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
5626 SDValue op = N->getOperand(i);
5627 AsmNodeOperands.push_back(op);
5628
5630 continue;
5631
5632 if (const auto *C = dyn_cast<ConstantSDNode>(N->getOperand(i)))
5633 Flag = InlineAsm::Flag(C->getZExtValue());
5634 else
5635 continue;
5636
5637 // Immediate operands to inline asm in the SelectionDAG are modeled with
5638 // two operands. The first is a constant of value InlineAsm::Kind::Imm, and
5639 // the second is a constant with the value of the immediate. If we get here
5640 // and we have a Kind::Imm, skip the next operand, and continue.
5641 if (Flag.isImmKind()) {
5642 SDValue op = N->getOperand(++i);
5643 AsmNodeOperands.push_back(op);
5644 continue;
5645 }
5646
5647 const unsigned NumRegs = Flag.getNumOperandRegisters();
5648 if (NumRegs)
5649 OpChanged.push_back(false);
5650
5651 unsigned DefIdx = 0;
5652 bool IsTiedToChangedOp = false;
5653 // If it's a use that is tied with a previous def, it has no
5654 // reg class constraint.
5655 if (Changed && Flag.isUseOperandTiedToDef(DefIdx))
5656 IsTiedToChangedOp = OpChanged[DefIdx];
5657
5658 // Memory operands to inline asm in the SelectionDAG are modeled with two
5659 // operands: a constant of value InlineAsm::Kind::Mem followed by the input
5660 // operand. If we get here and we have a Kind::Mem, skip the next operand
5661 // (so it doesn't get misinterpreted), and continue. We do this here because
5662 // it's important to update the OpChanged array correctly before moving on.
5663 if (Flag.isMemKind()) {
5664 SDValue op = N->getOperand(++i);
5665 AsmNodeOperands.push_back(op);
5666 continue;
5667 }
5668
5669 if (!Flag.isRegUseKind() && !Flag.isRegDefKind() &&
5670 !Flag.isRegDefEarlyClobberKind())
5671 continue;
5672
5673 unsigned RC;
5674 const bool HasRC = Flag.hasRegClassConstraint(RC);
5675 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
5676 || NumRegs != 2)
5677 continue;
5678
5679 assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
5680 SDValue V0 = N->getOperand(i+1);
5681 SDValue V1 = N->getOperand(i+2);
5682 Register Reg0 = cast<RegisterSDNode>(V0)->getReg();
5683 Register Reg1 = cast<RegisterSDNode>(V1)->getReg();
5684 SDValue PairedReg;
5685 MachineRegisterInfo &MRI = MF->getRegInfo();
5686
5687 if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) {
5688 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
5689 // the original GPRs.
5690
5691 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5692 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5693 SDValue Chain = SDValue(N,0);
5694
5695 SDNode *GU = N->getGluedUser();
5696 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
5697 Chain.getValue(1));
5698
5699 // Extract values from a GPRPair reg and copy to the original GPR reg.
5700 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
5701 RegCopy);
5702 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
5703 RegCopy);
5704 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
5705 RegCopy.getValue(1));
5706 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
5707
5708 // Update the original glue user.
5709 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
5710 Ops.push_back(T1.getValue(1));
5711 CurDAG->UpdateNodeOperands(GU, Ops);
5712 } else {
5713 // For Kind == InlineAsm::Kind::RegUse, we first copy two GPRs into a
5714 // GPRPair and then pass the GPRPair to the inline asm.
5715 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
5716
5717 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
5718 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
5719 Chain.getValue(1));
5720 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
5721 T0.getValue(1));
5722 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
5723
5724 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
5725 // i32 VRs of inline asm with it.
5726 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5727 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5728 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
5729
5730 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
5731 Glue = Chain.getValue(1);
5732 }
5733
5734 Changed = true;
5735
5736 if(PairedReg.getNode()) {
5737 OpChanged[OpChanged.size() -1 ] = true;
5738 Flag = InlineAsm::Flag(Flag.getKind(), 1 /* RegNum*/);
5739 if (IsTiedToChangedOp)
5740 Flag.setMatchingOp(DefIdx);
5741 else
5742 Flag.setRegClass(ARM::GPRPairRegClassID);
5743 // Replace the current flag.
5744 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
5745 Flag, dl, MVT::i32);
5746 // Add the new register node and skip the original two GPRs.
5747 AsmNodeOperands.push_back(PairedReg);
5748 // Skip the next two GPRs.
5749 i += 2;
5750 }
5751 }
5752
5753 if (Glue.getNode())
5754 AsmNodeOperands.push_back(Glue);
5755 if (!Changed)
5756 return false;
5757
5758 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
5759 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
5760 New->setNodeId(-1);
5761 ReplaceNode(N, New.getNode());
5762 return true;
5763}
5764
5765bool ARMDAGToDAGISel::SelectInlineAsmMemoryOperand(
5766 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
5767 std::vector<SDValue> &OutOps) {
5768 switch(ConstraintID) {
5769 default:
5770 llvm_unreachable("Unexpected asm memory constraint");
5771 case InlineAsm::ConstraintCode::m:
5772 case InlineAsm::ConstraintCode::o:
5773 case InlineAsm::ConstraintCode::Q:
5774 case InlineAsm::ConstraintCode::Um:
5775 case InlineAsm::ConstraintCode::Un:
5776 case InlineAsm::ConstraintCode::Uq:
5777 case InlineAsm::ConstraintCode::Us:
5778 case InlineAsm::ConstraintCode::Ut:
5779 case InlineAsm::ConstraintCode::Uv:
5780 case InlineAsm::ConstraintCode::Uy:
5781 // Require the address to be in a register. That is safe for all ARM
5782 // variants and it is hard to do anything much smarter without knowing
5783 // how the operand is used.
5784 OutOps.push_back(Op);
5785 return false;
5786 }
5787 return true;
5788}
5789
5790/// createARMISelDag - This pass converts a legalized DAG into a
5791/// ARM-specific DAG, ready for instruction scheduling.
5792///
5794 CodeGenOptLevel OptLevel) {
5795 return new ARMDAGToDAGISelLegacy(TM, OptLevel);
5796}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
return SDValue()
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isThumb(const MCSubtargetInfo &STI)
static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3], unsigned Opc128[3])
static int getBankedRegisterMask(StringRef RegString)
static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs)
Returns true if the given increment is a Constant known to be equal to the access size performed by a...
static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc)
static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned &Imm)
static bool isVSTfixed(unsigned Opc)
static bool isVLDfixed(unsigned Opc)
static bool isInt32Immediate(SDNode *N, unsigned &Imm)
isInt32Immediate - This method tests to see if the node is a 32-bit constant operand.
static std::optional< std::pair< unsigned, unsigned > > getContiguousRangeOfSetBits(const APInt &A)
static void getIntOperandsFromRegisterString(StringRef RegString, SelectionDAG *CurDAG, const SDLoc &DL, std::vector< SDValue > &Ops)
static int getARClassRegisterMask(StringRef Reg, StringRef Flags)
static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget)
static cl::opt< bool > DisableShifterOp("disable-shifter-op", cl::Hidden, cl::desc("Disable isel of shifter-op"), cl::init(false))
static SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl)
getAL - Returns a ARMCC::AL immediate node.
static bool shouldUseZeroOffsetLdSt(SDValue N)
static int getMClassFlagsMask(StringRef Flags)
static bool SDValueToConstBool(SDValue SDVal)
static bool isScaledConstantInRange(SDValue Node, int Scale, int RangeMin, int RangeMax, int &ScaledConstant)
Check whether a particular node is a constant value representable as (N * Scale) where (N in [RangeMi...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool isSigned(unsigned int Opcode)
#define DEBUG_TYPE
#define op(i)
const HexagonInstrInfo * TII
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T1
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
OptimizedStructLayoutField Field
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
#define PASS_NAME
Value * RHS
Value * LHS
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1314
Class for arbitrary precision integers.
Definition APInt.h:78
bool isSwift() const
bool isThumb1Only() const
bool hasFPARMv8Base() const
bool isThumb2() const
bool isLikeA9() const
bool hasVFP2Base() const
bool isLittle() const
bool isMClass() const
uint64_t getZExtValue() const
Container class for subtarget features.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
bool mayStore() const
Return true if this instruction could possibly modify memory.
unsigned getOpcode() const
Return the opcode number for this descriptor.
SimpleValueType SimpleTy
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineOperand & getOperand(unsigned i) const
@ MOLoad
The memory access reads data.
Align getAlign() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
int getNodeId() const
Return the unique node id.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
op_iterator op_end() const
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:702
LLVM_ABI std::string lower() const
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
LLVM Value Representation.
Definition Value.h:75
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static ShiftOpc getShiftOpcForNode(unsigned Opcode)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits)
decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the element value and the element ...
float getFPImmFloat(unsigned Imm)
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset)
getAM5Opc - This function encodes the addrmode5 opc field.
unsigned getAM5FP16Opc(AddrOpc Opc, unsigned char Offset)
getAM5FP16Opc - This function encodes the addrmode5fp16 opc field.
unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, unsigned IdxMode=0)
getAM3Opc - This function encodes the addrmode3 opc field.
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ TargetConstantPool
Definition ISDOpcodes.h:184
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ TargetExternalSymbol
Definition ISDOpcodes.h:185
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:180
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:933
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ TargetGlobalTLSAddress
Definition ISDOpcodes.h:181
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
@ ARM
Windows AXP64.
Definition MCAsmInfo.h:47
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
FunctionPass * createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOptLevel OptLevel)
createARMISelDag - This pass converts a legalized DAG into a ARM-specific DAG, ready for instruction ...
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:267
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
@ NearestTiesToEven
roundTiesToEven.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
#define NC
Definition regutils.h:42
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition ValueTypes.h:202
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.